451 lines
8.5 KiB
PHP
451 lines
8.5 KiB
PHP
|
|
; note:
|
|
; to not interfere with code resolving, all external input must stay unchanged
|
|
; in subsequent passes of the assembly; for this reason all data read from files is
|
|
; permanently cached (source texts are cached in tokenized form)
|
|
|
|
struct FileData
|
|
length dq ?
|
|
cache dd ? ; pointer to FileCache
|
|
ends
|
|
|
|
struct FileCache
|
|
offset dq ?
|
|
length dd ?
|
|
next dd ? ; pointer to another FileCache
|
|
ends
|
|
|
|
read_source:
|
|
; in:
|
|
; esi - source path
|
|
; out:
|
|
; eax - tokenized source, null when file not found
|
|
; esi - source path in persistent storage
|
|
mov ebx,[file_source_cache]
|
|
xor ecx,ecx
|
|
call get_from_map
|
|
jc read_source_file
|
|
cmp eax,-1
|
|
je get_erroneous_source
|
|
retn
|
|
read_source_file:
|
|
mov edx,esi
|
|
call open
|
|
jc source_file_not_found
|
|
xor eax,eax
|
|
mov edx,eax
|
|
mov cl,2
|
|
call lseek
|
|
jc error_reading_file
|
|
test edx,edx
|
|
jnz out_of_memory
|
|
push eax
|
|
xor eax,eax
|
|
mov edx,eax
|
|
mov cl,al
|
|
call lseek
|
|
jc error_reading_file
|
|
pop ecx
|
|
inc ecx
|
|
mov [source_text_length],ecx
|
|
call malloc
|
|
mov [source_text],eax
|
|
mov edx,eax
|
|
mov ecx,[source_text_length]
|
|
dec ecx
|
|
mov byte [edx+ecx],0
|
|
call read
|
|
jc error_reading_file
|
|
call close
|
|
push esi
|
|
call tokenize_source
|
|
mov eax,[source_text]
|
|
call mfree
|
|
pop esi
|
|
mov eax,[tokenization_buffer]
|
|
xor ecx,ecx
|
|
mov ebx,[file_source_cache]
|
|
call put_into_map
|
|
mov eax,[tokenization_buffer]
|
|
retn
|
|
source_file_not_found:
|
|
xor eax,eax
|
|
xor ecx,ecx
|
|
mov ebx,[file_source_cache]
|
|
call put_into_map
|
|
xor eax,eax
|
|
retn
|
|
error_reading_file:
|
|
or eax,-1
|
|
xor ecx,ecx
|
|
mov ebx,[file_source_cache]
|
|
call put_into_map
|
|
get_erroneous_source:
|
|
mov ebx,esi
|
|
mov edx,_error_reading_file
|
|
call register_error
|
|
mov eax,zero_value
|
|
retn
|
|
|
|
use_source:
|
|
; in:
|
|
; esi - ASCIIZ source string
|
|
; out:
|
|
; eax - tokenized source
|
|
; esi - source text in persistent storage
|
|
mov edi,esi
|
|
xor al,al
|
|
or ecx,-1
|
|
repne scasb
|
|
not ecx
|
|
mov [source_text_length],ecx
|
|
mov ebx,[memory_source_cache]
|
|
xor eax,eax
|
|
call get_from_map
|
|
jc adapt_memory_source
|
|
retn
|
|
adapt_memory_source:
|
|
mov [source_text],esi
|
|
call tokenize_source
|
|
mov eax,[tokenization_buffer]
|
|
mov esi,[source_text]
|
|
mov ecx,[source_text_length]
|
|
mov ebx,[memory_source_cache]
|
|
call put_into_map
|
|
mov eax,[tokenization_buffer]
|
|
retn
|
|
|
|
tokenize_source:
|
|
; in:
|
|
; [source_text] - ASCIIZ text
|
|
; [source_text_length] = length of text (including terminating character)
|
|
; out:
|
|
; [tokenization_buffer] - tokenized source
|
|
; [tokenization_buffer_length] = length of tokenized source
|
|
mov ecx,[source_text_length]
|
|
shl ecx,1
|
|
add ecx,18
|
|
call malloc_growable
|
|
mov [tokenization_buffer],eax
|
|
mov [tokenization_buffer_length],ecx
|
|
add eax,ecx
|
|
sub eax,[source_text]
|
|
sub eax,[source_text_length]
|
|
mov [buffer_end_offset],eax
|
|
mov esi,[source_text]
|
|
mov edi,[tokenization_buffer]
|
|
mov [last_token],0Ah
|
|
tokenize:
|
|
mov eax,[buffer_end_offset]
|
|
add eax,esi
|
|
sub eax,edi
|
|
cmp eax,18
|
|
jae tokenization_buffer_reserve_ok
|
|
mov ecx,esi
|
|
sub ecx,[source_text]
|
|
mov eax,[source_text_length]
|
|
mul [tokenization_buffer_length]
|
|
div ecx
|
|
mov ecx,eax
|
|
add ecx,18
|
|
mov eax,[tokenization_buffer]
|
|
call realloc
|
|
sub edi,[tokenization_buffer]
|
|
add edi,eax
|
|
mov [tokenization_buffer],eax
|
|
mov [tokenization_buffer_length],ecx
|
|
add eax,ecx
|
|
sub eax,[source_text]
|
|
sub eax,[source_text_length]
|
|
mov [buffer_end_offset],eax
|
|
tokenization_buffer_reserve_ok:
|
|
movzx eax,byte [esi]
|
|
inc esi
|
|
mov ah,[characters+eax]
|
|
cmp ah,20h
|
|
je control_character
|
|
test ah,ah
|
|
jnz make_name_token
|
|
character_token:
|
|
stosb
|
|
mov [last_token],al
|
|
jmp tokenize
|
|
make_string_token:
|
|
mov dl,al
|
|
mov byte [edi],22h
|
|
mov [last_token],22h
|
|
add edi,5
|
|
xor ecx,ecx
|
|
copy_string:
|
|
mov al,[esi]
|
|
cmp al,0Dh
|
|
je broken_string
|
|
cmp al,0Ah
|
|
je broken_string
|
|
cmp al,1Ah
|
|
je broken_string
|
|
test al,al
|
|
jz broken_string
|
|
inc esi
|
|
cmp al,dl
|
|
jne copy_string_character
|
|
cmp byte [esi],al
|
|
jne finish_string_token
|
|
inc esi
|
|
copy_string_character:
|
|
mov [edi+ecx],al
|
|
inc ecx
|
|
jmp copy_string
|
|
broken_string:
|
|
mov byte [edi-5],27h
|
|
finish_string_token:
|
|
mov al,[edi-5]
|
|
mov [edi-4],ecx
|
|
add edi,ecx
|
|
jmp tokenize
|
|
make_name_token:
|
|
cmp al,22h
|
|
je make_string_token
|
|
cmp al,27h
|
|
je make_string_token
|
|
mov byte [edi],1Ah
|
|
mov [last_token],1Ah
|
|
add edi,5
|
|
xor ebx,ebx
|
|
mov ecx,FNV_OFFSET
|
|
mov edx,ecx
|
|
hash_name:
|
|
mov [edi+ebx],al
|
|
inc ebx
|
|
xor cl,al
|
|
xor dl,ah
|
|
imul ecx,FNV_PRIME
|
|
imul edx,FNV_PRIME
|
|
movzx eax,byte [esi]
|
|
inc esi
|
|
mov ah,[characters+eax]
|
|
cmp ah,20h
|
|
je finish_name_token
|
|
test ah,ah
|
|
jnz hash_name
|
|
finish_name_token:
|
|
mov [edi-4],ebx
|
|
add edi,ebx
|
|
mov [edi],ecx
|
|
mov [edi+4],edx
|
|
xor ecx,ecx
|
|
mov [edi+8],ecx
|
|
add edi,12
|
|
cmp ah,20h
|
|
jne character_token
|
|
control_character:
|
|
cmp al,20h
|
|
je whitespace
|
|
cmp al,9
|
|
je whitespace
|
|
cmp [last_token],20h
|
|
je mark_end_of_line
|
|
inc edi
|
|
mark_end_of_line:
|
|
mov byte [edi-1],0Ah
|
|
mov [last_token],0Ah
|
|
cmp al,0Dh
|
|
je cr
|
|
cmp al,0Ah
|
|
je lf
|
|
xor al,al
|
|
stosb
|
|
mov ecx,edi
|
|
mov eax,[tokenization_buffer]
|
|
sub ecx,eax
|
|
call realloc
|
|
mov [tokenization_buffer],eax
|
|
mov [tokenization_buffer_length],ecx
|
|
retn
|
|
cr:
|
|
cmp byte [esi],0Ah
|
|
jne tokenize
|
|
inc esi
|
|
jmp tokenize
|
|
lf:
|
|
cmp byte [esi],0Dh
|
|
jne tokenize
|
|
inc esi
|
|
jmp tokenize
|
|
whitespace:
|
|
cmp [last_token],0Ah
|
|
je tokenize
|
|
cmp [last_token],20h
|
|
je tokenize
|
|
mov al,20h
|
|
stosb
|
|
mov [last_token],al
|
|
jmp tokenize
|
|
|
|
get_file_data:
|
|
; in:
|
|
; esi - file path
|
|
; out:
|
|
; ebx - FileData, null when file not found
|
|
; esi - file path in persistent storage
|
|
; preserves: edi
|
|
mov ebx,[file_data_cache]
|
|
xor ecx,ecx
|
|
call get_from_map
|
|
jc initialize_file_data
|
|
mov ebx,eax
|
|
retn
|
|
initialize_file_data:
|
|
mov edx,esi
|
|
call open
|
|
jc remember_file_not_found
|
|
push edi
|
|
mov ecx,sizeof.FileData
|
|
call malloc_fixed
|
|
mov edi,eax
|
|
xor eax,eax
|
|
mov edx,eax
|
|
mov cl,2
|
|
call lseek
|
|
jc file_not_seekable
|
|
mov dword [edi+FileData.length],eax
|
|
mov dword [edi+FileData.length+4],edx
|
|
call close
|
|
mov eax,edi
|
|
xor ecx,ecx
|
|
mov [eax+FileData.cache],ecx
|
|
mov ebx,[file_data_cache]
|
|
call put_into_map
|
|
mov ebx,edi
|
|
pop edi
|
|
retn
|
|
file_not_seekable:
|
|
pop edi
|
|
remember_file_not_found:
|
|
xor eax,eax
|
|
mov ecx,eax
|
|
mov ebx,[file_data_cache]
|
|
call put_into_map
|
|
xor ebx,ebx
|
|
retn
|
|
|
|
read_file_data:
|
|
; in:
|
|
; esi - file path
|
|
; ebx - FileData
|
|
; edi - buffer for data
|
|
; [file_offset] = offset of data
|
|
; [data_length] = length of data
|
|
; out:
|
|
; cf set when read failed
|
|
; preserves: esi
|
|
mov [file_data],ebx
|
|
lea eax,[ebx+FileData.cache]
|
|
mov [file_cache_pointer],eax
|
|
mov ebx,[eax]
|
|
read_from_file_cache:
|
|
mov ecx,[data_length]
|
|
test ecx,ecx
|
|
jz file_data_read
|
|
test ebx,ebx
|
|
jz new_trailing_file_cache_entry
|
|
mov eax,dword [file_offset]
|
|
mov edx,dword [file_offset+4]
|
|
sub eax,dword [ebx+FileCache.offset]
|
|
sbb edx,dword [ebx+FileCache.offset+4]
|
|
jc new_file_cache_entry
|
|
jnz next_entry
|
|
mov edx,[ebx+FileCache.length]
|
|
sub edx,eax
|
|
jbe next_entry
|
|
cmp ecx,edx
|
|
jbe length_to_read_ok
|
|
mov ecx,edx
|
|
length_to_read_ok:
|
|
sub [data_length],ecx
|
|
add dword [file_offset],ecx
|
|
adc dword [file_offset+4],0
|
|
mov edx,esi
|
|
lea esi,[ebx+sizeof.FileCache+eax]
|
|
rep movsb
|
|
mov esi,edx
|
|
next_entry:
|
|
lea eax,[ebx+FileCache.next]
|
|
mov [file_cache_pointer],eax
|
|
mov ebx,[eax]
|
|
jmp read_from_file_cache
|
|
file_data_read:
|
|
clc
|
|
retn
|
|
new_trailing_file_cache_entry:
|
|
mov ebx,[file_data]
|
|
mov ecx,dword [ebx+FileData.length]
|
|
mov edx,dword [ebx+FileData.length+4]
|
|
cmp ecx,dword [file_offset]
|
|
jne measure_cache_gap
|
|
cmp edx,dword [file_offset+4]
|
|
jne measure_cache_gap
|
|
stc
|
|
retn
|
|
new_file_cache_entry:
|
|
mov ecx,dword [ebx+FileCache.offset]
|
|
mov edx,dword [ebx+FileCache.offset+4]
|
|
measure_cache_gap:
|
|
mov eax,dword [file_offset]
|
|
and eax,not 0FFFh
|
|
sub ecx,eax
|
|
sbb edx,dword [file_offset+4]
|
|
jnz compute_aligned_length
|
|
cmp ecx,[data_length]
|
|
jbe read_into_cache
|
|
compute_aligned_length:
|
|
mov eax,dword [file_offset]
|
|
and eax,0FFFh
|
|
add eax,[data_length]
|
|
dec eax
|
|
shr eax,12
|
|
inc eax
|
|
shl eax,12
|
|
test edx,edx
|
|
jnz use_aligned_length
|
|
cmp eax,ecx
|
|
jae read_into_cache
|
|
use_aligned_length:
|
|
mov ecx,eax
|
|
read_into_cache:
|
|
push ecx
|
|
add ecx,sizeof.FileCache
|
|
call malloc_fixed
|
|
mov ebx,eax
|
|
mov eax,[file_cache_pointer]
|
|
mov edx,ebx
|
|
xchg edx,[eax]
|
|
mov [ebx+FileCache.next],edx
|
|
pop [ebx+FileCache.length]
|
|
mov eax,dword [file_offset]
|
|
and eax,not 0FFFh
|
|
mov edx,dword [file_offset+4]
|
|
mov dword [ebx+FileCache.offset],eax
|
|
mov dword [ebx+FileCache.offset+4],edx
|
|
push ebx edi
|
|
mov edi,ebx
|
|
mov edx,esi
|
|
call open
|
|
jc file_access_error
|
|
mov eax,dword [edi+FileCache.offset]
|
|
mov edx,dword [edi+FileCache.offset+4]
|
|
xor cl,cl
|
|
call lseek
|
|
jc file_access_error
|
|
lea edx,[edi+sizeof.FileCache]
|
|
mov ecx,[edi+FileCache.length]
|
|
call read
|
|
jc file_access_error
|
|
call close
|
|
pop edi ebx
|
|
jmp read_from_file_cache
|
|
file_access_error:
|
|
pop edi ebx
|
|
stc
|
|
retn
|
|
|