From db2336806ef84dac3768fe6651bf739fc2a7ac22 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 25 May 2025 23:22:17 -0400 Subject: [PATCH] learning --- code/forth.asm | 541 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 356 insertions(+), 185 deletions(-) diff --git a/code/forth.asm b/code/forth.asm index 239fe30..1942dca 100644 --- a/code/forth.asm +++ b/code/forth.asm @@ -201,37 +201,37 @@ COMMENT @/* ; Macros that deal with the return stack PUSH_RSP MACRO reg - lea rbp, [rbp - 8] ; push reg on to return stack - mov [rbp], reg + lea rbp, [rbp - 8] ; push reg on to return stack + mov [rbp], reg ENDM POP_RSP MACRO reg - mov reg, [rbp] ; pop top of return stack to reg - lea rbp, [rbp + 8] + mov reg, [rbp] ; pop top of return stack to reg + lea rbp, [rbp + 8] ENDM ; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead .code ALIGN 8 DO_COLON: - PUSH_RSP rsi ; push rsi on to the return stack - add rax, 8 ; rax points to codeword, so make - mov rsi, rax ; rsi point to first data word - NEXT + PUSH_RSP rsi ; push rsi on to the return stack + add rax, 8 ; rax points to codeword, so make + mov rsi, rax ; rsi point to first data word + NEXT .code PUBLIC main main: - cld - mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0. - mov rbp, OFFSET return_stack_top ; Initialise the return stack. - call set_up_data_segment - mov rsi, OFFSET cold_start ; Initialise interpreter. - NEXT ; Run interpreter! + cld + mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0. + mov rbp, OFFSET return_stack_top ; Initialise the return stack. + call set_up_data_segment + mov rsi, OFFSET cold_start ; Initialise interpreter. + NEXT ; Run interpreter! .const cold_start: ; High-level code without a codeword. - dq QUIT + dq QUIT ; Flags - these are discussed later. @@ -259,144 +259,144 @@ label: ENDM defcode MACRO name, namelen, flags:=<0>, label - .const - ALIGN 8 - PUBLIC name_&label -name_&label: - dq link ; link - link = name_&label - db flags + namelen ; flags + length byte - db "&name" ; the name - ALIGN 8 ; padding to next 8 byte boundary - PUBLIC label -label: - dq code_&label ; codeword - .code - ;ALIGN 8 - PUBLIC code_&label -code_&label: ; assembler code follows + .const + ALIGN 8 + PUBLIC name_&label&_WORD +name_&label&_WORD: + dq link ; 64-bit link pointer + link = name_&label&_WORD ; Update link to current word + db flags + namelen ; Flags + length byte + db name ; The name (string literal) + ALIGN 8 ; Padding to next 8-byte boundary + PUBLIC &label&_WORD +&label&_WORD: + dq code_&label&_WORD ; 64-bit codeword pointer + .code + ALIGN 8 + PUBLIC code_&label&_WORD +code_&label&_WORD: ; Assembler code follows ENDM ; Now some easy FORTH primitives. These are written in assembly for speed. ; drop top of stack defcode "DROP", 4, , DROP - pop rax - NEXT + pop rax + NEXT ; Swap two elements on stack defcode "SWAP", 4, , SWAP - pop rax - pop rbx - push rax - push rbx - NEXT + pop rax + pop rbx + push rax + push rbx + NEXT ; duplicate top of stack defcode "DUP", 3, , DUP - mov rax, [rsp] - push rax - NEXT + mov rax, [rsp] + push rax + NEXT ; get the second element of the stack and push it on top defcode "OVER", 4, , OVER - mov rax, [rsp + 8] ; get the second element of stack - push rax ; and push it on top - NEXT + mov rax, [rsp + 8] ; get the second element of stack + push rax ; and push it on top + NEXT defcode "ROT", 3, , ROT - pop rax - pop rbx - pop rcx - push rbx - push rax - push rcx - NEXT + pop rax + pop rbx + pop rcx + push rbx + push rax + push rcx + NEXT defcode "-ROT", 4, , NROT - pop rax - pop rbx - pop rcx - push rax - push rcx - push rbx - NEXT + pop rax + pop rbx + pop rcx + push rax + push rcx + push rbx + NEXT ; drop top two elements of stack defcode "2DROP", 5, , TWODROP - pop rax - pop rax - NEXT + pop rax + pop rax + NEXT ; duplicate top two elements of stack defcode "2DUP", 4, , TWODUP - mov rax, [rsp] - mov rbx, [rsp + 8] - push rbx - push rax - NEXT + mov rax, [rsp] + mov rbx, [rsp + 8] + push rbx + push rax + NEXT ; swap top two pairs of elements of stack defcode "2SWAP", 5, , TWOSWAP - pop rax - pop rbx - pop rcx - pop rdx - push rbx - push rax - push rdx - push rcx - NEXT + pop rax + pop rbx + pop rcx + pop rdx + push rbx + push rax + push rdx + push rcx + NEXT ; duplicate top of stack if non-zero defcode "?DUP", 4, , QDUP - mov rax, [rsp] - test rax, rax - jz @F - push rax + mov rax, [rsp] + test rax, rax + jz @F + push rax @@: NEXT ; increment top of stack defcode "1+", 2, , INCR - inc qword ptr [rsp] - NEXT + inc qword ptr [rsp] + NEXT ; decrement top of stack defcode "1-", 2, , DECR - dec qword ptr [rsp] - NEXT + dec qword ptr [rsp] + NEXT ; add 4 to top of stack defcode "4+", 2, , INCR4 - add qword ptr [rsp], 4 - NEXT + add qword ptr [rsp], 4 + NEXT ; subtract 4 from top of stack defcode "4-", 2, , DECR4 - sub qword ptr [rsp], 4 - NEXT + sub qword ptr [rsp], 4 + NEXT ; get top of stack ; and add it to next word on stack defcode "+", 1, , ADD - pop rax - add [rsp], rax - NEXT + pop rax + add [rsp], rax + NEXT ; get top of stack ; and subtract it from next word on stack defcode "-", 1, , SUB - pop rax - sub [rsp], rax - NEXT + pop rax + sub [rsp], rax + NEXT ; ignore overflow defcode "*", 1, , MUL - pop rax - pop rbx - imul rax, rbx - push rax - NEXT + pop rax + pop rbx + imul rax, rbx + push rax + NEXT COMMENT @/* In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in @@ -405,13 +405,13 @@ COMMENT @/* */@ defcode "/MOD", 4, , DIVMOD - xor rdx, rdx - pop rbx - pop rax - idiv rbx - push rdx ; push remainder - push rax ; push quotient - NEXT + xor rdx, rdx + pop rbx + pop rax + idiv rbx + push rdx ; push remainder + push rax ; push quotient + NEXT COMMENT @/* Lots of comparison operations like =, <, >, etc.. @@ -422,7 +422,8 @@ COMMENT @/* 1 meaning TRUE and 0 meaning FALSE. */@ -defcode "=", 1, , EQU ; top two words are equal? +; top two words are equal? +defcode "=", 1, , EQU pop rax pop rbx cmp rbx, rax @@ -431,7 +432,8 @@ defcode "=", 1, , EQU ; top two words are equal? push rax NEXT -defcode "<>", 2, , NEQU ; top two words are not equal? +; top two words are not equal? +defcode "<>", 2, , NEQU pop rax pop rbx cmp rbx, rax @@ -441,113 +443,282 @@ defcode "<>", 2, , NEQU ; top two words are not equal? NEXT defcode "<", 1, , LT - pop rax - pop rbx - cmp rbx, rax - setl al - movzx rax, al - push rax - NEXT + pop rax + pop rbx + cmp rbx, rax + setl al + movzx rax, al + push rax + NEXT defcode ">", 1, , GT - pop rax - pop rbx - cmp rbx, rax - setg al - movzx rax, al - push rax - NEXT + pop rax + pop rbx + cmp rbx, rax + setg al + movzx rax, al + push rax + NEXT defcode "<=", 2, , LE - pop rax - pop rbx - cmp rbx, rax - setle al - movzx rax, al - push rax - NEXT + pop rax + pop rbx + cmp rbx, rax + setle al + movzx rax, al + push rax + NEXT defcode ">=", 2, , GE - pop rax - pop rbx - cmp rbx, rax - setge al - movzx rax, al - push rax - NEXT + pop rax + pop rbx + cmp rbx, rax + setge al + movzx rax, al + push rax + NEXT -defcode "0=", 2, , ZEQU ; top of stack equals 0? - pop rax - test rax, rax - setz al - movzx rax, al - push rax - NEXT +; top of stack equals 0? +defcode "0=", 2, , ZEQU + pop rax + test rax, rax + setz al + movzx rax, al + push rax + NEXT -defcode "0<>", 3, , ZNEQU ; top of stack not 0? - pop rax - test rax, rax - setnz al - movzx rax, al - push rax - NEXT +; top of stack not 0? +defcode "0<>", 3, , ZNEQU + pop rax + test rax, rax + setnz al + movzx rax, al + push rax + NEXT -defcode "0<", 2, , ZLT ; comparisons with 0 - pop rax - test rax, rax - setl al - movzx rax, al - push rax - NEXT +; comparisons with 0 +defcode "0<", 2, , ZLT + pop rax + test rax, rax + setl al + movzx rax, al + push rax + NEXT defcode "0>", 2, , ZGT - pop rax - test rax, rax - setg al - movzx rax, al - push rax - NEXT + pop rax + test rax, rax + setg al + movzx rax, al + push rax + NEXT defcode "0<=", 3, , ZLE - pop rax - test rax, rax - setle al - movzx rax, al - push rax - NEXT + pop rax + test rax, rax + setle al + movzx rax, al + push rax + NEXT defcode "0>=", 3, , ZGE - pop rax - test rax, rax - setge al - movzx rax, al - push rax - NEXT + pop rax + test rax, rax + setge al + movzx rax, al + push rax + NEXT ; bitwise AND defcode "AND", 3, , AND - pop rax - and [rsp], rax - NEXT + pop rax + and [rsp], rax + NEXT ; bitwise OR defcode "OR", 2, , OR - pop rax - or [rsp], rax - NEXT + pop rax + or [rsp], rax + NEXT ; bitwise XOR defcode "XOR", 3, , XOR - pop rax - xor [rsp], rax - NEXT + pop rax + xor [rsp], rax + NEXT ; this is the FORTH bitwise "NOT" function defcode "INVERT", 6, , INVERT - not qword ptr [rsp] - NEXT + not qword ptr [rsp] + NEXT +COMMENT @/* + RETURNING FROM FORTH WORDS ---------------------------------------------------------------------- + Time to talk about what happens when we EXIT a function. In this diagram QUADRUPLE has called + DOUBLE, and DOUBLE is about to exit (look at where %esi is pointing): + + QUADRUPLE + +------------------+ + | codeword | + +------------------+ DOUBLE + | addr of DOUBLE ---------------> +------------------+ + +------------------+ | codeword | + | addr of DOUBLE | +------------------+ + +------------------+ | addr of DUP | + | addr of EXIT | +------------------+ + +------------------+ | addr of + | + +------------------+ + %esi -> | addr of EXIT | + +------------------+ + + What happens when the + function does NEXT? Well, the following code is executed. +*/@ + +; pop return stack into rsi +defcode "EXIT", 4, , EXIT + POP_RSP rsi + NEXT + +COMMENT @/* + EXIT gets the old %esi which we saved from before on the return stack, and puts it in %esi. + So after this (but just before NEXT) we get: + + QUADRUPLE + +------------------+ + | codeword | + +------------------+ DOUBLE + | addr of DOUBLE ---------------> +------------------+ + +------------------+ | codeword | + %esi -> | addr of DOUBLE | +------------------+ + +------------------+ | addr of DUP | + | addr of EXIT | +------------------+ + +------------------+ | addr of + | + +------------------+ + | addr of EXIT | + +------------------+ + + And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-) + + LITERALS ---------------------------------------------------------------------- + + The final point I "glossed over" before was how to deal with functions that do anything + apart from calling other functions. For example, suppose that DOUBLE was defined like this: + + : DOUBLE 2 * ; + + It does the same thing, but how do we compile it since it contains the literal 2? One way + would be to have a function called "2" (which you'd have to write in assembler), but you'd need + a function for every single literal that you wanted to use. + + FORTH solves this by compiling the function using a special word called LIT: + + +---------------------------+-------+-------+-------+-------+-------+ + | (usual header of DOUBLE) | DOCOL | LIT | 2 | * | EXIT | + +---------------------------+-------+-------+-------+-------+-------+ + + LIT is executed in the normal way, but what it does next is definitely not normal. It + looks at %esi (which now points to the number 2), grabs it, pushes it on the stack, then + manipulates %esi in order to skip the number as if it had never been there. + + What's neat is that the whole grab/manipulate can be done using a single byte single + i386 instruction, our old friend LODSL. Rather than me drawing more ASCII-art diagrams, + see if you can find out how LIT works: +*/@ + +defcode "LIT", 3, , LIT + ; rsi points to the next command, but in this case it points to the next + ; literal 64 bit integer. Get that literal into rax and increment rsi. + lodsq + push rax ; push the literal number on to stack + NEXT + +COMMENT @/* + MEMORY ---------------------------------------------------------------------- + + An important point about FORTH is that it gives you direct access to the lowest levels + of the machine. Manipulating memory directly is done frequently in FORTH, and these are + the primitive words for doing it. +*/@ + +defcode "!", 1, , STORE + pop rbx ; address to store at + pop rax ; data to store there + mov [rbx], rax ; store it + NEXT + +defcode "@", 1, , FETCH + pop rbx ; address to fetch + mov rax, [rbx] ; fetch it + push rax ; push value onto stack + NEXT + +defcode "+!", 2, , ADDSTORE + pop rbx ; address + pop rax ; the amount to add + add [rbx], rax ; add it + NEXT + +defcode "-!", 2, , SUBSTORE + pop rbx ; address + pop rax ; the amount to subtract + sub [rbx], rax ; subtract it + NEXT + +COMMENT $/* + ! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes + so we also define standard words C@ and C!. + + Byte-oriented operations only work on architectures which permit them (i386 is one of those). + */$ + + defcode "C!", 2, , STOREBYTE + pop rbx ; address to store at + pop rax ; data to store there + mov [rbx], al ; store it + NEXT + +defcode "C@", 2, , FETCHBYTE + pop rbx ; address to fetch + xor rax, rax + mov al, [rbx] ; fetch it + push rax ; push value onto stack + NEXT + +; C@C! is a useful byte copy primitive. +defcode "C@C!", 4, , CCOPY + mov rbx, [rsp + 8] ; source address + mov al, [rbx] ; get source character + pop rdi ; destination address + stosb ; copy to destination + push rdi ; increment destination address + inc qword ptr [rsp + 8] ; increment source address + NEXT + +; and CMOVE is a block copy operation. +defcode "CMOVE", 5, , CMOVE + mov rdx, rsi ; preserve rsi + pop rcx ; length + pop rdi ; destination address + pop rsi ; source address + rep movsb ; copy source to destination + mov rsi, rdx ; restore rsi + NEXT + +COMMENT $/* + BUILT-IN VARIABLES ---------------------------------------------------------------------- + + These are some built-in variables and related standard FORTH words. Of these, the only one that we + have discussed so far was LATEST, which points to the last (most recently defined) word in the + FORTH dictionary. LATEST is also a FORTH word which pushes the address of LATEST (the variable) + on to the stack, so you can read or write it using @ and ! operators. For example, to print + the current value of LATEST (and this can apply to any FORTH variable) you would do: + + LATEST @ . CR + + To make defining variables shorter, I'm using a macro called defvar, similar to defword and + defcode above. (In fact the defvar macro uses defcode to do the dictionary header). +*/$ mainCRTStartup proc mainCRTStartup endp