This commit is contained in:
Edward R. Gonzalez 2025-05-25 23:22:17 -04:00
parent a9d87e4797
commit db2336806e

View File

@ -201,37 +201,37 @@ COMMENT @/*
; Macros that deal with the return stack ; Macros that deal with the return stack
PUSH_RSP MACRO reg PUSH_RSP MACRO reg
lea rbp, [rbp - 8] ; push reg on to return stack lea rbp, [rbp - 8] ; push reg on to return stack
mov [rbp], reg mov [rbp], reg
ENDM ENDM
POP_RSP MACRO reg POP_RSP MACRO reg
mov reg, [rbp] ; pop top of return stack to reg mov reg, [rbp] ; pop top of return stack to reg
lea rbp, [rbp + 8] lea rbp, [rbp + 8]
ENDM ENDM
; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead ; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead
.code .code
ALIGN 8 ALIGN 8
DO_COLON: DO_COLON:
PUSH_RSP rsi ; push rsi on to the return stack PUSH_RSP rsi ; push rsi on to the return stack
add rax, 8 ; rax points to codeword, so make add rax, 8 ; rax points to codeword, so make
mov rsi, rax ; rsi point to first data word mov rsi, rax ; rsi point to first data word
NEXT NEXT
.code .code
PUBLIC main PUBLIC main
main: main:
cld cld
mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0. mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0.
mov rbp, OFFSET return_stack_top ; Initialise the return stack. mov rbp, OFFSET return_stack_top ; Initialise the return stack.
call set_up_data_segment call set_up_data_segment
mov rsi, OFFSET cold_start ; Initialise interpreter. mov rsi, OFFSET cold_start ; Initialise interpreter.
NEXT ; Run interpreter! NEXT ; Run interpreter!
.const .const
cold_start: ; High-level code without a codeword. cold_start: ; High-level code without a codeword.
dq QUIT dq QUIT
; Flags - these are discussed later. ; Flags - these are discussed later.
@ -259,144 +259,144 @@ label:
ENDM ENDM
defcode MACRO name, namelen, flags:=<0>, label defcode MACRO name, namelen, flags:=<0>, label
.const .const
ALIGN 8 ALIGN 8
PUBLIC name_&label PUBLIC name_&label&_WORD
name_&label: name_&label&_WORD:
dq link ; link dq link ; 64-bit link pointer
link = name_&label link = name_&label&_WORD ; Update link to current word
db flags + namelen ; flags + length byte db flags + namelen ; Flags + length byte
db "&name" ; the name db name ; The name (string literal)
ALIGN 8 ; padding to next 8 byte boundary ALIGN 8 ; Padding to next 8-byte boundary
PUBLIC label PUBLIC &label&_WORD
label: &label&_WORD:
dq code_&label ; codeword dq code_&label&_WORD ; 64-bit codeword pointer
.code .code
;ALIGN 8 ALIGN 8
PUBLIC code_&label PUBLIC code_&label&_WORD
code_&label: ; assembler code follows code_&label&_WORD: ; Assembler code follows
ENDM ENDM
; Now some easy FORTH primitives. These are written in assembly for speed. ; Now some easy FORTH primitives. These are written in assembly for speed.
; drop top of stack ; drop top of stack
defcode "DROP", 4, , DROP defcode "DROP", 4, , DROP
pop rax pop rax
NEXT NEXT
; Swap two elements on stack ; Swap two elements on stack
defcode "SWAP", 4, , SWAP defcode "SWAP", 4, , SWAP
pop rax pop rax
pop rbx pop rbx
push rax push rax
push rbx push rbx
NEXT NEXT
; duplicate top of stack ; duplicate top of stack
defcode "DUP", 3, , DUP defcode "DUP", 3, , DUP
mov rax, [rsp] mov rax, [rsp]
push rax push rax
NEXT NEXT
; get the second element of the stack and push it on top ; get the second element of the stack and push it on top
defcode "OVER", 4, , OVER defcode "OVER", 4, , OVER
mov rax, [rsp + 8] ; get the second element of stack mov rax, [rsp + 8] ; get the second element of stack
push rax ; and push it on top push rax ; and push it on top
NEXT NEXT
defcode "ROT", 3, , ROT defcode "ROT", 3, , ROT
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
push rbx push rbx
push rax push rax
push rcx push rcx
NEXT NEXT
defcode "-ROT", 4, , NROT defcode "-ROT", 4, , NROT
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
push rax push rax
push rcx push rcx
push rbx push rbx
NEXT NEXT
; drop top two elements of stack ; drop top two elements of stack
defcode "2DROP", 5, , TWODROP defcode "2DROP", 5, , TWODROP
pop rax pop rax
pop rax pop rax
NEXT NEXT
; duplicate top two elements of stack ; duplicate top two elements of stack
defcode "2DUP", 4, , TWODUP defcode "2DUP", 4, , TWODUP
mov rax, [rsp] mov rax, [rsp]
mov rbx, [rsp + 8] mov rbx, [rsp + 8]
push rbx push rbx
push rax push rax
NEXT NEXT
; swap top two pairs of elements of stack ; swap top two pairs of elements of stack
defcode "2SWAP", 5, , TWOSWAP defcode "2SWAP", 5, , TWOSWAP
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
pop rdx pop rdx
push rbx push rbx
push rax push rax
push rdx push rdx
push rcx push rcx
NEXT NEXT
; duplicate top of stack if non-zero ; duplicate top of stack if non-zero
defcode "?DUP", 4, , QDUP defcode "?DUP", 4, , QDUP
mov rax, [rsp] mov rax, [rsp]
test rax, rax test rax, rax
jz @F jz @F
push rax push rax
@@: NEXT @@: NEXT
; increment top of stack ; increment top of stack
defcode "1+", 2, , INCR defcode "1+", 2, , INCR
inc qword ptr [rsp] inc qword ptr [rsp]
NEXT NEXT
; decrement top of stack ; decrement top of stack
defcode "1-", 2, , DECR defcode "1-", 2, , DECR
dec qword ptr [rsp] dec qword ptr [rsp]
NEXT NEXT
; add 4 to top of stack ; add 4 to top of stack
defcode "4+", 2, , INCR4 defcode "4+", 2, , INCR4
add qword ptr [rsp], 4 add qword ptr [rsp], 4
NEXT NEXT
; subtract 4 from top of stack ; subtract 4 from top of stack
defcode "4-", 2, , DECR4 defcode "4-", 2, , DECR4
sub qword ptr [rsp], 4 sub qword ptr [rsp], 4
NEXT NEXT
; get top of stack ; get top of stack
; and add it to next word on stack ; and add it to next word on stack
defcode "+", 1, , ADD defcode "+", 1, , ADD
pop rax pop rax
add [rsp], rax add [rsp], rax
NEXT NEXT
; get top of stack ; get top of stack
; and subtract it from next word on stack ; and subtract it from next word on stack
defcode "-", 1, , SUB defcode "-", 1, , SUB
pop rax pop rax
sub [rsp], rax sub [rsp], rax
NEXT NEXT
; ignore overflow ; ignore overflow
defcode "*", 1, , MUL defcode "*", 1, , MUL
pop rax pop rax
pop rbx pop rbx
imul rax, rbx imul rax, rbx
push rax push rax
NEXT NEXT
COMMENT @/* COMMENT @/*
In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in
@ -405,13 +405,13 @@ COMMENT @/*
*/@ */@
defcode "/MOD", 4, , DIVMOD defcode "/MOD", 4, , DIVMOD
xor rdx, rdx xor rdx, rdx
pop rbx pop rbx
pop rax pop rax
idiv rbx idiv rbx
push rdx ; push remainder push rdx ; push remainder
push rax ; push quotient push rax ; push quotient
NEXT NEXT
COMMENT @/* COMMENT @/*
Lots of comparison operations like =, <, >, etc.. Lots of comparison operations like =, <, >, etc..
@ -422,7 +422,8 @@ COMMENT @/*
1 meaning TRUE and 0 meaning FALSE. 1 meaning TRUE and 0 meaning FALSE.
*/@ */@
defcode "=", 1, , EQU ; top two words are equal? ; top two words are equal?
defcode "=", 1, , EQU
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -431,7 +432,8 @@ defcode "=", 1, , EQU ; top two words are equal?
push rax push rax
NEXT NEXT
defcode "<>", 2, , NEQU ; top two words are not equal? ; top two words are not equal?
defcode "<>", 2, , NEQU
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -441,113 +443,282 @@ defcode "<>", 2, , NEQU ; top two words are not equal?
NEXT NEXT
defcode "<", 1, , LT defcode "<", 1, , LT
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
setl al setl al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode ">", 1, , GT defcode ">", 1, , GT
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
setg al setg al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode "<=", 2, , LE defcode "<=", 2, , LE
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
setle al setle al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode ">=", 2, , GE defcode ">=", 2, , GE
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
setge al setge al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode "0=", 2, , ZEQU ; top of stack equals 0? ; top of stack equals 0?
pop rax defcode "0=", 2, , ZEQU
test rax, rax pop rax
setz al test rax, rax
movzx rax, al setz al
push rax movzx rax, al
NEXT push rax
NEXT
defcode "0<>", 3, , ZNEQU ; top of stack not 0? ; top of stack not 0?
pop rax defcode "0<>", 3, , ZNEQU
test rax, rax pop rax
setnz al test rax, rax
movzx rax, al setnz al
push rax movzx rax, al
NEXT push rax
NEXT
defcode "0<", 2, , ZLT ; comparisons with 0 ; comparisons with 0
pop rax defcode "0<", 2, , ZLT
test rax, rax pop rax
setl al test rax, rax
movzx rax, al setl al
push rax movzx rax, al
NEXT push rax
NEXT
defcode "0>", 2, , ZGT defcode "0>", 2, , ZGT
pop rax pop rax
test rax, rax test rax, rax
setg al setg al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode "0<=", 3, , ZLE defcode "0<=", 3, , ZLE
pop rax pop rax
test rax, rax test rax, rax
setle al setle al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
defcode "0>=", 3, , ZGE defcode "0>=", 3, , ZGE
pop rax pop rax
test rax, rax test rax, rax
setge al setge al
movzx rax, al movzx rax, al
push rax push rax
NEXT NEXT
; bitwise AND ; bitwise AND
defcode "AND", 3, , AND defcode "AND", 3, , AND
pop rax pop rax
and [rsp], rax and [rsp], rax
NEXT NEXT
; bitwise OR ; bitwise OR
defcode "OR", 2, , OR defcode "OR", 2, , OR
pop rax pop rax
or [rsp], rax or [rsp], rax
NEXT NEXT
; bitwise XOR ; bitwise XOR
defcode "XOR", 3, , XOR defcode "XOR", 3, , XOR
pop rax pop rax
xor [rsp], rax xor [rsp], rax
NEXT NEXT
; this is the FORTH bitwise "NOT" function ; this is the FORTH bitwise "NOT" function
defcode "INVERT", 6, , INVERT defcode "INVERT", 6, , INVERT
not qword ptr [rsp] not qword ptr [rsp]
NEXT NEXT
COMMENT @/*
RETURNING FROM FORTH WORDS ----------------------------------------------------------------------
Time to talk about what happens when we EXIT a function. In this diagram QUADRUPLE has called
DOUBLE, and DOUBLE is about to exit (look at where %esi is pointing):
QUADRUPLE
+------------------+
| codeword |
+------------------+ DOUBLE
| addr of DOUBLE ---------------> +------------------+
+------------------+ | codeword |
| addr of DOUBLE | +------------------+
+------------------+ | addr of DUP |
| addr of EXIT | +------------------+
+------------------+ | addr of + |
+------------------+
%esi -> | addr of EXIT |
+------------------+
What happens when the + function does NEXT? Well, the following code is executed.
*/@
; pop return stack into rsi
defcode "EXIT", 4, , EXIT
POP_RSP rsi
NEXT
COMMENT @/*
EXIT gets the old %esi which we saved from before on the return stack, and puts it in %esi.
So after this (but just before NEXT) we get:
QUADRUPLE
+------------------+
| codeword |
+------------------+ DOUBLE
| addr of DOUBLE ---------------> +------------------+
+------------------+ | codeword |
%esi -> | addr of DOUBLE | +------------------+
+------------------+ | addr of DUP |
| addr of EXIT | +------------------+
+------------------+ | addr of + |
+------------------+
| addr of EXIT |
+------------------+
And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-)
LITERALS ----------------------------------------------------------------------
The final point I "glossed over" before was how to deal with functions that do anything
apart from calling other functions. For example, suppose that DOUBLE was defined like this:
: DOUBLE 2 * ;
It does the same thing, but how do we compile it since it contains the literal 2? One way
would be to have a function called "2" (which you'd have to write in assembler), but you'd need
a function for every single literal that you wanted to use.
FORTH solves this by compiling the function using a special word called LIT:
+---------------------------+-------+-------+-------+-------+-------+
| (usual header of DOUBLE) | DOCOL | LIT | 2 | * | EXIT |
+---------------------------+-------+-------+-------+-------+-------+
LIT is executed in the normal way, but what it does next is definitely not normal. It
looks at %esi (which now points to the number 2), grabs it, pushes it on the stack, then
manipulates %esi in order to skip the number as if it had never been there.
What's neat is that the whole grab/manipulate can be done using a single byte single
i386 instruction, our old friend LODSL. Rather than me drawing more ASCII-art diagrams,
see if you can find out how LIT works:
*/@
defcode "LIT", 3, , LIT
; rsi points to the next command, but in this case it points to the next
; literal 64 bit integer. Get that literal into rax and increment rsi.
lodsq
push rax ; push the literal number on to stack
NEXT
COMMENT @/*
MEMORY ----------------------------------------------------------------------
An important point about FORTH is that it gives you direct access to the lowest levels
of the machine. Manipulating memory directly is done frequently in FORTH, and these are
the primitive words for doing it.
*/@
defcode "!", 1, , STORE
pop rbx ; address to store at
pop rax ; data to store there
mov [rbx], rax ; store it
NEXT
defcode "@", 1, , FETCH
pop rbx ; address to fetch
mov rax, [rbx] ; fetch it
push rax ; push value onto stack
NEXT
defcode "+!", 2, , ADDSTORE
pop rbx ; address
pop rax ; the amount to add
add [rbx], rax ; add it
NEXT
defcode "-!", 2, , SUBSTORE
pop rbx ; address
pop rax ; the amount to subtract
sub [rbx], rax ; subtract it
NEXT
COMMENT $/*
! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes
so we also define standard words C@ and C!.
Byte-oriented operations only work on architectures which permit them (i386 is one of those).
*/$
defcode "C!", 2, , STOREBYTE
pop rbx ; address to store at
pop rax ; data to store there
mov [rbx], al ; store it
NEXT
defcode "C@", 2, , FETCHBYTE
pop rbx ; address to fetch
xor rax, rax
mov al, [rbx] ; fetch it
push rax ; push value onto stack
NEXT
; C@C! is a useful byte copy primitive.
defcode "C@C!", 4, , CCOPY
mov rbx, [rsp + 8] ; source address
mov al, [rbx] ; get source character
pop rdi ; destination address
stosb ; copy to destination
push rdi ; increment destination address
inc qword ptr [rsp + 8] ; increment source address
NEXT
; and CMOVE is a block copy operation.
defcode "CMOVE", 5, , CMOVE
mov rdx, rsi ; preserve rsi
pop rcx ; length
pop rdi ; destination address
pop rsi ; source address
rep movsb ; copy source to destination
mov rsi, rdx ; restore rsi
NEXT
COMMENT $/*
BUILT-IN VARIABLES ----------------------------------------------------------------------
These are some built-in variables and related standard FORTH words. Of these, the only one that we
have discussed so far was LATEST, which points to the last (most recently defined) word in the
FORTH dictionary. LATEST is also a FORTH word which pushes the address of LATEST (the variable)
on to the stack, so you can read or write it using @ and ! operators. For example, to print
the current value of LATEST (and this can apply to any FORTH variable) you would do:
LATEST @ . CR
To make defining variables shorter, I'm using a macro called defvar, similar to defword and
defcode above. (In fact the defvar macro uses defcode to do the dictionary header).
*/$
mainCRTStartup proc mainCRTStartup proc
mainCRTStartup endp mainCRTStartup endp