learning
This commit is contained in:
parent
a9d87e4797
commit
db2336806e
541
code/forth.asm
541
code/forth.asm
@ -201,37 +201,37 @@ COMMENT @/*
|
||||
; Macros that deal with the return stack
|
||||
|
||||
PUSH_RSP MACRO reg
|
||||
lea rbp, [rbp - 8] ; push reg on to return stack
|
||||
mov [rbp], reg
|
||||
lea rbp, [rbp - 8] ; push reg on to return stack
|
||||
mov [rbp], reg
|
||||
ENDM
|
||||
|
||||
POP_RSP MACRO reg
|
||||
mov reg, [rbp] ; pop top of return stack to reg
|
||||
lea rbp, [rbp + 8]
|
||||
mov reg, [rbp] ; pop top of return stack to reg
|
||||
lea rbp, [rbp + 8]
|
||||
ENDM
|
||||
|
||||
; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead
|
||||
.code
|
||||
ALIGN 8
|
||||
DO_COLON:
|
||||
PUSH_RSP rsi ; push rsi on to the return stack
|
||||
add rax, 8 ; rax points to codeword, so make
|
||||
mov rsi, rax ; rsi point to first data word
|
||||
NEXT
|
||||
PUSH_RSP rsi ; push rsi on to the return stack
|
||||
add rax, 8 ; rax points to codeword, so make
|
||||
mov rsi, rax ; rsi point to first data word
|
||||
NEXT
|
||||
|
||||
.code
|
||||
PUBLIC main
|
||||
main:
|
||||
cld
|
||||
mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0.
|
||||
mov rbp, OFFSET return_stack_top ; Initialise the return stack.
|
||||
call set_up_data_segment
|
||||
mov rsi, OFFSET cold_start ; Initialise interpreter.
|
||||
NEXT ; Run interpreter!
|
||||
cld
|
||||
mov var_S0, rsp ; Save the initial data stack pointer in FORTH variable S0.
|
||||
mov rbp, OFFSET return_stack_top ; Initialise the return stack.
|
||||
call set_up_data_segment
|
||||
mov rsi, OFFSET cold_start ; Initialise interpreter.
|
||||
NEXT ; Run interpreter!
|
||||
|
||||
.const
|
||||
cold_start: ; High-level code without a codeword.
|
||||
dq QUIT
|
||||
dq QUIT
|
||||
|
||||
; Flags - these are discussed later.
|
||||
|
||||
@ -259,144 +259,144 @@ label:
|
||||
ENDM
|
||||
|
||||
defcode MACRO name, namelen, flags:=<0>, label
|
||||
.const
|
||||
ALIGN 8
|
||||
PUBLIC name_&label
|
||||
name_&label:
|
||||
dq link ; link
|
||||
link = name_&label
|
||||
db flags + namelen ; flags + length byte
|
||||
db "&name" ; the name
|
||||
ALIGN 8 ; padding to next 8 byte boundary
|
||||
PUBLIC label
|
||||
label:
|
||||
dq code_&label ; codeword
|
||||
.code
|
||||
;ALIGN 8
|
||||
PUBLIC code_&label
|
||||
code_&label: ; assembler code follows
|
||||
.const
|
||||
ALIGN 8
|
||||
PUBLIC name_&label&_WORD
|
||||
name_&label&_WORD:
|
||||
dq link ; 64-bit link pointer
|
||||
link = name_&label&_WORD ; Update link to current word
|
||||
db flags + namelen ; Flags + length byte
|
||||
db name ; The name (string literal)
|
||||
ALIGN 8 ; Padding to next 8-byte boundary
|
||||
PUBLIC &label&_WORD
|
||||
&label&_WORD:
|
||||
dq code_&label&_WORD ; 64-bit codeword pointer
|
||||
.code
|
||||
ALIGN 8
|
||||
PUBLIC code_&label&_WORD
|
||||
code_&label&_WORD: ; Assembler code follows
|
||||
ENDM
|
||||
|
||||
; Now some easy FORTH primitives. These are written in assembly for speed.
|
||||
|
||||
; drop top of stack
|
||||
defcode "DROP", 4, , DROP
|
||||
pop rax
|
||||
NEXT
|
||||
pop rax
|
||||
NEXT
|
||||
|
||||
; Swap two elements on stack
|
||||
defcode "SWAP", 4, , SWAP
|
||||
pop rax
|
||||
pop rbx
|
||||
push rax
|
||||
push rbx
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
push rax
|
||||
push rbx
|
||||
NEXT
|
||||
|
||||
; duplicate top of stack
|
||||
defcode "DUP", 3, , DUP
|
||||
mov rax, [rsp]
|
||||
push rax
|
||||
NEXT
|
||||
mov rax, [rsp]
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
; get the second element of the stack and push it on top
|
||||
defcode "OVER", 4, , OVER
|
||||
mov rax, [rsp + 8] ; get the second element of stack
|
||||
push rax ; and push it on top
|
||||
NEXT
|
||||
mov rax, [rsp + 8] ; get the second element of stack
|
||||
push rax ; and push it on top
|
||||
NEXT
|
||||
|
||||
defcode "ROT", 3, , ROT
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
push rbx
|
||||
push rax
|
||||
push rcx
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
push rbx
|
||||
push rax
|
||||
push rcx
|
||||
NEXT
|
||||
|
||||
defcode "-ROT", 4, , NROT
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
push rax
|
||||
push rcx
|
||||
push rbx
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
push rax
|
||||
push rcx
|
||||
push rbx
|
||||
NEXT
|
||||
|
||||
; drop top two elements of stack
|
||||
defcode "2DROP", 5, , TWODROP
|
||||
pop rax
|
||||
pop rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rax
|
||||
NEXT
|
||||
|
||||
; duplicate top two elements of stack
|
||||
defcode "2DUP", 4, , TWODUP
|
||||
mov rax, [rsp]
|
||||
mov rbx, [rsp + 8]
|
||||
push rbx
|
||||
push rax
|
||||
NEXT
|
||||
mov rax, [rsp]
|
||||
mov rbx, [rsp + 8]
|
||||
push rbx
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
; swap top two pairs of elements of stack
|
||||
defcode "2SWAP", 5, , TWOSWAP
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
pop rdx
|
||||
push rbx
|
||||
push rax
|
||||
push rdx
|
||||
push rcx
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
pop rcx
|
||||
pop rdx
|
||||
push rbx
|
||||
push rax
|
||||
push rdx
|
||||
push rcx
|
||||
NEXT
|
||||
|
||||
; duplicate top of stack if non-zero
|
||||
defcode "?DUP", 4, , QDUP
|
||||
mov rax, [rsp]
|
||||
test rax, rax
|
||||
jz @F
|
||||
push rax
|
||||
mov rax, [rsp]
|
||||
test rax, rax
|
||||
jz @F
|
||||
push rax
|
||||
@@: NEXT
|
||||
|
||||
; increment top of stack
|
||||
defcode "1+", 2, , INCR
|
||||
inc qword ptr [rsp]
|
||||
NEXT
|
||||
inc qword ptr [rsp]
|
||||
NEXT
|
||||
|
||||
; decrement top of stack
|
||||
defcode "1-", 2, , DECR
|
||||
dec qword ptr [rsp]
|
||||
NEXT
|
||||
dec qword ptr [rsp]
|
||||
NEXT
|
||||
|
||||
; add 4 to top of stack
|
||||
defcode "4+", 2, , INCR4
|
||||
add qword ptr [rsp], 4
|
||||
NEXT
|
||||
add qword ptr [rsp], 4
|
||||
NEXT
|
||||
|
||||
; subtract 4 from top of stack
|
||||
defcode "4-", 2, , DECR4
|
||||
sub qword ptr [rsp], 4
|
||||
NEXT
|
||||
sub qword ptr [rsp], 4
|
||||
NEXT
|
||||
|
||||
; get top of stack
|
||||
; and add it to next word on stack
|
||||
defcode "+", 1, , ADD
|
||||
pop rax
|
||||
add [rsp], rax
|
||||
NEXT
|
||||
pop rax
|
||||
add [rsp], rax
|
||||
NEXT
|
||||
|
||||
; get top of stack
|
||||
; and subtract it from next word on stack
|
||||
defcode "-", 1, , SUB
|
||||
pop rax
|
||||
sub [rsp], rax
|
||||
NEXT
|
||||
pop rax
|
||||
sub [rsp], rax
|
||||
NEXT
|
||||
|
||||
; ignore overflow
|
||||
defcode "*", 1, , MUL
|
||||
pop rax
|
||||
pop rbx
|
||||
imul rax, rbx
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
imul rax, rbx
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
COMMENT @/*
|
||||
In this FORTH, only /MOD is primitive. Later we will define the / and MOD words in
|
||||
@ -405,13 +405,13 @@ COMMENT @/*
|
||||
*/@
|
||||
|
||||
defcode "/MOD", 4, , DIVMOD
|
||||
xor rdx, rdx
|
||||
pop rbx
|
||||
pop rax
|
||||
idiv rbx
|
||||
push rdx ; push remainder
|
||||
push rax ; push quotient
|
||||
NEXT
|
||||
xor rdx, rdx
|
||||
pop rbx
|
||||
pop rax
|
||||
idiv rbx
|
||||
push rdx ; push remainder
|
||||
push rax ; push quotient
|
||||
NEXT
|
||||
|
||||
COMMENT @/*
|
||||
Lots of comparison operations like =, <, >, etc..
|
||||
@ -422,7 +422,8 @@ COMMENT @/*
|
||||
1 meaning TRUE and 0 meaning FALSE.
|
||||
*/@
|
||||
|
||||
defcode "=", 1, , EQU ; top two words are equal?
|
||||
; top two words are equal?
|
||||
defcode "=", 1, , EQU
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
@ -431,7 +432,8 @@ defcode "=", 1, , EQU ; top two words are equal?
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "<>", 2, , NEQU ; top two words are not equal?
|
||||
; top two words are not equal?
|
||||
defcode "<>", 2, , NEQU
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
@ -441,113 +443,282 @@ defcode "<>", 2, , NEQU ; top two words are not equal?
|
||||
NEXT
|
||||
|
||||
defcode "<", 1, , LT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setl al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setl al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode ">", 1, , GT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setg al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setg al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "<=", 2, , LE
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setle al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setle al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode ">=", 2, , GE
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setge al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
pop rbx
|
||||
cmp rbx, rax
|
||||
setge al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0=", 2, , ZEQU ; top of stack equals 0?
|
||||
pop rax
|
||||
test rax, rax
|
||||
setz al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
; top of stack equals 0?
|
||||
defcode "0=", 2, , ZEQU
|
||||
pop rax
|
||||
test rax, rax
|
||||
setz al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0<>", 3, , ZNEQU ; top of stack not 0?
|
||||
pop rax
|
||||
test rax, rax
|
||||
setnz al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
; top of stack not 0?
|
||||
defcode "0<>", 3, , ZNEQU
|
||||
pop rax
|
||||
test rax, rax
|
||||
setnz al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0<", 2, , ZLT ; comparisons with 0
|
||||
pop rax
|
||||
test rax, rax
|
||||
setl al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
; comparisons with 0
|
||||
defcode "0<", 2, , ZLT
|
||||
pop rax
|
||||
test rax, rax
|
||||
setl al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0>", 2, , ZGT
|
||||
pop rax
|
||||
test rax, rax
|
||||
setg al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
test rax, rax
|
||||
setg al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0<=", 3, , ZLE
|
||||
pop rax
|
||||
test rax, rax
|
||||
setle al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
test rax, rax
|
||||
setle al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
defcode "0>=", 3, , ZGE
|
||||
pop rax
|
||||
test rax, rax
|
||||
setge al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
pop rax
|
||||
test rax, rax
|
||||
setge al
|
||||
movzx rax, al
|
||||
push rax
|
||||
NEXT
|
||||
|
||||
; bitwise AND
|
||||
defcode "AND", 3, , AND
|
||||
pop rax
|
||||
and [rsp], rax
|
||||
NEXT
|
||||
pop rax
|
||||
and [rsp], rax
|
||||
NEXT
|
||||
|
||||
; bitwise OR
|
||||
defcode "OR", 2, , OR
|
||||
pop rax
|
||||
or [rsp], rax
|
||||
NEXT
|
||||
pop rax
|
||||
or [rsp], rax
|
||||
NEXT
|
||||
|
||||
; bitwise XOR
|
||||
defcode "XOR", 3, , XOR
|
||||
pop rax
|
||||
xor [rsp], rax
|
||||
NEXT
|
||||
pop rax
|
||||
xor [rsp], rax
|
||||
NEXT
|
||||
|
||||
; this is the FORTH bitwise "NOT" function
|
||||
defcode "INVERT", 6, , INVERT
|
||||
not qword ptr [rsp]
|
||||
NEXT
|
||||
not qword ptr [rsp]
|
||||
NEXT
|
||||
|
||||
COMMENT @/*
|
||||
RETURNING FROM FORTH WORDS ----------------------------------------------------------------------
|
||||
|
||||
Time to talk about what happens when we EXIT a function. In this diagram QUADRUPLE has called
|
||||
DOUBLE, and DOUBLE is about to exit (look at where %esi is pointing):
|
||||
|
||||
QUADRUPLE
|
||||
+------------------+
|
||||
| codeword |
|
||||
+------------------+ DOUBLE
|
||||
| addr of DOUBLE ---------------> +------------------+
|
||||
+------------------+ | codeword |
|
||||
| addr of DOUBLE | +------------------+
|
||||
+------------------+ | addr of DUP |
|
||||
| addr of EXIT | +------------------+
|
||||
+------------------+ | addr of + |
|
||||
+------------------+
|
||||
%esi -> | addr of EXIT |
|
||||
+------------------+
|
||||
|
||||
What happens when the + function does NEXT? Well, the following code is executed.
|
||||
*/@
|
||||
|
||||
; pop return stack into rsi
|
||||
defcode "EXIT", 4, , EXIT
|
||||
POP_RSP rsi
|
||||
NEXT
|
||||
|
||||
COMMENT @/*
|
||||
EXIT gets the old %esi which we saved from before on the return stack, and puts it in %esi.
|
||||
So after this (but just before NEXT) we get:
|
||||
|
||||
QUADRUPLE
|
||||
+------------------+
|
||||
| codeword |
|
||||
+------------------+ DOUBLE
|
||||
| addr of DOUBLE ---------------> +------------------+
|
||||
+------------------+ | codeword |
|
||||
%esi -> | addr of DOUBLE | +------------------+
|
||||
+------------------+ | addr of DUP |
|
||||
| addr of EXIT | +------------------+
|
||||
+------------------+ | addr of + |
|
||||
+------------------+
|
||||
| addr of EXIT |
|
||||
+------------------+
|
||||
|
||||
And NEXT just completes the job by, well, in this case just by calling DOUBLE again :-)
|
||||
|
||||
LITERALS ----------------------------------------------------------------------
|
||||
|
||||
The final point I "glossed over" before was how to deal with functions that do anything
|
||||
apart from calling other functions. For example, suppose that DOUBLE was defined like this:
|
||||
|
||||
: DOUBLE 2 * ;
|
||||
|
||||
It does the same thing, but how do we compile it since it contains the literal 2? One way
|
||||
would be to have a function called "2" (which you'd have to write in assembler), but you'd need
|
||||
a function for every single literal that you wanted to use.
|
||||
|
||||
FORTH solves this by compiling the function using a special word called LIT:
|
||||
|
||||
+---------------------------+-------+-------+-------+-------+-------+
|
||||
| (usual header of DOUBLE) | DOCOL | LIT | 2 | * | EXIT |
|
||||
+---------------------------+-------+-------+-------+-------+-------+
|
||||
|
||||
LIT is executed in the normal way, but what it does next is definitely not normal. It
|
||||
looks at %esi (which now points to the number 2), grabs it, pushes it on the stack, then
|
||||
manipulates %esi in order to skip the number as if it had never been there.
|
||||
|
||||
What's neat is that the whole grab/manipulate can be done using a single byte single
|
||||
i386 instruction, our old friend LODSL. Rather than me drawing more ASCII-art diagrams,
|
||||
see if you can find out how LIT works:
|
||||
*/@
|
||||
|
||||
defcode "LIT", 3, , LIT
|
||||
; rsi points to the next command, but in this case it points to the next
|
||||
; literal 64 bit integer. Get that literal into rax and increment rsi.
|
||||
lodsq
|
||||
push rax ; push the literal number on to stack
|
||||
NEXT
|
||||
|
||||
COMMENT @/*
|
||||
MEMORY ----------------------------------------------------------------------
|
||||
|
||||
An important point about FORTH is that it gives you direct access to the lowest levels
|
||||
of the machine. Manipulating memory directly is done frequently in FORTH, and these are
|
||||
the primitive words for doing it.
|
||||
*/@
|
||||
|
||||
defcode "!", 1, , STORE
|
||||
pop rbx ; address to store at
|
||||
pop rax ; data to store there
|
||||
mov [rbx], rax ; store it
|
||||
NEXT
|
||||
|
||||
defcode "@", 1, , FETCH
|
||||
pop rbx ; address to fetch
|
||||
mov rax, [rbx] ; fetch it
|
||||
push rax ; push value onto stack
|
||||
NEXT
|
||||
|
||||
defcode "+!", 2, , ADDSTORE
|
||||
pop rbx ; address
|
||||
pop rax ; the amount to add
|
||||
add [rbx], rax ; add it
|
||||
NEXT
|
||||
|
||||
defcode "-!", 2, , SUBSTORE
|
||||
pop rbx ; address
|
||||
pop rax ; the amount to subtract
|
||||
sub [rbx], rax ; subtract it
|
||||
NEXT
|
||||
|
||||
COMMENT $/*
|
||||
! and @ (STORE and FETCH) store 32-bit words. It's also useful to be able to read and write bytes
|
||||
so we also define standard words C@ and C!.
|
||||
|
||||
Byte-oriented operations only work on architectures which permit them (i386 is one of those).
|
||||
*/$
|
||||
|
||||
defcode "C!", 2, , STOREBYTE
|
||||
pop rbx ; address to store at
|
||||
pop rax ; data to store there
|
||||
mov [rbx], al ; store it
|
||||
NEXT
|
||||
|
||||
defcode "C@", 2, , FETCHBYTE
|
||||
pop rbx ; address to fetch
|
||||
xor rax, rax
|
||||
mov al, [rbx] ; fetch it
|
||||
push rax ; push value onto stack
|
||||
NEXT
|
||||
|
||||
; C@C! is a useful byte copy primitive.
|
||||
defcode "C@C!", 4, , CCOPY
|
||||
mov rbx, [rsp + 8] ; source address
|
||||
mov al, [rbx] ; get source character
|
||||
pop rdi ; destination address
|
||||
stosb ; copy to destination
|
||||
push rdi ; increment destination address
|
||||
inc qword ptr [rsp + 8] ; increment source address
|
||||
NEXT
|
||||
|
||||
; and CMOVE is a block copy operation.
|
||||
defcode "CMOVE", 5, , CMOVE
|
||||
mov rdx, rsi ; preserve rsi
|
||||
pop rcx ; length
|
||||
pop rdi ; destination address
|
||||
pop rsi ; source address
|
||||
rep movsb ; copy source to destination
|
||||
mov rsi, rdx ; restore rsi
|
||||
NEXT
|
||||
|
||||
COMMENT $/*
|
||||
BUILT-IN VARIABLES ----------------------------------------------------------------------
|
||||
|
||||
These are some built-in variables and related standard FORTH words. Of these, the only one that we
|
||||
have discussed so far was LATEST, which points to the last (most recently defined) word in the
|
||||
FORTH dictionary. LATEST is also a FORTH word which pushes the address of LATEST (the variable)
|
||||
on to the stack, so you can read or write it using @ and ! operators. For example, to print
|
||||
the current value of LATEST (and this can apply to any FORTH variable) you would do:
|
||||
|
||||
LATEST @ . CR
|
||||
|
||||
To make defining variables shorter, I'm using a macro called defvar, similar to defword and
|
||||
defcode above. (In fact the defvar macro uses defcode to do the dictionary header).
|
||||
*/$
|
||||
|
||||
mainCRTStartup proc
|
||||
mainCRTStartup endp
|
||||
|
Loading…
x
Reference in New Issue
Block a user