problably going to go a diff route than masm

I wanted something in assembly to prep for Onat's streams. However assembly for x86 on linux is a pain in the ass to port to x64 masm.
I could try to do fasm instead... but I'm not sure if that port will be any less painful.

I could just do it in odin or C11 first, both can produce a similar runtime to what Onat showed. We don't get access to the registers without inline assembly however.
This commit is contained in:
Edward R. Gonzalez 2025-05-26 01:23:06 -04:00
parent db2336806e
commit f8afbadc33
2 changed files with 145 additions and 66 deletions

View File

@ -1,3 +1,5 @@
TBF_FORTH_VERSION equ 1
COMMENT @/* COMMENT @/*
PUBLIC DOMAIN ---------------------------------------------------------------------- PUBLIC DOMAIN ----------------------------------------------------------------------
@ -210,10 +212,16 @@ POP_RSP MACRO reg
lea rbp, [rbp + 8] lea rbp, [rbp + 8]
ENDM ENDM
; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead .code
set_up_data_segment PROC
; For now, just return - Windows handles memory differently than Linux
ret
set_up_data_segment ENDP
; DOCOL - the interpreter!
.code .code
ALIGN 8 ALIGN 8
DO_COLON: DOCOL:
PUSH_RSP rsi ; push rsi on to the return stack PUSH_RSP rsi ; push rsi on to the return stack
add rax, 8 ; rax points to codeword, so make add rax, 8 ; rax points to codeword, so make
mov rsi, rax ; rsi point to first data word mov rsi, rax ; rsi point to first data word
@ -242,7 +250,7 @@ F_LENMASK equ 1fh ; length mask
; Store the chain of links. ; Store the chain of links.
link = 0 link = 0
defword MACRO name, namelen, flags:=<0>, label def_word MACRO name, namelen, flags:=<0>, label
.const .const
ALIGN 8 ALIGN 8
PUBLIC name_&label PUBLIC name_&label
@ -258,34 +266,34 @@ label:
; list of word pointers follow ; list of word pointers follow
ENDM ENDM
defcode MACRO name, namelen, flags:=<0>, label def_code MACRO name, namelen, flags:=<0>, label
.const .const
ALIGN 8 ALIGN 8
PUBLIC name_&label&_WORD PUBLIC name_&label&_WORD
name_&label&_WORD: name_&label&_WORD:
dq link ; 64-bit link pointer dq link ; 64-bit link pointer
link = name_&label&_WORD ; Update link to current word link = name_&label&_WORD ; Update link to current word
db flags + namelen ; Flags + length byte db flags + namelen ; Flags + length byte
db name ; The name (string literal) db name ; The name (string literal)
ALIGN 8 ; Padding to next 8-byte boundary ALIGN 8 ; Padding to next 8-byte boundary
PUBLIC &label&_WORD PUBLIC &label&_WORD
&label&_WORD: &label&_WORD:
dq code_&label&_WORD ; 64-bit codeword pointer dq code_&label&_WORD ; 64-bit codeword pointer
.code .code
ALIGN 8 ALIGN 8
PUBLIC code_&label&_WORD PUBLIC code_&label&_WORD
code_&label&_WORD: ; Assembler code follows code_&label&_WORD: ; Assembler code follows
ENDM ENDM
; Now some easy FORTH primitives. These are written in assembly for speed. ; Now some easy FORTH primitives. These are written in assembly for speed.
; drop top of stack ; drop top of stack
defcode "DROP", 4, , DROP def_code "DROP", 4, , DROP
pop rax pop rax
NEXT NEXT
; Swap two elements on stack ; Swap two elements on stack
defcode "SWAP", 4, , SWAP def_code "SWAP", 4, , SWAP
pop rax pop rax
pop rbx pop rbx
push rax push rax
@ -293,18 +301,18 @@ defcode "SWAP", 4, , SWAP
NEXT NEXT
; duplicate top of stack ; duplicate top of stack
defcode "DUP", 3, , DUP def_code "DUP", 3, , DUP
mov rax, [rsp] mov rax, [rsp]
push rax push rax
NEXT NEXT
; get the second element of the stack and push it on top ; get the second element of the stack and push it on top
defcode "OVER", 4, , OVER def_code "OVER", 4, , OVER
mov rax, [rsp + 8] ; get the second element of stack mov rax, [rsp + 8] ; get the second element of stack
push rax ; and push it on top push rax ; and push it on top
NEXT NEXT
defcode "ROT", 3, , ROT def_code "ROT", 3, , ROT
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
@ -313,7 +321,7 @@ defcode "ROT", 3, , ROT
push rcx push rcx
NEXT NEXT
defcode "-ROT", 4, , NROT def_code "-ROT", 4, , NROT
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
@ -323,13 +331,13 @@ defcode "-ROT", 4, , NROT
NEXT NEXT
; drop top two elements of stack ; drop top two elements of stack
defcode "2DROP", 5, , TWODROP def_code "2DROP", 5, , TWODROP
pop rax pop rax
pop rax pop rax
NEXT NEXT
; duplicate top two elements of stack ; duplicate top two elements of stack
defcode "2DUP", 4, , TWODUP def_code "2DUP", 4, , TWODUP
mov rax, [rsp] mov rax, [rsp]
mov rbx, [rsp + 8] mov rbx, [rsp + 8]
push rbx push rbx
@ -337,7 +345,7 @@ defcode "2DUP", 4, , TWODUP
NEXT NEXT
; swap top two pairs of elements of stack ; swap top two pairs of elements of stack
defcode "2SWAP", 5, , TWOSWAP def_code "2SWAP", 5, , TWOSWAP
pop rax pop rax
pop rbx pop rbx
pop rcx pop rcx
@ -349,7 +357,7 @@ defcode "2SWAP", 5, , TWOSWAP
NEXT NEXT
; duplicate top of stack if non-zero ; duplicate top of stack if non-zero
defcode "?DUP", 4, , QDUP def_code "?DUP", 4, , QDUP
mov rax, [rsp] mov rax, [rsp]
test rax, rax test rax, rax
jz @F jz @F
@ -357,41 +365,41 @@ defcode "?DUP", 4, , QDUP
@@: NEXT @@: NEXT
; increment top of stack ; increment top of stack
defcode "1+", 2, , INCR def_code "1+", 2, , INCR
inc qword ptr [rsp] inc qword ptr [rsp]
NEXT NEXT
; decrement top of stack ; decrement top of stack
defcode "1-", 2, , DECR def_code "1-", 2, , DECR
dec qword ptr [rsp] dec qword ptr [rsp]
NEXT NEXT
; add 4 to top of stack ; add 4 to top of stack
defcode "4+", 2, , INCR4 def_code "4+", 2, , INCR4
add qword ptr [rsp], 4 add qword ptr [rsp], 4
NEXT NEXT
; subtract 4 from top of stack ; subtract 4 from top of stack
defcode "4-", 2, , DECR4 def_code "4-", 2, , DECR4
sub qword ptr [rsp], 4 sub qword ptr [rsp], 4
NEXT NEXT
; get top of stack ; get top of stack
; and add it to next word on stack ; and add it to next word on stack
defcode "+", 1, , ADD def_code "+", 1, , ADD
pop rax pop rax
add [rsp], rax add [rsp], rax
NEXT NEXT
; get top of stack ; get top of stack
; and subtract it from next word on stack ; and subtract it from next word on stack
defcode "-", 1, , SUB def_code "-", 1, , SUB
pop rax pop rax
sub [rsp], rax sub [rsp], rax
NEXT NEXT
; ignore overflow ; ignore overflow
defcode "*", 1, , MUL def_code "*", 1, , MUL
pop rax pop rax
pop rbx pop rbx
imul rax, rbx imul rax, rbx
@ -404,7 +412,7 @@ COMMENT @/*
leaves both quotient and remainder makes this the obvious choice. leaves both quotient and remainder makes this the obvious choice.
*/@ */@
defcode "/MOD", 4, , DIVMOD def_code "/MOD", 4, , DIVMOD
xor rdx, rdx xor rdx, rdx
pop rbx pop rbx
pop rax pop rax
@ -423,7 +431,7 @@ COMMENT @/*
*/@ */@
; top two words are equal? ; top two words are equal?
defcode "=", 1, , EQU def_code "=", 1, , EQU
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -433,7 +441,7 @@ defcode "=", 1, , EQU
NEXT NEXT
; top two words are not equal? ; top two words are not equal?
defcode "<>", 2, , NEQU def_code "<>", 2, , NEQU
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -442,7 +450,7 @@ defcode "<>", 2, , NEQU
push rax push rax
NEXT NEXT
defcode "<", 1, , LT def_code "<", 1, , LT
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -451,7 +459,7 @@ defcode "<", 1, , LT
push rax push rax
NEXT NEXT
defcode ">", 1, , GT def_code ">", 1, , GT
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -460,7 +468,7 @@ defcode ">", 1, , GT
push rax push rax
NEXT NEXT
defcode "<=", 2, , LE def_code "<=", 2, , LE
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -469,7 +477,7 @@ defcode "<=", 2, , LE
push rax push rax
NEXT NEXT
defcode ">=", 2, , GE def_code ">=", 2, , GE
pop rax pop rax
pop rbx pop rbx
cmp rbx, rax cmp rbx, rax
@ -479,7 +487,7 @@ defcode ">=", 2, , GE
NEXT NEXT
; top of stack equals 0? ; top of stack equals 0?
defcode "0=", 2, , ZEQU def_code "0=", 2, , ZEQU
pop rax pop rax
test rax, rax test rax, rax
setz al setz al
@ -488,7 +496,7 @@ defcode "0=", 2, , ZEQU
NEXT NEXT
; top of stack not 0? ; top of stack not 0?
defcode "0<>", 3, , ZNEQU def_code "0<>", 3, , ZNEQU
pop rax pop rax
test rax, rax test rax, rax
setnz al setnz al
@ -497,7 +505,7 @@ defcode "0<>", 3, , ZNEQU
NEXT NEXT
; comparisons with 0 ; comparisons with 0
defcode "0<", 2, , ZLT def_code "0<", 2, , ZLT
pop rax pop rax
test rax, rax test rax, rax
setl al setl al
@ -505,7 +513,7 @@ defcode "0<", 2, , ZLT
push rax push rax
NEXT NEXT
defcode "0>", 2, , ZGT def_code "0>", 2, , ZGT
pop rax pop rax
test rax, rax test rax, rax
setg al setg al
@ -513,7 +521,7 @@ defcode "0>", 2, , ZGT
push rax push rax
NEXT NEXT
defcode "0<=", 3, , ZLE def_code "0<=", 3, , ZLE
pop rax pop rax
test rax, rax test rax, rax
setle al setle al
@ -521,7 +529,7 @@ defcode "0<=", 3, , ZLE
push rax push rax
NEXT NEXT
defcode "0>=", 3, , ZGE def_code "0>=", 3, , ZGE
pop rax pop rax
test rax, rax test rax, rax
setge al setge al
@ -530,25 +538,25 @@ defcode "0>=", 3, , ZGE
NEXT NEXT
; bitwise AND ; bitwise AND
defcode "AND", 3, , AND def_code "AND", 3, , AND
pop rax pop rax
and [rsp], rax and [rsp], rax
NEXT NEXT
; bitwise OR ; bitwise OR
defcode "OR", 2, , OR def_code "OR", 2, , OR
pop rax pop rax
or [rsp], rax or [rsp], rax
NEXT NEXT
; bitwise XOR ; bitwise XOR
defcode "XOR", 3, , XOR def_code "XOR", 3, , XOR
pop rax pop rax
xor [rsp], rax xor [rsp], rax
NEXT NEXT
; this is the FORTH bitwise "NOT" function ; this is the FORTH bitwise "NOT" function
defcode "INVERT", 6, , INVERT def_code "INVERT", 6, , INVERT
not qword ptr [rsp] not qword ptr [rsp]
NEXT NEXT
@ -576,7 +584,7 @@ COMMENT @/*
*/@ */@
; pop return stack into rsi ; pop return stack into rsi
defcode "EXIT", 4, , EXIT def_code "EXIT", 4, , EXIT
POP_RSP rsi POP_RSP rsi
NEXT NEXT
@ -626,7 +634,7 @@ COMMENT @/*
see if you can find out how LIT works: see if you can find out how LIT works:
*/@ */@
defcode "LIT", 3, , LIT def_code "LIT", 3, , LIT
; rsi points to the next command, but in this case it points to the next ; rsi points to the next command, but in this case it points to the next
; literal 64 bit integer. Get that literal into rax and increment rsi. ; literal 64 bit integer. Get that literal into rax and increment rsi.
lodsq lodsq
@ -641,25 +649,25 @@ COMMENT @/*
the primitive words for doing it. the primitive words for doing it.
*/@ */@
defcode "!", 1, , STORE def_code "!", 1, , STORE
pop rbx ; address to store at pop rbx ; address to store at
pop rax ; data to store there pop rax ; data to store there
mov [rbx], rax ; store it mov [rbx], rax ; store it
NEXT NEXT
defcode "@", 1, , FETCH def_code "@", 1, , FETCH
pop rbx ; address to fetch pop rbx ; address to fetch
mov rax, [rbx] ; fetch it mov rax, [rbx] ; fetch it
push rax ; push value onto stack push rax ; push value onto stack
NEXT NEXT
defcode "+!", 2, , ADDSTORE def_code "+!", 2, , ADDSTORE
pop rbx ; address pop rbx ; address
pop rax ; the amount to add pop rax ; the amount to add
add [rbx], rax ; add it add [rbx], rax ; add it
NEXT NEXT
defcode "-!", 2, , SUBSTORE def_code "-!", 2, , SUBSTORE
pop rbx ; address pop rbx ; address
pop rax ; the amount to subtract pop rax ; the amount to subtract
sub [rbx], rax ; subtract it sub [rbx], rax ; subtract it
@ -672,13 +680,13 @@ COMMENT $/*
Byte-oriented operations only work on architectures which permit them (i386 is one of those). Byte-oriented operations only work on architectures which permit them (i386 is one of those).
*/$ */$
defcode "C!", 2, , STOREBYTE def_code "C!", 2, , STOREBYTE
pop rbx ; address to store at pop rbx ; address to store at
pop rax ; data to store there pop rax ; data to store there
mov [rbx], al ; store it mov [rbx], al ; store it
NEXT NEXT
defcode "C@", 2, , FETCHBYTE def_code "C@", 2, , FETCHBYTE
pop rbx ; address to fetch pop rbx ; address to fetch
xor rax, rax xor rax, rax
mov al, [rbx] ; fetch it mov al, [rbx] ; fetch it
@ -686,7 +694,7 @@ defcode "C@", 2, , FETCHBYTE
NEXT NEXT
; C@C! is a useful byte copy primitive. ; C@C! is a useful byte copy primitive.
defcode "C@C!", 4, , CCOPY def_code "C@C!", 4, , CCOPY
mov rbx, [rsp + 8] ; source address mov rbx, [rsp + 8] ; source address
mov al, [rbx] ; get source character mov al, [rbx] ; get source character
pop rdi ; destination address pop rdi ; destination address
@ -696,7 +704,7 @@ defcode "C@C!", 4, , CCOPY
NEXT NEXT
; and CMOVE is a block copy operation. ; and CMOVE is a block copy operation.
defcode "CMOVE", 5, , CMOVE def_code "CMOVE", 5, , CMOVE
mov rdx, rsi ; preserve rsi mov rdx, rsi ; preserve rsi
pop rcx ; length pop rcx ; length
pop rdi ; destination address pop rdi ; destination address
@ -716,10 +724,82 @@ COMMENT $/*
LATEST @ . CR LATEST @ . CR
To make defining variables shorter, I'm using a macro called defvar, similar to defword and To make defining variables shorter, I'm using a macro called defvar, similar to def_word and
defcode above. (In fact the defvar macro uses defcode to do the dictionary header). def_code above. (In fact the defvar macro uses def_code to do the dictionary header).
*/$ */$
def_var MACRO name, namelen, flags:=<0>, label, initial:=<0>
def_code name, namelen, flags, label
push OFFSET var_&label
NEXT
; Don't switch sections here - define the variable data separately
ENDM
COMMENT @/*
The built-in variables are:
STATE Is the interpreter executing code (0) or compiling a word (non-zero)?
LATEST Points to the latest (most recently defined) word in the dictionary.
HERE Points to the next free byte of memory. When compiling, compiled words go here.
S0 Stores the address of the top of the parameter stack.
BASE The current base for printing and reading numbers.
*/@
def_var "STATE", 5, , STATE
def_var "HERE", 4, , HERE
def_var "LATEST", 6, , LATEST, 0 ; SYSCALL0 must be last in built-in dictionary
def_var "S0", 2, , SZ
def_var "BASE", 4, , BASE, 10
COMMENT @/*
BUILT-IN CONSTANTS ----------------------------------------------------------------------
It's also useful to expose a few constants to FORTH. When the word is executed it pushes a
constant value on the stack.
The built-in constants are:
VERSION Is the current version of this FORTH.
R0 The address of the top of the return stack.
DOCOL Pointer to DOCOL.
F_IMMED The IMMEDIATE flag's actual value.
F_HIDDEN The HIDDEN flag's actual value.
F_LENMASK The length mask in the flags/len byte.
SYS_* and the numeric codes of various Linux syscalls (from <asm/unistd.h>)
*/@
; Define def_const macro
; Macros (assuming NEXT and defcode are already defined)
def_const MACRO name, namelen, flags:=<0>, label, value
def_code name, namelen, flags, label
push value ; Just push the raw value
NEXT
ENDM
; FORTH system constants
; FORTH system constants
def_const "VERSION", 7,, VERSION, TBF_FORTH_VERSION
def_const "R0",2,,RZ,OFFSET return_stack_top
def_const "DOCOL",5,,__DOCOL,OFFSET DOCOL
def_const "F_IMMED", 7, , __F_IMMED, F_IMMED
def_const "F_HIDDEN", 8, , __F_HIDDEN, F_HIDDEN
def_const "F_LENMASK", 9, , __F_LENMASK, F_LENMASK
; Windows-specific constants
def_const "EXIT_SUCCESS", 11, , EXIT_SUCCESS, 0
def_const "EXIT_FAILURE", 11, , EXIT_FAILURE, 1
; Windows file access modes
def_const "GENERIC_READ", 11, , GENERIC_READ, 80000000h
def_const "GENERIC_WRITE", 12, , GENERIC_WRITE, 40000000h
def_const "GENERIC_READ_WRITE", 16, , GENERIC_READ_WRITE, 0C0000000h
; Windows file creation flags
def_const "CREATE_NEW", 10, , CREATE_NEW, 1
def_const "CREATE_ALWAYS", 12, , CREATE_ALWAYS, 2
def_const "OPEN_EXISTING", 12, , OPEN_EXISTING, 3
def_const "OPEN_ALWAYS", 10, , OPEN_ALWAYS, 4
def_const "TRUNCATE_EXISTING", 15, , TRUNCATE_EXISTING, 5
mainCRTStartup proc mainCRTStartup proc
mainCRTStartup endp mainCRTStartup endp
end end

View File

@ -308,8 +308,7 @@
jmp *(%eax) jmp *(%eax)
.endm .endm
/* /* The macro is called NEXT. That's a FORTH-ism. It expands to those two instructions.
The macro is called NEXT. That's a FORTH-ism. It expands to those two instructions.
Every FORTH primitive that we write has to be ended by NEXT. Think of it kind of like Every FORTH primitive that we write has to be ended by NEXT. Think of it kind of like
a return. a return.