learning

2025-05-25 22:35:07 -04:00
parent b32fdffb93
commit a9d87e4797
4 changed files with 2870 additions and 4 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -23,7 +23,7 @@ charset = utf-8

 [*.asm]
 indent_style = tab
-indent_size  = 2
+indent_size  = 8
 charset      = utf-8

 [*.{natvis, natstepfilter}]
--- a/code/forth.asm
+++ b/code/forth.asm
@@ -1,2 +1,554 @@
-; An introduction to forth based on jonesforth.S
+COMMENT @/*
+	PUBLIC DOMAIN ----------------------------------------------------------------------

+	I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
+
+	In case this is not legally possible, I grant any entity the right to use this work for any purpose,
+	without any conditions, unless such conditions are required by law.
+
+	INFO -------------------------------------------------------------------------------
+
+	File: forath.asm
+
+	VENDOR TARGETS:
+		OS:        Windows 11 amd64
+		ASSEMBLER: Micorsoft Macro Assembler Version 14.43
+
+	Inspiration to learn FORTH:
+		Metaprogramming VAMP in KYRA, a Next-gen Forth-like language --- Onat Türkçüoğlu -- 2025-04-26
+		https://www.youtube.com/watch?v=J9U_5tjdegY
+
+		Onat's post on his KYRA language: https://onatto.github.io/lang.html
+
+		Timothy Lottes forth-like language: "A"; inspired him with KYRA. 
+		All of which are related to FORTH and COLOR FORTH
+
+	An introduction to forth based on jonesforth.S
+	See: https://github.com/nornagon/jonesforth/blob/master/jonesforth.S
+
+	I will be pasting much of the commentary from the original source into this file,
+	with some edits.
+
+	SETTING UP ----------------------------------------------------------------------
+
+	Let's get a few housekeeping things out of the way.  Firstly because I need to draw lots of
+	ASCII-art diagrams to explain concepts, the best way to look at this is using a window which
+	uses a fixed width font and is at least this wide:
+
+ <------------------------------------------------------------------------------------------------------------------------>
+
+	Secondly make sure TABS are set to 8 characters.  The following should be a vertical
+	line.  If not, sort out your tabs.
+
+		|
+	        |
+	    	|
+
+	Thirdly I assume that your screen is at least 50 characters high.
+
+	THE DICTIONARY ----------------------------------------------------------------------
+
+	In FORTH as you will know, functions are called "words", and just as in other languages they
+	have a name and a definition.  Here are two FORTH words:
+
+	: DOUBLE DUP + ;		\ name is "DOUBLE", definition is "DUP +"
+	: QUADRUPLE DOUBLE DOUBLE ;	\ name is "QUADRUPLE", definition is "DOUBLE DOUBLE"
+
+	Words, both built-in ones and ones which the programmer defines later, are stored in a dictionary
+	which is just a linked list of dictionary entries.
+
+	<--- DICTIONARY ENTRY (HEADER) ----------------------->
+	+------------------------+--------+---------- - - - - +----------- - - - -
+	| LINK POINTER           | LENGTH/| NAME	      | DEFINITION
+	|			 | FLAGS  |     	      |
+	+--- (4 bytes) ----------+- byte -+- n bytes  - - - - +----------- - - - -
+
+	I'll come to the definition of the word later.  For now just look at the header.  The first
+	4 bytes are the link pointer.  This points back to the previous word in the dictionary, or, for
+	the first word in the dictionary it is just a NULL pointer.  Then comes a length/flags byte.
+	The length of the word can be up to 31 characters (5 bits used) and the top three bits are used
+	for various flags which I'll come to later.  This is followed by the name itself, and in this
+	implementation the name is rounded up to a multiple of 4 bytes by padding it with zero bytes.
+	That's just to ensure that the definition starts on a 32 bit boundary.
+
+	A FORTH variable called LATEST contains a pointer to the most recently defined word, in
+	other words, the head of this linked list.
+
+	DOUBLE and QUADRUPLE might look like this:
+
+	  pointer to previous word
+	   ^
+	   |
+	+--|------+---+---+---+---+---+---+---+---+------------- - - - -
+	| LINK    | 6 | D | O | U | B | L | E | 0 | (definition ...)
+	+---------+---+---+---+---+---+---+---+---+------------- - - - -
+           ^       len                         padding
+	   |
+	+--|------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
+	| LINK    | 9 | Q | U | A | D | R | U | P | L | E | 0 | 0 | (definition ...)
+	+---------+---+---+---+---+---+---+---+---+---+---+---+---+------------- - - - -
+           ^       len                                     padding
+           |
+           |
+	  LATEST
+
+	You should be able to see from this how you might implement functions to find a word in
+	the dictionary (just walk along the dictionary entries starting at LATEST and matching
+	the names until you either find a match or hit the NULL pointer at the end of the dictionary);
+	and add a word to the dictionary (create a new definition, set its LINK to LATEST, and set
+	LATEST to point to the new word).  We'll see precisely these functions implemented in
+	assembly code later on.
+
+	One interesting consequence of using a linked list is that you can redefine words, and
+	a newer definition of a word overrides an older one.  This is an important concept in
+	FORTH because it means that any word (even "built-in" or "standard" words) can be
+	overridden with a new definition, either to enhance it, to make it faster or even to
+	disable it.  However because of the way that FORTH words get compiled, which you'll
+	understand below, words defined using the old definition of a word continue to use
+	the old definition.  Only words defined after the new definition use the new definition.
+*/@
+
+;	NEXT MACRO:
+;	This loads a qword from [rsi] into rax and increments rsi by 8, then
+;	jumps to the address stored at [rax].
+NEXT MACRO
+	lodsq
+	jmp qword ptr [rax]
+ENDM
+
+COMMENT @/*
+	The macro is called NEXT.  That's a FORTH-ism.  It expands to those two instructions.
+	
+	Every FORTH primitive that we write has to be ended by NEXT.  Think of it kind of like
+	a return.
+
+	DIRECT THREADED CODE ----------------------------------------------------------------------
+	
+	Let's talk about what "threaded code" means.  Imagine a peculiar version of C where
+	you are only allowed to call functions without arguments.  (Don't worry for now that such a
+	language would be completely useless!)  So in our peculiar C, code would look like this:
+
+	f ()
+	{
+	  a ();
+	  b ();
+	  c ();
+	}
+
+	and so on.  How would a function, say 'f' above, be compiled by a standard C compiler?
+	Probably into assembly code like this.  On the right hand side I've written the actual
+	x86 machine code.
+
+	f:
+	  CALL a			0E8h, 008h, 000h, 000h, 000h
+	  CALL b			0E8h, 01Ch, 000h, 000h, 000h
+	  CALL c			0E8h, 02Ch, 000h, 000h, 000h
+	  ; ignore the return from the function for now
+
+	"E8h" is the x86 machine code to "CALL" a function.  In the first 20 years of computing
+	memory was hideously expensive and we might have worried about the wasted space being used
+	by the repeated "E8h" bytes.  We can save 20% in code size (and therefore, in expensive memory)
+	by compressing this into just:
+
+	008h, 000h, 000h, 000h		Just the function addresses, without
+	01Ch, 000h, 000h, 000h		the CALL prefix.
+	02Ch, 000h, 000h, 000h
+
+	On a 16-bit machine like the ones which originally ran FORTH the savings are even greater - 33%.
+
+	[Historical note: If the execution model that FORTH uses looks strange from the following
+	paragraphs, then it was motivated entirely by the need to save memory on early computers.
+	This code compression isn't so important now when our machines have more memory in their L1
+	caches than those early computers had in total, but the execution model still has some
+	useful properties].
+
+	Of course this code won't run directly on the CPU any more.  Instead we need to write an
+	interpreter which takes each set of bytes and calls it.
+
+	On an x86 machine it turns out that we can write this interpreter rather easily, in just
+	two assembly instructions which turn into just 3 bytes of machine code.  Let's store the
+	pointer to the next word to execute in the ESI register:
+
+		008h, 000h, 000h, 000h	<- We're executing this one now.  ESI is the _next_ one to execute.
+	ESI ->  01Ch, 000h, 000h, 000h
+		02Ch, 000h, 000h, 000h
+
+	The all-important x86 instruction is called LODSD (or in Intel manuals, LODSD).  It does
+	two things.  Firstly it reads the memory at ESI into the accumulator (EAX).  Secondly it
+	increments ESI by 4 bytes.  So after LODSD, the situation now looks like this:
+
+		008h, 000h, 000h, 000h	<- We're still executing this one
+		01Ch, 000h, 000h, 000h	<- EAX now contains this address (0000001Ch)
+	ESI ->  02Ch, 000h, 000h, 000h
+
+	Now we just need to jump to the address in EAX.  This is again just a single x86 instruction
+	written JMP DWORD PTR [EAX].  And after doing the jump, the situation looks like:
+
+		008h, 000h, 000h, 000h
+		01Ch, 000h, 000h, 000h	<- Now we're executing this subroutine.
+	ESI ->  02Ch, 000h, 000h, 000h
+
+	To make this work, each subroutine is followed by the two instructions 'LODSD; JMP DWORD PTR [EAX]'
+	which literally make the jump to the next subroutine.
+
+	-------------------------------------------------------------------------------------------
+
+	To sum up: We compress our function calls down to a list of addresses and use a somewhat
+	magical macro to act as a "jump to next function in the list".  We also use one register (ESI)
+	to act as a kind of instruction pointer, pointing to the next function in the list.
+*/@
+
+; Macros that deal with the return stack
+
+PUSH_RSP MACRO reg
+    lea  rbp,  [rbp - 8]   ; push reg on to return stack
+    mov [rbp],  reg
+ENDM
+
+POP_RSP MACRO reg
+    mov reg, [rbp]         ; pop top of return stack to reg
+    lea rbp, [rbp + 8]
+ENDM
+
+; DOCOL - the interpreter! NOTE(Ed): I'm going to use DO_COLON instead
+.code
+ALIGN 8
+DO_COLON:
+    PUSH_RSP rsi        ; push rsi on to the return stack
+    add rax, 8          ; rax points to codeword, so make
+    mov rsi, rax        ; rsi point to first data word
+    NEXT
+
+.code
+PUBLIC main
+main:
+    cld
+    mov var_S0, rsp                      ; Save the initial data stack pointer in FORTH variable S0.
+    mov rbp,    OFFSET return_stack_top  ; Initialise the return stack.
+    call set_up_data_segment
+    mov rsi,    OFFSET cold_start        ; Initialise interpreter.
+    NEXT                                 ; Run interpreter!
+
+.const
+cold_start:                      ; High-level code without a codeword.
+    dq QUIT
+
+; Flags - these are discussed later.
+
+F_IMMED     equ 80h
+F_HIDDEN    equ 20h
+F_LENMASK   equ 1fh     ; length mask
+
+; Store the chain of links.
+link = 0
+
+defword MACRO name, namelen, flags:=<0>, label
+	.const
+	ALIGN 8
+	PUBLIC name_&label
+name_&label:
+	dq link             ; link
+	link = name_&label
+	db flags + namelen  ; flags + length byte
+	db "&name"          ; the name
+	ALIGN 8             ; padding to next 8 byte boundary
+	PUBLIC label
+label:
+	dq DOCOL            ; codeword - the interpreter
+	; list of word pointers follow
+ENDM
+
+defcode MACRO name, namelen, flags:=<0>, label
+	.const
+	ALIGN 8
+	PUBLIC name_&label
+name_&label:
+	dq link             ; link
+	link = name_&label
+	db flags + namelen  ; flags + length byte
+	db "&name"          ; the name
+	ALIGN 8             ; padding to next 8 byte boundary
+	PUBLIC label
+label:
+	dq code_&label      ; codeword
+	.code
+	;ALIGN 8
+	PUBLIC code_&label
+code_&label:            ; assembler code follows
+ENDM
+
+; Now some easy FORTH primitives. These are written in assembly for speed.
+
+; drop top of stack
+defcode "DROP", 4, , DROP
+    pop rax
+    NEXT
+
+; Swap two elements on stack
+defcode "SWAP", 4, , SWAP
+    pop  rax
+    pop  rbx
+    push rax
+    push rbx
+    NEXT
+
+; duplicate top of stack
+defcode "DUP", 3, , DUP
+    mov  rax, [rsp] 
+    push rax
+    NEXT
+
+; get the second element of the stack and push it on top
+defcode "OVER", 4, , OVER
+    mov  rax, [rsp + 8] ; get the second element of stack
+    push rax            ; and push it on top
+    NEXT
+
+defcode "ROT", 3, , ROT
+    pop  rax
+    pop  rbx
+    pop  rcx
+    push rbx
+    push rax
+    push rcx
+    NEXT
+
+defcode "-ROT", 4, , NROT
+    pop  rax
+    pop  rbx
+    pop  rcx
+    push rax
+    push rcx
+    push rbx
+    NEXT
+
+; drop top two elements of stack
+defcode "2DROP", 5, , TWODROP
+    pop rax
+    pop rax
+    NEXT
+
+; duplicate top two elements of stack
+defcode "2DUP", 4, , TWODUP
+    mov  rax, [rsp]
+    mov  rbx, [rsp + 8]
+    push rbx
+    push rax
+    NEXT
+
+; swap top two pairs of elements of stack
+defcode "2SWAP", 5, , TWOSWAP
+    pop  rax
+    pop  rbx
+    pop  rcx
+    pop  rdx
+    push rbx
+    push rax
+    push rdx
+    push rcx
+    NEXT
+
+; duplicate top of stack if non-zero
+defcode "?DUP", 4, , QDUP
+    mov  rax, [rsp]
+    test rax,  rax
+    jz   @F
+    push rax
+@@: NEXT
+
+; increment top of stack
+defcode "1+", 2, , INCR
+    inc qword ptr [rsp]
+    NEXT
+
+; decrement top of stack
+defcode "1-", 2, , DECR
+    dec qword ptr [rsp]
+    NEXT
+
+; add 4 to top of stack
+defcode "4+", 2, , INCR4
+    add qword ptr [rsp], 4
+    NEXT
+
+; subtract 4 from top of stack
+defcode "4-", 2, , DECR4
+    sub qword ptr [rsp], 4
+    NEXT
+
+; get top of stack
+; and add it to next word on stack
+defcode "+", 1, , ADD
+    pop  rax             
+    add [rsp], rax
+    NEXT
+
+; get top of stack
+; and subtract it from next word on stack
+defcode "-", 1, , SUB
+    pop  rax
+    sub [rsp], rax      
+    NEXT
+
+;  ignore overflow
+defcode "*", 1, , MUL
+    pop  rax
+    pop  rbx
+    imul rax, rbx
+    push rax
+    NEXT
+
+COMMENT @/*
+	In this FORTH, only /MOD is primitive.  Later we will define the / and MOD words in
+	terms of the primitive /MOD.  The design of the i386 assembly instruction idiv which
+	leaves both quotient and remainder makes this the obvious choice.
+*/@
+
+defcode "/MOD", 4, , DIVMOD
+    xor  rdx, rdx
+    pop  rbx
+    pop  rax
+    idiv rbx
+    push rdx        ; push remainder
+    push rax        ; push quotient
+    NEXT
+
+COMMENT @/*
+	Lots of comparison operations like =, <, >, etc..
+
+	ANS FORTH says that the comparison words should return all (binary) 1's for
+	TRUE and all 0's for FALSE.  However this is a bit of a strange convention
+	so this FORTH breaks it and returns the more normal (for C programmers ...)
+	1 meaning TRUE and 0 meaning FALSE.
+*/@
+
+defcode "=", 1, , EQU       ; top two words are equal?
+	pop   rax
+	pop   rbx
+	cmp   rbx, rax
+	sete  al
+	movzx rax, al
+	push  rax
+	NEXT
+
+defcode "<>", 2, , NEQU     ; top two words are not equal?
+	pop   rax
+	pop   rbx
+	cmp   rbx, rax
+	setne al
+	movzx rax, al
+	push  rax
+	NEXT
+
+defcode "<", 1, , LT
+    pop   rax
+    pop   rbx
+    cmp   rbx, rax
+    setl  al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode ">", 1, , GT
+    pop   rax
+    pop   rbx
+    cmp   rbx, rax
+    setg  al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "<=", 2, , LE
+    pop   rax
+    pop   rbx
+    cmp   rbx, rax
+    setle al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode ">=", 2, , GE
+    pop   rax
+    pop   rbx
+    cmp   rbx, rax
+    setge al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0=", 2, , ZEQU     ; top of stack equals 0?
+    pop   rax
+    test  rax, rax
+    setz  al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0<>", 3, , ZNEQU   ; top of stack not 0?
+    pop   rax
+    test  rax, rax
+    setnz al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0<", 2, , ZLT      ; comparisons with 0
+    pop   rax
+    test  rax, rax
+    setl  al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0>", 2, , ZGT
+    pop   rax
+    test  rax, rax
+    setg  al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0<=", 3, , ZLE
+    pop   rax
+    test  rax, rax
+    setle al
+    movzx rax, al
+    push  rax
+    NEXT
+
+defcode "0>=", 3, , ZGE
+    pop   rax
+    test  rax, rax
+    setge al
+    movzx rax, al
+    push  rax
+    NEXT
+
+; bitwise AND
+defcode "AND", 3, , AND
+    pop  rax
+    and [rsp], rax
+    NEXT
+
+; bitwise OR
+defcode "OR", 2, , OR
+    pop  rax
+    or  [rsp], rax
+    NEXT
+
+; bitwise XOR
+defcode "XOR", 3, , XOR
+    pop  rax
+    xor [rsp], rax
+    NEXT
+
+; this is the FORTH bitwise "NOT" function
+defcode "INVERT", 6, , INVERT
+    not qword ptr [rsp]
+    NEXT
+
+
+
+mainCRTStartup proc
+mainCRTStartup endp
+end
--- a/code/jonesforth.asm
+++ b/code/jonesforth.asm
--- a/scripts/build.ps1
+++ b/scripts/build.ps1
@@ -1,4 +1,4 @@
-$devshell           = Join-Path $PSScriptRoot 'helpers/devshell.ps1'
+$devshell = Join-Path $PSScriptRoot 'helpers/devshell.ps1'
 & $devshell -arch amd64

 $path_root  = split-path -Path $PSScriptRoot -Parent
@@ -16,7 +16,6 @@ $lib_kernel32 = 'kernel32.lib'
 $flag_subsystem_console = '/subsystem:console'
 $flag_link              = '/link'

-
 push-location $path_build
 $unit = join-path $path_code 'forth.asm'