;; -*- nasm -*- ;; ;; Minimal FORTH interpreter for 64-bit Linux systems. ;; Based on jonesforth ;; ;; compile it with: ;; ;; nasm -g -F dwarf -f elf64 -o jombloforth.o jombloforth.asm ;; ld -o jombloforth jombloforth.o ;; ;; ;; UNLICENSE ;; --------- ;; ;; This is free and unencumbered software released into the public ;; domain. ;; ;; Anyone is free to copy, modify, publish, use, compile, sell, or ;; distribute this software, either in source code form or as a compiled ;; binary, for any purpose, commercial or non-commercial, and by any ;; means. ;; ;; In jurisdictions that recognize copyright laws, the author or authors ;; of this software dedicate any and all copyright interest in the ;; software to the public domain. We make this dedication for the benefit ;; of the public at large and to the detriment of our heirs and ;; successors. We intend this dedication to be an overt act of ;; relinquishment in perpetuity of all present and future rights to this ;; software under copyright law. ;; ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR ;; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ;; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ;; OTHER DEALINGS IN THE SOFTWARE. ;; ;; For more information, please refer to ;; syscall number, generated using this command: ;; ;; sed 's_#_%_;s_/\*_;_;s_ \*/__' /usr/include/x86_64-linux-gnu/asm/unistd_64.h > unistd_64.inc ;; %include "unistd_64.inc" ;; see buffer allocation %define RETURN_STACK_SIZE 8192 %define BUFFER_SIZE 4096 ;; MACRO DEFINITION ;; ---------------- ;; The version of this program, not to be mistaken as how many years the ;; author has been single. Please increment this number each time one year ;; passed. %assign JOMBLO_VERSION 2 ;; (that's a joke, btw) ;; Our first word %macro NEXT 0 lodsq jmp [rax] %endmacro ;; Helper for pushing/popping from the return stack %macro PUSHRSP 1 lea rbp, [rbp-8] mov [rbp], %1 %endmacro %macro POPRSP 1 mov %1, [rbp] lea rbp, [rbp+8] %endmacro ;; First Non-Macro Word section .text DOCOL: PUSHRSP rsi add rax, 8 mov rsi, rax NEXT global _start _start: cld ; Clear direction flag ; Save the initial data stack pointer in variable S0 mov [var_S0], rsp mov rbp, return_stack_top ; Initialize the return stack call set_up_data_segment mov rsi, cold_start NEXT section .rodata cold_start: dq QUIT ;; Various flags for the dictionary word header %define F_IMMED 0x80 %define F_HIDDEN 0x20 %define F_LENMASK 0x1f ;; Holds previously defined word ;; Starts as null/zero %define link 0 ;; Macro for defining forth word ;; ;; defword name, label, flag ;; %macro defword 2-3 0 %strlen name_len %1 ;; dictionary word header section .rodata align 8, db 0 global name_%2 name_%2: dq link db name_len + %3 db %1 ;; update link to point to this word's header %define link name_%2 ;; word definitions, starts with DOCOL align 8, db 0 global %2 %2: dq DOCOL %endmacro ;; Macro for defining native word ;; ;; defcode name, label, flag ;; %macro defcode 2-3 0 %strlen name_len %1 ;; dictionary word header section .rodata align 8, db 0 global name_%2 name_%2: dq link db name_len + %3 db %1 ;; update link to point to this word's header %define link name_%2 ;; word definition, link to the native code align 8, db 0 global $%2 ; fix error for `WORD` which isn't valid label $%2: dq code_%2 ;; native code section .text align 8 global code_%2 code_%2: %endmacro defcode "DROP", DROP pop rax NEXT defcode "SWAP", SWAP pop rax pop rbx push rax push rbx NEXT defcode "DUP", DUP mov rax, [rsp] push rax NEXT defcode "OVER", OVER mov rax, [rsp+8] push rax NEXT defcode "ROT", ROT pop rax pop rbx pop rcx push rbx push rax push rcx NEXT defcode "-ROT", NROT pop rax pop rbx pop rcx push rax push rcx push rbx NEXT defcode "2DROP", TWODROP pop rax pop rax NEXT defcode "2DUP", TWODUP mov rax, [rsp] mov rbx, [rsp+8] push rbx push rax NEXT defcode "2SWAP", TWOSWAP pop rax pop rbx pop rcx pop rdx push rbx push rax push rdx push rcx NEXT defcode "?DUP", QDUP mov rax, [rsp] test rax, rax jz .next push rax .next NEXT defcode "1+", INCR inc qword [rsp] NEXT defcode "1-", DECR dec qword [rsp] NEXT defcode "8+", INCR8 add qword [rsp], 8 NEXT defcode "8-", DECR8 sub qword [rsp], 8 NEXT defcode "+", ADD pop rax add [rsp], rax NEXT defcode "-", SUB pop rax sub [rsp], rax NEXT defcode "*", MUL pop rax pop rbx imul rax, rbx push rax NEXT defcode "/MOD", DIVMOD xor rdx, rdx pop rbx pop rax idiv rbx push rdx push rax NEXT ;;;; Comparison Words %macro defcmp 3 defcode %1, %2 pop rax pop rbx cmp rbx, rax set%+3 al movzx rax, al push rax NEXT %endmacro defcmp "=", EQU, e defcmp "<>", NEQ, ne defcmp "<", LT, l defcmp ">", GT, g defcmp "<=", LE, le defcmp ">=", GE, ge %macro deftest 3 defcode %1, %2 pop rax test rax, rax set%+3 al movzx rax, al push rax NEXT %endmacro deftest "0=", ZEQU, z deftest "0<>", ZNEQU, nz deftest "0<", ZLT, l deftest "0>", ZGT, g deftest "0<=", ZLE, le deftest "0>=", ZGE, ge defcode "AND", AND pop rax and [rsp], rax NEXT defcode "OR", OR pop rax or [rsp], rax NEXT defcode "XOR", XOR pop rax xor [rsp], rax NEXT defcode "INVERT", INVERT not qword [rsp] NEXT ;;;; Exiting a Word defcode "EXIT", EXIT POPRSP rsi NEXT ;; Literal defcode "LIT", LIT lodsq push rax NEXT ;;;; MEMORY defcode "!", STORE pop rbx pop rax mov [rbx], rax NEXT defcode "@", FETCH pop rbx mov rax, [rbx] push rax NEXT defcode "+!", ADDSTORE pop rbx pop rax add [rbx], rax NEXT defcode "-!", SUBSTORE pop rbx pop rax sub [rbx], rax NEXT defcode "C!", STOREBYTE pop rbx pop rax mov [rbx], al NEXT defcode "C@", FETCHBYTE pop rbx xor rax, rax mov al, [rbx] push rax NEXT defcode "C@C!", CCOPY mov rbx, [rsp+8] mov al, [rbx] pop rdi stosb push rdi inc qword [rsp+8] NEXT defcode "CMOVE", CMOVE mov rdx, rsi pop rcx pop rdi pop rsi rep movsb mov rsi, rdx NEXT ;;;; BUILT-IN VARIABLE %macro defvar 2-4 0, 0 defcode %1, %2, %4 push var_%2 NEXT ;; data storage section .data align 8, db 0 var_%2: dq %3 %endmacro defvar "STATE", STATE defvar "HERE", HERE defvar "LATEST", LATEST, name_SYSCALL0 defvar "S0", S0 defvar "BASE", BASE, 10 %macro defconst 3-4 0 defcode %1, %2, %4 push %3 NEXT %endmacro defconst "VERSION", VERSION, JOMBLO_VERSION defconst "R0", R0, return_stack_top defconst "DOCOL", __DOCOL, DOCOL defconst "F_IMMED", __F_IMMED, F_IMMED defconst "F_HIDDEN", __F_HIDDEN, F_HIDDEN defconst "F_LENMASK", __F_LENMASK, F_LENMASK %macro defsys 2 %defstr name SYS_%1 defconst name, SYS_%1, __NR_%2 %endmacro defsys EXIT, exit defsys OPEN, open defsys CLOSE, close defsys READ, read defsys WRITE, write defsys CREAT, creat defsys BRK, brk %macro defo 2 %defstr name O_%1 defconst name, __O_%1, %2 %endmacro defo RDONLY, 0o defo WRONLY, 1o defo RDWR, 2o defo CREAT, 100o defo EXCL, 200o defo TRUNC, 1000o defo APPEND, 2000o defo NONBLOCK, 4000o ;;;; RETURN STACK defcode ">R", TOR pop rax PUSHRSP rax NEXT defcode "R>", FROMR POPRSP rax push rax NEXT defcode "RSP@", RSPFETCH push rbp NEXT defcode "RSP!", RSPSTORE pop rbp NEXT defcode "RDROP", RDROP add rbp, 8 NEXT ;;;; PARAMETER (DATA) STACK defcode "DSP@", DSPFETCH mov rax, rsp push rax NEXT defcode "DSP!", DSPSTORE pop rsp NEXT ;;;; INPUT OUTPUT defcode "KEY", KEY call _KEY push rax NEXT _KEY: mov rbx, [currkey] cmp rbx, [bufftop] jge .full xor rax, rax mov al, [rbx] inc rbx mov [currkey], rbx ret .full: push rsi ; save rsi temporarily push rdi ; and rdi xor rdi, rdi ; stdin (0) mov rsi, buffer ; pointer to the buffer mov [currkey], rsi mov rdx, BUFFER_SIZE ; how many bytes to read max mov rax, __NR_read ; read(0, buffer, size) syscall test rax, rax jbe .eof add rsi, rax mov [bufftop], rsi pop rdi ; restore pop rsi ; and restore jmp _KEY .eof: xor rdi, rdi mov rax, __NR_exit syscall section .data align 8, db 0 currkey: dq buffer bufftop: dq buffer defcode "EMIT", EMIT pop rax call _EMIT NEXT _EMIT: mov rdi, 1 ; stdout (1) mov [emit_scratch], al ; save the byte to scratch buffer push rsi ; save rsi temporarily mov rsi, emit_scratch mov rdx, 1 ; how many bytes to write mov rax, __NR_write ; write(1, scratch, 1) syscall pop rsi ; restore it ret section .data emit_scratch: db 0 defcode "WORD", WORD call _WORD push rdi push rcx NEXT _WORD: .ws: call _KEY cmp al, '\' je .comment cmp al, ' ' jbe .ws mov rdi, word_buffer .word: stosb call _KEY cmp al, ' ' ja .word sub rdi, word_buffer mov rcx, rdi mov rdi, word_buffer ret .comment: call _KEY cmp al, 0x0A jne .comment jmp .ws section .data word_buffer: times 32 db 0 defcode "NUMBER", NUMBER pop rcx pop rdi call _NUMBER push rax push rcx NEXT _NUMBER: xor rax, rax xor rbx, rbx test rcx, rcx ; trying to parse zero-length string is an error, but will return 0. jz .ret mov rdx, [var_BASE] ; get BASE (in dl) mov bl, [rdi] ; bl = first character in string inc rdi push rax ; push 0 on stack cmp bl, '-' ; negative number? jnz .convert pop rax push rbx ; push <> 0 on stack, indicating negative dec rcx jnz .loop pop rbx mov rcx, 1 ret ; Loop reading digits. .loop: imul rax, rdx ; rax *= BASE mov bl, [rdi] ; bl = next character in string inc rdi .convert: sub bl, '0' ; < '0'? jb .finish cmp bl, 10 ; <= '9'? jb .numeric sub bl, 17 ; < 'A'? (17 is 'A'-'0') jb .finish add bl, 10 .numeric: cmp bl, dl ; >= BASE? jge .finish ; OK, so add it to rax and loop add rax, rbx dec rcx jnz .loop ; Negate the result if the first character was '-' (saved on the stack) .finish: pop rbx test rbx, rbx jz .ret neg rax .ret: ret ;;;; Dictionary Looks Ups defcode "FIND", FIND pop rcx pop rdi call _FIND push rax NEXT _FIND: push rsi ; save rsi so that we can use it in string comparison ; now we start searching the dictionary for this word mov rdx, [var_LATEST] ; LATEST points to name header of the latest word in the dictionary .loop: test rdx, rdx ; NULL pointer? je .notfound ; Compare the length expected and the length of the word. ; Note that if the F_HIDDEN flag is set on the word, then by a bit of trickery ; this won't pick the word (the length will appear to be wrong). xor rax,rax mov al, [rdx+8] ; al = flags+length field and al, F_HIDDEN | F_LENMASK ; al = name length cmp al, cl ; Length is the same? jne .next ; Compare the strings in detail. push rcx ; Save the length push rdi ; Save the address (repe cmpsb will move this pointer) lea rsi, [rdx+9] ; Dictionary string we are checking against. repe cmpsb ; Compare the strings. pop rdi pop rcx jne .next ; Not the same. ; The strings are the same - return the header pointer in rax pop rsi mov rax, rdx ret .next: mov rdx, [rdx] ; Move back through the link field to the previous word jmp .loop ; .. and loop. .notfound: pop rsi xor rax,rax ; Return zero to indicate not found. ret defcode ">CFA", TCFA pop rdi call _TCFA push rdi NEXT _TCFA: xor rax, rax add rdi, 8 ; skip link pointer mov al, [rdi] ; load flags+len into al inc rdi ; skip flags+len byte and al, F_LENMASK ; just the length, not the flags add rdi, rax ; skip the name add rdi, 0b111 ; the codeword is 8-byte aligned and rdi, ~0b111 ret defword ">DFA", TDFA dq TCFA dq INCR8 dq EXIT ;;;; Compiling defcode "CREATE", CREATE ; Get the name length and address. pop rcx ; rcx = length pop rbx ; rbx = address of name ; Link pointer. mov rdi, [var_HERE] ; rdi is the address of the header mov rax, [var_LATEST] ; Get link pointer stosq ; and store it in the header. ; Length byte and the word itself. mov al, cl ; Get the length. stosb ; Store the length/flags byte. push rsi mov rsi, rbx ; rsi = word rep movsb ; Copy the word pop rsi add rdi, 0b111 ; Align to next 8 byte boundary. and rdi, ~0b111 ; Update LATEST and HERE. mov rax, [var_HERE] mov [var_LATEST], rax mov [var_HERE], rdi NEXT defcode ",", COMMA pop rax call _COMMA NEXT _COMMA: mov rdi, [var_HERE] ; HERE stosq ; Store it. mov [var_HERE], rdi ; Update HERE (incremented) ret defcode "[", LBRAC, F_IMMED xor rax, rax mov [var_STATE], rax NEXT defcode "]", RBRAC mov qword [var_STATE], 1 NEXT defword ":", COLON dq $WORD dq CREATE dq LIT, DOCOL, COMMA dq LATEST, FETCH, HIDDEN dq RBRAC dq EXIT defword ";", SEMICOLON, F_IMMED dq LIT, EXIT, COMMA dq LATEST, FETCH, HIDDEN dq LBRAC dq EXIT defcode "IMMEDIATE", IMMEDATE, F_IMMED mov rdi, [var_LATEST] add rdi, 8 xor byte [rdi], F_IMMED NEXT defcode "HIDDEN", HIDDEN pop rdi add rdi, 8 xor byte [rdi], F_HIDDEN NEXT defword "HIDE", HIDE dq $WORD dq FIND dq HIDDEN dq EXIT defcode "'", TICK lodsq push rax NEXT ;;;; Branching defcode "BRANCH", BRANCH add rsi, [rsi] NEXT defcode "0BRANCH", ZBRANCH pop rax test rax, rax jz code_BRANCH lodsq NEXT ;;;; Literal String defcode "LITSTRING", LITSTRING lodsq push rsi push rax add rsi, rax add rsi, 0b111 and rsi, ~0b111 NEXT defcode "TELL", TELL mov rcx, rsi ; save temporarily mov rdi, 1 ; 1st param = stdout(1) pop rdx ; 3nd param = length of string pop rsi ; 2nd param = the string mov rax, __NR_write push rcx ; save previous value of rsi in the stack syscall pop rsi ; restore rsi NEXT ;;;; Part of Testing defword "FORTYTWO", FORTYTWO dq LIT dq 42 dq EXIT ;;;; Quit and Interpret defword "QUIT", QUIT dq R0, RSPSTORE dq INTERPRET dq BRANCH, -16 defcode "INTERPRET", INTERPRET call _WORD ; return rcx = length, rdi = pointer to word ; Is it in the dictionary? xor rax, rax mov [interpret_is_lit], rax ; Not a literal number (not yet anyway ...) call _FIND ; Return rax = pointer to header or 0 if not found test rax, rax ; Found? jz .number ; In the dictionary. Is it an IMMEDIATE codeword? mov rdi, rax ; rdi = dictionary entry mov al, [rdi+8] ; Get name+flags. push ax ; Just save it for now call _TCFA ; Convert dictionary entry in rdi to codeword pointer pop ax and al, F_IMMED ; Is IMMED flag set? mov rax, rdi jnz .exec ; If IMMED, jump straight to executing jmp .main ; Not in the dictionary (not a word) so assume it's a literal number. .number: inc qword [interpret_is_lit] call _NUMBER ; Returns the parsed number in rax, rcx > 0 if error test rcx, rcx jnz .numerror mov rbx, rax mov rax, LIT ; The word is LIT ; Are we compiling or executing? .main: mov rdx, [var_STATE] test rdx, rdx jz .exec ; Jump if executing. ; Compiling - just append the word to the current dictionary definition. call _COMMA mov rcx, [interpret_is_lit] ; Was it a literal? test rcx, rcx jz .next mov rax, rbx ; Yes, so LIT is followed by a number. call _COMMA .next: NEXT ; Executing - run it! .exec: mov rcx, [interpret_is_lit] ; Literal? test rcx, rcx ; Literal? jnz .litexec ; Not a literal, execute it now. This never returns, but the codeword will ; eventually call NEXT which will reenter the loop in QUIT. jmp [rax] ; Executing a literal, which means push it on the stack. .litexec: push rbx NEXT ; Parse error (not a known word or a number in the current BASE). .numerror: ; Print an error message followed by up to 40 characters of context. push rsi mov rdi, 2 ; 1st param: stderr(2) mov rsi, errmsg ; 2nd param: error message mov rdx, errmsglen ; 3rd param: length of string mov rax, __NR_write ; write syscall syscall mov rsi, [currkey] ; the error occurred just before currkey position mov rdx, rsi sub rdx, buffer ; rdx = currkey - buffer (length in buffer before currkey) cmp rdx, 40 ; if > 40, then print only 40 characters jle .le mov rdx, 40 .le: sub rsi, rdx ; rcx = start of area to print, rdx = length mov rax, __NR_write ; write syscall syscall mov rsi, errmsgnl ; newline mov rdx, 1 mov rax, __NR_write ; write syscall syscall pop rsi NEXT section .rodata errmsg: db "PARSE ERROR: " errmsglen: equ $ - errmsg errmsgnl: db 0x0A section .data ; NB: easier to fit in the .data section align 8 interpret_is_lit: dq 0 ; Flag used to record if reading a literal ;;;; Odds and Ends defcode "CHAR", CHAR call _WORD ; Returns rcx = length, rdi = pointer to word. xor rax, rax mov al, [rdi] ; Get the first character of the word. push rax ; Push it onto the stack. NEXT defcode "EXECUTE", EXECUTE pop rax ; Get xt into rax jmp [rax] ; and jump to it. ; After xt runs its NEXT will continue executing the current word. defcode "SYSCALL3", SYSCALL3 mov rcx, rsi ; Save rsi pop rax ; System call number (see ) pop rdi ; First parameter. pop rsi ; Second parameter pop rdx ; Third parameter push rcx ; Save previous value of rsi on stack syscall pop rsi ; restore push rax ; Result (negative for -errno) NEXT defcode "SYSCALL2", SYSCALL2 mov rcx, rsi pop rax ; System call number (see ) pop rdi ; First parameter. pop rsi ; Second parameter push rcx syscall pop rsi push rax ; Result (negative for -errno) NEXT defcode "SYSCALL1", SYSCALL1 pop rax ; System call number (see ) pop rdi ; First parameter. syscall push rax ; Result (negative for -errno) NEXT defcode "SYSCALL0", SYSCALL0 pop rax ; System call number (see ) syscall push rax ; Result (negative for -errno) NEXT ;;;; Data Segment %define INITIAL_DATA_SEGMENT_SIZE 65536 section .text set_up_data_segment: xor rdi, rdi mov rax, __NR_brk ; brk(0) syscall mov [var_HERE], rax add rax, INITIAL_DATA_SEGMENT_SIZE mov rdi, rax mov rax, __NR_brk syscall ret ;;;; buffers allocation section .bss align 4096 return_stack: resb RETURN_STACK_SIZE return_stack_top: align 4096 buffer: resb BUFFER_SIZE