Compare commits

...

31 Commits

Author SHA1 Message Date
ed f18af12968 progress (need to review further) 2026-02-22 18:09:22 -05:00
ed 03fb6a8180 add bootslop sproject toml for manual_slop 2026-02-21 20:58:26 -05:00
ed 67f8639ee7 progress 2026-02-21 14:18:15 -05:00
ed 6953e6b9b3 progress 2026-02-21 13:11:54 -05:00
ed 0a78cd8405 more gud ui 2026-02-21 12:16:32 -05:00
ed fcc4d4a515 gud ui 2026-02-21 12:11:39 -05:00
ed b5e038f909 nice 2026-02-21 11:41:36 -05:00
ed ff49c3eada fixes 2026-02-21 11:34:04 -05:00
ed 68d0a5997f claude updates 2026-02-21 10:52:56 -05:00
ed 67c55a50ce trying out claude 2026-02-21 10:06:50 -05:00
ed 7c1d721272 trash 2026-02-21 00:01:32 -05:00
ed 7ca83267ba fixes 2026-02-20 22:56:19 -05:00
ed 34e7f6017e notes 2026-02-20 22:31:17 -05:00
ed fd132c6efc progress 2026-02-20 22:10:29 -05:00
ed 2db4acd493 dont use let gemini respect .gitignore 2026-02-20 21:41:43 -05:00
ed 5a1f2fd799 remove unused script 2026-02-20 21:39:22 -05:00
ed d387fc4f10 notes 2026-02-20 21:38:52 -05:00
ed 0d96c85012 notes 2026-02-20 21:25:46 -05:00
ed b3984970a8 more 2026-02-20 21:19:59 -05:00
ed e630590065 adjustments 2026-02-20 21:03:37 -05:00
ed 3c99f3f950 fix 2026-02-20 20:59:26 -05:00
ed 9dc4372bf3 cleanup 2026-02-20 20:51:36 -05:00
ed 784f3b9945 cleanup 2026-02-20 20:46:25 -05:00
ed 9db1748249 adjustments 2026-02-20 20:33:39 -05:00
ed bc30206e65 add skill and some adjustments 2026-02-20 19:42:19 -05:00
ed bac294714b adjustments 2026-02-20 19:00:50 -05:00
ed 884deeda4d notes 2026-02-20 18:57:42 -05:00
ed 9567a05697 progress 2026-02-20 16:58:07 -05:00
ed b4055d8067 progress 2026-02-20 16:29:32 -05:00
ed e1a97ae669 progress 2026-02-20 16:12:17 -05:00
ed 7b738e037c progress. 2026-02-20 15:54:29 -05:00
30 changed files with 3969 additions and 736 deletions
+18
View File
@@ -0,0 +1,18 @@
{
"permissions": {
"allow": [
"Bash(head:*)",
"Bash(git:*)",
"Bash(tail:*)",
"Bash(ls:*)",
"Bash(sort:*)",
"Bash(dir:*)",
"Bash(printf %s\\\\n:*)",
"Bash(tee:*)",
"Bash(cmd.exe:*)",
"Bash(pwsh.exe:*)",
"Bash(echo No ANTHROPIC env vars found:*)",
"Bash(powershell:*)"
]
}
}
+50
View File
@@ -0,0 +1,50 @@
root = true
[*.s]
indent_style = tab
indent_size = 2
[*.asm]
indent_style = tab
indent_size = 2
[*.refactor]
indent_style = space
indent_size = 4
[*.md]
indent_style = space
indent_size = 4
[*.c]
indent_style = tab
indent_size = 2
charset = utf-8
[*.cpp]
indent_style = tab
indent_size = 2
charset = utf-8
[*.h]
indent_style = tab
indent_size = 2
charset = utf-8
[*.hpp]
indent_style = tab
indent_size = 2
charset = utf-8
[*.{ps1, psm1}]
indent_style = tab
indent_size = 4
[*.odin]
indent_style = tab
indent_size = 2
charset = utf-8
[*.{natvis, natstepfilter}]
indent_style = tab
indent_size = 4
+7
View File
@@ -0,0 +1,7 @@
{
"context": {
"fileFiltering": {
"respectGitIgnore": false
}
}
}
+1
View File
@@ -3,3 +3,4 @@
references/processed_visuals references/processed_visuals
build build
bootslop.proj bootslop.proj
clay_ui_temp
+172
View File
@@ -0,0 +1,172 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## AI Behavior Rules
- **Do not** create shell scripts, README files, or descriptive files unless explicitly instructed.
- **Do not** do anything beyond what was asked. Suggest extras in text; do not implement them.
- If a task is heavy, use sub-agents (codebase investigator, code editor, pattern analyzer, etc.).
- Screenshots are in `C:\Users\Ed\scoop\apps\sharex\current\ShareX\Screenshots\2026-02` — user will
specify which by last-modified. Manually pasted content goes in `./gallery`.
- Do not use `.gitignore` to infer file relevance for context.
- Goal is guided mentorship: validate architecture, give nudges, provide tactical help when asked.
The user is learning to build this system. Do not auto-generate finished solutions.
## Project Overview
**bootslop** is an experimental x86-64 Windows application: a sourceless, zero-overhead
ColorForth-inspired programming environment. Inspired by Timothy Lottes' "x56-40" / source-less
programming series and Onat Türkçüoğlu's VAMP/KYRA register-stack architecture.
There is no human-readable source — the "source of truth" is a binary token array (the "tape").
It features a modal visual editor (GDI-based), real-time JIT compilation to x86-64 machine code,
and cartridge-based persistence.
Canonical architecture reference: `references/Architectural_Consolidation.md`
Coding conventions: `CONVENTIONS.md`
AI behavior and goal context: `GEMINI.md`
## Build
Two-stage build via PowerShell: compile with clang, link with lld-link.
```powershell
pwsh scripts/build.attempt_1.c.ps1
```
Output goes to `build/attempt_1.exe`. Run the exe manually — it opens a GUI window.
**Toolchain requirements:** `clang` and `lld-link.exe` on PATH. Targets amd64 Windows 11.
Compiler flags: `-std=c23 -O0 -g -Wall -DBUILD_DEBUG=1 -fno-exceptions -fdiagnostics-absolute-paths`
Linker flags: `/MACHINE:X64 /SUBSYSTEM:CONSOLE /DEBUG /INCREMENTAL:NO` + `kernel32.lib user32.lib gdi32.lib`
Note: `-nostdlib` / `-ffreestanding` are commented out in the build script — the CRT is currently
linked but `<stdlib.h>` / `<string.h>` must not be included directly.
No automated tests exist. Verification is interactive via the running GUI.
## Code Architecture
All active source is in `attempt_1/`:
- **`main.c`** — The entire application (~867 lines). Contains: semantic tag definitions (X-macro),
global VM state, the JIT compiler (`compile_action`, `compile_and_run_tape`), the GDI renderer,
keyboard input handling, and cartridge save/load (F1/F2).
- **`duffle.amd64.win32.h`** — The C DSL header. Defines all base types (`U1``U8`, `S1``S8`,
`F4`, `F8`, `B1``B8`, `Str8`, `UTF8`), macros (`global`, `internal`, `LP_`, `I_`, `N_`),
arena allocator (`FArena`, `farena_push`, `farena_reset`), string formatting, and raw WinAPI
bindings.
### Token / Tape Model
- Tokens are `U4` (32-bit): top 4 bits = semantic tag, lower 28 bits = value or annotation index.
- Tags are defined via X-macro `Tag_Entries()`:
`Define` (`:`) · `Call` (`~`) · `Data` (`$`) · `Imm` (`^`) · `Comment` (`.`) · `Format` (` `)
- Two arenas: `tape_arena` (array of `U4` tokens) and `anno_arena` (array of `U8` — one 8-char
name slot per token, space-padded for name resolution).
- Helper macros: `pack_token(tag, val)`, `unpack_tag(token)`, `unpack_val(token)`.
### JIT Compiler
- `compile_action(val)` — emits x86-64 machine code for a single primitive or call. Called by
`compile_and_run_tape` for each token.
- `compile_and_run_tape()` (`IA_` always-inline) — resets `code_arena`, compiles the tape up to
`cursor_idx + 1` (incremental mode, `run_full == false`) or the full tape (`run_full == true`),
then immediately executes the generated code. Called on every relevant keystroke.
- **JIT prologue/epilogue:** The generated function takes `U8* globals_ptr` (= `vm_globals`).
Prologue loads `rax` from `globals_ptr[0x70/8]` = `vm_globals[14]` and `rdx` from
`globals_ptr[0x78/8]` = `vm_globals[15]`. Epilogue stores them back. `vm_rax` / `vm_rdx` are
synced from `vm_globals[14/15]` after execution.
- **The Magenta Pipe:** Every `Define` token emits a `JMP` (to skip over the function body for
inline execution flow) followed by `xchg rax, rdx` at the word entry point. This is the implicit
register-stack rotation at word boundaries — Onat's "magenta pipe".
- **O(1) linker:** `tape_to_code_offset[65536]` maps tape index → byte offset in `code_arena`.
Populated during `compile_and_run_tape` when a `Define` token is encountered.
- The VM uses two global registers (`vm_rax`, `vm_rdx`) and 16 global memory cells
(`vm_globals[16]`). No traditional Forth data stack in memory.
- **13 primitive operations:** `SWAP` · `MULT` · `ADD` · `FETCH` · `STORE` · `DUP` · `DROP` ·
`SUB` · `DEC` · `PRINT` · `RET` · `RET_IF_Z` · `RET_IF_S`
- **32-bit instruction granularity:** All emitted instructions are padded to 4-byte alignment via
NOP bytes (0x90). `pad32()` enforces this after every emit.
- Name resolution: `resolve_name_to_index()` matches 8-char space-padded annotations against
primitives first, then prior `Define` tokens. After edits, `relink_tape()` re-resolves all
`Call`/`Imm` references.
### Editor
- Two modes: `MODE_NAV` (navigate) / `MODE_EDIT` (type into token). Toggled with `E` / `Escape`.
- **Key bindings (NAV mode):**
- `E` — enter MODE_EDIT
- Arrow keys — move cursor (Up/Down navigate by logical lines delimited by `Format` tokens)
- `Tab` — cycle the current token's tag through `STag_*` values
- `Space` — insert a new `Comment` token at cursor
- `Shift+Space` — insert a new `Comment` token after cursor
- `Return` — insert a `Format` (newline) token at cursor
- `Backspace` — delete token before cursor
- `Shift+Backspace` — delete token at cursor
- `PgUp` / `PgDn` — scroll viewport
- `F5` — toggle `run_full` (incremental ↔ full-tape JIT)
- `F1` — save cartridge to `cartridge.bin`
- `F2` — load cartridge from `cartridge.bin` and run
- **Key bindings (EDIT mode):**
- Hex digits (`0-9`, `a-f`) — shift into `Data` token value
- Any printable char — append to annotation name (up to 8 chars)
- `Backspace` — shift `Data` value right or trim annotation name
- `Escape` — exit to MODE_NAV, triggers `relink_tape()`
- Tape renders as colored token boxes, `TOKENS_PER_ROW` (8) per row, each showing a tag prefix
char and either a 6-char hex value (Data) or an 8-char annotation name.
- GDI rendering via `BeginPaint`/`EndPaint`. The HUD (status bar at bottom) shows RAX/RDX state,
global memory cells [0-3], print log, and debug log.
### Persistence
- Cartridge format: `[tape_arena.used : U8][anno_arena.used : U8][cursor_idx : U8]
[tape data][anno data]`
- On load: restores arenas, cursor, calls `relink_tape()` then `compile_and_run_tape()`.
## Current Development Roadmap
Status as of 2026-02-21:
1. **FFI / Tape Drive Argument Scatter** — the PRINT primitive manually aligns RSP and moves rax
into rcx before calling `ms_builtin_print`. R8/R9 args should come from pre-defined `vm_globals`
offsets ("preemptive scatter") rather than being zeroed.
2. **Variable-Length Annotations** — `anno_arena` is fixed at 8 bytes per token. Need a scheme
for longer comments without breaking the `O(1)` `tape_to_code_offset` mapping.
3. ~~**Cartridge Persistence**~~ — DONE (F1/F2 save/load via WinAPI `CreateFileA`/`WriteFile`).
4. **Editor Cursor Refinement** — proper in-token cursor for `Data` and annotation tokens, rather
than backspace-truncation and right-shift append.
5. **Control Flow Expansion** — lambdas or basic block jumps beyond the current conditional-return
primitives (`RET_IF_Z`, `RET_IF_S`).
## C DSL Conventions (from CONVENTIONS.md — strictly enforced)
**Types:** Never use `int`, `long`, `unsigned`, etc. Always use `U1`/`U2`/`U4`/`U8` (unsigned),
`S1`/`S2`/`S4`/`S8` (signed), `F4`/`F8` (float), `B1``B8` (bool).
Use cast macros (`u8_(val)`, `u4_(val)`, `u4_r(ptr)`) — not C-style casts. Standard C casts only
for complex types where no macro exists.
**Naming:** `lower_snake_case` for functions/variables. `PascalCase` for types. WinAPI bindings
prefixed with `ms_` using `asm("SymbolName")` — never declare raw WinAPI names.
**const placement:** Always to the right: `char const*`, not `const char*`.
**Structs/Enums:** Use `typedef Struct_(Name) { ... };` and `typedef Enum_(UnderlyingType, Name) { ... };`.
**X-Macros:** Use for enums coupled with metadata (colors, prefixes, names). Entry names PascalCase,
enum symbols use `tmpl(TypeName, Entry)` → `TypeName_Entry`.
**Memory:** Use `FArena` / `farena_push` / `farena_reset` — no raw malloc. Use `mem_fill`/`mem_copy`
not memset/memcpy. Do not `#include <stdlib.h>` or `<string.h>`.
**Formatting:** Allman braces for complex blocks. Vertical alignment for struct fields and related
declarations. Space between `&` and operand: `& my_var`. `else if` / `else` on new lines. Align
consecutive `while`/`if` keywords vertically where possible.
**Storage class keywords:** `global` (= `static` at file scope), `internal` (= `static` for
functions), `LP_` (= `static` inside a function), `I_` (inline), `N_` (noinline), `IA_`
(always-inline).
**Line length:** 120160 characters per line in scripts.
+48 -13
View File
@@ -10,6 +10,7 @@ This document outlines the strict C style and architectural conventions expected
* Float: `F4`, `F8` * Float: `F4`, `F8`
* Boolean: `B1`, `B2`, `B4`, `B8` (use `true`/`false` primitives) * Boolean: `B1`, `B2`, `B4`, `B8` (use `true`/`false` primitives)
* Strings/Chars: `UTF8` (for characters), `Str8` (for string slices) * Strings/Chars: `UTF8` (for characters), `Str8` (for string slices)
* **Fundamental Type Casts:** Strictly use the provided casting macros (e.g., `u8_(val)`, `u4_r(ptr)`, `s4_(val)`) instead of standard C-style cast syntax like `(U8)val`. Standard casts should only be used for complex types or when an appropriate macro isn't available.
* **WinAPI Structs:** Only use `MS_` prefixed fundamental types (e.g., `MS_LONG`, `MS_DWORD`) *inside* WinAPI struct definitions (`MS_WNDCLASSA`, etc.) to maintain FFI compatibility. Do not use them in general application logic. * **WinAPI Structs:** Only use `MS_` prefixed fundamental types (e.g., `MS_LONG`, `MS_DWORD`) *inside* WinAPI struct definitions (`MS_WNDCLASSA`, etc.) to maintain FFI compatibility. Do not use them in general application logic.
## 2. Declaration Wrappers & X-Macros ## 2. Declaration Wrappers & X-Macros
@@ -18,8 +19,8 @@ This document outlines the strict C style and architectural conventions expected
* `typedef Enum_(UnderlyingType, Name) { ... };` * `typedef Enum_(UnderlyingType, Name) { ... };`
* **X-Macros:** Use X-Macros to tightly couple Enums with their corresponding string representations or metadata. * **X-Macros:** Use X-Macros to tightly couple Enums with their corresponding string representations or metadata.
```c ```c
#define My_Tag_Entries() #define My_Tag_Entries() \
X(Define, "Define") X(Define, "Define") \
X(Call, "Call") X(Call, "Call")
``` ```
@@ -27,17 +28,51 @@ This document outlines the strict C style and architectural conventions expected
* **Case:** Strictly use `lower_snake_case` for all functions and variables. * **Case:** Strictly use `lower_snake_case` for all functions and variables.
* **Types:** Use `PascalCase` for type names (`FArena`, `SWord_Tag`). * **Types:** Use `PascalCase` for type names (`FArena`, `SWord_Tag`).
* **WinAPI Symbols:** When declaring foreign Win32 symbols, prefix the C function name with `ms_` (using `lower_snake_case`) and use the `asm("SymbolName")` attribute to link it to the actual DLL export. * **WinAPI Symbols:** When declaring foreign Win32 symbols, prefix the C function name with `ms_` (using `lower_snake_case`) and use the `asm("SymbolName")` attribute to link it to the actual DLL export.
* *Correct:* `WinAPI U2 ms_register_class(const MS_WNDCLASSA* lpWndClass) asm("RegisterClassA");` * *Correct:* `WinAPI U2 ms_register_class_a(MS_WNDCLASSA const* lpWndClass) asm("RegisterClassA");`
* *Incorrect:* `WinAPI U2 RegisterClassA(...);` * *Incorrect:* `WinAPI U2 RegisterClassA(...);`
## 4. Memory Management ## 4. Formatting & Layout
* **No Standard Library:** The environment is built with `-nostdlib` and `-ffreestanding`. Never include `<stdlib.h>`, `<string.h>`, etc. * **Vertical Alignment:** Align related variable declarations, struct fields, and function prototypes into columns to create a "sheet-like" layout. This improves visual parsing.
* **Arenas over Malloc:** Use `FArena` and its associated macros (`farena_push`, `farena_push_type`, `farena_reset`) for all dynamic memory allocations. Do not use raw pointers with manual arithmetic when an arena can handle it. * Example Struct:
* **Memory Ops:** Use `mem_fill` and `mem_copy` instead of standard `memset`/`memcpy` within the application logic. (A minimal `memset`/`memcpy` shim is only provided to satisfy compiler intrinsic struct zeroing under `-nostdlib`). ```c
typedef struct MS_WNDCLASSA {
U4 style;
S8 (*lpfnWndProc)(void*, U4, U8, S8);
S4 cbClsExtra;
// ...
char const* lpszClassName;
} MS_WNDCLASSA;
```
* **Multi-line Argument Alignment:** For long function signatures, place one argument per line with a single 4-space indent.
* **WinAPI Grouping:** Group foreign procedure declarations by their originating OS library (e.g., Kernel32, User32, GDI32) using comment headers.
* **Brace Style:** Use Allman style (braces on a new line) for function bodies or control blocks (`if`, `for`, `switch`, etc.) that are large or complex. Smaller blocks may use K&R style.
* **Conditionals & Control Flow:** Always place `else if` and `else` statements on a new line. Align control flow parentheses (e.g., between consecutive `while` and `if` blocks) vertically when possible for aesthetic uniformity:
```c
while (len < 8) len ++;
if (len > 0) { ... }
```
* **Address-Of Operator:** Do insert a space between the address-of operator (`&`) and the variable name.
* **Correct:** `& my_var`
* **Incorrect:** `&my_var`
## 5. Memory Management
* **Standard Library:** The C standard library is linked, but headers like `<stdlib.h>` or `<string.h>` should not be included directly. Required functions should be declared manually if needed, or accessed via compiler builtins.
* **Arenas over Malloc:** Use `FArena` and its associated macros (`farena_push`, `farena_push_type`, `farena_reset`) for all dynamic memory allocations. Do not use raw pointers with manual arithmetic when an arena can handle it.
* **Memory Ops:** Use `mem_fill` and `mem_copy` instead of standard `memset`/`memcpy` within the application logic.
## 6. Type Qualifiers
* **`const` Placement:** The `const` keyword must always be placed to the right of the type it modifies. This maintains a consistent right-to-left reading of type declarations.
* **Correct:** `char const* my_ptr;` (Pointer to a constant character)
* **Correct:** `U4 const* const my_ptr;` (Constant pointer to a constant U4)
* **Incorrect:** `const char* my_ptr;`
## 7. Metadata Coupling (X-Macros)
* **Metadata Enums:** Use X-Macros to define Enums that are tightly coupled with static metadata (colors, prefixes, names).
* Example:
```c
#define Tag_Entries() \
X(Define, "Define", 0x0018AEFF, ":") \
X(Call, "Call", 0x00D6A454, "~")
```
* **Naming Conventions:** When using X-Macros for Tags, entry names should be PascalCase, and the Enum symbols should be prefixed with the Enum type name (e.g., `tmpl(STag, Define)` -> `STag_Define`).
## 5. Modifiers
* `internal`: Static functions.
* `global`: Global state variables.
* `IA_`: Internal Always Inline.
* `I_`: Internal Inline.
* Pointers use `*r` (restrict) or `*v` (volatile) macros where applicable.
+43 -11
View File
@@ -2,11 +2,14 @@
## Baseline ## Baseline
DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. DO NOT EVER make a shell script unless told to. DO NOT EVER make a readme or a file describing your changes unless your are told to. If you have commands I should be entering into the command line or if you have something to explain to me, please just use code blocks or normal text output. DO NOT DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TODO. DO NOT EVER, EVER DO ANYTHING OTHER THAN WHAT YOU WERE TOLD TO DO. IF YOU WANT TO DO OTHER THINGS, SIMPLY SUGGEST THEM, AND THEN I WILL REVIEW YOUR CHANGES, AND MAKE THE DECISION ON HOW TO PROCEED. WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code.
The user will often screenshot various aspects of the development with ShareX, which will be available in the current months directory: 'C:\Users\Ed\scoop\apps\sharex\current\ShareX\Screenshots\2026-02'
WHEN WRITING SCRIPTS USE A 120-160 character limit per line. I don't want to see scrunched code. You may read fromt his and the user will let you know (by last modified) which of the last screenshots are the most relevant. Otherwise they manually paste relevant content in the './gallery' directory.
Do not use the .gitignore as a reference for WHAT YOU SHOULD IGNORE. THAT IS STRICT FOR THE GIT REPO, NOT FOR INFERENCING FILE RELEVANCE.
If a task is very heavy, use sub-agents (such as a codebase/docs/references investiagor, code editor, specifc pattern or nuance analyzer, etc).
## Coding Conventions ## Coding Conventions
Before writing any C code in this workspace, you MUST review the strict stylistic and architectural guidelines defined in [CONVENTIONS.md](./CONVENTIONS.md). These dictate the usage of byte-width types, X-Macros, WinAPI FFI mapping, and memory arenas. Before writing any C code in this workspace, you MUST review the strict stylistic and architectural guidelines defined in [CONVENTIONS.md](./CONVENTIONS.md). These dictate the usage of byte-width types, X-Macros, WinAPI FFI mapping, and memory arenas.
## Necessary Background for Goal ## Necessary Background for Goal
@@ -39,13 +42,42 @@ Based on the curation in `./references/`, the resulting system MUST adhere to th
5. **No `if/then` branches:** Rely on hardware-level flags like conditional returns (`ret-if-signed`) combined with factored calls to avoid writing complex AST parsers. 5. **No `if/then` branches:** Rely on hardware-level flags like conditional returns (`ret-if-signed`) combined with factored calls to avoid writing complex AST parsers.
6. **No Dependencies:** C implementation must be minimal (`-nostdlib`), ideally running directly against OS APIs (e.g., WinAPI `VirtualAlloc`, `ExitProcess`, `GDI32` for rendering). 6. **No Dependencies:** C implementation must be minimal (`-nostdlib`), ideally running directly against OS APIs (e.g., WinAPI `VirtualAlloc`, `ExitProcess`, `GDI32` for rendering).
## Visual Context Synthesis & Color Semantics ## Current Development Roadmap (attempt_1)
Based on the extracted frame OCR data from the references: The prototype currently implements:
- A functional WinAPI modal editor backed by `microui` for immediate-mode floating panels.
- A 2-register (`RAX`/`RDX`) JIT compiler with an `O(1)` visual linker (`tape_to_code_offset` table).
- x68-style 32-bit instruction padding via `pad32()` using `0x90` NOPs.
- Implicit definition boundaries (Magenta Pipe / `STag_Define`) emitting `JMP rel32` over the body and `xchg rax, rdx` at the entry point.
- An FFI Bridge (`x64_FFI_PROLOGUE`, `x64_FFI_MAP_ARGS`, `x64_FFI_CALL_ABS`, `x64_FFI_EPILOGUE`) for calling WinAPI functions safely from JIT'd code.
- Persistence via F1 (save) / F2 (load) to `cartridge.bin`.
- A Lambda tag (`STag_Lambda`) that compiles a code block out-of-line and leaves its address in `RAX`.
- A well-defined **x64 Emission DSL** (`#pragma region x64 Emission DSL`) with named REX prefixes, register encodings, ModRM/SIB composition macros, opcode constants, and composite instruction inline functions.
- **Red (`<RED>`):** Defines a new word or symbol in the dictionary. This is the entry point for compilation. ### x64 Emission DSL Discipline
- **Green (`<GREEN>`):** Compiles a word into the current definition. All JIT code emission in `main.c` MUST use the x64 Emission DSL defined in the `#pragma region x64 Emission DSL` block. Raw magic bytes are forbidden. The allowed primitives are:
- **Yellow/Orange (`<YELLOW>` / `<ORANGE>`):** Executes a word immediately. Often used for macros or immediate mode evaluation within the editor. - **Composite helpers:** `x64_XCHG_RAX_RDX()`, `x64_MOV_RDX_RAX()`, `x64_MOV_RAX_RDX()`, `x64_ADD_RAX_RDX()`, `x64_SUB_RAX_RDX()`, `x64_IMUL_RAX_RDX()`, `x64_DEC_RAX()`, `x64_TEST_RAX_RAX()`, `x64_RET_IF_ZERO()`, `x64_RET_IF_SIGN()`, `x64_FETCH()`, `x64_STORE()`, `x64_CALL_RAX()`, `x64_RET()`.
- **Cyan/Blue (`<CYAN>` / `<BLUE>`):** Used for variables, memory addresses, or formatting layout (not executable instruction logic). - **Prologue/Epilogue:** `x64_JIT_PROLOGUE()`, `x64_JIT_EPILOGUE()`.
- **White/Dim (`<WHITE>` / `<DIM>`):** Comments, annotations, and UI elements. - **FFI:** `x64_FFI_PROLOGUE()`, `x64_FFI_MAP_ARGS()`, `x64_FFI_CALL_ABS(addr)`, `x64_FFI_EPILOGUE()`.
- **Magenta (`<MAGENTA>`):** Typically used for pointers or state modifiers. - **Raw emission only via named constants:** `emit8(x64_op_*)`, `emit8(x64_REX*)`, `emit8(x64_modrm(*))`, `emit32(val)`, `emit64(val)`.
- **Exception:** Forward jump placeholders (`JMP rel32`, `CALL rel32`) that have no composite helper may use `emit8(x64_op_JMP_rel32)` / `emit8(x64_op_CALL_rel32)` directly with a following `emit32(0)` placeholder, pending a dedicated DSL wrapper.
Here is a breakdown of the next steps to advance the `attempt_1` implementation towards a complete ColorForth derivative:
1. ~~**Refine the FFI / Tape Drive Argument Scatter:**~~ (Completed)
2. ~~**Implement the Self-Modifying Cartridge (Persistence):**~~ (Completed via F1/F2 save/load)
3. ~~**Refine Visual Editor Interactions:**~~ (Completed via `microui` integration)
4. ~~**Audit and enforce x64 Emission DSL usage throughout `main.c`:**~~ (Completed — all raw magic bytes replaced with named DSL constants and composite helpers)
5. **Add DSL wrappers for forward jump placeholders:**
- `x64_JMP_fwd_placeholder(U4* offset_out)` — emits `E9 00000000` and writes the patch offset.
- `x64_patch_fwd(U4 offset)` — patches a previously emitted placeholder with the current code position.
- This will eliminate the last remaining raw `emit8`/`emit32` pairs in `compile_and_run_tape`.
6. **Expanded Annotation Layer (Variable-Length Comments):**
- The current `anno_arena` strictly allocates 8 bytes (a `U8`) per token.
- Refactor the visual editor and annotation memory management to allow for arbitrarily long text blocks (comments) to be attached to specific tokens without disrupting the `O(1)` compilation mapping.
7. **Continuous Validation & Complex Control Flow:**
- Expand the primitive set to allow for more complex, AST-less control flow (e.g., handling Basic Block jumps `[ ]`).
- Investigate adding a `RET_IF_ZERO` + tail-call pattern for loops without explicit branch instructions.
+25
View File
@@ -0,0 +1,25 @@
# Bootslop: A Sourceless ColorForth Derivative
This repository contains the curation materials and prototype implementation for building a zero-overhead, sourceless ColorForth-derivative for x86-64, specifically modeled after the architectures of Timothy Lottes and Onat Türkçüoğlu.
## Project Goal
The objective is to *learn* how to build this architecture from scratch, with the AI acting as a highly contextualized mentor.
## Current State
The `attempt_1/` directory contains a working C prototype that successfully implements the core architectural pillars:
* A "sourceless" editor that manipulates a 32-bit token array (`Tape Drive`) and a parallel 64-bit annotation array.
* A modal, interactive GUI built with raw Win32 GDI calls.
* A handmade Just-In-Time (JIT) compiler that translates tokens into executable x86-64 machine code on every keypress.
* An execution model based on Onat's 2-register stack (`RAX`/`RDX`) and a global memory tape.
## Helper Scripts
This repository contains several Python scripts used during the initial curation and content-gathering phase:
* `process_visuals.py`: Downloads videos from YouTube, extracts frames based on transcript timestamps, performs OCR on the frames, and uses color analysis to generate semantically-tagged markdown logs of the visual content. It also crops out relevant code blocks and diagrams.
* `fetch_blog.py`: Parses `TimothyLottesBlog.csv` and scrapes the HTML content of each blog post, converting it to clean markdown for local archival.
* `fetch_notes.py`: Parses `FORTH_NOTES.csv`, filters out irrelevant or already-processed links, and scrapes the remaining pages into markdown files.
* `estimate_context.py`: A utility to scan the `references/` directory and provide a rough estimate of the total token count to ensure it fits within the AI model's context window.
* `ocr_interaction.py`: A small utility to perform OCR on single image files.
+106
View File
@@ -0,0 +1,106 @@
# Technical Outline: Attempt 1
## Overview
`attempt_1` is a minimal C program that serves as a proof-of-concept for the "Lottes/Onat" sourceless ColorForth paradigm. It successfully integrates a visual editor, a live JIT compiler, and an execution environment into a single, cohesive Win32 application that links against the C runtime but avoids direct includes of standard headers, using manually declared functions instead.
The application presents a visual grid of 32-bit tokens rendered via `microui` floating panels and allows the user to navigate and edit them directly. On every keypress, the token array is re-compiled into x86-64 machine code and executed, with the results (register states and global memory) displayed instantly in the HUD.
## Core Concepts Implemented
1. **Sourceless Token Array (`FArena` tape):**
* The "source code" is a contiguous block of `U4` (32-bit) integers allocated by `VirtualAlloc` and managed by the `FArena` from `duffle.h`.
* Each token is packed with a 4-bit "Color" tag and a 28-bit payload, adhering to the core design.
2. **Annotation Layer (`FArena` anno):**
* A parallel `FArena` of `U8` (64-bit) integers stores an 8-character string for each corresponding token on the tape.
* The UI renderer prioritizes displaying this string, but the compiler only ever sees the indices packed into the 32-bit token.
3. **2-Register Stack & Global Memory:**
* The JIT compiler emits x86-64 that strictly adheres to Onat's `RAX`/`RDX` register stack.
* A `vm_globals` array (16 x `U8`) is passed by pointer into the JIT'd code via `RCX` (Win64 calling convention), held in `RBX` for the duration of execution.
* `vm_globals[14]` and `vm_globals[15]` serve as the `RAX` and `RDX` save/restore slots across JIT entry and exit.
* Indices 013 are available as the "tape drive" global memory for `FETCH`/`STORE` primitives.
4. **Handmade x86-64 JIT Emitter with Named DSL:**
* A small set of `emit8`/`emit32`/`emit64` functions write raw x86-64 opcodes into a `VirtualAlloc` block marked `PAGE_EXECUTE_READWRITE`.
* All emission is done through a well-defined **x64 Emission DSL** (`#pragma region x64 Emission DSL`) consisting of:
* Named REX prefix constants (`x64_REX`, `x64_REX_R`, `x64_REX_B`, etc.).
* Named register encoding constants (`x64_reg_RAX`, `x64_reg_RDX`, etc.).
* ModRM and SIB composition macros (`x64_modrm(mod, reg, rm)`, `x64_sib(scale, index, base)`).
* Named opcode constants (`x64_op_MOV_reg_rm`, `x64_op_CALL_rel32`, etc.).
* Composite inline instruction helpers (`x64_XCHG_RAX_RDX()`, `x64_ADD_RAX_RDX()`, `x64_RET_IF_ZERO()`, `x64_FETCH()`, `x64_STORE()`, etc.).
* Prologue/Epilogue helpers (`x64_JIT_PROLOGUE()`, `x64_JIT_EPILOGUE()`).
* FFI helpers (`x64_FFI_PROLOGUE()`, `x64_FFI_MAP_ARGS()`, `x64_FFI_CALL_ABS(addr)`, `x64_FFI_EPILOGUE()`).
* **Raw magic bytes are forbidden** in `compile_and_run_tape` and `compile_action`. All emission uses the DSL.
5. **Modal Editor (Win32 GDI + microui):**
* The UI is built with `microui` rendered via raw Win32 GDI calls defined in `duffle.h`.
* It features two modes: `Navigation` (blue cursor, arrow key movement) and `Edit` (orange cursor, text input).
* The editor correctly handles token insertion, deletion (Vim-style backspace), tag cycling (Tab), and value editing, all while re-compiling and re-executing on every keystroke.
* Four floating panels: **ColorForth Source Tape**, **Compiler & Status**, **Registers & Globals**, **Print Log**.
6. **O(1) Dictionary & Visual Linking:**
* The dictionary relies on an edit-time visual linker. When the tape is modified, `relink_tape` resolves names to absolute source memory indices.
* The compiler resolves references in `O(1)` time by indexing into `tape_to_code_offset[65536]`.
7. **Implicit Definition Boundaries (STag_Define):**
* A `STag_Define` token causes the JIT to:
1. Emit `RET` to close the prior block (via `x64_RET()`).
2. Emit a `JMP rel32` placeholder to skip over the new definition body.
3. Record the entry point in `tape_to_code_offset[i]`.
4. Emit `xchg rax, rdx` (via `x64_XCHG_RAX_RDX()`) as the definition's first instruction, rotating the 2-register stack.
8. **Lambda Tag (STag_Lambda):**
* A `STag_Lambda` token compiles a code block out-of-line and leaves its absolute 64-bit address in `RAX` for use with `STORE` or `EXECUTE`.
* Implemented via `x64_MOV_RDX_RAX()` to save the prior TOS, a `mov rax, imm64` with a patched-in address, and a `JMP rel32` to skip the body.
9. **x68 Instruction Padding:**
* `pad32()` pads every logical block/instruction to exact 32-bit multiples using `0x90` (NOPs), aligning with the visual token grid.
10. **The FFI Bridge:**
* `x64_FFI_PROLOGUE()` pushes `RDX`, aligns `RSP` to 16 bytes, and allocates 32 bytes of shadow space. * x64_FFI_MAP_ARGS() maps the 2-register stack and globals into Win64 ABI registers (RCX=RAX, R8=globals[0], R9=globals[1]). * x64_FFI_CALL_ABS(addr) loads the absolute 64-bit function address into R10 and calls it. * x64_FFI_EPILOGUE() restores RSP and pops RDX.
Persistence (Cartridge Save/Load):
F1 saves the tape and annotation arenas (with metadata) to cartridge.bin via WriteFile.
F2 loads from cartridge.bin, re-runs relink_tape() and compile_and_run_tape() to restore full live state.
Primitive Instruction Set
```md
ID Name Emitted x86-64 (via DSL)
1 SWAP x64_XCHG_RAX_RDX()
2 MULT x64_IMUL_RAX_RDX()
3 ADD x64_ADD_RAX_RDX()
4 FETCH x64_FETCH() — mov rax, [rbx + rax*8]
5 DEC x64_DEC_RAX()
6 STORE x64_STORE() — mov [rbx + rax*8], rdx
7 RET_IF_Z x64_RET_IF_ZERO()
8 RETURN x64_RET()
9 PRINT FFI dance → ms_builtin_print
10 RET_IF_S x64_RET_IF_SIGN()
11 DUP x64_MOV_RDX_RAX()
12 DROP x64_MOV_RAX_RDX()
13 SUB x64_SUB_RAX_RDX()
14 EXECUTE x64_CALL_RAX()
```
## Whats Missing (TODO)
- DSL wrappers for forward jump placeholders: The JMP rel32 and CALL rel32 forward-jump patterns in compile_and_run_tape still use bare emit8(x64_op_JMP_rel32) + emit32(0) pairs. Dedicated x64_JMP_fwd_placeholder(U4* offset_out) and x64_patch_fwd(U4 offset) helpers should be added to the DSL to eliminate this last gap.
- Expanded Annotation Layer (Variable-Length Comments): The anno_arena strictly allocates 8 bytes per token. Arbitrarily long comment blocks need a separate indirection layer without disrupting the O(1) compile mapping.
- Expanded Instruction Set: No floating point. No multi-way branching beyond RET_IF_Z / RET_IF_S.
- Basic Block Jumps [ ]: Lottes-style scoped jump targets for structured control flow without an AST are not yet implemented.
- Tape Drive / Preemptive Scatter Improvements: The FFI argument mapping reads globals[0] and globals[1] for R8/R9. A proper scatter model that pre-places arguments into named slots before a call is not yet formalized.
- Self-Hosting Bootstrap: The editor and JIT are written in C. The long-term goal is to rewrite the core inside the custom language itself, discarding the C host.
## References Utilized
### Heavily Utilized:
- Onats Talks: The core architecture (2-register stack, global memory tape, JIT philosophy) is a direct implementation of the concepts from his VAMP/KYRA presentations.
Lottes Twitter Notes: The 2-character mapped dictionary, ret-if-signed (RET_IF_ZERO), and annotation layer concepts were taken directly from his tweets.
- Users duffle.h & fortish-study: The C coding conventions (X-Macros, FArena, byte-width types, ms_ prefixes) were adopted from these sources.
### Lightly Utilized:
- Lottes Blog: Provided the high-level “sourceless” philosophy and inspiration.
- Grok Searches: Served to validate our understanding and provide parallels (like Wasms linear memory), but did not provide direct implementation details.
+116 -15
View File
@@ -159,10 +159,11 @@ IA_ U8 atm_swap_u8(U8*r addr, U8 value){asm volatile("lock xchgq %0,%1":"=r"(val
#pragma endregion Thread Coherence #pragma endregion Thread Coherence
#pragma region Debug #pragma region Debug
WinAPI void process_exit(U4 status) asm("ExitProcess"); WinAPI void ms_exit_process(U4 uExitCode) asm("ExitProcess"); // Kernel 32
#define debug_trap() __builtin_debugtrap() #define debug_trap() __builtin_debugtrap()
#if BUILD_DEBUG #if BUILD_DEBUG
IA_ void assert(U8 cond) { if(cond){return;} else{debug_trap(); process_exit(1);} } IA_ void assert(U8 cond) { if(cond){return;} else{debug_trap(); ms_exit_process(1);} }
#else #else
#define assert(cond) #define assert(cond)
#endif #endif
@@ -182,6 +183,24 @@ IA_ U8 align_pow2(U8 x, U8 b) {
return ((x + b - 1) & (~(b - 1))); return ((x + b - 1) & (~(b - 1)));
} }
#if 0
#pragma clang optimize off
// TODO(Ed): Replace these later (only matters if CRT is not used)
void* memset(void* dest, int c, U8 count) {
U1* bytes = (U1*)dest;
while (count--) *bytes++ = (U1)c;
return dest;
}
void* memcpy(void* dest, const void* src, U8 count) {
U1* d = (U1*)dest;
const U1* s = (const U1*)src;
while (count--) *d++ = *s++;
return dest;
}
#pragma clang optimize on
#endif
IA_ U8 mem_copy (U8 dest, U8 src, U8 len) { return (U8)(__builtin_memcpy ((void*)dest, (void const*)src, len)); } IA_ U8 mem_copy (U8 dest, U8 src, U8 len) { return (U8)(__builtin_memcpy ((void*)dest, (void const*)src, len)); }
IA_ U8 mem_copy_overlapping(U8 dest, U8 src, U8 len) { return (U8)(__builtin_memmove((void*)dest, (void const*)src, len)); } IA_ U8 mem_copy_overlapping(U8 dest, U8 src, U8 len) { return (U8)(__builtin_memmove((void*)dest, (void const*)src, len)); }
IA_ U8 mem_fill (U8 dest, U8 value, U8 len) { return (U8)(__builtin_memset ((void*)dest, (int) value, len)); } IA_ U8 mem_fill (U8 dest, U8 value, U8 len) { return (U8)(__builtin_memset ((void*)dest, (int) value, len)); }
@@ -224,6 +243,13 @@ IA_ Slice mem_bump(U8 start, U8 cap, U8*r used, U8 amount) {
} }
#pragma endregion Memory #pragma endregion Memory
#pragma region Encoding
IA_ void u64_to_hex(U8 val, char* buf, S4 chars) {
static const char hex_chars[] = "0123456789ABCDEF";
for(S1 i = chars - 1; i >= 0; --i) { buf[i] = hex_chars[val & 0xF]; val >>= 4; }
}
#pragma endregion Encoding
#pragma region Math #pragma region Math
#define u8_max 0xffffffffffffffffull #define u8_max 0xffffffffffffffffull
@@ -332,9 +358,6 @@ IA_ U8 hash64_fnv1a_ret(Slice data, U8 seed) { U8 h = 0; hash64_fnv1a(& h, data,
#define MS_STD_INPUT u4_(-10) #define MS_STD_INPUT u4_(-10)
#define MS_STD_OUTPUT u4_(-11) #define MS_STD_OUTPUT u4_(-11)
typedef Struct_(MS_Handle){U8 id;}; typedef Struct_(MS_Handle){U8 id;};
WinAPI MS_Handle ms_get_std_handle(U4 handle_type) asm("GetStdHandle");
WinAPI B4 ms_read_console(MS_Handle handle, UTF8*r buffer, U4 to_read, U4*r num_read, U8 reserved_input_control) asm("ReadConsoleA");
WinAPI B4 ms_write_console(MS_Handle handle, UTF8 const*r buffer, U4 chars_to_write, U4*v chars_written, U8 reserved) asm("WriteConsoleA");
#pragma endregion IO #pragma endregion IO
#pragma region Key Table Linear (KTL) #pragma region Key Table Linear (KTL)
@@ -581,40 +604,107 @@ typedef struct MS_WNDCLASSA {
char const* lpszMenuName; char const* lpszMenuName;
char const* lpszClassName; char const* lpszClassName;
} MS_WNDCLASSA; } MS_WNDCLASSA;
typedef struct MS_POINT { S4 x, y; } MS_POINT; typedef struct MS_POINT { S4 x, y; } MS_POINT;
typedef struct MS_MSG { void* hwnd; U4 message; U8 wParam; S8 lParam; U4 time; MS_POINT pt; } MS_MSG; typedef struct MS_MSG { void* hwnd; U4 message; U8 wParam; S8 lParam; U4 time; MS_POINT pt; } MS_MSG;
typedef struct MS_RECT { S4 left, top, right, bottom; } MS_RECT; typedef struct MS_RECT { S4 left, top, right, bottom; } MS_RECT;
typedef struct MS_PAINTSTRUCT { void* hdc; S4 fErase; MS_RECT rcPaint; S4 fRestore; S4 fIncUpdate; U1 rgbReserved[32]; } MS_PAINTSTRUCT; typedef struct MS_PAINTSTRUCT { void* hdc; S4 fErase; MS_RECT rcPaint; S4 fRestore; S4 fIncUpdate; U1 rgbReserved[32]; } MS_PAINTSTRUCT;
// Win32 API declarations // --- Kernel32 ---
WinAPI void* ms_virtual_alloc(void* lpAddress, U8 dwSize, U4 flAllocationType, U4 flProtect) asm("VirtualAlloc");
WinAPI void ms_exit_process(U4 uExitCode) asm("ExitProcess"); WinAPI void ms_exit_process(U4 uExitCode) asm("ExitProcess");
WinAPI U2 ms_register_class_a(const MS_WNDCLASSA* lpWndClass) asm("RegisterClassA"); WinAPI MS_Handle ms_get_std_handle(U4 handle_type) asm("GetStdHandle");
WinAPI void* ms_create_window_ex_a(U4 dwExStyle, char const* lpClassName, char const* lpWindowName, U4 dwStyle, S4 X, S4 Y, S4 nWidth, S4 nHeight, void* hWndParent, void* hMenu, void* hInstance, void* lpParam) asm("CreateWindowExA"); WinAPI void* ms_virtual_alloc(void* lpAddress, U8 dwSize, U4 flAllocationType, U4 flProtect) asm("VirtualAlloc");
WinAPI B4 ms_read_console(
MS_Handle handle,
UTF8*r buffer,
U4 to_read,
U4*r num_read,
U8 reserved_input_control
) asm("ReadConsoleA");
WinAPI B4 ms_write_console(
MS_Handle handle,
UTF8 const*r buffer,
U4 chars_to_write,
U4*v chars_written,
U8 reserved
) asm("WriteConsoleA");
// --- User32 ---
WinAPI U2 ms_register_class_a(MS_WNDCLASSA const* lpWndClass) asm("RegisterClassA");
WinAPI void* ms_create_window_ex_a(
U4 dwExStyle,
char const* lpClassName,
char const* lpWindowName,
U4 dwStyle,
S4 X,
S4 Y,
S4 nWidth,
S4 nHeight,
void* hWndParent,
void* hMenu,
void* hInstance,
void* lpParam
) asm("CreateWindowExA");
WinAPI S4 ms_show_window(void* hWnd, S4 nCmdShow) asm("ShowWindow"); WinAPI S4 ms_show_window(void* hWnd, S4 nCmdShow) asm("ShowWindow");
WinAPI S4 ms_get_message_a(MS_MSG* lpMsg, void* hWnd, U4 wMsgFilterMin, U4 wMsgFilterMax) asm("GetMessageA"); WinAPI S4 ms_get_message_a(MS_MSG* lpMsg, void* hWnd, U4 wMsgFilterMin, U4 wMsgFilterMax) asm("GetMessageA");
WinAPI S4 ms_translate_message(const MS_MSG* lpMsg) asm("TranslateMessage"); WinAPI S4 ms_translate_message(MS_MSG const* lpMsg) asm("TranslateMessage");
WinAPI S8 ms_dispatch_message_a(const MS_MSG* lpMsg) asm("DispatchMessageA"); WinAPI S8 ms_dispatch_message_a(MS_MSG const* lpMsg) asm("DispatchMessageA");
WinAPI S8 ms_def_window_proc_a(void* hWnd, U4 Msg, U8 wParam, S8 lParam) asm("DefWindowProcA"); WinAPI S8 ms_def_window_proc_a(void* hWnd, U4 Msg, U8 wParam, S8 lParam) asm("DefWindowProcA");
WinAPI void ms_post_quit_message(S4 nExitCode) asm("PostQuitMessage"); WinAPI void ms_post_quit_message(S4 nExitCode) asm("PostQuitMessage");
WinAPI S4 ms_invalidate_rect(void* hWnd, const MS_RECT* lpRect, S4 bErase) asm("InvalidateRect"); WinAPI S4 ms_invalidate_rect(void* hWnd, MS_RECT const* lpRect, S4 bErase) asm("InvalidateRect");
WinAPI S2 ms_get_async_key_state(S4 vKey) asm("GetAsyncKeyState");
// --- GDI32 ---
WinAPI void* ms_begin_paint(void* hWnd, MS_PAINTSTRUCT* lpPaint) asm("BeginPaint"); WinAPI void* ms_begin_paint(void* hWnd, MS_PAINTSTRUCT* lpPaint) asm("BeginPaint");
WinAPI S4 ms_end_paint(void* hWnd, const MS_PAINTSTRUCT* lpPaint) asm("EndPaint"); WinAPI S4 ms_end_paint(void* hWnd, MS_PAINTSTRUCT const* lpPaint) asm("EndPaint");
WinAPI U4 ms_set_text_color(void* hdc, U4 color) asm("SetTextColor"); WinAPI U4 ms_set_text_color(void* hdc, U4 color) asm("SetTextColor");
WinAPI U4 ms_set_bk_color(void* hdc, U4 color) asm("SetBkColor"); WinAPI U4 ms_set_bk_color(void* hdc, U4 color) asm("SetBkColor");
WinAPI S4 ms_text_out_a(void* hdc, S4 x, S4 y, char const* lpString, S4 c) asm("TextOutA"); WinAPI S4 ms_text_out_a(void* hdc, S4 x, S4 y, char const* lpString, S4 c) asm("TextOutA");
WinAPI void* ms_get_stock_object(S4 i) asm("GetStockObject"); WinAPI void* ms_get_stock_object(S4 i) asm("GetStockObject");
WinAPI void* ms_create_font_a(S4 cHeight, S4 cWidth, S4 cEscapement, S4 cOrientation, S4 cWeight, U4 bItalic, U4 bUnderline, U4 bStrikeOut, U4 iCharSet, U4 iOutPrecision, U4 iClipPrecision, U4 iQuality, U4 iPitchAndFamily, char const* pszFaceName) asm("CreateFontA"); WinAPI void* ms_create_font_a(
S4 cHeight,
S4 cWidth,
S4 cEscapement,
S4 cOrientation,
S4 cWeight,
U4 bItalic,
U4 bUnderline,
U4 bStrikeOut,
U4 iCharSet,
U4 iOutPrecision,
U4 iClipPrecision,
U4 iQuality,
U4 iPitchAndFamily,
char const* pszFaceName
) asm("CreateFontA");
WinAPI void* ms_create_compatible_dc(void* hdc) asm("CreateCompatibleDC");
WinAPI void* ms_create_compatible_bitmap(void* hdc, S4 cx, S4 cy) asm("CreateCompatibleBitmap");
WinAPI B4 ms_bit_blt(void* hdcDest, S4 x, S4 y, S4 w, S4 h, void* hdcSrc, S4 xSrc, S4 ySrc, U4 rop) asm("BitBlt");
WinAPI B4 ms_delete_dc(void* hdc) asm("DeleteDC");
WinAPI B4 ms_get_client_rect(void* hwnd, MS_RECT* lpRect) asm("GetClientRect");
WinAPI void* ms_select_object(void* hdc, void* h) asm("SelectObject"); WinAPI void* ms_select_object(void* hdc, void* h) asm("SelectObject");
WinAPI S4 ms_rectangle(void* hdc, S4 left, S4 top, S4 right, S4 bottom) asm("Rectangle"); WinAPI S4 ms_rectangle(void* hdc, S4 left, S4 top, S4 right, S4 bottom) asm("Rectangle");
WinAPI S4 ms_set_bk_mode(void* hdc, S4 mode) asm("SetBkMode");
WinAPI void* ms_create_solid_brush(U4 color) asm("CreateSolidBrush");
WinAPI S4 ms_delete_object(void* ho) asm("DeleteObject");
#define MS_MEM_COMMIT 0x00001000 #define MS_MEM_COMMIT 0x00001000
#define MS_MEM_RESERVE 0x00002000 #define MS_MEM_RESERVE 0x00002000
#define MS_PAGE_READWRITE 0x04 #define MS_PAGE_READWRITE 0x04
#define MS_SRCCOPY 0x00CC0020
#define MS_WM_DESTROY 0x0002 #define MS_WM_DESTROY 0x0002
#define MS_WM_SIZE 0x0005
#define MS_WM_PAINT 0x000F #define MS_WM_PAINT 0x000F
#define MS_WM_ERASEBKGND 0x0014
#define MS_WM_KEYDOWN 0x0100 #define MS_WM_KEYDOWN 0x0100
#define MS_WM_KEYUP 0x0101
#define MS_WM_MOUSEMOVE 0x0200
#define MS_WM_LBUTTONDOWN 0x0201
#define MS_WM_LBUTTONUP 0x0202
#define MS_WM_RBUTTONDOWN 0x0204
#define MS_WM_RBUTTONUP 0x0205
#define MS_WM_MBUTTONDOWN 0x0207
#define MS_WM_MBUTTONUP 0x0208
#define MS_WM_MOUSEWHEEL 0x020A
#define MS_WS_OVERLAPPEDWINDOW 0x00CF0000 #define MS_WS_OVERLAPPEDWINDOW 0x00CF0000
#define MS_WS_VISIBLE 0x10000000 #define MS_WS_VISIBLE 0x10000000
#define MS_VK_LEFT 0x25 #define MS_VK_LEFT 0x25
@@ -623,4 +713,15 @@ WinAPI S4 ms_rectangle(void* hdc, S4 left, S4 top, S4 right, S4 bottom) a
#define MS_VK_DOWN 0x28 #define MS_VK_DOWN 0x28
#define MS_PAGE_EXECUTE_READWRITE 0x40 #define MS_PAGE_EXECUTE_READWRITE 0x40
#define MS_WM_CHAR 0x0102
#define MS_VK_RETURN 0x0D
#define MS_VK_BACK 0x08
#define MS_VK_TAB 0x09
#define MS_VK_SPACE 0x20
#define MS_VK_F5 0x74
#define MS_VK_PRIOR 0x21
#define MS_VK_NEXT 0x22
#define MS_VK_SHIFT 0x10
#pragma endregion OS_GDI_And_Minimal #pragma endregion OS_GDI_And_Minimal
+1218 -247
View File
File diff suppressed because it is too large Load Diff
+1208
View File
File diff suppressed because it is too large Load Diff
+296
View File
@@ -0,0 +1,296 @@
/*
** Copyright (c) 2024 rxi
**
** This library is free software; you can redistribute it and/or modify it
** under the terms of the MIT license. See `microui.c` for details.
*/
#ifndef MICROUI_H
#define MICROUI_H
#define MU_VERSION "2.02"
#define MU_COMMANDLIST_SIZE (256 * 1024)
#define MU_ROOTLIST_SIZE 32
#define MU_CONTAINERSTACK_SIZE 32
#define MU_CLIPSTACK_SIZE 32
#define MU_IDSTACK_SIZE 32
#define MU_LAYOUTSTACK_SIZE 16
#define MU_CONTAINERPOOL_SIZE 48
#define MU_TREENODEPOOL_SIZE 48
#define MU_MAX_WIDTHS 16
#define MU_REAL float
#define MU_REAL_FMT "%.3g"
#define MU_SLIDER_FMT "%.2f"
#define MU_MAX_FMT 127
#define mu_stack(T, n) struct { int idx; T items[n]; }
#define mu_min(a, b) ((a) < (b) ? (a) : (b))
#define mu_max(a, b) ((a) > (b) ? (a) : (b))
#define mu_clamp(x, a, b) mu_min(b, mu_max(a, x))
enum {
MU_CLIP_PART = 1,
MU_CLIP_ALL
};
enum {
MU_COMMAND_JUMP = 1,
MU_COMMAND_CLIP,
MU_COMMAND_RECT,
MU_COMMAND_TEXT,
MU_COMMAND_ICON,
MU_COMMAND_MAX
};
enum {
MU_COLOR_TEXT,
MU_COLOR_BORDER,
MU_COLOR_WINDOWBG,
MU_COLOR_TITLEBG,
MU_COLOR_TITLETEXT,
MU_COLOR_PANELBG,
MU_COLOR_BUTTON,
MU_COLOR_BUTTONHOVER,
MU_COLOR_BUTTONFOCUS,
MU_COLOR_BASE,
MU_COLOR_BASEHOVER,
MU_COLOR_BASEFOCUS,
MU_COLOR_SCROLLBASE,
MU_COLOR_SCROLLTHUMB,
MU_COLOR_MAX
};
enum {
MU_ICON_CLOSE = 1,
MU_ICON_CHECK,
MU_ICON_COLLAPSED,
MU_ICON_EXPANDED,
MU_ICON_MAX
};
enum {
MU_RES_ACTIVE = (1 << 0),
MU_RES_SUBMIT = (1 << 1),
MU_RES_CHANGE = (1 << 2)
};
enum {
MU_OPT_ALIGNCENTER = (1 << 0),
MU_OPT_ALIGNRIGHT = (1 << 1),
MU_OPT_NOINTERACT = (1 << 2),
MU_OPT_NOFRAME = (1 << 3),
MU_OPT_NORESIZE = (1 << 4),
MU_OPT_NOSCROLL = (1 << 5),
MU_OPT_NOCLOSE = (1 << 6),
MU_OPT_NOTITLE = (1 << 7),
MU_OPT_HOLDFOCUS = (1 << 8),
MU_OPT_AUTOSIZE = (1 << 9),
MU_OPT_POPUP = (1 << 10),
MU_OPT_CLOSED = (1 << 11),
MU_OPT_EXPANDED = (1 << 12)
};
enum {
MU_MOUSE_LEFT = (1 << 0),
MU_MOUSE_RIGHT = (1 << 1),
MU_MOUSE_MIDDLE = (1 << 2)
};
enum {
MU_KEY_SHIFT = (1 << 0),
MU_KEY_CTRL = (1 << 1),
MU_KEY_ALT = (1 << 2),
MU_KEY_BACKSPACE = (1 << 3),
MU_KEY_RETURN = (1 << 4)
};
typedef struct mu_Context mu_Context;
typedef unsigned mu_Id;
typedef MU_REAL mu_Real;
typedef void* mu_Font;
typedef struct { int x, y; } mu_Vec2;
typedef struct { int x, y, w, h; } mu_Rect;
typedef struct { unsigned char r, g, b, a; } mu_Color;
typedef struct { mu_Id id; int last_update; } mu_PoolItem;
typedef struct { int type, size; } mu_BaseCommand;
typedef struct { mu_BaseCommand base; void *dst; } mu_JumpCommand;
typedef struct { mu_BaseCommand base; mu_Rect rect; } mu_ClipCommand;
typedef struct { mu_BaseCommand base; mu_Rect rect; mu_Color color; } mu_RectCommand;
typedef struct { mu_BaseCommand base; mu_Font font; mu_Vec2 pos; mu_Color color; char str[1]; } mu_TextCommand;
typedef struct { mu_BaseCommand base; mu_Rect rect; int id; mu_Color color; } mu_IconCommand;
typedef union {
int type;
mu_BaseCommand base;
mu_JumpCommand jump;
mu_ClipCommand clip;
mu_RectCommand rect;
mu_TextCommand text;
mu_IconCommand icon;
} mu_Command;
typedef struct {
mu_Rect body;
mu_Rect next;
mu_Vec2 position;
mu_Vec2 size;
mu_Vec2 max;
int widths[MU_MAX_WIDTHS];
int items;
int item_index;
int next_row;
int next_type;
int indent;
} mu_Layout;
typedef struct {
mu_Command *head, *tail;
mu_Rect rect;
mu_Rect body;
mu_Vec2 content_size;
mu_Vec2 scroll;
int zindex;
int open;
} mu_Container;
typedef struct {
mu_Font font;
mu_Vec2 size;
int padding;
int spacing;
int indent;
int title_height;
int scrollbar_size;
int thumb_size;
mu_Color colors[MU_COLOR_MAX];
} mu_Style;
struct mu_Context {
/* callbacks */
int (*text_width)(mu_Font font, const char *str, int len);
int (*text_height)(mu_Font font);
void (*draw_frame)(mu_Context *ctx, mu_Rect rect, int colorid);
/* core state */
mu_Style _style;
mu_Style *style;
mu_Id hover;
mu_Id focus;
mu_Id last_id;
mu_Rect last_rect;
int last_zindex;
int updated_focus;
int frame;
mu_Container *hover_root;
mu_Container *next_hover_root;
mu_Container *scroll_target;
char number_edit_buf[MU_MAX_FMT];
mu_Id number_edit;
/* stacks */
mu_stack(char, MU_COMMANDLIST_SIZE) command_list;
mu_stack(mu_Container*, MU_ROOTLIST_SIZE) root_list;
mu_stack(mu_Container*, MU_CONTAINERSTACK_SIZE) container_stack;
mu_stack(mu_Rect, MU_CLIPSTACK_SIZE) clip_stack;
mu_stack(mu_Id, MU_IDSTACK_SIZE) id_stack;
mu_stack(mu_Layout, MU_LAYOUTSTACK_SIZE) layout_stack;
/* retained state pools */
mu_PoolItem container_pool[MU_CONTAINERPOOL_SIZE];
mu_Container containers[MU_CONTAINERPOOL_SIZE];
mu_PoolItem treenode_pool[MU_TREENODEPOOL_SIZE];
/* input state */
mu_Vec2 mouse_pos;
mu_Vec2 last_mouse_pos;
mu_Vec2 mouse_delta;
mu_Vec2 scroll_delta;
int mouse_down;
int mouse_pressed;
int key_down;
int key_pressed;
char input_text[32];
};
mu_Vec2 mu_vec2(int x, int y);
mu_Rect mu_rect(int x, int y, int w, int h);
mu_Color mu_color(int r, int g, int b, int a);
void mu_init(mu_Context *ctx);
void mu_begin(mu_Context *ctx);
void mu_end(mu_Context *ctx);
void mu_set_focus(mu_Context *ctx, mu_Id id);
mu_Id mu_get_id(mu_Context *ctx, const void *data, int size);
void mu_push_id(mu_Context *ctx, const void *data, int size);
void mu_pop_id(mu_Context *ctx);
void mu_push_clip_rect(mu_Context *ctx, mu_Rect rect);
void mu_pop_clip_rect(mu_Context *ctx);
mu_Rect mu_get_clip_rect(mu_Context *ctx);
int mu_check_clip(mu_Context *ctx, mu_Rect r);
mu_Container* mu_get_current_container(mu_Context *ctx);
mu_Container* mu_get_container(mu_Context *ctx, const char *name);
void mu_bring_to_front(mu_Context *ctx, mu_Container *cnt);
int mu_pool_init(mu_Context *ctx, mu_PoolItem *items, int len, mu_Id id);
int mu_pool_get(mu_Context *ctx, mu_PoolItem *items, int len, mu_Id id);
void mu_pool_update(mu_Context *ctx, mu_PoolItem *items, int idx);
void mu_input_mousemove(mu_Context *ctx, int x, int y);
void mu_input_mousedown(mu_Context *ctx, int x, int y, int btn);
void mu_input_mouseup(mu_Context *ctx, int x, int y, int btn);
void mu_input_scroll(mu_Context *ctx, int x, int y);
void mu_input_keydown(mu_Context *ctx, int key);
void mu_input_keyup(mu_Context *ctx, int key);
void mu_input_text(mu_Context *ctx, const char *text);
mu_Command* mu_push_command(mu_Context *ctx, int type, int size);
int mu_next_command(mu_Context *ctx, mu_Command **cmd);
void mu_set_clip(mu_Context *ctx, mu_Rect rect);
void mu_draw_rect(mu_Context *ctx, mu_Rect rect, mu_Color color);
void mu_draw_box(mu_Context *ctx, mu_Rect rect, mu_Color color);
void mu_draw_text(mu_Context *ctx, mu_Font font, const char *str, int len, mu_Vec2 pos, mu_Color color);
void mu_draw_icon(mu_Context *ctx, int id, mu_Rect rect, mu_Color color);
void mu_layout_row(mu_Context *ctx, int items, const int *widths, int height);
void mu_layout_width(mu_Context *ctx, int width);
void mu_layout_height(mu_Context *ctx, int height);
void mu_layout_begin_column(mu_Context *ctx);
void mu_layout_end_column(mu_Context *ctx);
void mu_layout_set_next(mu_Context *ctx, mu_Rect r, int relative);
mu_Rect mu_layout_next(mu_Context *ctx);
void mu_draw_control_frame(mu_Context *ctx, mu_Id id, mu_Rect rect, int colorid, int opt);
void mu_draw_control_text(mu_Context *ctx, const char *str, mu_Rect rect, int colorid, int opt);
int mu_mouse_over(mu_Context *ctx, mu_Rect rect);
void mu_update_control(mu_Context *ctx, mu_Id id, mu_Rect rect, int opt);
#define mu_button(ctx, label) mu_button_ex(ctx, label, 0, MU_OPT_ALIGNCENTER)
#define mu_textbox(ctx, buf, bufsz) mu_textbox_ex(ctx, buf, bufsz, 0)
#define mu_slider(ctx, value, lo, hi) mu_slider_ex(ctx, value, lo, hi, 0, MU_SLIDER_FMT, MU_OPT_ALIGNCENTER)
#define mu_number(ctx, value, step) mu_number_ex(ctx, value, step, MU_SLIDER_FMT, MU_OPT_ALIGNCENTER)
#define mu_header(ctx, label) mu_header_ex(ctx, label, 0)
#define mu_begin_treenode(ctx, label) mu_begin_treenode_ex(ctx, label, 0)
#define mu_begin_window(ctx, title, rect) mu_begin_window_ex(ctx, title, rect, 0)
#define mu_begin_panel(ctx, name) mu_begin_panel_ex(ctx, name, 0)
void mu_text(mu_Context *ctx, const char *text);
void mu_label(mu_Context *ctx, const char *text);
int mu_button_ex(mu_Context *ctx, const char *label, int icon, int opt);
int mu_checkbox(mu_Context *ctx, const char *label, int *state);
int mu_textbox_raw(mu_Context *ctx, char *buf, int bufsz, mu_Id id, mu_Rect r, int opt);
int mu_textbox_ex(mu_Context *ctx, char *buf, int bufsz, int opt);
int mu_slider_ex(mu_Context *ctx, mu_Real *value, mu_Real low, mu_Real high, mu_Real step, const char *fmt, int opt);
int mu_number_ex(mu_Context *ctx, mu_Real *value, mu_Real step, const char *fmt, int opt);
int mu_header_ex(mu_Context *ctx, const char *label, int opt);
int mu_begin_treenode_ex(mu_Context *ctx, const char *label, int opt);
void mu_end_treenode(mu_Context *ctx);
int mu_begin_window_ex(mu_Context *ctx, const char *title, mu_Rect rect, int opt);
void mu_end_window(mu_Context *ctx);
void mu_open_popup(mu_Context *ctx, const char *name);
int mu_begin_popup(mu_Context *ctx, const char *name);
void mu_end_popup(mu_Context *ctx);
void mu_begin_panel_ex(mu_Context *ctx, const char *name, int opt);
void mu_end_panel(mu_Context *ctx);
#endif
-98
View File
@@ -1,98 +0,0 @@
#pragma region OS
#pragma warning(push)
#pragma warning(disable: 4820)
#pragma comment(lib, "Kernel32.lib")
#pragma comment(lib, "Advapi32.lib")
#define MS_INVALID_HANDLE_VALUE ((MS_HANDLE)(S8)-1)
#define MS_ANYSIZE_ARRAY 1
#define MS_MEM_COMMIT 0x00001000
#define MS_MEM_RESERVE 0x00002000
#define MS_MEM_RELEASE 0x00008000
#define MS_MEM_LARGE_PAGES 0x20000000
#define MS_PAGE_READWRITE 0x04
#define MS_TOKEN_ADJUST_PRIVILEGES (0x0020)
#define MS_SE_PRIVILEGE_ENABLED (0x00000002L)
#define MS_TOKEN_QUERY (0x0008)
#define MS__TEXT(quote) L ## quote // r_winnt
#define MS_TEXT(quote) MS__TEXT(quote) // r_winnt
#define MS_SE_LOCK_MEMORY_NAME MS_TEXT("SeLockMemoryPrivilege")
typedef int MS_BOOL;
typedef unsigned long MS_DWORD;
typedef MS_DWORD* MS_PDWORD;
typedef void* MS_HANDLE;
typedef MS_HANDLE* MS_PHANDLE;
typedef long MS_LONG;
typedef S8 MS_LONGLONG;
typedef char const* MS_LPCSTR;
typedef unsigned short* MS_LPWSTR, *MS_PWSTR;
typedef void* MS_LPVOID;
typedef MS_DWORD* MS_LPDWORD;
typedef U8 MS_ULONG_PTR, *MS_PULONG_PTR;
typedef void const* MS_LPCVOID;
typedef struct MS_SECURITY_ATTRIBUTES *MS_PSECURITY_ATTRIBUTES, *MS_LPSECURITY_ATTRIBUTES;
typedef struct MS_OVERLAPPED *MS_LPOVERLAPPED;
typedef def_union(MS_LARGE_INTEGER) { struct { MS_DWORD LowPart; MS_LONG HighPart; } _; struct { MS_DWORD LowPart; MS_LONG HighPart; } u; MS_LONGLONG QuadPart; };
typedef def_struct(MS_FILE) { void* _Placeholder; };
typedef def_struct(MS_SECURITY_ATTRIBUTES) { MS_DWORD nLength; A4_B1 _PAD_; MS_LPVOID lpSecurityDescriptor; MS_BOOL bInheritHandle; };
typedef def_struct(MS_OVERLAPPED) { MS_ULONG_PTR Internal; MS_ULONG_PTR InternalHigh; union { struct { MS_DWORD Offset; MS_DWORD OffsetHigh; } _; void* Pointer; } _; MS_HANDLE hEvent; };
typedef struct MS_LUID* MS_PLUID;
typedef struct MS_LUID_AND_ATTRIBUTES* MS_PLUID_AND_ATTRIBUTES;
typedef struct MS_TOKEN_PRIVILEGES* MS_PTOKEN_PRIVILEGES;
typedef def_struct(MS_LUID) { MS_DWORD LowPart; MS_LONG HighPart; };
typedef def_struct(MS_LUID_AND_ATTRIBUTES) { MS_LUID Luid; MS_DWORD Attributes; };
typedef def_struct(MS_TOKEN_PRIVILEGES) { MS_DWORD PrivilegeCount; MS_LUID_AND_ATTRIBUTES Privileges[MS_ANYSIZE_ARRAY]; };
WinAPI MS_BOOL CloseHandle(MS_HANDLE hObject);
WinAPI MS_BOOL AdjustTokenPrivileges(MS_HANDLE TokenHandle, MS_BOOL DisableAllPrivileges, MS_PTOKEN_PRIVILEGES NewState, MS_DWORD BufferLength, MS_PTOKEN_PRIVILEGES PreviousState, MS_PDWORD ReturnLength);
WinAPI MS_HANDLE GetCurrentProcess(void);
WinAPI U8 GetLargePageMinimum(void);
WinAPI MS_BOOL LookupPrivilegeValueW(MS_LPWSTR lpSystemName, MS_LPWSTR lpName, MS_PLUID lpLuid);
WinAPI MS_BOOL OpenProcessToken(MS_HANDLE ProcessHandle, MS_DWORD DesiredAccess, MS_PHANDLE TokenHandle);
WinAPI MS_LPVOID VirtualAlloc(MS_LPVOID lpAddress, U8 dwSize, MS_DWORD flAllocationType, MS_DWORD flProtect);
WinAPI MS_BOOL VirtualFree (MS_LPVOID lpAddress, U8 dwSize, MS_DWORD dwFreeType);
#pragma warning(pop)
typedef def_struct(OS_Windows_State) { OS_SystemInfo system_info; };
global OS_Windows_State os__windows_info;
IA_ OS_SystemInfo* os_system_info(void) { return & os__windows_info.system_info; }
I_
void os__enable_large_pages(void) {
MS_HANDLE token;
if (OpenProcessToken(GetCurrentProcess(), MS_TOKEN_ADJUST_PRIVILEGES | MS_TOKEN_QUERY, &token))
{
MS_LUID luid;
if (LookupPrivilegeValueW(0, MS_SE_LOCK_MEMORY_NAME, &luid))
{
MS_TOKEN_PRIVILEGES priv;
priv.PrivilegeCount = 1;
priv.Privileges[0].Luid = luid;
priv.Privileges[0].Attributes = MS_SE_PRIVILEGE_ENABLED;
AdjustTokenPrivileges(token, 0, & priv, size_of(priv), 0, 0);
}
CloseHandle(token);
}
}
I_
void os_init(void) {
os__enable_large_pages();
OS_SystemInfo*R_ info = & os__windows_info.system_info;
info->target_page_size = (U8)GetLargePageMinimum();
}
// TODO(Ed): Large pages disabled for now... (not failing gracefully)
IA_ U8 os__vmem_reserve(U8 size, Opts_vmem*R_ opts) {
assert(opts != nullptr);
void*R_ result = VirtualAlloc(cast(void*R_, opts->base_addr), size
, MS_MEM_RESERVE
// |MS_MEM_COMMIT|(opts->no_large_pages == false ? MS_MEM_LARGE_PAGES : 0)
, MS_PAGE_READWRITE
);
return u8_(result);
}
IA_ B4 os__vmem_commit(U8 vm, U8 size, Opts_vmem*R_ opts) {
assert(opts != nullptr);
// if (opts->no_large_pages == false ) { return 1; }
B4 result = (VirtualAlloc(cast(MS_LPVOID, vm), size, MS_MEM_COMMIT, MS_PAGE_READWRITE) != 0);
return result;
}
I_ void os_vmem_release(U8 vm, U8 size) { VirtualFree(cast(MS_LPVOID, vm), 0, MS_MEM_RELEASE); }
#pragma endregion OS
+61
View File
@@ -0,0 +1,61 @@
[project]
name = "bootslop"
git_dir = "C:/projects/forth/bootslop"
word_wrap = true
system_prompt = "Avoid reading bootslop.md. It has full discussion histories, its very expnesive for your token limits."
main_context = "C:/projects/forth/bootslop/GEMINI.md"
[output]
namespace = "bootslop"
output_dir = "./md_gen"
[files]
base_dir = "."
paths = [
"C:/projects/forth/bootslop/.editorconfig",
"C:/projects/forth/bootslop/CONVENTIONS.md",
"C:/projects/forth/bootslop/GEMINI.md",
"C:/projects/forth/bootslop/Readme.md",
"C:/projects/forth/bootslop/references/Architectural_Consolidation.md",
"C:/projects/forth/bootslop/references/kyra_in-depth.md",
"C:/projects/forth/bootslop/references/neokineogfx_in-depth.md",
"C:/projects/forth/bootslop/scripts/build.attempt_1.c.ps1",
"C:/projects/forth/bootslop/attempt_1/attempt_1.md",
"C:/projects/forth/bootslop/attempt_1/duffle.amd64.win32.h",
"C:/projects/forth/bootslop/attempt_1/main.c",
"C:/projects/forth/bootslop/attempt_1/microui.h",
]
[screenshots]
base_dir = "C:/Users/Ed/scoop/apps/sharex/current/ShareX/Screenshots/2026-02"
paths = []
[discussion]
roles = [
"User",
"AI",
"Vendor API",
"System",
]
active = "test"
auto_add = true
[discussion.discussions.main]
git_commit = ""
last_updated = "2026-02-22T11:09:05"
history = [
"@2026-02-21T20:58:31\nUser:\nwhat should we do next with attempt_1",
"@2026-02-21T20:58:32\nAI:\nNow I have a clear picture. Let me give you a prioritized assessment:\n\n---\n\n## Assessment: What to Do Next with `attempt_1`\n\nBased on the roadmap, the code, and the gap analysis, here's what I'd recommend in priority order:\n\n### 1. **Fix the JMP rel32 Hack & Add Forward-Jump DSL Wrappers** (Quick Win — Immediate)\n\nThis is listed as item #5 in the roadmap and it's the most mechanically obvious fix. Right now you have this abomination on line 657:\n\n```c\nemit8(x64_op_CALL_rel32 - 3); // E9 = JMP rel32\n```\n\n`0xE8 - 3 = 0xE5`, which is **wrong**. `JMP rel32` is `0xE9`, and `0xE8 - 3 = 0xE5` (which is `IN EAX, imm8`). This is a **live bug** — the define-skip jump is emitting garbage. It only works by accident if that code path is never actually taken, or if it's being masked by some other behavior.\n\nThe fix is trivial: you already have `x64_op_JMP_rel32` defined as `0xE9` in the DSL. Plus, adding the two helpers (`x64_JMP_fwd_placeholder` / `x64_CALL_fwd_placeholder` / `x64_patch_fwd`) will clean up ~6 raw emit patterns in `compile_and_run_tape`.\n\n**Also needed:** A `x64_MOV_RAX_IMM64()` helper and a `x64_MOV_RAX_IMM32()` helper for the Data and Lambda tag emission, which are currently still raw.\n\n### 2. **Add a `x64_MOV_RAX_IMM32(val)` / `x64_MOV_RAX_IMM64_placeholder()` DSL wrapper** (Quick Win)\n\nLines 683-686 and 707-710 are raw emit sequences for `mov rax, imm64` and `mov r/m, imm32` that should have named DSL wrappers. This is the last gap before you can honestly say \"all emission uses the DSL.\"\n\n### 3. **Basic Block Jumps `[ ]`** (The Real Next Feature)\n\nThis is item #7 from the roadmap and is the **most architecturally significant next step**. Right now your control flow is limited to:\n- `RET_IF_ZERO` / `RET_IF_SIGN` (conditional early returns)\n- Lambdas (out-of-line code blocks)\n- Unconditional `CALL`\n\nYou have no way to express a loop or a conditional branch *within* a definition. The `[ ]` basic block mechanism from Onat would give you:\n- `[` — mark the top of a basic block (save the code offset)\n- `]` — emit a conditional or unconditional backward jump to the `[`\n- Or: `[` emits a forward-jump placeholder, and `]` patches it (for if-style blocks)\n\nThis would let you write factorial as a loop inside a single definition instead of relying on repeated `Imm` calls to `F_STEP`.\n\n### 4. **Expanded Annotation Layer** (Important but Can Wait)\n\nThe 8-byte annotation limit per token is functional but restrictive. This is item #6 — refactoring to variable-length comments. I'd defer this until after `[ ]` blocks work, because it's a UI/data-structure change rather than a compiler feature.\n\n---\n\n## My Recommendation: Do Items 1 + 2 Right Now\n\nFix the JMP bug, add the forward-jump DSL wrappers, and add the remaining immediate-move wrappers. This is ~30 minutes of clean mechanical work, eliminates every remaining raw emit in `compile_and_run_tape`, and gives you the exact `x64_patch_fwd` infrastructure you'll need for `[ ]` basic blocks immediately after.\n\nWant me to implement items 1 and 2 now?",
]
[discussion.discussions.test]
git_commit = "03fb6a818060e71e53e1c240e5f869d4d7e64c49"
last_updated = "2026-02-22T17:03:58"
history = [
"@2026-02-22T11:15:32\nUser:\nWhat should we do next with attempt_1?",
"@2026-02-22T11:15:34\nAI:\nLooking at the `GEMINI.md` roadmap, items 1-4 are completed. The remaining items are:\n\n**5. DSL wrappers for forward jump placeholders** — The `JMP rel32` and `CALL rel32` forward-jump patterns in `compile_and_run_tape` still use bare `emit8(x64_op_JMP_rel32) + emit32(0)` pairs. This is the most surgical, self-contained next step.\n\n**6. Expanded Annotation Layer** — The `anno_arena` is fixed at 8 bytes per token. Arbitrary-length comments need an indirection layer.\n\n**7. Continuous Validation & Complex Control Flow** — Basic Block jumps `[ ]`, more conditional patterns, loop primitives.\n\n---\n\nMy recommendation is to tackle **#5 first**, because:\n\n- It's the smallest, most well-defined gap. The exact API is already specced in the roadmap:\n - `x64_JMP_fwd_placeholder(U4* offset_out)` — emits `E9 00000000`, writes the patch offset.\n - `x64_patch_fwd(U4 offset)` — patches the placeholder with the current code position.\n- It closes the last \"raw magic bytes\" hole in the DSL discipline, making the emitter fully consistent.\n- It's a prerequisite for **#7** (Basic Block `[ ]` jumps need exactly this mechanism to resolve forward targets cleanly).\n\nAfter #5, **#7 (Basic Blocks `[ ]`)** is the highest-leverage architectural feature — it's what unlocks structured control flow without an AST, which is the core of the Lottes/Onat paradigm. **#6** (variable-length annotations) is more of a quality-of-life editor feature and can wait.\n\nWant me to implement **#5** now?",
"@2026-02-22T11:15:45\nUser:\nAlright lets do 5.",
"@2026-02-22T11:17:41\nAI:\nLooking at `main.c`, I need to add two DSL helpers to the `x64 Emission DSL` region and then replace the bare `emit8`/`emit32` pairs in `compile_and_run_tape`. Let me identify exactly what needs changing.\n\nThere are three raw forward-jump sites in `compile_and_run_tape`:\n\n1. **`STag_Define` block** — `emit8(x64_op_CALL_rel32 - 3)` + `emit32(0)` (note: `E9` is `JMP rel32`, written as `CALL_rel32 - 3` which is a bug-prone magic expression)\n2. **`STag_Lambda` block** — `emit8(x64_op_JMP_rel32)` + `emit32(0)`\n3. **Both patch sites** — `u4_r(code_arena.start + <offset>)[0] = current - (<offset> + 4)`\n\nI'll add `x64_JMP_fwd_placeholder` and `x64_patch_fwd` to the DSL, then clean up all three sites.\n\nI can see the file was saved but the PowerShell string replacement missed the `STag_Define` and both patch sites inside the loop — the old raw patterns are still there in `compile_and_run_tape`. The new DSL helpers were added correctly to the DSL region, but the call sites in the loop body weren't updated. Let me fix those now.\n\nThe DSL helpers were added correctly but the call sites in `compile_and_run_tape` still have the old raw patterns. The string replacement is failing because of whitespace/tab differences. I'll use a Python helper script to do the targeted replacements reliably.\n\nThe file looks correct. Let me do a final verification build to confirm it compiles and links cleanly.\n\nItem 5 is done and clean. Here's a summary of what was accomplished:\n\n**Two new DSL helpers added** to the `#pragma region x64 Emission DSL` block, immediately after `x64_RET()`:\n\n- `x64_JMP_fwd_placeholder(U4* offset_out)` — emits `E9 00000000` and records the displacement field's offset for later patching.\n- `x64_patch_fwd(U4 offset)` — writes the correct `rel32` displacement back into the previously emitted placeholder, computed as `current_pos - (offset + 4)`.\n\n**Six raw emission sites eliminated** in `compile_and_run_tape`:\n\n| Site | Old | New |\n|---|---|---|\n| `STag_Define` JMP emission | `emit8(x64_op_CALL_rel32 - 3)` + `emit32(0)` | `x64_JMP_fwd_placeholder(& def_jmp_offset)` |\n| `STag_Lambda` JMP emission | `emit8(x64_op_JMP_rel32)` + `emit32(0)` | `x64_JMP_fwd_placeholder(& lambda_jmp_offset)` |\n| Lambda patch (in loop) | `u4_r(...)[0] = current - (offset + 4)` | `x64_patch_fwd(lambda_jmp_offset)` |\n| Define patch (in loop) | `u4_r(...)[0] = current - (offset + 4)` | `x64_patch_fwd(def_jmp_offset)` |\n| Lambda patch (tail) | `u4_r(...)[0] = current - (offset + 4)` | `x64_patch_fwd(lambda_jmp_offset)` |\n| Define patch (tail) | `u4_r(...)[0] = current - (offset + 4)` | `x64_patch_fwd(def_jmp_offset)` |\n\nThe DSL is now fully consistent — no raw forward-jump patterns remain anywhere in `compile_and_run_tape`. The `x64_op_JMP_rel32` exception note in `GEMINI.md` can now be retired as well. Ready to move on to **#7 (Basic Block jumps `[ ]`)** whenever you are.",
"@2026-02-22T11:41:36\nUser:\nMove on to 7. (basic block jumps) sure.",
"@2026-02-22T11:45:25\nAI:\nI have successfully implemented the Basic Block jumps (`[ ]`) and the `RET_IF_ZERO` + tail-call pattern for loops. \n\nHere is a breakdown of what was added to `attempt_1/main.c`:\n\n1. **New Primitives:**\n - `[ ` (`PRIM_IF`): Emits a conditional forward jump (`JZ rel32`) that skips the block if `RAX == 0`. It pushes the patch offset to a new `ctrl_stack` managed by the JIT compiler.\n - `] ` (`PRIM_END`): Pops the offset from `ctrl_stack` and patches the forward jump with the correct relative displacement.\n - `RECURSE ` (`PRIM_RECURSE`): Emits an unconditional backward jump (`JMP rel32`) to the entry point of the current definition (`tape_to_code_offset[current_def_idx]`).\n\n2. **Tail-Call Loop Pattern:**\n The `Factorial` test was refactored. Instead of manually calling `F_STEP` five times, `F_STEP` now ends with a `RECURSE` primitive. Because `F_STEP` already uses `RET_IF_Z` to break out when the counter reaches zero, this effectively creates a zero-overhead `while` loop using the tail-call pattern.\n\n3. **AST-Less Control Flow (`[ ]`):**\n A new `IF TEST` was added to the tape initialization. It demonstrates the `[` and `]` block. It pushes `0` to `RAX`, hits `[`, and correctly skips the inner block. Then it pushes `1` to `RAX`, hits `[`, and correctly executes the inner block.\n\nThe compiler logic for these constructs is extremely simple and operates strictly in `O(1)` time during the single JIT pass, adhering to the Lottes/Onat \"zero-overhead\" philosophy. The project builds cleanly and the executable is ready to run.\n\nWould you like to move on to **#6 (Expanded Annotation Layer)** next, or is there anything you want to tweak with the control flow?",
]
Binary file not shown.
+31
View File
@@ -0,0 +1,31 @@
---
name: colorforth-nudge
description: "Interactive mentor for building a sourceless, zero-overhead ColorForth derivative. Use when the user wants to work on the 'bootslop' project, providing guided nudges and architectural validation based on the Lottes/Onat paradigm."
---
# ColorForth Nudge & Review Skill
This skill transforms Gemini CLI into a highly contextualized mentor for building a specific type of zero-overhead, sourceless ColorForth derivative.
## Workflow Trigger
This skill should be activated when the user expresses intent to work on the "bootslop" or "ColorForth" project.
## Your Role: The Mentor
Your primary goal is to *guide*, not *do*. The user is learning how to build this system from scratch. Your task is to provide architectural validation, specific tactical assistance when requested, and "guided nudges" to help them get to the next step.
## Session Start Procedure
On activation, immediately perform the following steps:
1. **Refresh Context:** Read the following two files from the project root to load the complete architectural blueprint into your context:
* `C:/projects/forth/bootslop/CONVENTIONS.md`
* `C:/projects/forth/bootslop/references/Architectural_Consolidation.md`
2. **Analyze Current State:**
* List the contents of the `attempt_1/` directory.
* Read the `attempt_1/main.c` file.
3. **Prompt for Nudge:** Conclude your first response with a summary of the project's current state and ask the user for the next step.
* **Example:** *"The context is loaded. The current prototype has a working JIT compiler, a modal editor, and a 2-character dictionary resolver. The editor supports keyboard input and visualizes the sourceless token array. What is the next implementation step you'd like me to guide you through?"*
Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Before

Width:  |  Height:  |  Size: 428 KiB

After

Width:  |  Height:  |  Size: 428 KiB

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

+2 -1
View File
@@ -19,10 +19,11 @@ This document serves as the master blueprint for the research and curation phase
## 3. Onat's VAMP/KYRA Architecture (The Runtime/Codegen) ## 3. Onat's VAMP/KYRA Architecture (The Runtime/Codegen)
* **2-Item Register Stack:** Uses `RAX` and `RDX` as a tiny, hardware-resident stack. * **2-Item Register Stack:** Uses `RAX` and `RDX` as a tiny, hardware-resident stack.
* **The Swap:** `xchg rax, rdx` (1-byte: `48 87 C2`) is emitted to rotate the "top of stack". * **The Swap / Magenta Pipe:** A definition boundary implicitly emits `RET` (to close the last block) followed by `xchg rax, rdx` (1-byte: `48 87 C2` or `48 92`) to rotate the "top of stack" for the new block.
* **Aliased Global Namespace:** The CPU register file is treated as a shared, aliased memory space for functions. * **Aliased Global Namespace:** The CPU register file is treated as a shared, aliased memory space for functions.
* **Functions as Blocks:** Words are "free of arguments and returns" in the traditional sense. * **Functions as Blocks:** Words are "free of arguments and returns" in the traditional sense.
* **Preemptive Scatter ("Tape Drive"):** Arguments are pre-placed into fixed, contiguous memory slots ("the tape") by the compiler/loader before execution. This eliminates "argument gathering" during function calls. * **Preemptive Scatter ("Tape Drive"):** Arguments are pre-placed into fixed, contiguous memory slots ("the tape") by the compiler/loader before execution. This eliminates "argument gathering" during function calls.
* **The FFI Dance (C-ABI Integration):** To call OS APIs (like WinAPI or Vulkan), the hardware stack pointer (`RSP`) must be strictly 16-byte aligned. Custom macros (like `CCALL`) must save state, align `RSP`, map the 2-register stack into C-ABI registers (`RCX`, `RDX`, `R8`, `R9`), execute the `CALL`, and restore `RSP`.
## 4. Implementation Components ## 4. Implementation Components
* **Emitter:** **Zydis Encoder API**. Zero-allocation, sub-5ms instruction generation. * **Emitter:** **Zydis Encoder API**. Zero-allocation, sub-5ms instruction generation.
+92
View File
@@ -0,0 +1,92 @@
# In-Depth Chronological Breakdown of Source-Less Programming Reference Videos
This document provides an exhaustive, highly detailed chronological paraphrase of the technical specifics, screen visuals, and mechanical explanations provided by Timothy Lottes and Onat Türkçüoğlu.
---
## 1. "Forth Day 2020 - Preview of x64 & ColorForth & SPIR V" (Onat, 2020)
**0:00 - 3:00 | Introduction & The Editor Visuals**
Onat introduces his 1-month-old iteration of Forth, inspired by ColorForth.
* **Screen Details:** A custom 3-pane UI rendered in C and Vulkan. Left/center panes show block-based colored tokens; the right pane displays live x64 assembly output that updates instantly as he edits.
* The editor treats code blocks as tracked state objects, supporting native undo/redo.
**3:00 - 6:00 | O(1) Dictionary Lookup & Execution Tracing**
* To avoid hashing, his compiler allocates an extra 4 bytes per character strictly to store the *source memory location* of the currently compiled word.
* **Visual Feature:** "Jump to Definition" and an "Execution Trace" overlay. He demonstrates invoking a command that instantly numbers every occurrence of a word across the codebase in the exact chronological order of execution, providing a "compile-time call graph" without running the program.
**6:00 - 11:00 | The High-Level x64 Macro Assembler & SPIR-V**
* **Screen Details:** Syntax like `AX to BX` or `CX + offset`. Toggling a "direction register" macro changes `from AX to BX register, let's move an unsigned` into a 32-bit `mov ebx, eax`. Modifiers like `long` emit 64-bit `mov rbx, rax`.
* He uses this same macro-assembler to generate SPIR-V. He notes x64 was actually less complicated than SPIR-V because x64 is a flat instruction stream, whereas SPIR-V requires strict sections, type declarations, and capabilities, forcing him to introduce "sections" into his JIT.
---
## 2. "4th And Beyond" (Timothy Lottes, NeoKineoGfx, 2026)
**0:00 - 8:00 | HP48 Evolution & ColorForth Mechanics**
* Lottes advocates removing compilers, linkers, and debuggers. He starts with HP48's RPN as the baseline.
* **Screen Details:** He defines a red word `4K` pointing to the next item on the data stack. Typing `1024 4 *` computes `4096`. `4K` acts as a variable.
* He defines `DROP` pointing to `add esi, -4` and `ret`. `4K 1 2 + DROP` yields 4096.
* He reviews ColorForth: Code compiles onto the data stack. Yellow = Execute, Red = Define, Green = Compile, Magenta = Variable. A Yellow-to-Green transition pops the stack and emits a `push` instruction.
* **Screen Details:** Disassembly of Block 24/26 shows `168B 2 , C28B0689 ,`. This pushes bytes onto the stack, disassembling to `mov edx, dword ptr esi` and `mov dword ptr esi, eax` (literally byte-banging machine code).
**8:00 - 20:00 | Branch Misprediction, Folded Interpreter, & x68**
* Standard Forth causes 16-clock branch misprediction stalls due to tag branching.
* **The Folded Interpreter:** Lottes fixes this by folding a 5-byte interpreter into the end of every word: `LODSD`, lookup, `JMP RAX`. Every transition gets its own branch predictor slot.
* **x68 Architecture:** Forces all instructions to 32-bit boundaries. `RET` (`C3`) is padded with three `NOP`s (`90 90 90`). `MOV ESI, imm32` is padded with a `3E` ignored DS prefix.
* This makes relative offsets (`CALL`, `JMP`) align perfectly. The editor auto-relinks offsets as tokens are inserted/deleted.
* **Assembly Shorthand:** Editor maps `add rcx, qword ptr [rdx + 0x8]` to visual `h + at i08`.
**20:00 - End | Live Execution (SteamOS/Linux)**
* Lottes targets a mix of high-level JIT and raw x68 sourceless.
* **Cartridge execution:** The binary copies itself to `cart.back`, maps into memory at a fixed address (bypassing ASLR), and provides a zero-fill space. 32-bit tokens act as direct absolute memory pointers, removing lookup overhead.
---
## 3. "Metaprogramming VAMP in KYRA" (Onat, SVFIG, 2025-04-26)
This presentation contains the most explicit, hardcore low-level details regarding Onat's binary-encoded compiler (VAMP).
**0:00 - 10:00 | The Binary Editor, Compilation Speed, & The 2-Item Stack**
* VAMP compiles the entire program (Vulkan renderers, UI) in **8.24 milliseconds** on Windows/Linux. His previous text-based Forth took 16-17.8ms just to compile the editor.
* **Hardware Locality & The Stack:** Traditional Forth is "runtime opinionated" with a memory data stack, making GPU compute shaders difficult. Onat strictly restricts the stack to two CPU registers: **`RAX` and `RDX`**.
* **Screen Details:** The stack state is constantly visualized in the top left corner.
* **Magenta Pipe `|`:** There are no `begin` or `end` definition words. A magenta pipe token implicitly signals the end of the previous definition (compiling a `ret`) and starts the new one. Spaces between words imply execution.
**10:00 - 18:00 | Dictionary Management, UX, & Indexing**
* **Dictionary Encoding:** Words are stored as 24-bit indices pointing to 8-byte cells, packed with an 8-bit color tag. (He notes the next iteration will use 32-bit indices + a separate 1-byte tag block for faster skipping of empty blocks).
* This pure index mapping eliminates hashing and string parsing. It allows IP-protection: you can ship the source indices without the symbols/dictionary. Core language is just 2 to 4 KB.
* **Screen Details:** Words are organized explicitly into 16-word horizontal "scrolls" (e.g., "Vulkan API", "FFMPEG", "x64 Assembly"). He presses `Ctrl+Space` to manually redefine a word in a specific scroll.
* **Comments:** A comment (Blue tag) is encoded as a string directly inside the 24-bit payload (3 characters).
**18:00 - 28:00 | Data Flow Visualization & Global Memory**
* **Free Printf:** Hovering over a word injects code to record `RAX` and `RDX`. Pressing Previous/Next steps through the execution flow visually.
* **Global Variables vs. Stacks:** To pass complex state (since the stack only holds two items), he relies entirely on global memory. He explicitly critiques Rust's "safe programming" for forcing developers to pass state through 30 layers of call stacks.
* **Single-Register Memory Access:** He dedicates a single CPU register to act as the base pointer for all program memory, giving instant access to "gigabytes of state".
**28:00 - 45:00 | Syntax, Tags, and JIT Assembly Mechanics**
* He demonstrates compiling Vulcan commands. Instead of typing `vkGetSwapchainImagesKHR`, he defines a word `get swap chain images` in the `vk device` scroll.
* **The `xchg` Trick (`48 92`):** Because the stack is just `RAX` and `RDX`, keeping `RAX` as the Top of Stack is vital. He explicitly notes that `xchg rax, rdx` compiles to just two bytes: `48 92` (REX.W + xchg eax, edx). Before starting a definition or making a call, the JIT emits `48 92` to ensure `RAX` is correctly aligned as the top.
* **Color Semantics:**
* **White (Call):** Emits a `CALL` or `JMP RAX` (e.g., `FFE0`).
* **Green (Load):** Emits `mov rax, [global_offset]`.
* **Red (Store):** Emits `mov [global_offset], rax`.
* **Yellow (Immediate/Execute):** Used heavily. For a number, emits `mov rax, imm`. Also used to invoke a lambda block.
* **Blue (Comment):** Ignored.
* **Cyan (Number):** Data literal.
**45:00 - 55:00 | Lambdas `{ }` & Basic Blocks `[ ]`**
* He explicitly eliminates `if/else` ASTs.
* **Lambdas `{ }`:** Defining a lambda block (Yellow `{`) does not execute it. It compiles the block elsewhere and leaves its executable memory address in `RAX`.
* **Basic Blocks `[ ]`:** These define a constrained range of assembly with implicit begin, link, and end jump targets.
* **Conditionals in Blocks:** He shows checking `if luma > 0.6`. He explicitly creates a `condition` variable (e.g., `26E`). The `>` operator consumes the values and writes the boolean to `condition`. The conditional word then reads `condition` and consumes the lambda address from `RAX`, emitting a `cmp condition, 0` and `jz lambda_address`.
**55:00 - 1:10:00 | FFI, Stack Pointers, and OS Interop**
* **`RSP` Alignment:** The hardware stack pointer (`RSP`) is exclusively used for the call stack, eliminating buffer overflows. When calling OS APIs (like FFMPEG), he explicitly reads `RSP` into a variable to align it to 16 bytes (required by C ABI), makes the call, and restores it.
* **Filling Structs:** For `VkImageCreateInfo`, he uses a temporary variable `$` (Dollar sign). He doesn't use C headers. He knows `14` is the Type ID, manually pushing offsets into the contiguous memory space (e.g., `info + offset`).
**1:10:00 - End | SPIR-V, Bug Triage, and Implicit Registers**
* **SPIR-V Generation:** VAMP directly emits SPIR-V. He shows the spec (Opcode 194 is Shift Right Logical) and demonstrates a 4-line definition that writes exactly `194` and its operands into a binary vector, replacing a 100MB `glslang` compiler with ~256KB of VAMP code.
* **Bug Triage:** He does not use tests or asserts. He triages bugs by commenting out blocks of code (disabling them) and hitting compile (8ms) until the crash stops.
* **Implicit Register Passing:** He shows UI hover logic where the `slot ID` is implicitly passed in register `R12D` across functions, completely avoiding pushing it to the data stack.
* **Lock Prefix:** Writing concurrent code is handled by the macro assembler. Placing the word `lock` before an `inc` token simply emits the `F0` prefix byte.
+97
View File
@@ -0,0 +1,97 @@
# In-Depth Analysis: Timothy Lottes's Development Blogs (2007 - 2016)
This document synthesizes the architectural paradigms, implementation details, and philosophical shifts documented in Timothy Lottes's blogs over a decade of building minimal, high-performance Forth-like operating environments. This knowledge is crucial for understanding the "Lottes/Onat Paradigm" and successfully implementing the `bootslop` project.
---
## 1. The Core Philosophy: "Vintage Programming"
Lottes advocates for returning to a "stone-age" development methodology reminiscent of the Commodore 64 or HP48, but applied to modern x86-64 hardware and GPUs.
* **Rejection of Modern Complexity:** He explicitly rejects the "NO" of modern operating systems—compilers, linkers, debuggers, memory protection, paging, and bloated ABIs. He aims for an environment that says "YES" to direct hardware access.
* **The OS IS the Editor:** The system boots directly into a visual editor. This editor functions simultaneously as an IDE, assembler, disassembler, hex editor, debugger, and live-coding environment.
* **Instant Iteration:** The primary goal is a sub-5ms edit-compile-run loop. Debugging is done via instant visual feedback and "printf" style memory peeking within the editor itself, rendering traditional debuggers obsolete.
* **Extreme Minimalism:** His compilers and core runtimes often fit within 1.5KB to 4KB (e.g., the 1536-byte bootloader/interpreter project).
## 2. The Evolution to "Source-Less" Programming
The most critical architectural shift in Lottes's work is the move from text-based source files (like his 2014 "A" language) to **Source-Less Programming** (2015).
### Why Source-Less?
Parsing text (lexical analysis, string hashing, AST generation) is slow and complex. In a source-less model, the "source code" *is* the binary executable image (or a direct structured representation of it).
### The Architecture of Source-Less (x68)
1. **32-Bit Granularity:** Every token in the system is exactly 32 bits (4 bytes).
* To accommodate variable-length x86-64 instructions, Lottes invented "x68".
* **Padding:** Standard x86 instructions are padded to exactly 32 bits (or multiples of 32 bits) using ignored segment override prefixes (like `2E` or `3E`) and multi-byte NOPs.
* Example: A `RET` instruction (`C3`) becomes `C3 90 90 90`.
* *Why?* This keeps immediate values (like 32-bit addresses or constants) 32-bit aligned, drastically simplifying the editor and the assembler.
2. **The Token Types:** A 32-bit word in memory represents one of four things:
* **DAT (Data):** Hexadecimal data or an immediate value.
* **OP (Opcode):** A padded 32-bit x86-64 machine instruction.
* **ABS (Absolute Address):** A direct 32-bit memory pointer.
* **REL (Relative Address):** An `[RIP + imm32]` relative offset used for branching.
3. **The Annotation Overlay (The "Shadow" Memory):**
* Because raw 32-bit hex values are unreadable to humans, the editor maintains a *parallel array* of 64-bit annotations for every 32-bit token.
* **Annotation Layout (64-bit):**
* `Tag` (4 to 8 bits): Defines how the editor should display and treat the 32-bit value (e.g., display as a signed int, an opcode name, a relative address, or a specific color).
* `Label / Name`: A short string (e.g., 5 to 8 characters, often compressed using 6-bit or 7-bit encodings to fit) that acts as the human-readable name for the memory address.
* *The Magic:* The editor reads the binary array and the annotation array. It uses the tags to dynamically format the screen. There is **zero string parsing** at runtime.
4. **Edit-Time Relinking (The Visual Linker):**
* When you insert or delete a token in the editor, all tokens tagged as `ABS` or `REL` (addresses) are automatically recalculated and updated in real-time. The editor *is* the linker.
5. **Live State vs. Edit State:**
* Memory is split: The live running program, and the edit buffer.
* When edits are made and confirmed (e.g., hitting ESC or Enter), the editor atomically swaps or patches the live image with the edited image.
## 3. Language Paradigms: "Ear" and "Toe"
In his "Random Holiday 2015" post, Lottes solidifies the specific DSLs used within this source-less framework:
* **"Toe" (The Low-Level Assembler):** This is the subset of x86-64 with 32-bit padded opcodes. It is heavily macro-driven to assemble machine code.
* **"Ear" (The High-Level Macro/Forth Language):** A zero-operand, Forth-like language embedded directly into the binary form.
* Instead of a traditional Forth interpreter searching a dictionary at runtime, the dictionary is resolved at *edit-time* or *import-time*.
* A token is just an index or a direct `CALL` instruction to the compiled word.
### The 2-Item Stack (Implicit Registers)
While early experiments used a traditional Forth data stack in memory, Lottes's later architectures (and Onat's derived work) map the stack directly to hardware registers to eliminate memory overhead:
* `RAX` = Top of Stack (TOS)
* `RBX` (or `RDX` in Onat's VAMP) = Second item on stack (NOS)
* **The xchg Trick:** Stack rotation is often handled by `xchg rax, rbx` (or `rdx`), which compiles to a tiny 2-3 byte instruction, keeping execution entirely within the CPU cache.
## 4. Bootstrapping "The Chicken Without an Egg"
How do you build a system that requires a custom binary editor to write code, when you don't have the editor yet?
1. **C Prototype First:** Lottes explicitly states he builds the first iteration of the visual editor and virtual machine in C (using WinAPI or standard libraries). This allows rapid iteration of the visual layout and the memory arena logic.
2. **Hand-Assembling Bootstraps:** He uses standard assemblers (like NASM) or hexadecimal byte-banging (using tools like `objdump -d`) to figure out the exact padded 32-bit opcode bytes.
3. **Embed Opcode Definitions:** The C prototype includes hardcoded arrays of bytes that represent the base opcodes (e.g., `MOV`, `ADD`, `CALL`, `RET`).
4. **Self-Hosting:** Once the C editor is stable and can generate binary code into an arena, he rewrites the editor *inside* the custom language within the C editor, eventually discarding the C host.
## 5. UI and Visual Design
The UI is not an afterthought; it is integral to the architecture.
* **The Grid:** The editor displays memory as a strict grid. Typical layout: 8 tokens per line (fitting half a 64-byte cache line).
* **Two Rows per Token:**
* Top Row: The Annotation (Label/Name), color-coded.
* Bottom Row: The 32-bit Data (Hex value, or a resolved symbol name if tagged as an address).
* **Colors (ColorForth Inspired):**
* Colors dictate semantic meaning (e.g., Red = Define, Green = Compile, Yellow = Execute/Immediate, White/Grey = Comment/Format). This visual syntax replaces traditional language keywords.
* **Pixel-Perfect Fonts:** Lottes builds custom, fixed-width raster fonts (e.g., 6x11 or 8x8) to ensure perfect readability without anti-aliasing blurring, often treating specific characters (like `_`, `-`, `=`) as line-drawing characters to structure the UI.
## Summary for the `bootslop` Implementation
Our current `attempt_1/main.c` is perfectly aligned with Phase 1 of the Lottes bootstrapping process:
1. We have a C-based WinAPI editor.
2. We have a token array (`tape_arena`) and an annotation array (`anno_arena`).
3. We have 32-bit tokens packed with a 4-bit semantic tag and a 28-bit payload.
4. We have a basic JIT emitter targeting a 2-register (`RAX`/`RDX`) virtual machine.
**Next Immediate Priorities based on Lottes's path:**
* Move away from string-based dictionary lookups at runtime to **Edit-Time Relinking** (resolving addresses when the token is typed or modified in the UI).
* Implement the **Padding Strategy** for the x86-64 JIT emitter to ensure all emitted logical blocks align cleanly, paving the way for 1:1 token-to-machine-code mapping.
* Refine the Editor Grid to show the two-row (Annotation / Data) layout clearly.
+58
View File
@@ -0,0 +1,58 @@
# In-Depth Analysis: Onat's Forth Day 2020 Presentation
This document provides an exhaustive breakdown of the technical specifics, screen visuals, and mechanical explanations from Onat Türkçüoğlu's "Preview of x64 & ColorForth & SPIR V" presentation at Forth Day 2020, synthesizing both the video transcript and the OCR analysis of the editor's visual state.
---
## 1. The Environment and Editor UI
Onat introduces a custom 3-pane UI built entirely from scratch in C and Vulkan. This editor serves as the primary IDE, compiler, and visual debugger.
### Visual Layout (from OCR & Video)
* **The Three Panes:** Left/center panes display the block-based, colorized Forth/macro tokens. The right pane displays live x86-64 assembly output (or SPIR-V binary data) that updates instantly as the user edits the source.
* **Color Semantics (Observed in OCR):**
* **Cyan:** Low-level x86-64 opcodes or API functions (`mov`, `jmp`, `xorpd`, `CCALL1`, `ide_syscmd`).
* **Yellow:** Line numbers, specific execution tokens, or immediate jump labels/blocks.
* **Magenta:** High-level struct definitions, bitwise layouts, and basic block delineations (`Structs`, `vars`, `bits`).
* **Red:** Literal numbers (`32`, `64`), format strings, or specific SPIR-V instruction IDs and properties.
* **Orange/Green:** UI and control flow modifiers.
* **State Tracking:** The editor treats code blocks as tracked state objects, which allows for native, robust Undo/Redo operations without relying on a traditional text file format.
## 2. O(1) Dictionary Lookup & "Compile-Time Call Graph"
Traditional Forth systems (and even Lottes's early systems) relied on hashing strings or linear searches to resolve words. Onat eliminated this overhead entirely.
* **Source Memory Mapping:** Instead of hashing, the compiler allocates an extra 4 bytes per character in the visual block to store the *exact source memory location* of the currently compiled word.
* **Instant Resolution:** Because the token itself points to its origin, "Jump to Definition" is instantaneous.
* **Execution Tracing:** He demonstrates a command that instantly numbers every occurrence of a word across the codebase in the exact chronological order of execution. This provides a "compile-time call graph" without actually running the program, allowing the programmer to visualize the data flow statically.
## 3. The High-Level x64 Macro Assembler
The core of the system is not a traditional Forth interpreter, but a high-level macro assembler that compiles words directly into x64 machine code.
* **Syntax & Abstraction:**
* The syntax is designed to be readable and fluid: `AX to BX` or `CX + offset`.
* A "direction register" macro allows toggling the flow of data. For instance, `from AX to BX register, let's move an unsigned` emits a 32-bit `mov ebx, eax`.
* Modifiers like `long` change the emission to a 64-bit `mov rbx, rax`.
* **Low-Level Control (OCR Insights):** The OCR reveals exact x64 instructions embedded in the blocks:
* `xorpd xm15, xm15` and `movups [rsi], xm15` show direct, native access to SSE/AVX registers for vectorized operations.
* Macros like `PUSH2 rsi, rdi` and `POP2 rsi, rdi` are used instead of traditional C-style prologues/epilogues, maintaining tight control over the stack pointer and register preservation.
* **C-ABI Integration:** The OCR shows words like `CCALL1 ide_p` and `CCALL3 ide_syscmd`. This indicates a custom FFI (Foreign Function Interface) macro set (`CCALL0`, `CCALL1`, `CCALL2`, `CCALL3`) designed to automatically align the stack (`RSP` to 16 bytes) and map registers to the C-ABI (e.g., `RCX`, `RDX`, `R8`, `R9` on Windows) to call out to the C-based host/Vulkan engine.
## 4. SPIR-V Generation
A significant portion of the presentation focuses on using this same macro-assembler foundation to generate SPIR-V (the intermediate representation for Vulkan compute/graphics shaders) entirely from scratch, replacing massive compiler toolchains like `glslang`.
* **x64 vs. SPIR-V Complexity:** Onat notes that x64 assembly was actually *less* complicated to generate than SPIR-V.
* x64 is a flat, linear instruction stream.
* SPIR-V is strictly structured. It requires rigid sections for Capabilities, Extensions, Memory Models, Entry Points, Execution Modes, Types, and Function Definitions before any actual logic can be emitted.
* **SPIR-V Macros (OCR Insights):** The OCR captures the exact implementation of the SPIR-V generator:
* Words like `opTypeInt 32`, `opTypeVector 4`, `opTypeFloat` map directly to the SPIR-V specification binary IDs.
* Memory addresses and types are explicitly laid out: `PhysicalStorageBuffer64`.
* This proves that the "sourceless" environment scales perfectly from raw CPU machine code to structured GPU bytecodes by just changing the underlying byte-emission macros.
## 5. Key Takeaways for the `bootslop` Implementation
1. **Immediate x64 Access:** The system shouldn't hide the CPU. It should expose it via macros (like `CCALL`) that handle the tedious parts of the ABI while letting the programmer write `movups` if they want to.
2. **Visual Over Text:** The implementation of 4 extra bytes per character to store "source location" reinforces that the visual grid *is* the data structure. It's not text being parsed; it's a spatial array of tokens pointing to each other.
3. **The FFI Bridge:** We will need a macro pattern equivalent to `CCALL` in our JIT emitter to talk to WinAPI functions without trashing the 2-item (`RAX`/`RDX`) stack or violating the 16-byte `RSP` alignment required by Windows.
+86
View File
@@ -0,0 +1,86 @@
# In-Depth Analysis: Metaprogramming KYRA in KYRA (Onat Türkçüoğlu)
This document provides a comprehensive synthesis of the "Metaprogramming KYRA in KYRA" presentation given by Onat Türkçüoğlu at the Silicon Valley Forth Interest Group (SVFIG) on April 26, 2025. It integrates insights from the video transcript and the extensive OCR analysis of his visual editor.
This presentation is the most explicit, hardcore low-level deep dive into Onat's binary-encoded compiler (KYRA) and serves as the definitive mechanical blueprint for our `bootslop` project.
---
## 1. Performance and "Runtime-Opinionated" Languages
Onat's primary critique of traditional Forth (and languages like C or Rust) is that they are "runtime opinionated." Standard Forth dictates a memory-based data stack and return stack. This makes it fundamentally incompatible with environments like GPU compute shaders.
* **Compilation Speed:** KYRA compiles its entire program (including a custom editor, Vulkan renderers, and FFMPEG integrations) in **8.24 milliseconds** natively on Windows/Linux.
* **The 2-Item Hardware Stack:** To achieve hardware locality and GPU compatibility, KYRA strictly restricts the data stack to exactly two CPU registers: **`RAX` (Top of Stack)** and **`RDX` (Next on Stack)**.
* **Zero Stack Overhead:** By having no memory data stack, KYRA eliminates the push/pop overhead that plagues standard Forth implementations.
## 2. The Mechanics of the KYRA Emitter
KYRA is not an interpreter; it is a high-level macro assembler that generates direct x86-64 machine code via JIT compilation.
### The `xchg` Trick (The Magenta Pipe `|`)
* Because the stack is just `RAX` and `RDX`, ensuring `RAX` is the active "Top of Stack" before executing a word is vital.
* The `xchg rax, rdx` instruction compiles to a tiny 2-byte opcode: `48 92`.
* **Definitions:** There are no `begin` or `end` words. A magenta pipe token (`|`) implicitly signals the start of a new definition. The JIT reacts to this by:
1. Emitting a `RET` (`C3`) to close the *previous* definition.
2. Emitting `48 92` (`xchg rax, rdx`) to ensure proper stack alignment for the *new* definition.
### Color Semantics and Code Generation (From Transcript & OCR)
* **Magenta (`|`):** Definition boundary (`RET` + `xchg rax, rdx`).
* **White (Call):** A compile-time call. Emits a direct `CALL` instruction or a `JMP RAX` (e.g., `FFE0`) if optimizing a tail call.
* **Green (Load):** Emits a read from memory: `mov rax, [global_offset]`.
* **Red (Store):** Emits a write to memory: `mov [global_offset], rax`.
* **Yellow (Execute/Immediate):** A highly overloaded color used for runtime execution, immediate invocation of lambdas, or prefix accessors (like struct member reading).
* **Cyan (Literal):** Compiles an immediate value load: `mov rax, imm`.
* **Blue (Comment):** Stored directly in the token payload (3 characters per 24-bit payload) without polluting the global dictionary.
## 3. Global Memory vs. Local Variables
Onat heavily critiques the conventional wisdom of avoiding global variables, specifically calling out Rust for forcing developers to pass state through 30 layers of call stacks.
* **Implicit Register Passing:** For passing transient state (like the active UI element's `slot ID`), he implicitly passes the value in a dedicated register (e.g., `R12D`) across functions, completely bypassing any need to push it to a stack.
* **Single-Register Memory Base:** He dedicates a single CPU register to act as the base pointer for all program memory. This gives instant `[BASE_REG + offset]` access to "gigabytes of state."
* **The "Tape Drive" in Practice:** Instead of a stack, data needed for complex API calls (like Vulkan initialization) is pre-scattered into these known global offsets using Red (Store) words, and then passed via a single pointer.
## 4. Dictionary Management and The "Deck"
Unlike text-based Forths that require hashing, KYRA uses a pure binary index map.
* **24-Bit Indices:** Words are stored as 24-bit indices pointing to 8-byte cells. (Onat notes his next iteration moves to 32-bit indices + a separate 1-byte tag array, exactly matching Lottes's `x68` annotation model).
* **Visual Organization (The "Scrolls"):** The dictionary is explicitly organized by the programmer into 16-word horizontal "scrolls" (e.g., one scroll for "Vulkan API", another for "Math").
* **IP Protection:** Because the dictionary mapping is separate from the source array, you can ship the binary source indices without the dictionary symbols, effectively stripping the symbols while retaining the executable structure.
## 5. Control Flow: Basic Blocks `[ ]` and Lambdas `{ }`
KYRA eliminates standard Abstract Syntax Trees (ASTs) and `if/else/then` branching.
* **Basic Blocks `[ ]`:** These visually constrain the assembly output. They provide implicit begin, link (else), and end jump targets for the JIT to resolve relative offsets within a limited scope.
* **Lambdas `{ }`:** A lambda (colored Yellow `{`) does not execute inline. The JIT compiles the block of code elsewhere in the arena and leaves its executable memory address in `RAX`.
* **Conditionals:** To perform an `IF`:
1. Evaluate a condition (e.g., `luma > 0.6`).
2. Write the boolean result to a dedicated global `condition` variable.
3. Define a lambda block containing the "true" branch (leaving its address in `RAX`).
4. Call an execution word that reads the `condition` variable, emits a `cmp condition, 0`, and executes a `jz` (jump if zero) to skip the lambda address stored in `RAX`.
## 6. FFI: Bridging to C and Vulkan (WinAPI equivalent)
Dealing with OS APIs and standard C libraries (like Vulkan and FFMPEG) requires satisfying the C Application Binary Interface (ABI).
* **RSP Alignment:** The hardware stack pointer (`RSP`) is exclusively used for the call stack (return addresses), eliminating buffer overflow vulnerabilities.
* **The FFI Dance:** When calling external C functions, Onat's macros explicitly read `RSP` into a temporary variable, align `RSP` to 16-bytes (a strict requirement for Windows/Linux x64 C ABI), execute the `CALL`, and then restore `RSP`.
* *(Note for Bootslop: We saw `CCALL1`, `CCALL2`, etc., in the OCR, confirming he uses specialized macro words to map the `RAX`/`RDX` stack and global variables into the standard `RCX`, `RDX`, `R8`, `R9` C-ABI registers before triggering the OS call).*
## 7. Development Workflow
* **Bug Triage over Asserts:** There are no unit tests or assertions. Bugs are found by commenting out blocks of code (disabling them) and hitting compile. Because compilation takes 8ms, binary searching for the crash point is faster than writing tests.
* **Free Printf / Data Flow:** By hovering over a word in the editor, the system automatically injects code to record `RAX` and `RDX` at that exact execution step, allowing the programmer to step through the data flow visually without running traditional debuggers.
---
### Conclusion for `bootslop`
The "Metaprogramming KYRA" talk confirms that our 2-register stack and "preemptive scatter" global memory model in `attempt_1/main.c` is the exact correct path.
The next major hurdles for `bootslop` will be:
1. Implementing the `xchg rax, rdx` definition boundary logic.
2. Creating an FFI bridge (like Onat's `CCALL`) that aligns `RSP` to 16 bytes and maps globals to WinAPI registers, allowing our minimal Forth to summon full OS windows and graphics.
3. Transitioning dictionary definitions from string-parsing to direct array index resolution.
+62
View File
@@ -0,0 +1,62 @@
# In-Depth Analysis: Neokineogfx - 4th And Beyond (Timothy Lottes)
This document synthesizes the insights extracted from the transcript and OCR analysis of Timothy Lottes's "4th And Beyond" presentation video (released under his Neokineogfx channel in 2026). It details the evolution of his Forth derivatives, the specifics of his "x68" encoding, and the mechanics of his "5th" system.
---
## 1. Evolution from Calculator to Forth
Lottes traces the ideal interactive tool back to Reverse Polish Notation (RPN) calculators like the HP48.
* **The Baseline:** Start with simple RPN math on a stack.
* **The Dictionary:** Introduce a dictionary that points to positions on the data stack or to executable code.
* **Color Semantics (ColorForth Inspired):**
* **Yellow (Execute):** Push numbers to the stack, or execute dictionary words.
* **Red (Define):** Define a word.
* **Green (Compile):** Compile words or push values during compilation.
* **Magenta (Variable):** Define a variable.
## 2. The Branch Misprediction Problem
Standard Forth causes severe CPU pipeline stalls (averaging 16-clock stalls on architectures like Zen 2) due to constant branch misprediction when interpreting tags or navigating the dictionary lookup loop.
* **Solution - The Folded Interpreter:** Lottes mitigates this by folding a tiny (5-byte) interpreter directly into the end of every compiled word.
* By ending every word with its own fetch/dispatch logic (e.g., `LODSD`, lookup, `JMP`), the CPU's branch predictor gets unique slots for every transition, drastically improving execution speed.
## 3. The Architecture of "Source-Less" (x68)
To make manipulating binary data as easy as text, Lottes invented "x68"—a subset of x86-64 designed purely around 32-bit boundaries.
* **32-Bit Instruction Granularity:** Every x86-64 instruction is padded to exactly 4 bytes (or multiples of 4).
* **Prefix Padding:** x86-64 allows ignored prefixes (like `3E`, the DS segment override) and multi-byte NOPs to pad instructions.
* *Example (RET):* `C3` padded to `f0f c3` or `C3 90 90 90` (RET + NOPs).
* *Example (Inline Data):* Moving a 32-bit immediate is padded with `3E`s to ensure the immediate value is perfectly 32-bit aligned in the next memory slot.
* **Why?** This removes the complexity of variable-length instructions, turning compilation into an edit-time operation where the user simply copies and pastes 32-bit words.
## 4. Editor Mechanics & Annotation Overlay
The editor is an "Advanced 32-bit Hex Editor". The source code is literally the binary array.
* **Structure:** The file is split into blocks. For every 32-bit source word, there are 64 bits of annotation memory.
* **64-bit Annotation Layout:**
* 8 characters encoded in 7 bits each (56 bits total) acting as the human-readable Label/Note.
* 8-bit Tag. This tag dictates how the 32-bit value in memory is formatted in the editor (e.g., Hex Data, Absolute Address, Relative Address).
* **Visual Layout:** The editor displays lines with two elements per cell:
* Top: The Annotation string (color-coded by tag).
* Bottom: The 32-bit interpreted value.
* **Auto-Relinking:** The editor dynamically recalculates `CALL`/`JMP` 32-bit relative offsets and 8-bit conditional jump offsets when tokens are inserted or deleted. The editor is the linker.
## 5. Free-Form Source & Argument Fetching
Lottes diverges from strict zero-operand Forth by introducing "preemptive scatter" arguments directly in the source stream.
* **Source is the Dictionary:** The 32-bit words are direct absolute memory pointers into the binary.
* **Argument Fetching:** Instead of pushing to a data stack before calling, words can read ahead in the instruction stream.
* `[RSI]` points to the current word.
* `[RSI+4]`, `[RSI+8]` can be fetched directly into registers (like `RCX`, `RDX`) within the word's implementation.
* **Benefits:** This reduces branch granularity and eliminates stack shuffling overhead, making it much faster for heavy code-generation tasks (like JITing GPU shaders).
## 6. The Self-Modifying OS Cartridge
To handle persistent storage and live updates without complex OS APIs, Lottes leverages Linux's memory mapping and dirty page writeback.
* **The Execution Loop:**
1. Launch `cart` (the binary).
2. The binary copies itself to `cart.bck` and launches `cart.bck`.
3. `cart.bck` maps the original `cart` file into memory (e.g., at the 6MiB mark) with Read/Write/Execute (RWE) permissions.
4. It maps an adjustable zero-fill memory space immediately following it.
5. It jumps into the interpreter.
* **Persistence:** Because the file is mapped into memory, any changes made in the editor modify the file in RAM. Linux's kernel automatically flushes "dirty pages" to the physical disk (e.g., every 30 seconds on SteamOS/SteamDeck). There is no "Save File" code required; data and code reside together and persist implicitly.
+3 -4
View File
@@ -51,8 +51,8 @@ $compiler_args += $flag_no_optimization
$compiler_args += $flag_diagnostics_absolute_paths $compiler_args += $flag_diagnostics_absolute_paths
$compiler_args += $flag_exceptions_disabled $compiler_args += $flag_exceptions_disabled
$compiler_args += ($flag_include + (join-path $path_root "attempt_1")) $compiler_args += ($flag_include + (join-path $path_root "attempt_1"))
$compiler_args += "-nostdlib" # $compiler_args += "-nostdlib"
$compiler_args += "-ffreestanding" # $compiler_args += "-ffreestanding"
$compiler_args += $flag_compile $compiler_args += $flag_compile
$compiler_args += $flag_path_output, $object $compiler_args += $flag_path_output, $object
$compiler_args += $unit_source $compiler_args += $unit_source
@@ -73,11 +73,10 @@ $linker_args += $flag_link_win_debug
$linker_args += $flag_link_win_pdb + $pdb $linker_args += $flag_link_win_pdb + $pdb
$linker_args += $flag_link_mapfile + $map $linker_args += $flag_link_mapfile + $map
$linker_args += $flag_link_win_subsystem_console $linker_args += $flag_link_win_subsystem_console
$linker_args += "/nodefaultlib" # $linker_args += "/nodefaultlib"
$linker_args += "kernel32.lib" $linker_args += "kernel32.lib"
$linker_args += "user32.lib" $linker_args += "user32.lib"
$linker_args += "gdi32.lib" $linker_args += "gdi32.lib"
$linker_args += "/entry:main"
$linker_args += $object $linker_args += $object
$linker_args | ForEach-Object { Write-Host $_ } $linker_args | ForEach-Object { Write-Host $_ }
-178
View File
@@ -1,178 +0,0 @@
$path_root = split-path -Path $PSScriptRoot -Parent
$misc = join-path $PSScriptRoot 'helpers/misc.ps1'
. $misc
$path_toolchain = join-path $path_root 'toolchain'
$path_rad = join-path $path_toolchain 'rad'
# --- Toolchain Executable Paths ---
$compiler = 'clang'
$optimizer = 'opt.exe'
$linker = 'lld-link.exe'
$archiver = 'llvm-lib.exe'
$radbin = join-path $path_rad 'radbin.exe'
$radlink = join-path $path_rad 'radlink.exe'
# https://clang.llvm.org/docs/ClangCommandLineReference.html
$flag_all_c = @('-x', 'c')
$flag_c11 = '-std=c11'
$flag_c23 = '-std=c23'
$flag_all_cpp = '-x c++'
$flag_charset_utf8 = '-fexec-charset=utf-8'
$flag_compile = '-c'
$flag_color_diagnostics = '-fcolor-diagnostics'
$flag_no_builtin_includes = '-nobuiltininc'
$flag_no_color_diagnostics = '-fno-color-diagnostics'
$flag_debug = '-g'
$flag_debug_codeview = '-gcodeview'
$flag_define = '-D'
$flag_emit_llvm = '-emit-llvm'
$flag_stop_after_gen = '-S'
$flag_exceptions_disabled = '-fno-exceptions'
$flag_rtti_disabled = '-fno-rtti'
$flag_diagnostics_absolute_paths = '-fdiagnostics-absolute-paths'
$flag_preprocess = '-E'
$flag_include = '-I'
$flag_section_data = '-fdata-sections'
$flag_section_functions = '-ffunction-sections'
$flag_library = '-l'
$flag_library_path = '-L'
$flag_linker = '-Wl,'
$flag_link_dll = '/DLL'
$flag_link_mapfile = '/MAP:'
$flag_link_optimize_references = '/OPT:REF'
$flag_link_win_subsystem_console = '/SUBSYSTEM:CONSOLE'
$flag_link_win_subsystem_windows = '/SUBSYSTEM:WINDOWS'
$flag_link_win_machine_32 = '/MACHINE:X86'
$flag_link_win_machine_64 = '/MACHINE:X64'
$flag_link_win_debug = '/DEBUG'
$flag_link_win_pdb = '/PDB:'
$flag_link_win_path_output = '/OUT:'
$flag_link_no_incremental = '/INCREMENTAL:NO'
$flag_no_optimization = '-O0'
$flag_optimize_fast = '-O2'
$flag_optimize_size = '-O1'
$flag_optimize_intrinsics = '-Oi'
$flag_path_output = '-o'
$flag_preprocess_non_intergrated = '-no-integrated-cpp'
$flag_profiling_debug = '-fdebug-info-for-profiling'
$flag_set_stack_size = '-stack='
$flag_syntax_only = '-fsyntax-only'
$flag_target_arch = '-target'
$flag_time_trace = '-ftime-trace'
$flag_verbose = '-v'
$flag_wall = '-Wall'
$flag_warning = '-W'
$flag_warnings_as_errors = '-Werror'
$flag_nologo = '/nologo'
$path_build = join-path $path_root 'build'
if ( -not(test-path -Path $path_build) ) {
new-item -ItemType Directory -Path $path_build
}
push-location $path_build
# --- File Paths ---
$unit_name = "simple"
$unit_source = join-path $path_root "code\C\$unit_name.c"
$ir_unoptimized = join-path $path_build "$unit_name.ll"
$ir_optimized = join-path $path_build "$unit_name.opt.ll"
$object = join-path $path_build "$unit_name.obj"
$binary = join-path $path_build "$unit_name.exe"
$pdb = join-path $path_build "$unit_name.pdb"
$map = join-path $path_build "$unit_name.map"
# --- Stage 1: Compile C to LLVM IR ---
write-host "Stage 1: Compiling C to LLVM IR"
$compiler_args = @()
# $compiler_args += $flag_stop_after_gen
# $compiler_args += $flag_emit_llvm
$compiler_args += ($flag_define + 'BUILD_DEBUG=1')
$compiler_args += $flag_debug
# $compiler_args += $flag_debug_codeview
$compiler_args += $flag_wall
# $compiler_args += $flag_charset_utf8
$compiler_args += $flag_c23
$compiler_args += $flag_no_optimization
# $compiler_args += $flag_no_builtin_includes
$compiler_args += $flag_diagnostics_absolute_paths
$compiler_args += $flag_rtti_disabled
$compiler_args += $flag_exceptions_disabled
$compiler_args += ($flag_include + $path_root)
$compiler_args += $flag_compile
$compiler_args += $flag_path_output, $object
$compiler_args += $unit_source
$compiler_args | ForEach-Object { Write-Host $_ }
$stage1_time = Measure-Command { & $compiler $compiler_args }
write-host "Compilation took $($stage1_time.TotalMilliseconds)ms"
# write-host "IR generation took $($stage1_time.TotalMilliseconds)ms"
write-host
# --- Stage 2: Manually Optimize LLVM IR ---
if ($false) {
write-host "Manually Optimizing LLVM IR with 'opt'"
$optimization_passes = @(
'-sroa', # Scalar Replacement Of Aggregates
'-early-cse', # Early Common Subexpression Elimination
'-instcombine' # Instruction Combining
)
$optimizer_args = @(
$optimization_passes,
$ir_unoptimized,
$flag_path_output,
$ir_optimized
)
$optimizer_args | ForEach-Object { Write-Host $_ }
$stage2_time = Measure-Command { & $optimizer $optimizer_args }
write-host "Optimization took $($stage2_time.TotalMilliseconds)ms"
write-hosts
write-host "Compiling LLVM IR to Object File with 'clang'"
$ir_to_obj_args = @()
$ir_to_obj_args += $flag_compile
$ir_to_obj_args += $flag_path_output, $object
$ir_to_obj_args += $ir_optimized
$ir_to_obj_args | ForEach-Object { Write-Host $_ }
$stage3_time = Measure-Command { & $compiler $ir_to_obj_args }
write-host "Object file generation took $($stage3_time.TotalMilliseconds)ms"
write-host
}
if ($true) {
# write-host "Linking with lld-link"
$linker_args = @()
$linker_args += $flag_nologo
$linker_args += $flag_link_win_machine_64
$linker_args += $flag_link_no_incremental
$linker_args += ($flag_link_win_path_output + $binary)
$linker_args += "$flag_link_win_debug"
$linker_args += $flag_link_win_pdb + $pdb
$linker_args += $flag_link_mapfile + $map
$linker_args += $flag_link_win_subsystem_console
$linker_args += $object
# Diagnoistc print for the args
$linker_args | ForEach-Object { Write-Host $_ }
$linking_time = Measure-Command { & $linker $linker_args }
write-host "Linking took $($linking_time.TotalMilliseconds)ms"
write-host
}
if ($false) {
write-host "Dumping Debug Info"
$rbin_out = '--out:'
$rbin_dump = '--dump'
$rdi = join-path $path_build "$unit_name.rdi"
$rdi_listing = join-path $path_build "$unit_name.rdi.list"
$nargs = @($pdb, ($rbin_out + $rdi))
& $radbin $nargs
$nargs = @($rbin_dump, $rdi)
$dump = & $radbin $nargs
$dump > $rdi_listing
}
Pop-Location
BIN
View File
Binary file not shown.