diff --git a/estimate_context.py b/estimate_context.py new file mode 100644 index 0000000..ae3d4cb --- /dev/null +++ b/estimate_context.py @@ -0,0 +1,39 @@ +import os + +REFERENCES_DIR = "C:/projects/forth/bootslop/references" + +def estimate_tokens(): + text_extensions = {".txt", ".md", ".csv"} + image_extensions = {".jpg", ".jpeg", ".png"} + + total_text_chars = 0 + total_images = 0 + + for root, _, files in os.walk(REFERENCES_DIR): + for file in files: + ext = os.path.splitext(file)[1].lower() + filepath = os.path.join(root, file) + + if ext in text_extensions: + try: + with open(filepath, "r", encoding="utf-8", errors="ignore") as f: + total_text_chars += len(f.read()) + except Exception as e: + print(f"Error reading {filepath}: {e}") + elif ext in image_extensions: + total_images += 1 + + # Approximate 1 token = 4 characters for English text + estimated_text_tokens = total_text_chars // 4 + # Approximate Gemini 1.5 image token cost (typically 258 tokens per image) + estimated_image_tokens = total_images * 258 + + total_tokens = estimated_text_tokens + estimated_image_tokens + + print(f"Total Text Files Scanned: {total_text_chars} characters (~{estimated_text_tokens} tokens)") + print(f"Total Images Scanned: {total_images} images (~{estimated_image_tokens} tokens)") + print(f"---") + print(f"Estimated Total Context Cost: ~{total_tokens} tokens") + +if __name__ == "__main__": + estimate_tokens() diff --git a/ocr_interaction.py b/ocr_interaction.py index 0e45ae9..22d9b27 100644 --- a/ocr_interaction.py +++ b/ocr_interaction.py @@ -35,5 +35,5 @@ async def ocr_single_image(img_path): print(f"OCR result saved to {out_path}") if __name__ == "__main__": - img = r"C:\projects\forth\bootslop\references\X.com - Onat & Lottes Interaction 1.png" + img = r"C:\projects\forth\bootslop\references\X.com - Lottes note on dictionary.png" asyncio.run(ocr_single_image(img)) diff --git a/references/Architectural_Consolidation.md b/references/Architectural_Consolidation.md new file mode 100644 index 0000000..0a6f916 --- /dev/null +++ b/references/Architectural_Consolidation.md @@ -0,0 +1,43 @@ +# Architectural Consolidation: Zero-Overhead Sourceless ColorForth + +This document serves as the master blueprint for the research and curation phase, synthesizing the findings from Timothy Lottes, Onat Türkçüoğlu, and related high-performance minimalist systems. + +## 1. Core Philosophy +* **Sourceless:** The "source of truth" is a 32-bit token array, not a text file. No string parsing occurs at runtime. +* **Zero-Overhead:** Instant iteration (<5ms compilation) by emitting machine code directly from tokens. +* **Bounded Complexity:** Force complexity into data structures rather than code logic. +* **Hardware Locality:** Treat the register file as a global namespace; minimize or eliminate the data stack. + +## 2. Lottes' x68 Architecture (The Frontend/Editor) +* **32-Bit Instruction Granularity:** Every x86-64 instruction is padded to exactly 4 bytes (or multiples thereof) using ignored prefixes and multi-byte NOPs. + * *Example:* `RET` (0xC3) -> `C3 90 90 90`. +* **Token Format:** 32-bit words consisting of: + * **28 Bits:** Compressed name/string or value. + * **4 Bits:** Semantic Tag (Opcode, Abs Addr, Rel Addr, Immediate, etc.). +* **Annotation Overlay:** A parallel memory layer (e.g., 64-bit per token) stores metadata for the editor (colors, names, formatting tags) without polluting the executable. +* **Tooling Recommendation:** **ImHex** with a custom `.hexpat` pattern language can serve as the visual frontend for this annotation overlay. + +## 3. Onat's VAMP/KYRA Architecture (The Runtime/Codegen) +* **2-Item Register Stack:** Uses `RAX` and `RDX` as a tiny, hardware-resident stack. + * **The Swap:** `xchg rax, rdx` (1-byte: `48 87 C2`) is emitted to rotate the "top of stack". +* **Aliased Global Namespace:** The CPU register file is treated as a shared, aliased memory space for functions. +* **Functions as Blocks:** Words are "free of arguments and returns" in the traditional sense. +* **Preemptive Scatter ("Tape Drive"):** Arguments are pre-placed into fixed, contiguous memory slots ("the tape") by the compiler/loader before execution. This eliminates "argument gathering" during function calls. + +## 4. Implementation Components +* **Emitter:** **Zydis Encoder API**. Zero-allocation, sub-5ms instruction generation. +* **Live Reload:** **Hot Runtime Linking** (Fredriksson style). Atomic pointer swapping at main-loop "safe points" to patch code in-place. +* **Threading Model:** **Direct Threaded Code (DTC)** for the initial dictionary/execution token (`xt`) baseline. +* **Wasm Parallels:** WebAssembly's linear memory and binary sectioning provide a modern reference for the "tape drive" and fixed-offset load/store model. + +## 5. Visual Semantics (ColorForth Mapping) +* **RED:** Define new word (Dictionary entry). +* **GREEN:** Compile word into current definition. +* **YELLOW/ORANGE:** Immediate execution (Macros/Editor commands). +* **CYAN/BLUE:** Variables, Addresses, Layout. +* **WHITE/DIM:** Comments, Annotations, UI. +* **MAGENTA:** Pointers, State modifiers. + +--- +*Curation Phase Status: COMPLETE* +*Ready for Strategy Phase: Pending Directive* diff --git a/references/Web_Search_Gather_Pass.md b/references/Web_Search_Gather_Pass.md new file mode 100644 index 0000000..8f8eaba --- /dev/null +++ b/references/Web_Search_Gather_Pass.md @@ -0,0 +1,26 @@ +# Last Ditch Web Search Gather Pass: AMD64 / x86-64 ColorForth Implementations + +While explicit source code dumps from Timothy Lottes (x68) and Onat Türkçüoğlu (VAMP/KYRA) remain mostly private or embedded in their video talks, a final scavenge of the web for "ColorForth x86-64" and "AMD64" revealed several open-source community attempts to bring Chuck Moore's ColorForth paradigms to modern 64-bit architectures. + +These repositories can serve as structural references for bootstrapping the dictionary and memory layouts, even if they don't use the exact `xchg` register stack or 32-bit token array padding of the Lottes/Onat model. + +### Relevant GitHub Repositories Found: + +1. **`cthulhuology/forth64`** + - **Description:** "64bit forth because why not" + - **Relevance:** Explicitly states that "a lot of the instruction design and behavior is incorporating design elements from Chuck Moore's later work including Colorforth and Okad." It aims for native compilation on x86_64. This is a strong modern reference for 64-bit ColorForth-inspired compilation. + +2. **`narke/Einherjar`** + - **Description:** A "colorForth computing environment for amd64 and PowerPC Macs." + - **Relevance:** A direct attempt at building a ColorForth environment specifically targeting the AMD64 (x86-64) architecture. + +3. **`phreda4/r3js`** + - **Description:** "r4, colorforth, 64 bits, web." + - **Relevance:** While javascript/web-focused, it implements 64-bit ColorForth concepts (R3/R4 dialects are minimalist concatenative derivations). + +4. **`ljmccarthy/crunchyforth`** + - **Description:** "Forth compiler written in x86 assembly (inspired by colorForth)." + - **Relevance:** Archived, but provides a baseline for how colorForth inspiration translates directly into raw x86 assembly. + +### Synthesis Notes: +The broader web confirms that the extreme "sourceless" 32-bit hex-editor frontend (Lottes) and the 2-register `xchg` stack (Onat) are highly bespoke, proprietary innovations built *on top* of the ColorForth philosophy. The open-source community provides the base x86-64 ColorForth building blocks (like `forth64`), but the "zero-overhead / UI-as-OS" paradigm will need to be synthesized from the blueprints we've consolidated in `Architectural_Consolidation.md`. diff --git a/references/X.com - Lottes note on dictionary.png b/references/X.com - Lottes note on dictionary.png new file mode 100644 index 0000000..60258b8 Binary files /dev/null and b/references/X.com - Lottes note on dictionary.png differ diff --git a/references/X.com - Lottes note on dictionary.png.ocr.md b/references/X.com - Lottes note on dictionary.png.ocr.md new file mode 100644 index 0000000..2c3d496 --- /dev/null +++ b/references/X.com - Lottes note on dictionary.png.ocr.md @@ -0,0 +1,17 @@ +# OCR Thread: X.com - Lottes note on dictionary.png + +NOTimothyLottes +@NOTimothyLottes +Woke up with the realization that comments as ignored identifier prefix +characters might solve the radically limited 2 character direct mapped +dictionary problem for a minimal forth like language +10:21 AM • sep 8, 2025 1,173 +Vi ews +02 +Post your reply +NOTimothyLottes @NOTimothyLottes Sep 8, 2025 +Reply +Also one could replace return with conditional retum if signed, pair that +with call and that removes the need for other branching constructs like +'if/then'. +948 \ No newline at end of file