From 1b71b748dbc5eb03f1a3b3942e87fa76d6dd1ab5 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 22 Feb 2026 11:22:08 -0500 Subject: [PATCH] wip docs --- MainContext.md | 22 +++++++++++++--------- config.toml | 6 +++--- docs/Readme.md | 2 ++ docs/guide_architecture.md | 11 ++++++----- docs/guide_tools.md | 7 +++++-- manual_slop.toml | 4 ++-- 6 files changed, 31 insertions(+), 21 deletions(-) diff --git a/MainContext.md b/MainContext.md index 2833022..841ad25 100644 --- a/MainContext.md +++ b/MainContext.md @@ -87,9 +87,9 @@ Is a local GUI tool for manually curating and sending context to AI APIs. It agg - All tool calls (script + result/rejection) are appended to `_tool_log` and displayed in the Tool Calls panel **Dynamic file context refresh (ai_client.py):** -- After every tool call round, all project files from `file_items` are re-read from disk via `_reread_file_items()` -- For Anthropic: the refreshed file contents are injected as a `text` block appended to the `tool_results` user message, prefixed with `[FILES UPDATED]` and an instruction not to re-read them -- For Gemini: files are re-read (updating the `file_items` list in place) but cannot be injected into tool results due to Gemini's structured function response format +- After the last tool call in each round, all project files from `file_items` are re-read from disk via `_reread_file_items()`. The `file_items` variable is reassigned so subsequent rounds see fresh content. +- For Anthropic: the refreshed file contents are injected as a `text` block appended to the `tool_results` user message, prefixed with `[FILES UPDATED]` and an instruction not to re-read them. +- For Gemini: refreshed file contents are appended to the last function response's `output` string as a `[SYSTEM: FILES UPDATED]` block. On the next tool round, stale `[FILES UPDATED]` blocks are stripped from history and old tool outputs are truncated to `_history_trunc_limit` characters to control token growth. - `_build_file_context_text(file_items)` formats the refreshed files as markdown code blocks (same format as the original context) - The `tool_result_send` comms log entry filters out the injected text block (only logs actual `tool_result` entries) to keep the comms panel clean - `file_items` flows from `aggregate.build_file_items()` → `gui.py` `self.last_file_items` → `ai_client.send(file_items=...)` → `_send_anthropic(file_items=...)` / `_send_gemini(file_items=...)` @@ -142,9 +142,11 @@ Entry layout: index + timestamp + direction + kind + provider/model header row, - `close_session()` flushes and closes both file handles; called just before `dpg.destroy_context()` **Anthropic prompt caching:** -- System prompt sent as an array with `cache_control: ephemeral` on the text block -- Last tool in `_ANTHROPIC_TOOLS` has `cache_control: ephemeral`; system + tools prefix is cached together after the first request -- First user message content[0] is the `` block with `cache_control: ephemeral`; content[1] is the user question without cache control +- System prompt + context are combined into one string, chunked into <=120k char blocks, and sent as the `system=` parameter array. Only the LAST chunk gets `cache_control: ephemeral`, so the entire system prefix is cached as one unit. +- Last tool in `_ANTHROPIC_TOOLS` (`run_powershell`) has `cache_control: ephemeral`; this means the tools prefix is cached together with the system prefix after the first request. +- The user message is sent as a plain `[{"type": "text", "text": user_message}]` block with NO cache_control. The context lives in `system=`, not in the first user message. +- The tools list is built once per session via `_get_anthropic_tools()` and reused across all API calls within the tool loop, avoiding redundant Python-side reconstruction. +- `_strip_cache_controls()` removes stale `cache_control` markers from all history entries before each API call, ensuring only the stable system/tools prefix consumes cache breakpoint slots. - Cache stats (creation tokens, read tokens) are surfaced in the comms log usage dict and displayed in the Comms History panel **Data flow:** @@ -190,15 +192,17 @@ Entry layout: index + timestamp + direction + kind + provider/model header row, **Known extension points:** - Add more providers by adding a section to `credentials.toml`, a `_list_*` and `_send_*` function in `ai_client.py`, and the provider name to the `PROVIDERS` list in `gui.py` -- System prompt support could be added as a field in the project `.toml` and passed in `ai_client.send()` - Discussion history excerpts could be individually toggleable for inclusion in the generated md - `MAX_TOOL_ROUNDS` in `ai_client.py` caps agentic loops at 10 rounds; adjustable - `COMMS_CLAMP_CHARS` in `gui.py` controls the character threshold for clamping heavy payload fields in the Comms History panel - Additional project metadata (description, tags, created date) could be added to `[project]` in the per-project toml ### Gemini Context Management -- Investigating ways to prevent context duplication in _gemini_chat history, as currently {md_content} is prepended to the user message on every single request, causing history bloat. -- Discussing explicit Gemini Context Caching API (client.caches.create()) to store read-only file context and avoid re-reading files across sessions. +- Gemini uses explicit caching via `client.caches.create()` to store the `system_instruction` + tools as an immutable cached prefix with a 1-hour TTL. The cache is created once per chat session. +- When context changes (detected via `md_content` hash), the old cache is deleted, a new cache is created, and chat history is migrated to a fresh chat session pointing at the new cache. +- If cache creation fails (e.g., content is under the minimum token threshold — 1024 for Flash, 4096 for Pro), the system falls back to inline `system_instruction` in the chat config. Implicit caching may still provide cost savings in this case. +- The `` block lives inside `system_instruction`, NOT in user messages, preventing history bloat across turns. +- On cleanup/exit, active caches are deleted via `ai_client.cleanup()` to prevent orphaned billing. ### Latest Changes - Removed `Config` panel from the GUI to streamline per-project configuration. diff --git a/config.toml b/config.toml index d1ee746..d175f70 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ [ai] -provider = "gemini" -model = "gemini-3.1-pro-preview" +provider = "anthropic" +model = "claude-sonnet-4-6" temperature = 0.6000000238418579 max_tokens = 12000 history_trunc_limit = 8000 @@ -17,4 +17,4 @@ paths = [ "manual_slop.toml", "C:/projects/forth/bootslop/bootslop.toml", ] -active = "manual_slop.toml" +active = "C:/projects/forth/bootslop/bootslop.toml" diff --git a/docs/Readme.md b/docs/Readme.md index 9b558f4..555e325 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -8,6 +8,8 @@ A GUI orchestrator for local LLM-driven coding sessions, built to prevent the AI The heart of context management. +> **Note:** The Config panel has been removed. Output directory and auto-add history settings are now integrated into the Projects and Discussion History panels respectively. + - **Configuration:** You specify the Git Directory (for commit tracking) and a Main Context File (the markdown file containing your project's notes and schema). - **Word-Wrap Toggle:** Dynamically swaps text rendering in large read-only panels (Responses, Comms Log) between unwrapped (ideal for viewing precise code formatting) and wrapped (ideal for prose). - **Project Switching:** Switch between different .toml profiles to instantly swap out your entire active file list, discussion history, and settings. diff --git a/docs/guide_architecture.md b/docs/guide_architecture.md index d72c47e..39529aa 100644 --- a/docs/guide_architecture.md +++ b/docs/guide_architecture.md @@ -44,14 +44,15 @@ The communication model is unified under ai_client.py, which normalizes the Gemi The loop is defined as follows: -1. **Prompt Injection:** The aggregated Markdown context and system prompt are injected. (Gemini injects this directly into system_instruction at chat instantiation to prevent history bloat; Anthropic chunks this into cache_control: ephemeral blocks). -2. **Execution Loop:** A MAX_TOOL_ROUNDS (default 10) bounded loop begins. +1. **Prompt Injection:** The aggregated Markdown context and system prompt are injected. For Gemini, the system_instruction and tools are stored in an explicit cache via `client.caches.create()` with a 1-hour TTL; if cache creation fails (under minimum token threshold), it falls back to inline system_instruction. When context changes mid-session, the old cache is deleted and a new one is created. For Anthropic, the system prompt + context are sent as `system=` blocks with `cache_control: ephemeral` on the last chunk, and tools carry `cache_control: ephemeral` on the last tool definition. +2. **Execution Loop:** A MAX_TOOL_ROUNDS (default 10) bounded loop begins. The tools list for Anthropic is built once per session and reused. 3. The AI provider is polled. -4. If the provider's stop_reason is ool_use: +4. If the provider's stop_reason is tool_use: 1. The loop parses the requested tool (either a read-only MCP tool or the destructive PowerShell tool). 2. If PowerShell, it dispatches a blocking event to the Main Thread (see *On Tool Execution & Concurrency*). - 3. Once the result is retrieved, the loop executes a **Dynamic Refresh** (_reread_file_items). Any files currently tracked by the project are pulled from the disk fresh. - 4. The tool result, appended with the fresh [FILES UPDATED] block, is sent back to the provider. + 3. Once the last tool result in the batch is retrieved, the loop executes a **Dynamic Refresh** (`_reread_file_items`). Any files currently tracked by the project are pulled from disk fresh. The `file_items` variable is reassigned so subsequent tool rounds see the updated content. + 4. For Anthropic: the refreshed file contents are appended as a text block to the tool_results user message. For Gemini: the refreshed contents are appended to the last function response's output string. In both cases, the block is prefixed with `[FILES UPDATED]` / `[SYSTEM: FILES UPDATED]`. + 5. On subsequent rounds, stale file-refresh blocks from previous turns are stripped from history to prevent token accumulation. For Gemini, old tool outputs exceeding `_history_trunc_limit` characters are also truncated. 5. Once the model outputs standard text, the loop terminates and yields the string back to the GUI callback. ### On Tool Execution & Concurrency diff --git a/docs/guide_tools.md b/docs/guide_tools.md index ec02adc..34e9c7e 100644 --- a/docs/guide_tools.md +++ b/docs/guide_tools.md @@ -46,5 +46,8 @@ The core system prompt explicitly guides the AI on how to use this tool safely: ### Synthetic Context Refresh -Immediately after **any** tool call turn finishes, ai_client runs _reread_file_items. It fetches the latest disk state of all files in the current project context and appends them as a synthetic [FILES UPDATED] message to the tool result. -This means if the AI writes to a file, it instantly "sees" the modification in its next turn without having to waste a cycle calling read_file. +After the **last** tool call in each round finishes (when multiple tools are called in a single round, the refresh happens once after all of them), ai_client runs `_reread_file_items`. It fetches the latest disk state of all files in the current project context. The `file_items` variable is reassigned so subsequent tool rounds within the same request use the fresh content. + +For Anthropic, the refreshed contents are injected as a text block in the `tool_results` user message. For Gemini, they are appended to the last function response's output string. In both cases, the block is prefixed with `[FILES UPDATED]` / `[SYSTEM: FILES UPDATED]`. + +On the next tool round, stale file-refresh blocks from previous rounds are stripped from history to prevent token accumulation. This means if the AI writes to a file, it instantly "sees" the modification in its next turn without having to waste a cycle calling `read_file`, and the cost of carrying the full file snapshot is limited to one round. diff --git a/manual_slop.toml b/manual_slop.toml index 3036cee..6bd81fe 100644 --- a/manual_slop.toml +++ b/manual_slop.toml @@ -1,7 +1,7 @@ [project] name = "manual_slop" git_dir = "C:/projects/manual_slop" -system_prompt = "Make sure to update MainContext.md every time.\nMake destructive modifications to the project, ITS OK, I HAVE GIT HISTORY TO MANAGE THE PROJECTS." +system_prompt = "Make sure to update MainContext.md every time.\nMake destructive modifications to the project, ITS OK, I HAVE GIT HISTORY TO MANAGE THE PROJECTS.\nAvoid reading manual_slop.toml its expensive as it has the history of multiple dicussions.\n" main_context = "C:/projects/manual_slop/MainContext.md" word_wrap = true @@ -147,7 +147,7 @@ history = [ [discussion.discussions."docs writeup"] git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e" -last_updated = "2026-02-22T10:34:24" +last_updated = "2026-02-22T11:08:58" history = [ "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.", "@2026-02-22T08:56:58\nAI:\n(No text returned)",