From 1b71b748dbc5eb03f1a3b3942e87fa76d6dd1ab5 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Sun, 22 Feb 2026 11:22:08 -0500
Subject: [PATCH] wip docs

---
 MainContext.md             | 22 +++++++++++++---------
 config.toml                |  6 +++---
 docs/Readme.md             |  2 ++
 docs/guide_architecture.md | 11 ++++++-----
 docs/guide_tools.md        |  7 +++++--
 manual_slop.toml           |  4 ++--
 6 files changed, 31 insertions(+), 21 deletions(-)
diff --git a/MainContext.md b/MainContext.md
index 2833022..841ad25 100644
--- a/MainContext.md
+++ b/MainContext.md
@@ -87,9 +87,9 @@ Is a local GUI tool for manually curating and sending context to AI APIs. It agg
 - All tool calls (script + result/rejection) are appended to `_tool_log` and displayed in the Tool Calls panel
 
 **Dynamic file context refresh (ai_client.py):**
-- After every tool call round, all project files from `file_items` are re-read from disk via `_reread_file_items()`
-- For Anthropic: the refreshed file contents are injected as a `text` block appended to the `tool_results` user message, prefixed with `[FILES UPDATED]` and an instruction not to re-read them
-- For Gemini: files are re-read (updating the `file_items` list in place) but cannot be injected into tool results due to Gemini's structured function response format
+- After the last tool call in each round, all project files from `file_items` are re-read from disk via `_reread_file_items()`. The `file_items` variable is reassigned so subsequent rounds see fresh content.
+- For Anthropic: the refreshed file contents are injected as a `text` block appended to the `tool_results` user message, prefixed with `[FILES UPDATED]` and an instruction not to re-read them.
+- For Gemini: refreshed file contents are appended to the last function response's `output` string as a `[SYSTEM: FILES UPDATED]` block. On the next tool round, stale `[FILES UPDATED]` blocks are stripped from history and old tool outputs are truncated to `_history_trunc_limit` characters to control token growth.
 - `_build_file_context_text(file_items)` formats the refreshed files as markdown code blocks (same format as the original context)
 - The `tool_result_send` comms log entry filters out the injected text block (only logs actual `tool_result` entries) to keep the comms panel clean
 - `file_items` flows from `aggregate.build_file_items()` â†’ `gui.py` `self.last_file_items` â†’ `ai_client.send(file_items=...)` â†’ `_send_anthropic(file_items=...)` / `_send_gemini(file_items=...)`
@@ -142,9 +142,11 @@ Entry layout: index + timestamp + direction + kind + provider/model header row,
 - `close_session()` flushes and closes both file handles; called just before `dpg.destroy_context()`
 
 **Anthropic prompt caching:**
-- System prompt sent as an array with `cache_control: ephemeral` on the text block
-- Last tool in `_ANTHROPIC_TOOLS` has `cache_control: ephemeral`; system + tools prefix is cached together after the first request
-- First user message content[0] is the `<context>` block with `cache_control: ephemeral`; content[1] is the user question without cache control
+- System prompt + context are combined into one string, chunked into <=120k char blocks, and sent as the `system=` parameter array. Only the LAST chunk gets `cache_control: ephemeral`, so the entire system prefix is cached as one unit.
+- Last tool in `_ANTHROPIC_TOOLS` (`run_powershell`) has `cache_control: ephemeral`; this means the tools prefix is cached together with the system prefix after the first request.
+- The user message is sent as a plain `[{"type": "text", "text": user_message}]` block with NO cache_control. The context lives in `system=`, not in the first user message.
+- The tools list is built once per session via `_get_anthropic_tools()` and reused across all API calls within the tool loop, avoiding redundant Python-side reconstruction.
+- `_strip_cache_controls()` removes stale `cache_control` markers from all history entries before each API call, ensuring only the stable system/tools prefix consumes cache breakpoint slots.
 - Cache stats (creation tokens, read tokens) are surfaced in the comms log usage dict and displayed in the Comms History panel
 
 **Data flow:**
@@ -190,15 +192,17 @@ Entry layout: index + timestamp + direction + kind + provider/model header row,
 
 **Known extension points:**
 - Add more providers by adding a section to `credentials.toml`, a `_list_*` and `_send_*` function in `ai_client.py`, and the provider name to the `PROVIDERS` list in `gui.py`
-- System prompt support could be added as a field in the project `.toml` and passed in `ai_client.send()`
 - Discussion history excerpts could be individually toggleable for inclusion in the generated md
 - `MAX_TOOL_ROUNDS` in `ai_client.py` caps agentic loops at 10 rounds; adjustable
 - `COMMS_CLAMP_CHARS` in `gui.py` controls the character threshold for clamping heavy payload fields in the Comms History panel
 - Additional project metadata (description, tags, created date) could be added to `[project]` in the per-project toml
 
 ### Gemini Context Management
-- Investigating ways to prevent context duplication in _gemini_chat history, as currently <context>{md_content}</context> is prepended to the user message on every single request, causing history bloat.
-- Discussing explicit Gemini Context Caching API (client.caches.create()) to store read-only file context and avoid re-reading files across sessions.
+- Gemini uses explicit caching via `client.caches.create()` to store the `system_instruction` + tools as an immutable cached prefix with a 1-hour TTL. The cache is created once per chat session.
+- When context changes (detected via `md_content` hash), the old cache is deleted, a new cache is created, and chat history is migrated to a fresh chat session pointing at the new cache.
+- If cache creation fails (e.g., content is under the minimum token threshold — 1024 for Flash, 4096 for Pro), the system falls back to inline `system_instruction` in the chat config. Implicit caching may still provide cost savings in this case.
+- The `<context>` block lives inside `system_instruction`, NOT in user messages, preventing history bloat across turns.
+- On cleanup/exit, active caches are deleted via `ai_client.cleanup()` to prevent orphaned billing.
 
 ### Latest Changes
 - Removed `Config` panel from the GUI to streamline per-project configuration.
diff --git a/config.toml b/config.toml
index d1ee746..d175f70 100644
--- a/config.toml
+++ b/config.toml
@@ -1,6 +1,6 @@
 [ai]
-provider = "gemini"
-model = "gemini-3.1-pro-preview"
+provider = "anthropic"
+model = "claude-sonnet-4-6"
 temperature = 0.6000000238418579
 max_tokens = 12000
 history_trunc_limit = 8000
@@ -17,4 +17,4 @@ paths = [
     "manual_slop.toml",
     "C:/projects/forth/bootslop/bootslop.toml",
 ]
-active = "manual_slop.toml"
+active = "C:/projects/forth/bootslop/bootslop.toml"
diff --git a/docs/Readme.md b/docs/Readme.md
index 9b558f4..555e325 100644
--- a/docs/Readme.md
+++ b/docs/Readme.md
@@ -8,6 +8,8 @@ A GUI orchestrator for local LLM-driven coding sessions, built to prevent the AI
 
 The heart of context management. 
 
+> **Note:** The Config panel has been removed. Output directory and auto-add history settings are now integrated into the Projects and Discussion History panels respectively.
+
 - **Configuration:** You specify the Git Directory (for commit tracking) and a Main Context File (the markdown file containing your project's notes and schema).
 - **Word-Wrap Toggle:** Dynamically swaps text rendering in large read-only panels (Responses, Comms Log) between unwrapped (ideal for viewing precise code formatting) and wrapped (ideal for prose).
 - **Project Switching:** Switch between different <project>.toml profiles to instantly swap out your entire active file list, discussion history, and settings.
diff --git a/docs/guide_architecture.md b/docs/guide_architecture.md
index d72c47e..39529aa 100644
--- a/docs/guide_architecture.md
+++ b/docs/guide_architecture.md
@@ -44,14 +44,15 @@ The communication model is unified under ai_client.py, which normalizes the Gemi
 
 The loop is defined as follows:
 
-1. **Prompt Injection:** The aggregated Markdown context and system prompt are injected. (Gemini injects this directly into system_instruction at chat instantiation to prevent history bloat; Anthropic chunks this into cache_control: ephemeral blocks).
-2. **Execution Loop:** A MAX_TOOL_ROUNDS (default 10) bounded loop begins.
+1. **Prompt Injection:** The aggregated Markdown context and system prompt are injected. For Gemini, the system_instruction and tools are stored in an explicit cache via `client.caches.create()` with a 1-hour TTL; if cache creation fails (under minimum token threshold), it falls back to inline system_instruction. When context changes mid-session, the old cache is deleted and a new one is created. For Anthropic, the system prompt + context are sent as `system=` blocks with `cache_control: ephemeral` on the last chunk, and tools carry `cache_control: ephemeral` on the last tool definition.
+2. **Execution Loop:** A MAX_TOOL_ROUNDS (default 10) bounded loop begins. The tools list for Anthropic is built once per session and reused.
 3. The AI provider is polled.
-4. If the provider's stop_reason is 	ool_use:
+4. If the provider's stop_reason is tool_use:
    1. The loop parses the requested tool (either a read-only MCP tool or the destructive PowerShell tool).
    2. If PowerShell, it dispatches a blocking event to the Main Thread (see *On Tool Execution & Concurrency*).
-   3. Once the result is retrieved, the loop executes a **Dynamic Refresh** (_reread_file_items). Any files currently tracked by the project are pulled from the disk fresh.
-   4. The tool result, appended with the fresh [FILES UPDATED] block, is sent back to the provider.
+   3. Once the last tool result in the batch is retrieved, the loop executes a **Dynamic Refresh** (`_reread_file_items`). Any files currently tracked by the project are pulled from disk fresh. The `file_items` variable is reassigned so subsequent tool rounds see the updated content.
+   4. For Anthropic: the refreshed file contents are appended as a text block to the tool_results user message. For Gemini: the refreshed contents are appended to the last function response's output string. In both cases, the block is prefixed with `[FILES UPDATED]` / `[SYSTEM: FILES UPDATED]`.
+   5. On subsequent rounds, stale file-refresh blocks from previous turns are stripped from history to prevent token accumulation. For Gemini, old tool outputs exceeding `_history_trunc_limit` characters are also truncated.
 5. Once the model outputs standard text, the loop terminates and yields the string back to the GUI callback.
 
 ### On Tool Execution & Concurrency
diff --git a/docs/guide_tools.md b/docs/guide_tools.md
index ec02adc..34e9c7e 100644
--- a/docs/guide_tools.md
+++ b/docs/guide_tools.md
@@ -46,5 +46,8 @@ The core system prompt explicitly guides the AI on how to use this tool safely:
 
 ### Synthetic Context Refresh
 
-Immediately after **any** tool call turn finishes, ai_client runs _reread_file_items. It fetches the latest disk state of all files in the current project context and appends them as a synthetic [FILES UPDATED] message to the tool result. 
-This means if the AI writes to a file, it instantly "sees" the modification in its next turn without having to waste a cycle calling read_file.
+After the **last** tool call in each round finishes (when multiple tools are called in a single round, the refresh happens once after all of them), ai_client runs `_reread_file_items`. It fetches the latest disk state of all files in the current project context. The `file_items` variable is reassigned so subsequent tool rounds within the same request use the fresh content.
+
+For Anthropic, the refreshed contents are injected as a text block in the `tool_results` user message. For Gemini, they are appended to the last function response's output string. In both cases, the block is prefixed with `[FILES UPDATED]` / `[SYSTEM: FILES UPDATED]`.
+
+On the next tool round, stale file-refresh blocks from previous rounds are stripped from history to prevent token accumulation. This means if the AI writes to a file, it instantly "sees" the modification in its next turn without having to waste a cycle calling `read_file`, and the cost of carrying the full file snapshot is limited to one round.
diff --git a/manual_slop.toml b/manual_slop.toml
index 3036cee..6bd81fe 100644
--- a/manual_slop.toml
+++ b/manual_slop.toml
@@ -1,7 +1,7 @@
 [project]
 name = "manual_slop"
 git_dir = "C:/projects/manual_slop"
-system_prompt = "Make sure to update MainContext.md every time.\nMake destructive modifications to the project, ITS OK, I HAVE GIT HISTORY TO MANAGE THE PROJECTS."
+system_prompt = "Make sure to update MainContext.md every time.\nMake destructive modifications to the project, ITS OK, I HAVE GIT HISTORY TO MANAGE THE PROJECTS.\nAvoid reading manual_slop.toml its expensive as it has the history of multiple dicussions.\n"
 main_context = "C:/projects/manual_slop/MainContext.md"
 word_wrap = true
 
@@ -147,7 +147,7 @@ history = [
 
 [discussion.discussions."docs writeup"]
 git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
-last_updated = "2026-02-22T10:34:24"
+last_updated = "2026-02-22T11:08:58"
 history = [
     "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
     "@2026-02-22T08:56:58\nAI:\n(No text returned)",