From 1581380a43fbc947be934858df4260b2f5c91331 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Sun, 22 Feb 2026 11:57:23 -0500 Subject: [PATCH] final updates --- MainContext.md | 20 ++++++++++++++++++++ aggregate.py | 30 +++++++++++++++++++++++++----- ai_client.py | 39 +++++++++++++++++++++++++++------------ config.toml | 4 ++-- manual_slop.toml | 12 ++++++++++-- 5 files changed, 84 insertions(+), 21 deletions(-) diff --git a/MainContext.md b/MainContext.md index 841ad25..e67393a 100644 --- a/MainContext.md +++ b/MainContext.md @@ -244,3 +244,23 @@ Documentation has been completely rewritten matching the strict, structural form - `docs/guide_architecture.md`: Details the Python implementation algorithms, queue management for UI rendering, the specific AST heuristics used for context aggregation, and the distinct algorithms for trimming Anthropic history vs Gemini state caching. - `docs/Readme.md`: The core interface manual. - `docs/guide_tools.md`: Security architecture for `_is_allowed` paths and definitions of the read-only vs destructive tool pipeline. + +## Branch Analysis: master vs not_sure (2026-02-22) + +### Summary +The `not_sure` branch introduces a static/dynamic context split in the `send()` API signature, separating files+screenshots (cacheable, stable) from discussion history (changes every turn). This improves cache hit rates for both Anthropic and Gemini. + +### Current master branch API correctness +- **Anthropic**: Correct. System blocks with cache_control, SDK content block serialisation, history repair, stale file refresh stripping all work properly. +- **Gemini**: Correct after `patch_gemini_history.py` was applied. Uses `_get_gemini_history_list()` for safe SDK access, drops history in pairs to maintain alternating roles, explicit caching via `caches.create()`. + +### not_sure branch improvements +- **Anthropic**: Puts discussion history as a separate uncached system block after the cached static context. Better cache hit rates when discussion changes between turns. +- **Gemini**: Wraps discussion in `` tags in user messages and strips old ones from history via regex. Prevents discussion duplication across turns. + +### not_sure branch bugs (not merged from master) +- Uses `_gemini_chat.history` directly instead of `_get_gemini_history_list()` — will crash on newer google-genai SDK versions where `.history` was removed. +- Missing the pair-wise history dropping fix (drops single messages, breaking Gemini's alternating role requirement). + +### Recommended merge path +Cherry-pick the static/dynamic split from `not_sure` into `master` while keeping master's SDK safety fixes (`_get_gemini_history_list`, pair-wise dropping, `_content_block_to_dict`). \ No newline at end of file diff --git a/aggregate.py b/aggregate.py index 304ebc8..32b8585 100644 --- a/aggregate.py +++ b/aggregate.py @@ -126,6 +126,24 @@ def build_summary_section(base_dir: Path, files: list[str]) -> str: items = build_file_items(base_dir, files) return summarize.build_summary_markdown(items) +def build_static_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str: + """Build the static (cacheable) portion of the context: files + screenshots.""" + parts = [] + if files: + if summary_only: + parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files)) + else: + parts.append("## Files\n\n" + build_files_section(base_dir, files)) + if screenshots: + parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) + return "\n\n---\n\n".join(parts) + +def build_dynamic_markdown(history: list[str]) -> str: + """Build the dynamic (changes every turn) portion: discussion history.""" + if history: + return "## Discussion History\n\n" + build_discussion_section(history) + return "" + def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str: parts = [] # STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits @@ -155,18 +173,20 @@ def run(config: dict) -> tuple[str, Path]: output_dir.mkdir(parents=True, exist_ok=True) increment = find_next_increment(output_dir, namespace) output_file = output_dir / f"{namespace}_{increment:03d}.md" - # Provide full files to trigger Gemini's 32k cache threshold and give the AI immediate context - markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history, - summary_only=False) + # Build static (files+screenshots) and dynamic (discussion) portions separately for better caching + static_md = build_static_markdown(base_dir, files, screenshot_base_dir, screenshots, summary_only=False) + dynamic_md = build_dynamic_markdown(history) + # Write combined markdown to disk for archival + markdown = f"{static_md}\n\n---\n\n{dynamic_md}" if static_md and dynamic_md else static_md or dynamic_md output_file.write_text(markdown, encoding="utf-8") file_items = build_file_items(base_dir, files) - return markdown, output_file, file_items + return static_md, dynamic_md, output_file, file_items def main(): with open("config.toml", "rb") as f: import tomllib config = tomllib.load(f) - markdown, output_file, _ = run(config) + static_md, dynamic_md, output_file, _ = run(config) print(f"Written: {output_file}") if __name__ == "__main__": diff --git a/ai_client.py b/ai_client.py index c32a4a1..ee8a9ad 100644 --- a/ai_client.py +++ b/ai_client.py @@ -465,16 +465,16 @@ def _get_gemini_history_list(chat): return chat.get_history() return [] -def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: +def _send_gemini(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: global _gemini_chat, _gemini_cache from google.genai import types try: _ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir]) - sys_instr = f"{_get_combined_system_prompt()}\n\n\n{md_content}\n" + sys_instr = f"{_get_combined_system_prompt()}\n\n\n{static_md}\n" tools_decl = [_gemini_tool_declaration()] # DYNAMIC CONTEXT: Check if files/context changed mid-session - current_md_hash = hash(md_content) + current_md_hash = hash(static_md) old_history = None if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash: old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else [] @@ -520,10 +520,20 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: _gemini_chat = _gemini_client.chats.create(**kwargs) _gemini_chat._last_md_hash = current_md_hash - _append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"}) - payload, all_text = user_message, [] + # Build user message: prepend dynamic context (discussion) so it's NOT cached in system_instruction + full_user_msg = f"\n{dynamic_md}\n\n\n{user_message}" if dynamic_md.strip() else user_message + _append_comms("OUT", "request", {"message": f"[ctx {len(static_md)} static + {len(dynamic_md)} dynamic + msg {len(user_message)}]"}) + payload, all_text = full_user_msg, [] for r_idx in range(MAX_TOOL_ROUNDS + 2): + # Strip stale blocks from old user messages so they don't accumulate + import re as _re + if _gemini_chat and _get_gemini_history_list(_gemini_chat): + for msg in _get_gemini_history_list(_gemini_chat): + if msg.role == "user" and hasattr(msg, "parts"): + for p in msg.parts: + if hasattr(p, "text") and p.text and "" in p.text: + p.text = _re.sub(r".*?\n\n", "", p.text, flags=_re.DOTALL) # Strip stale file refreshes and truncate old tool outputs in Gemini history if _gemini_chat and _get_gemini_history_list(_gemini_chat): for msg in _get_gemini_history_list(_gemini_chat): @@ -818,13 +828,16 @@ def _repair_anthropic_history(history: list[dict]): }) -def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: +def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str: try: _ensure_anthropic_client() mcp_client.configure(file_items or [], [base_dir]) - system_text = _get_combined_system_prompt() + f"\n\n\n{md_content}\n" + system_text = _get_combined_system_prompt() + f"\n\n\n{static_md}\n" system_blocks = _build_chunked_context_blocks(system_text) + # Dynamic context (discussion history) goes after the cached static prefix, without cache_control + if dynamic_md.strip(): + system_blocks.append({"type": "text", "text": f"\n{dynamic_md}\n"}) user_content = [{"type": "text", "text": user_message}] @@ -844,7 +857,7 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item n_chunks = len(system_blocks) _append_comms("OUT", "request", { "message": ( - f"[system {n_chunks} chunk(s), {len(md_content)} chars context] " + f"[system {n_chunks} chunk(s), {len(static_md)} static + {len(dynamic_md)} dynamic chars] " f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}" ), }) @@ -997,7 +1010,8 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item # ------------------------------------------------------------------ unified send def send( - md_content: str, + static_md: str, + dynamic_md: str, user_message: str, base_dir: str = ".", file_items: list[dict] | None = None, @@ -1005,14 +1019,15 @@ def send( """ Send a message to the active provider. - md_content : aggregated markdown string from aggregate.run() + static_md : cacheable context (files + screenshots) from aggregate.run() + dynamic_md : volatile context (discussion history) that changes every turn user_message: the user question / instruction base_dir : project base directory (for PowerShell tool calls) file_items : list of file dicts from aggregate.build_file_items() for dynamic context refresh after tool calls """ if _provider == "gemini": - return _send_gemini(md_content, user_message, base_dir, file_items) + return _send_gemini(static_md, dynamic_md, user_message, base_dir, file_items) elif _provider == "anthropic": - return _send_anthropic(md_content, user_message, base_dir, file_items) + return _send_anthropic(static_md, dynamic_md, user_message, base_dir, file_items) raise ValueError(f"unknown provider: {_provider}") \ No newline at end of file diff --git a/config.toml b/config.toml index d175f70..d5dcd7c 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,6 @@ [ai] provider = "anthropic" -model = "claude-sonnet-4-6" +model = "claude-opus-4-6" temperature = 0.6000000238418579 max_tokens = 12000 history_trunc_limit = 8000 @@ -17,4 +17,4 @@ paths = [ "manual_slop.toml", "C:/projects/forth/bootslop/bootslop.toml", ] -active = "C:/projects/forth/bootslop/bootslop.toml" +active = "manual_slop.toml" diff --git a/manual_slop.toml b/manual_slop.toml index 6bd81fe..5bdd757 100644 --- a/manual_slop.toml +++ b/manual_slop.toml @@ -38,7 +38,7 @@ roles = [ "Vendor API", "System", ] -active = "docs writeup" +active = "review ai_client.py" auto_add = true [discussion.discussions.main] @@ -147,7 +147,7 @@ history = [ [discussion.discussions."docs writeup"] git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e" -last_updated = "2026-02-22T11:08:58" +last_updated = "2026-02-22T11:47:11" history = [ "@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.", "@2026-02-22T08:56:58\nAI:\n(No text returned)", @@ -158,3 +158,11 @@ history = [ "@2026-02-22T09:11:01\nUser:\nThe docs aren't detailed enough, the code isn't commented like the vefontcache code is. etc. My docs were straight to the point, in-depth, old school provides extensive insight into what was done.", "@2026-02-22T09:17:39\nVendor API:\n[GEMINI QUOTA EXHAUSTED]\n\n429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 1000000, model: gemini-3.1-pro\\nPlease retry in 20.333361679s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count', 'quotaId': 'GenerateContentPaidTierInputTokensPerModelPerMinute', 'quotaDimensions': {'location': 'global', 'model': 'gemini-3.1-pro'}, 'quotaValue': '1000000'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '20s'}]}}", ] + +[discussion.discussions."review ai_client.py"] +git_commit = "" +last_updated = "2026-02-22T11:49:36" +history = [ + "@2026-02-22T11:48:29\nUser:\nLook at ai_client.py on this branch and the git branch not_sure. This current branch had a fix applied for gemini's uses that you'll find in: ./patch_gemini_history.py\n\nIs our usage of the apis correct for gemini? Is the not_sure better at using ai apis for anthropic or google?\n", + "@2026-02-22T11:49:12\nVendor API:\n[GEMINI QUOTA EXHAUSTED]\n\n429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'Resource has been exhausted (e.g. check quota).', 'status': 'RESOURCE_EXHAUSTED'}}", +]