@@ -244,23 +244,3 @@ Documentation has been completely rewritten matching the strict, structural form
|
|||||||
- `docs/guide_architecture.md`: Details the Python implementation algorithms, queue management for UI rendering, the specific AST heuristics used for context aggregation, and the distinct algorithms for trimming Anthropic history vs Gemini state caching.
|
- `docs/guide_architecture.md`: Details the Python implementation algorithms, queue management for UI rendering, the specific AST heuristics used for context aggregation, and the distinct algorithms for trimming Anthropic history vs Gemini state caching.
|
||||||
- `docs/Readme.md`: The core interface manual.
|
- `docs/Readme.md`: The core interface manual.
|
||||||
- `docs/guide_tools.md`: Security architecture for `_is_allowed` paths and definitions of the read-only vs destructive tool pipeline.
|
- `docs/guide_tools.md`: Security architecture for `_is_allowed` paths and definitions of the read-only vs destructive tool pipeline.
|
||||||
|
|
||||||
## Branch Analysis: master vs not_sure (2026-02-22)
|
|
||||||
|
|
||||||
### Summary
|
|
||||||
The `not_sure` branch introduces a static/dynamic context split in the `send()` API signature, separating files+screenshots (cacheable, stable) from discussion history (changes every turn). This improves cache hit rates for both Anthropic and Gemini.
|
|
||||||
|
|
||||||
### Current master branch API correctness
|
|
||||||
- **Anthropic**: Correct. System blocks with cache_control, SDK content block serialisation, history repair, stale file refresh stripping all work properly.
|
|
||||||
- **Gemini**: Correct after `patch_gemini_history.py` was applied. Uses `_get_gemini_history_list()` for safe SDK access, drops history in pairs to maintain alternating roles, explicit caching via `caches.create()`.
|
|
||||||
|
|
||||||
### not_sure branch improvements
|
|
||||||
- **Anthropic**: Puts discussion history as a separate uncached system block after the cached static context. Better cache hit rates when discussion changes between turns.
|
|
||||||
- **Gemini**: Wraps discussion in `<discussion>` tags in user messages and strips old ones from history via regex. Prevents discussion duplication across turns.
|
|
||||||
|
|
||||||
### not_sure branch bugs (not merged from master)
|
|
||||||
- Uses `_gemini_chat.history` directly instead of `_get_gemini_history_list()` — will crash on newer google-genai SDK versions where `.history` was removed.
|
|
||||||
- Missing the pair-wise history dropping fix (drops single messages, breaking Gemini's alternating role requirement).
|
|
||||||
|
|
||||||
### Recommended merge path
|
|
||||||
Cherry-pick the static/dynamic split from `not_sure` into `master` while keeping master's SDK safety fixes (`_get_gemini_history_list`, pair-wise dropping, `_content_block_to_dict`).
|
|
||||||
30
aggregate.py
30
aggregate.py
@@ -126,24 +126,6 @@ def build_summary_section(base_dir: Path, files: list[str]) -> str:
|
|||||||
items = build_file_items(base_dir, files)
|
items = build_file_items(base_dir, files)
|
||||||
return summarize.build_summary_markdown(items)
|
return summarize.build_summary_markdown(items)
|
||||||
|
|
||||||
def build_static_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
|
|
||||||
"""Build the static (cacheable) portion of the context: files + screenshots."""
|
|
||||||
parts = []
|
|
||||||
if files:
|
|
||||||
if summary_only:
|
|
||||||
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
|
|
||||||
else:
|
|
||||||
parts.append("## Files\n\n" + build_files_section(base_dir, files))
|
|
||||||
if screenshots:
|
|
||||||
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
||||||
return "\n\n---\n\n".join(parts)
|
|
||||||
|
|
||||||
def build_dynamic_markdown(history: list[str]) -> str:
|
|
||||||
"""Build the dynamic (changes every turn) portion: discussion history."""
|
|
||||||
if history:
|
|
||||||
return "## Discussion History\n\n" + build_discussion_section(history)
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
||||||
parts = []
|
parts = []
|
||||||
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
||||||
@@ -173,20 +155,18 @@ def run(config: dict) -> tuple[str, Path]:
|
|||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
increment = find_next_increment(output_dir, namespace)
|
increment = find_next_increment(output_dir, namespace)
|
||||||
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
||||||
# Build static (files+screenshots) and dynamic (discussion) portions separately for better caching
|
# Provide full files to trigger Gemini's 32k cache threshold and give the AI immediate context
|
||||||
static_md = build_static_markdown(base_dir, files, screenshot_base_dir, screenshots, summary_only=False)
|
markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history,
|
||||||
dynamic_md = build_dynamic_markdown(history)
|
summary_only=False)
|
||||||
# Write combined markdown to disk for archival
|
|
||||||
markdown = f"{static_md}\n\n---\n\n{dynamic_md}" if static_md and dynamic_md else static_md or dynamic_md
|
|
||||||
output_file.write_text(markdown, encoding="utf-8")
|
output_file.write_text(markdown, encoding="utf-8")
|
||||||
file_items = build_file_items(base_dir, files)
|
file_items = build_file_items(base_dir, files)
|
||||||
return static_md, dynamic_md, output_file, file_items
|
return markdown, output_file, file_items
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open("config.toml", "rb") as f:
|
with open("config.toml", "rb") as f:
|
||||||
import tomllib
|
import tomllib
|
||||||
config = tomllib.load(f)
|
config = tomllib.load(f)
|
||||||
static_md, dynamic_md, output_file, _ = run(config)
|
markdown, output_file, _ = run(config)
|
||||||
print(f"Written: {output_file}")
|
print(f"Written: {output_file}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
39
ai_client.py
39
ai_client.py
@@ -465,16 +465,16 @@ def _get_gemini_history_list(chat):
|
|||||||
return chat.get_history()
|
return chat.get_history()
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _send_gemini(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||||
global _gemini_chat, _gemini_cache
|
global _gemini_chat, _gemini_cache
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
try:
|
try:
|
||||||
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
_ensure_gemini_client(); mcp_client.configure(file_items or [], [base_dir])
|
||||||
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{static_md}\n</context>"
|
sys_instr = f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"
|
||||||
tools_decl = [_gemini_tool_declaration()]
|
tools_decl = [_gemini_tool_declaration()]
|
||||||
|
|
||||||
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
# DYNAMIC CONTEXT: Check if files/context changed mid-session
|
||||||
current_md_hash = hash(static_md)
|
current_md_hash = hash(md_content)
|
||||||
old_history = None
|
old_history = None
|
||||||
if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash:
|
if _gemini_chat and getattr(_gemini_chat, "_last_md_hash", None) != current_md_hash:
|
||||||
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
|
old_history = list(_get_gemini_history_list(_gemini_chat)) if _get_gemini_history_list(_gemini_chat) else []
|
||||||
@@ -520,20 +520,10 @@ def _send_gemini(static_md: str, dynamic_md: str, user_message: str, base_dir: s
|
|||||||
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
_gemini_chat = _gemini_client.chats.create(**kwargs)
|
||||||
_gemini_chat._last_md_hash = current_md_hash
|
_gemini_chat._last_md_hash = current_md_hash
|
||||||
|
|
||||||
# Build user message: prepend dynamic context (discussion) so it's NOT cached in system_instruction
|
_append_comms("OUT", "request", {"message": f"[ctx {len(md_content)} + msg {len(user_message)}]"})
|
||||||
full_user_msg = f"<discussion>\n{dynamic_md}\n</discussion>\n\n{user_message}" if dynamic_md.strip() else user_message
|
payload, all_text = user_message, []
|
||||||
_append_comms("OUT", "request", {"message": f"[ctx {len(static_md)} static + {len(dynamic_md)} dynamic + msg {len(user_message)}]"})
|
|
||||||
payload, all_text = full_user_msg, []
|
|
||||||
|
|
||||||
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
for r_idx in range(MAX_TOOL_ROUNDS + 2):
|
||||||
# Strip stale <discussion> blocks from old user messages so they don't accumulate
|
|
||||||
import re as _re
|
|
||||||
if _gemini_chat and _get_gemini_history_list(_gemini_chat):
|
|
||||||
for msg in _get_gemini_history_list(_gemini_chat):
|
|
||||||
if msg.role == "user" and hasattr(msg, "parts"):
|
|
||||||
for p in msg.parts:
|
|
||||||
if hasattr(p, "text") and p.text and "<discussion>" in p.text:
|
|
||||||
p.text = _re.sub(r"<discussion>.*?</discussion>\n\n", "", p.text, flags=_re.DOTALL)
|
|
||||||
# Strip stale file refreshes and truncate old tool outputs in Gemini history
|
# Strip stale file refreshes and truncate old tool outputs in Gemini history
|
||||||
if _gemini_chat and _get_gemini_history_list(_gemini_chat):
|
if _gemini_chat and _get_gemini_history_list(_gemini_chat):
|
||||||
for msg in _get_gemini_history_list(_gemini_chat):
|
for msg in _get_gemini_history_list(_gemini_chat):
|
||||||
@@ -828,16 +818,13 @@ def _repair_anthropic_history(history: list[dict]):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
|
||||||
try:
|
try:
|
||||||
_ensure_anthropic_client()
|
_ensure_anthropic_client()
|
||||||
mcp_client.configure(file_items or [], [base_dir])
|
mcp_client.configure(file_items or [], [base_dir])
|
||||||
|
|
||||||
system_text = _get_combined_system_prompt() + f"\n\n<context>\n{static_md}\n</context>"
|
system_text = _get_combined_system_prompt() + f"\n\n<context>\n{md_content}\n</context>"
|
||||||
system_blocks = _build_chunked_context_blocks(system_text)
|
system_blocks = _build_chunked_context_blocks(system_text)
|
||||||
# Dynamic context (discussion history) goes after the cached static prefix, without cache_control
|
|
||||||
if dynamic_md.strip():
|
|
||||||
system_blocks.append({"type": "text", "text": f"<discussion>\n{dynamic_md}\n</discussion>"})
|
|
||||||
|
|
||||||
user_content = [{"type": "text", "text": user_message}]
|
user_content = [{"type": "text", "text": user_message}]
|
||||||
|
|
||||||
@@ -857,7 +844,7 @@ def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir
|
|||||||
n_chunks = len(system_blocks)
|
n_chunks = len(system_blocks)
|
||||||
_append_comms("OUT", "request", {
|
_append_comms("OUT", "request", {
|
||||||
"message": (
|
"message": (
|
||||||
f"[system {n_chunks} chunk(s), {len(static_md)} static + {len(dynamic_md)} dynamic chars] "
|
f"[system {n_chunks} chunk(s), {len(md_content)} chars context] "
|
||||||
f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"
|
f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
@@ -1010,8 +997,7 @@ def _send_anthropic(static_md: str, dynamic_md: str, user_message: str, base_dir
|
|||||||
# ------------------------------------------------------------------ unified send
|
# ------------------------------------------------------------------ unified send
|
||||||
|
|
||||||
def send(
|
def send(
|
||||||
static_md: str,
|
md_content: str,
|
||||||
dynamic_md: str,
|
|
||||||
user_message: str,
|
user_message: str,
|
||||||
base_dir: str = ".",
|
base_dir: str = ".",
|
||||||
file_items: list[dict] | None = None,
|
file_items: list[dict] | None = None,
|
||||||
@@ -1019,15 +1005,14 @@ def send(
|
|||||||
"""
|
"""
|
||||||
Send a message to the active provider.
|
Send a message to the active provider.
|
||||||
|
|
||||||
static_md : cacheable context (files + screenshots) from aggregate.run()
|
md_content : aggregated markdown string from aggregate.run()
|
||||||
dynamic_md : volatile context (discussion history) that changes every turn
|
|
||||||
user_message: the user question / instruction
|
user_message: the user question / instruction
|
||||||
base_dir : project base directory (for PowerShell tool calls)
|
base_dir : project base directory (for PowerShell tool calls)
|
||||||
file_items : list of file dicts from aggregate.build_file_items() for
|
file_items : list of file dicts from aggregate.build_file_items() for
|
||||||
dynamic context refresh after tool calls
|
dynamic context refresh after tool calls
|
||||||
"""
|
"""
|
||||||
if _provider == "gemini":
|
if _provider == "gemini":
|
||||||
return _send_gemini(static_md, dynamic_md, user_message, base_dir, file_items)
|
return _send_gemini(md_content, user_message, base_dir, file_items)
|
||||||
elif _provider == "anthropic":
|
elif _provider == "anthropic":
|
||||||
return _send_anthropic(static_md, dynamic_md, user_message, base_dir, file_items)
|
return _send_anthropic(md_content, user_message, base_dir, file_items)
|
||||||
raise ValueError(f"unknown provider: {_provider}")
|
raise ValueError(f"unknown provider: {_provider}")
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[ai]
|
[ai]
|
||||||
provider = "anthropic"
|
provider = "anthropic"
|
||||||
model = "claude-opus-4-6"
|
model = "claude-sonnet-4-6"
|
||||||
temperature = 0.6000000238418579
|
temperature = 0.6000000238418579
|
||||||
max_tokens = 12000
|
max_tokens = 12000
|
||||||
history_trunc_limit = 8000
|
history_trunc_limit = 8000
|
||||||
@@ -17,4 +17,4 @@ paths = [
|
|||||||
"manual_slop.toml",
|
"manual_slop.toml",
|
||||||
"C:/projects/forth/bootslop/bootslop.toml",
|
"C:/projects/forth/bootslop/bootslop.toml",
|
||||||
]
|
]
|
||||||
active = "manual_slop.toml"
|
active = "C:/projects/forth/bootslop/bootslop.toml"
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ roles = [
|
|||||||
"Vendor API",
|
"Vendor API",
|
||||||
"System",
|
"System",
|
||||||
]
|
]
|
||||||
active = "review ai_client.py"
|
active = "docs writeup"
|
||||||
auto_add = true
|
auto_add = true
|
||||||
|
|
||||||
[discussion.discussions.main]
|
[discussion.discussions.main]
|
||||||
@@ -147,7 +147,7 @@ history = [
|
|||||||
|
|
||||||
[discussion.discussions."docs writeup"]
|
[discussion.discussions."docs writeup"]
|
||||||
git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
|
git_commit = "bf2d09f3fd817d64fbf6b4aa667e2b635b6fbc0e"
|
||||||
last_updated = "2026-02-22T11:47:11"
|
last_updated = "2026-02-22T11:08:58"
|
||||||
history = [
|
history = [
|
||||||
"@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
|
"@2026-02-22T08:56:39\nUser:\nLets write extensive documentation in the same style that I used for my VEFontCache-Oodin project.\nI added it's directories to your context.",
|
||||||
"@2026-02-22T08:56:58\nAI:\n(No text returned)",
|
"@2026-02-22T08:56:58\nAI:\n(No text returned)",
|
||||||
@@ -158,11 +158,3 @@ history = [
|
|||||||
"@2026-02-22T09:11:01\nUser:\nThe docs aren't detailed enough, the code isn't commented like the vefontcache code is. etc. My docs were straight to the point, in-depth, old school provides extensive insight into what was done.",
|
"@2026-02-22T09:11:01\nUser:\nThe docs aren't detailed enough, the code isn't commented like the vefontcache code is. etc. My docs were straight to the point, in-depth, old school provides extensive insight into what was done.",
|
||||||
"@2026-02-22T09:17:39\nVendor API:\n[GEMINI QUOTA EXHAUSTED]\n\n429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 1000000, model: gemini-3.1-pro\\nPlease retry in 20.333361679s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count', 'quotaId': 'GenerateContentPaidTierInputTokensPerModelPerMinute', 'quotaDimensions': {'location': 'global', 'model': 'gemini-3.1-pro'}, 'quotaValue': '1000000'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '20s'}]}}",
|
"@2026-02-22T09:17:39\nVendor API:\n[GEMINI QUOTA EXHAUSTED]\n\n429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count, limit: 1000000, model: gemini-3.1-pro\\nPlease retry in 20.333361679s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_paid_tier_input_token_count', 'quotaId': 'GenerateContentPaidTierInputTokensPerModelPerMinute', 'quotaDimensions': {'location': 'global', 'model': 'gemini-3.1-pro'}, 'quotaValue': '1000000'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '20s'}]}}",
|
||||||
]
|
]
|
||||||
|
|
||||||
[discussion.discussions."review ai_client.py"]
|
|
||||||
git_commit = ""
|
|
||||||
last_updated = "2026-02-22T11:49:36"
|
|
||||||
history = [
|
|
||||||
"@2026-02-22T11:48:29\nUser:\nLook at ai_client.py on this branch and the git branch not_sure. This current branch had a fix applied for gemini's uses that you'll find in: ./patch_gemini_history.py\n\nIs our usage of the apis correct for gemini? Is the not_sure better at using ai apis for anthropic or google?\n",
|
|
||||||
"@2026-02-22T11:49:12\nVendor API:\n[GEMINI QUOTA EXHAUSTED]\n\n429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'Resource has been exhausted (e.g. check quota).', 'status': 'RESOURCE_EXHAUSTED'}}",
|
|
||||||
]
|
|
||||||
|
|||||||
Reference in New Issue
Block a user