checkpoint: finished test curation

2026-02-25 21:58:18 -05:00
parent e0b9ab997a
commit 56025a84e9
33 changed files with 546 additions and 356 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -20,6 +20,7 @@ import difflib
 import threading
 from pathlib import Path
 import os
+import project_manager
 import file_cache
 import mcp_client
 import anthropic
@@ -44,6 +45,13 @@ def set_model_params(temp: float, max_tok: int, trunc_limit: int = 8000):
    _max_tokens = max_tok
    _history_trunc_limit = trunc_limit

+def get_history_trunc_limit() -> int:
+    return _history_trunc_limit
+
+def set_history_trunc_limit(val: int):
+    global _history_trunc_limit
+    _history_trunc_limit = val
+
 _gemini_client = None
 _gemini_chat = None
 _gemini_cache = None
@@ -800,11 +808,10 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
    try:
        if _gemini_cli_adapter is None:
            _gemini_cli_adapter = GeminiCliAdapter(binary_path="gemini")
-        
-        events.emit("request_start", payload={"provider": "gemini_cli", "model": _model, "round": 0})
-        
+
+        mcp_client.configure(file_items or [], [base_dir])
+
        # If it's a new session (session_id is None), we should ideally send the context.
-        # For now, following the simple pattern:
        payload = user_message
        if _gemini_cli_adapter.session_id is None:
            # Prepend context and discussion history to the first message
@@ -814,23 +821,104 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
            full_prompt += user_message
            payload = full_prompt

-        _append_comms("OUT", "request", {"message": f"[CLI] [msg {len(payload)}]"})
-        
-        result_text = _gemini_cli_adapter.send(payload)
-        
-        usage = _gemini_cli_adapter.last_usage or {}
-        latency = _gemini_cli_adapter.last_latency
-        events.emit("response_received", payload={"provider": "gemini_cli", "model": _model, "usage": usage, "latency": latency, "round": 0})
-        
-        _append_comms("IN", "response", {
-            "round": 0,
-            "stop_reason": "STOP",
-            "text": result_text,
-            "tool_calls": [],
-            "usage": usage
-        })
-        
-        return result_text
+        all_text = []
+        _cumulative_tool_bytes = 0
+
+        for r_idx in range(MAX_TOOL_ROUNDS + 2):
+            events.emit("request_start", payload={"provider": "gemini_cli", "model": _model, "round": r_idx})
+            _append_comms("OUT", "request", {"message": f"[CLI] [round {r_idx}] [msg {len(payload)}]"})
+
+            resp_data = _gemini_cli_adapter.send(payload)
+            txt = resp_data.get("text", "")
+            if txt: all_text.append(txt)
+
+            calls = resp_data.get("tool_calls", [])
+            usage = _gemini_cli_adapter.last_usage or {}
+            latency = _gemini_cli_adapter.last_latency
+
+            events.emit("response_received", payload={"provider": "gemini_cli", "model": _model, "usage": usage, "latency": latency, "round": r_idx})
+
+            # Clean up the tool calls format to match comms log expectation
+            log_calls = []
+            for c in calls:
+                log_calls.append({"name": c.get("name"), "args": c.get("args")})
+
+            _append_comms("IN", "response", {
+                "round": r_idx,
+                "stop_reason": "TOOL_USE" if calls else "STOP",
+                "text": txt,
+                "tool_calls": log_calls,
+                "usage": usage
+            })
+
+            # If there's text and we're not done, push it to the history immediately
+            # so it appears as a separate entry in the GUI.
+            if txt and calls and comms_log_callback:
+                # Use kind='history_add' to push a new entry into the disc_entries list
+                comms_log_callback({
+                    "ts": project_manager.now_ts(),
+                    "direction": "IN",
+                    "kind": "history_add",
+                    "payload": {
+                        "role": "AI",
+                        "content": txt
+                    }
+                })
+
+            if not calls or r_idx > MAX_TOOL_ROUNDS:
+                break
+
+            tool_results_for_cli = []
+            for i, fc in enumerate(calls):
+                name = fc.get("name")
+                args = fc.get("args", {})
+                call_id = fc.get("id")
+                
+                events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
+                if name in mcp_client.TOOL_NAMES:
+                    _append_comms("OUT", "tool_call", {"name": name, "id": call_id, "args": args})
+                    out = mcp_client.dispatch(name, args)
+                elif name == TOOL_NAME:
+                    scr = args.get("script", "")
+                    _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": call_id, "script": scr})
+                    out = _run_script(scr, base_dir)
+                else:
+                    out = f"ERROR: unknown tool '{name}'"
+
+                if i == len(calls) - 1:
+                    if file_items:
+                        file_items, changed = _reread_file_items(file_items)
+                        ctx = _build_file_diff_text(changed)
+                        if ctx:
+                            out += f"\n\n[SYSTEM: FILES UPDATED]\n\n{ctx}"
+                    if r_idx == MAX_TOOL_ROUNDS:
+                        out += "\n\n[SYSTEM: MAX ROUNDS. PROVIDE FINAL ANSWER.]"
+
+                out = _truncate_tool_output(out)
+                _cumulative_tool_bytes += len(out)
+                
+                tool_results_for_cli.append({
+                    "role": "tool",
+                    "tool_call_id": call_id,
+                    "name": name,
+                    "content": out
+                })
+                
+                _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out})
+                events.emit("tool_execution", payload={"status": "completed", "tool": name, "result": out, "round": r_idx})
+
+            if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
+                 _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+                 # We should ideally tell the model here, but for CLI we just append to payload
+
+            # For Gemini CLI, we send the tool results as a JSON array of messages (or similar)
+            # The adapter expects a string, so we'll pass the JSON string of the results.
+            payload = json.dumps(tool_results_for_cli)
+
+        # Return only the text from the last round, because intermediate 
+        # text chunks were already pushed to history via comms_log_callback.
+        final_text = all_text[-1] if all_text else "(No text returned)"
+        return final_text
    except Exception as e:
        # Basic error classification for CLI
        raise ProviderError("unknown", "gemini_cli", e)
@@ -1348,6 +1436,7 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict:
            "percentage": percentage,
        }
    elif _provider == "gemini":
+        effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
        if _gemini_chat:
            try:
                _ensure_gemini_client()
@@ -1368,7 +1457,7 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict:
                    print("[DEBUG] Gemini count_tokens skipped: no history or md_content")
                    return {
                        "provider": "gemini",
-                        "limit": _GEMINI_MAX_INPUT_TOKENS,
+                        "limit": effective_limit,
                        "current": 0,
                        "percentage": 0,
                    }
@@ -1379,12 +1468,11 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict:
                    contents=history
                )
                current_tokens = resp.total_tokens
-                limit_tokens = _GEMINI_MAX_INPUT_TOKENS
-                percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
+                percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
                print(f"[DEBUG] Gemini current_tokens={current_tokens}, percentage={percentage:.4f}%")
                return {
                    "provider": "gemini",
-                    "limit": limit_tokens,
+                    "limit": effective_limit,
                    "current": current_tokens,
                    "percentage": percentage,
                }
@@ -1400,12 +1488,11 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict:
                    contents=[types.Content(role="user", parts=[types.Part.from_text(text=md_content)])]
                )
                current_tokens = resp.total_tokens
-                limit_tokens = _GEMINI_MAX_INPUT_TOKENS
-                percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
+                percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
                print(f"[DEBUG] Gemini (MD ONLY) current_tokens={current_tokens}, percentage={percentage:.4f}%")
                return {
                    "provider": "gemini",
-                    "limit": limit_tokens,
+                    "limit": effective_limit,
                    "current": current_tokens,
                    "percentage": percentage,
                }
@@ -1415,10 +1502,28 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict:
        
        return {
            "provider": "gemini",
-            "limit": _GEMINI_MAX_INPUT_TOKENS,
+            "limit": effective_limit,
            "current": 0,
            "percentage": 0,
        }
+    elif _provider == "gemini_cli":
+        effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
+        # For Gemini CLI, we don't have direct count_tokens access without making a call,
+        # so we report the limit and current usage from the last run if available.
+        limit_tokens = effective_limit
+        current_tokens = 0
+        if _gemini_cli_adapter and _gemini_cli_adapter.last_usage:
+            # Stats from CLI use 'input_tokens' or 'input'
+            u = _gemini_cli_adapter.last_usage
+            current_tokens = u.get("input_tokens") or u.get("input", 0)
+        
+        percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
+        return {
+            "provider": "gemini_cli",
+            "limit": limit_tokens,
+            "current": current_tokens,
+            "percentage": percentage,
+        }
    
    # Default empty state
    return {