manual_slop/ai_client.py

# ai_client.py
import tomllib
import json
import datetime
from pathlib import Path
import file_cache
import mcp_client

_provider: str = "gemini"
_model: str = "gemini-2.0-flash"

_gemini_client = None
_gemini_chat = None

_anthropic_client = None
_anthropic_history: list[dict] = []

# Injected by gui.py - called when AI wants to run a command.
# Signature: (script: str, base_dir: str) -> str | None
confirm_and_run_callback = None

# Injected by gui.py - called whenever a comms entry is appended.
# Signature: (entry: dict) -> None
comms_log_callback = None

# Injected by gui.py - called whenever a tool call completes.
# Signature: (script: str, result: str) -> None
tool_log_callback = None

# Increased to allow thorough code exploration before forcing a summary
MAX_TOOL_ROUNDS = 10

# Maximum characters per text chunk sent to Anthropic.
# Kept well under the ~200k token API limit.
_ANTHROPIC_CHUNK_SIZE = 120_000

_SYSTEM_PROMPT = (
    "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). "
    "When asked to create or edit files, prefer targeted edits over full rewrites. "
    "Always explain what you are doing before invoking the tool.\n\n"
    "When writing or rewriting large files (especially those containing quotes, backticks, or special characters), "
    "avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string "
    "(@'...'@ for literal content), (2) run it with `python <script>`, (3) delete the helper. "
    "For small targeted edits, use PowerShell's (Get-Content) / .Replace() / Set-Content or Add-Content directly.\n\n"
    "When making function calls using tools that accept array or object parameters "
    "ensure those are structured using JSON. For example:\n"
    "When you need to verify a change, rely on the exit code and stdout/stderr from the tool \u2014 "
    "the user's context files are automatically refreshed after every tool call, so you do NOT "
    "need to re-read files that are already provided in the <context> block."
)


_custom_system_prompt: str = ""

def set_custom_system_prompt(prompt: str):
    global _custom_system_prompt
    _custom_system_prompt = prompt

def _get_combined_system_prompt() -> str:
    if _custom_system_prompt.strip():
        return f"{_SYSTEM_PROMPT}\n\n[USER SYSTEM PROMPT]\n{_custom_system_prompt}"
    return _SYSTEM_PROMPT

# ------------------------------------------------------------------ comms log

_comms_log: list[dict] = []

COMMS_CLAMP_CHARS = 300


def _append_comms(direction: str, kind: str, payload: dict):
    entry = {
        "ts":        datetime.datetime.now().strftime("%H:%M:%S"),
        "direction": direction,
        "kind":      kind,
        "provider":  _provider,
        "model":     _model,
        "payload":   payload,
    }
    _comms_log.append(entry)
    if comms_log_callback is not None:
        comms_log_callback(entry)


def get_comms_log() -> list[dict]:
    return list(_comms_log)


def clear_comms_log():
    _comms_log.clear()


def _load_credentials() -> dict:
    with open("credentials.toml", "rb") as f:
        return tomllib.load(f)


# ------------------------------------------------------------------ provider errors

class ProviderError(Exception):
    def __init__(self, kind: str, provider: str, original: Exception):
        self.kind = kind
        self.provider = provider
        self.original = original
        super().__init__(str(original))

    def ui_message(self) -> str:
        labels = {
            "quota":      "QUOTA EXHAUSTED",
            "rate_limit": "RATE LIMITED",
            "auth":       "AUTH / API KEY ERROR",
            "balance":    "BALANCE / BILLING ERROR",
            "network":    "NETWORK / CONNECTION ERROR",
            "unknown":    "API ERROR",
        }
        label = labels.get(self.kind, "API ERROR")
        return f"[{self.provider.upper()} {label}]\n\n{self.original}"


def _classify_anthropic_error(exc: Exception) -> ProviderError:
    try:
        import anthropic
        if isinstance(exc, anthropic.RateLimitError):
            return ProviderError("rate_limit", "anthropic", exc)
        if isinstance(exc, anthropic.AuthenticationError):
            return ProviderError("auth", "anthropic", exc)
        if isinstance(exc, anthropic.PermissionDeniedError):
            return ProviderError("auth", "anthropic", exc)
        if isinstance(exc, anthropic.APIConnectionError):
            return ProviderError("network", "anthropic", exc)
        if isinstance(exc, anthropic.APIStatusError):
            status = getattr(exc, "status_code", 0)
            body = str(exc).lower()
            if status == 429:
                return ProviderError("rate_limit", "anthropic", exc)
            if status in (401, 403):
                return ProviderError("auth", "anthropic", exc)
            if status == 402:
                return ProviderError("balance", "anthropic", exc)
            if "credit" in body or "balance" in body or "billing" in body:
                return ProviderError("balance", "anthropic", exc)
            if "quota" in body or "limit" in body or "exceeded" in body:
                return ProviderError("quota", "anthropic", exc)
    except ImportError:
        pass
    return ProviderError("unknown", "anthropic", exc)


def _classify_gemini_error(exc: Exception) -> ProviderError:
    body = str(exc).lower()
    try:
        from google.api_core import exceptions as gac
        if isinstance(exc, gac.ResourceExhausted):
            return ProviderError("quota", "gemini", exc)
        if isinstance(exc, gac.TooManyRequests):
            return ProviderError("rate_limit", "gemini", exc)
        if isinstance(exc, (gac.Unauthenticated, gac.PermissionDenied)):
            return ProviderError("auth", "gemini", exc)
        if isinstance(exc, gac.ServiceUnavailable):
            return ProviderError("network", "gemini", exc)
    except ImportError:
        pass
    if "429" in body or "quota" in body or "resource exhausted" in body:
        return ProviderError("quota", "gemini", exc)
    if "rate" in body and "limit" in body:
        return ProviderError("rate_limit", "gemini", exc)
    if "401" in body or "403" in body or "api key" in body or "unauthenticated" in body:
        return ProviderError("auth", "gemini", exc)
    if "402" in body or "billing" in body or "balance" in body or "payment" in body:
        return ProviderError("balance", "gemini", exc)
    if "connection" in body or "timeout" in body or "unreachable" in body:
        return ProviderError("network", "gemini", exc)
    return ProviderError("unknown", "gemini", exc)


# ------------------------------------------------------------------ provider setup

def set_provider(provider: str, model: str):
    global _provider, _model
    _provider = provider
    _model = model


def reset_session():
    global _gemini_client, _gemini_chat
    global _anthropic_client, _anthropic_history
    _gemini_client = None
    _gemini_chat = None
    _anthropic_client = None
    _anthropic_history = []
    file_cache.reset_client()


# ------------------------------------------------------------------ model listing

def list_models(provider: str) -> list[str]:
    creds = _load_credentials()
    if provider == "gemini":
        return _list_gemini_models(creds["gemini"]["api_key"])
    elif provider == "anthropic":
        return _list_anthropic_models()
    return []


def _list_gemini_models(api_key: str) -> list[str]:
    from google import genai
    try:
        client = genai.Client(api_key=api_key)
        models = []
        for m in client.models.list():
            name = m.name
            if name.startswith("models/"):
                name = name[len("models/"):]
            if "gemini" in name.lower():
                models.append(name)
        return sorted(models)
    except Exception as exc:
        raise _classify_gemini_error(exc) from exc


def _list_anthropic_models() -> list[str]:
    import anthropic
    try:
        creds = _load_credentials()
        client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])
        models = []
        for m in client.models.list():
            models.append(m.id)
        return sorted(models)
    except Exception as exc:
        raise _classify_anthropic_error(exc) from exc


# ------------------------------------------------------------------ tool definition

TOOL_NAME = "run_powershell"

def _build_anthropic_tools() -> list[dict]:
    """Build the full Anthropic tools list: run_powershell + MCP file tools."""
    mcp_tools = []
    for spec in mcp_client.MCP_TOOL_SPECS:
        mcp_tools.append({
            "name":        spec["name"],
            "description": spec["description"],
            "input_schema": spec["parameters"],
        })
    powershell_tool = {
        "name": TOOL_NAME,
        "description": (
            "Run a PowerShell script within the project base_dir. "
            "Use this to create, edit, rename, or delete files and directories. "
            "The working directory is set to base_dir automatically. "
            "Always prefer targeted edits over full rewrites where possible. "
            "stdout and stderr are returned to you as the result."
        ),
        "input_schema": {
            "type": "object",
            "properties": {
                "script": {
                    "type": "string",
                    "description": "The PowerShell script to execute."
                }
            },
            "required": ["script"]
        },
        "cache_control": {"type": "ephemeral"},
    }
    return mcp_tools + [powershell_tool]


_ANTHROPIC_TOOLS = _build_anthropic_tools()


def _gemini_tool_declaration():
    from google.genai import types

    declarations = []

    # MCP file tools
    for spec in mcp_client.MCP_TOOL_SPECS:
        props = {}
        for pname, pdef in spec["parameters"].get("properties", {}).items():
            props[pname] = types.Schema(
                type=types.Type.STRING,
                description=pdef.get("description", ""),
            )
        declarations.append(types.FunctionDeclaration(
            name=spec["name"],
            description=spec["description"],
            parameters=types.Schema(
                type=types.Type.OBJECT,
                properties=props,
                required=spec["parameters"].get("required", []),
            ),
        ))

    # PowerShell tool
    declarations.append(types.FunctionDeclaration(
        name=TOOL_NAME,
        description=(
            "Run a PowerShell script within the project base_dir. "
            "Use this to create, edit, rename, or delete files and directories. "
            "The working directory is set to base_dir automatically. "
            "stdout and stderr are returned to you as the result."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
            properties={
                "script": types.Schema(
                    type=types.Type.STRING,
                    description="The PowerShell script to execute."
                )
            },
            required=["script"]
        ),
    ))

    return types.Tool(function_declarations=declarations)


def _run_script(script: str, base_dir: str) -> str:
    if confirm_and_run_callback is None:
        return "ERROR: no confirmation handler registered"
    result = confirm_and_run_callback(script, base_dir)
    if result is None:
        output = "USER REJECTED: command was not executed"
    else:
        output = result
    if tool_log_callback is not None:
        tool_log_callback(script, output)
    return output


# ------------------------------------------------------------------ dynamic file context refresh

def _reread_file_items(file_items: list[dict]) -> list[dict]:
    """
    Re-read every file in file_items from disk, returning a fresh list.
    This is called after tool calls so the AI sees updated file contents.
    """
    refreshed = []
    for item in file_items:
        path = item.get("path")
        if path is None:
            refreshed.append(item)
            continue
        from pathlib import Path as _P
        p = _P(path) if not isinstance(path, _P) else path
        try:
            content = p.read_text(encoding="utf-8")
            refreshed.append({**item, "content": content, "error": False})
        except Exception as e:
            refreshed.append({**item, "content": f"ERROR re-reading {p}: {e}", "error": True})
    return refreshed


def _build_file_context_text(file_items: list[dict]) -> str:
    """
    Build a compact text summary of all files from file_items, suitable for
    injecting into a tool_result message so the AI sees current file contents.
    """
    if not file_items:
        return ""
    parts = []
    for item in file_items:
        path = item.get("path") or item.get("entry", "unknown")
        suffix = str(path).rsplit(".", 1)[-1] if "." in str(path) else "text"
        content = item.get("content", "")
        parts.append(f"### `{path}`\n\n```{suffix}\n{content}\n```")
    return "\n\n---\n\n".join(parts)


# ------------------------------------------------------------------ content block serialisation

def _content_block_to_dict(block) -> dict:
    """
    Convert an Anthropic SDK content block object to a plain dict.
    This ensures history entries are always JSON-serialisable dicts,
    not opaque SDK objects that may fail on re-serialisation.
    """
    if isinstance(block, dict):
        return block
    if hasattr(block, "model_dump"):
        return block.model_dump()
    if hasattr(block, "to_dict"):
        return block.to_dict()
    # Fallback: manually construct based on type
    block_type = getattr(block, "type", None)
    if block_type == "text":
        return {"type": "text", "text": block.text}
    if block_type == "tool_use":
        return {"type": "tool_use", "id": block.id, "name": block.name, "input": block.input}
    return {"type": "text", "text": str(block)}


# ------------------------------------------------------------------ gemini

def _ensure_gemini_client():
    global _gemini_client
    if _gemini_client is None:
        from google import genai
        creds = _load_credentials()
        _gemini_client = genai.Client(api_key=creds["gemini"]["api_key"])


def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
    global _gemini_chat
    from google import genai
    from google.genai import types

    try:
        _ensure_gemini_client()
        mcp_client.configure(file_items or [], [base_dir])

        system_text = _get_combined_system_prompt() + f"\n\n<context>\n{md_content}\n</context>"
        if _gemini_chat is None:
            _gemini_chat = _gemini_client.chats.create(
                model=_model,
                config=types.GenerateContentConfig(
                    system_instruction=system_text,
                    tools=[_gemini_tool_declaration()]
                )
            )
        else:
            _gemini_chat = _gemini_client.chats.create(
                model=_model,
                config=types.GenerateContentConfig(
                    system_instruction=system_text,
                    tools=[_gemini_tool_declaration()]
                ),
                history=_gemini_chat.get_history()
            )

        payload_to_send = user_message

        _append_comms("OUT", "request", {
            "message": f"[context {len(md_content)} chars + user message {len(user_message)} chars]",
        })

        all_text_parts = []

        # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
        for round_idx in range(MAX_TOOL_ROUNDS + 2):
            response = _gemini_chat.send_message(payload_to_send)

            text_parts_raw = [
                part.text
                for candidate in response.candidates
                for part in candidate.content.parts
                if hasattr(part, "text") and part.text
            ]
            if text_parts_raw:
                all_text_parts.append("\n".join(text_parts_raw))

            tool_calls = [
                part.function_call
                for candidate in response.candidates
                for part in candidate.content.parts
                if hasattr(part, "function_call") and part.function_call is not None
            ]

            usage_dict = {}
            if hasattr(response, "usage_metadata") and response.usage_metadata:
                meta = response.usage_metadata
                if hasattr(meta, "prompt_token_count") and meta.prompt_token_count is not None:
                    usage_dict["input_tokens"] = meta.prompt_token_count
                if hasattr(meta, "candidates_token_count") and meta.candidates_token_count is not None:
                    usage_dict["output_tokens"] = meta.candidates_token_count
                if hasattr(meta, "cached_content_token_count") and meta.cached_content_token_count:
                    usage_dict["cache_read_input_tokens"] = meta.cached_content_token_count

            stop_reason = ""
            if response.candidates and hasattr(response.candidates[0], "finish_reason"):
                fr = response.candidates[0].finish_reason
                stop_reason = str(fr.name) if hasattr(fr, "name") else str(fr)

            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": stop_reason,
                "text":        "\n".join(text_parts_raw),
                "tool_calls":  [{"name": fc.name, "args": dict(fc.args)} for fc in tool_calls],
                "usage":       usage_dict,
            })

            if not tool_calls:
                break

            if round_idx > MAX_TOOL_ROUNDS:
                # The model ignored the MAX ROUNDS warning and kept calling tools.
                # Force abort to prevent infinite loop.
                break

            function_responses = []
            sent_results_log = []

            for i, fc in enumerate(tool_calls):
                fc_name = fc.name
                fc_args = dict(fc.args)

                if fc_name in mcp_client.TOOL_NAMES:
                    _append_comms("OUT", "tool_call", {"name": fc_name, "args": fc_args})
                    output = mcp_client.dispatch(fc_name, fc_args)
                    _append_comms("IN", "tool_result", {"name": fc_name, "output": output})
                elif fc_name == TOOL_NAME:
                    script = fc_args.get("script", "")
                    _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "script": script})
                    output = _run_script(script, base_dir)
                    _append_comms("IN", "tool_result", {"name": TOOL_NAME, "output": output})
                else:
                    output = f"ERROR: unknown tool '{fc_name}'"

                # Inject dynamic updates directly into the LAST tool's output string.
                # Gemini strictly expects function_responses only, so we piggyback on the string.
                if i == len(tool_calls) - 1:
                    if file_items:
                        file_items = _reread_file_items(file_items)
                        refreshed_ctx = _build_file_context_text(file_items)
                        if refreshed_ctx:
                            output += f"\n\n[SYSTEM: FILES UPDATED — current contents below. Do NOT re-read these files.]\n\n{refreshed_ctx}"

                    if round_idx == MAX_TOOL_ROUNDS:
                        output += "\n\n[SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS.]"

                function_responses.append(
                    types.Part.from_function_response(name=fc_name, response={"output": output})
                )
                sent_results_log.append({"tool_use_id": fc_name, "content": output})

            _append_comms("OUT", "tool_result_send", {"results": sent_results_log})
            payload_to_send = function_responses

        final_text = "\n\n".join(all_text_parts)
        return final_text if final_text.strip() else "(No text returned by the model)"

    except ProviderError:
        raise
    except Exception as exc:
        raise _classify_gemini_error(exc) from exc


# ------------------------------------------------------------------ anthropic history management

# Rough chars-per-token ratio. Anthropic tokeniser averages ~3.5-4 chars/token.
# We use 3.5 to be conservative (overestimate token count = safer).
_CHARS_PER_TOKEN = 3.5

# Maximum token budget for the entire prompt (system + tools + messages).
# Anthropic's limit is 200k. We leave headroom for the response + tool schemas.
_ANTHROPIC_MAX_PROMPT_TOKENS = 180_000

# Marker prefix used to identify stale file-refresh injections in history
_FILE_REFRESH_MARKER = "[FILES UPDATED"


def _estimate_message_tokens(msg: dict) -> int:
    """Rough token estimate for a single Anthropic message dict."""
    total_chars = 0
    content = msg.get("content", "")
    if isinstance(content, str):
        total_chars += len(content)
    elif isinstance(content, list):
        for block in content:
            if isinstance(block, dict):
                text = block.get("text", "") or block.get("content", "")
                if isinstance(text, str):
                    total_chars += len(text)
                # tool_use input
                inp = block.get("input")
                if isinstance(inp, dict):
                    import json as _json
                    total_chars += len(_json.dumps(inp, ensure_ascii=False))
            elif isinstance(block, str):
                total_chars += len(block)
    return max(1, int(total_chars / _CHARS_PER_TOKEN))


def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> int:
    """Estimate total prompt tokens: system + tools + all history messages."""
    total = 0
    # System blocks
    for block in system_blocks:
        text = block.get("text", "")
        total += max(1, int(len(text) / _CHARS_PER_TOKEN))
    # Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
    total += 2500
    # History messages
    for msg in history:
        total += _estimate_message_tokens(msg)
    return total


def _strip_stale_file_refreshes(history: list[dict]):
    """
    Remove [FILES UPDATED ...] text blocks from all history turns EXCEPT
    the very last user message. These are stale snapshots from previous
    tool rounds that bloat the context without providing value.
    """
    if len(history) < 2:
        return
    # Find the index of the last user message — we keep its file refresh intact
    last_user_idx = -1
    for i in range(len(history) - 1, -1, -1):
        if history[i].get("role") == "user":
            last_user_idx = i
            break
    for i, msg in enumerate(history):
        if msg.get("role") != "user" or i == last_user_idx:
            continue
        content = msg.get("content")
        if not isinstance(content, list):
            continue
        cleaned = []
        for block in content:
            if isinstance(block, dict) and block.get("type") == "text":
                text = block.get("text", "")
                if text.startswith(_FILE_REFRESH_MARKER):
                    continue  # drop this stale file refresh block
            cleaned.append(block)
        if len(cleaned) < len(content):
            msg["content"] = cleaned


def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]):
    """
    Trim the Anthropic history to fit within the token budget.
    Strategy:
      1. Strip stale file-refresh injections from old turns.
      2. If still over budget, drop oldest turn pairs (user + assistant).
    Returns the number of messages dropped.
    """
    # Phase 1: strip stale file refreshes
    _strip_stale_file_refreshes(history)

    est = _estimate_prompt_tokens(system_blocks, history)
    if est <= _ANTHROPIC_MAX_PROMPT_TOKENS:
        return 0

    # Phase 2: drop oldest turn pairs until within budget
    dropped = 0
    while len(history) > 2 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
        # Always drop from the front in pairs (user, assistant) to maintain alternation
        # But be careful: the first message might be user, followed by assistant
        if history[0].get("role") == "user" and len(history) > 1 and history[1].get("role") == "assistant":
            removed_user = history.pop(0)
            removed_asst = history.pop(0)
            dropped += 2
            est -= _estimate_message_tokens(removed_user)
            est -= _estimate_message_tokens(removed_asst)
            # If the next message is a user tool_result that belonged to the dropped assistant,
            # we need to drop it too to avoid dangling tool_results
            while history and history[0].get("role") == "user":
                content = history[0].get("content", [])
                if isinstance(content, list) and content and isinstance(content[0], dict) and content[0].get("type") == "tool_result":
                    removed_tr = history.pop(0)
                    dropped += 1
                    est -= _estimate_message_tokens(removed_tr)
                    # And the assistant reply that followed it
                    if history and history[0].get("role") == "assistant":
                        removed_a2 = history.pop(0)
                        dropped += 1
                        est -= _estimate_message_tokens(removed_a2)
                else:
                    break
        else:
            # Edge case: history starts with something unexpected. Drop one message.
            removed = history.pop(0)
            dropped += 1
            est -= _estimate_message_tokens(removed)

    return dropped


# ------------------------------------------------------------------ anthropic

def _ensure_anthropic_client():
    global _anthropic_client
    if _anthropic_client is None:
        import anthropic
        creds = _load_credentials()
        _anthropic_client = anthropic.Anthropic(api_key=creds["anthropic"]["api_key"])


def _chunk_text(text: str, chunk_size: int) -> list[str]:
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]


def _build_chunked_context_blocks(md_content: str) -> list[dict]:
    """
    Split md_content into <=_ANTHROPIC_CHUNK_SIZE char chunks.
    cache_control:ephemeral is placed only on the LAST block so the whole
    prefix is cached as one unit.
    """
    chunks = _chunk_text(md_content, _ANTHROPIC_CHUNK_SIZE)
    blocks = []
    for i, chunk in enumerate(chunks):
        block: dict = {"type": "text", "text": chunk}
        if i == len(chunks) - 1:
            block["cache_control"] = {"type": "ephemeral"}
        blocks.append(block)
    return blocks


def _strip_cache_controls(history: list[dict]):
    """
    Remove cache_control from all content blocks in message history.
    Anthropic allows max 4 cache_control blocks total across system + tools +
    messages.  We reserve those slots for the stable system/tools prefix and
    the current turn's context block, so all older history entries must be clean.
    """
    for msg in history:
        content = msg.get("content")
        if isinstance(content, list):
            for block in content:
                if isinstance(block, dict):
                    block.pop("cache_control", None)

def _repair_anthropic_history(history: list[dict]):
    """
    If history ends with an assistant message that contains tool_use blocks
    without a following user tool_result message, append a synthetic tool_result
    message so the history is valid before the next request.
    """
    if not history:
        return
    last = history[-1]
    if last.get("role") != "assistant":
        return
    content = last.get("content", [])
    tool_use_ids = []
    for block in content:
        if isinstance(block, dict):
            if block.get("type") == "tool_use":
                tool_use_ids.append(block["id"])
    if not tool_use_ids:
        return
    history.append({
        "role": "user",
        "content": [
            {
                "type":        "tool_result",
                "tool_use_id": tid,
                "content":     "Tool call was not completed (session interrupted).",
            }
            for tid in tool_use_ids
        ],
    })


def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_items: list[dict] | None = None) -> str:
    try:
        _ensure_anthropic_client()
        mcp_client.configure(file_items or [], [base_dir])

        system_text = _get_combined_system_prompt() + f"\n\n<context>\n{md_content}\n</context>"
        system_blocks = _build_chunked_context_blocks(system_text)

        user_content = [{"type": "text", "text": user_message}]

        _strip_cache_controls(_anthropic_history)
        _repair_anthropic_history(_anthropic_history)
        _anthropic_history.append({"role": "user", "content": user_content})

        n_chunks = len(system_blocks)
        _append_comms("OUT", "request", {
            "message": (
                f"[system {n_chunks} chunk(s), {len(md_content)} chars context] "
                f"{user_message[:200]}{'...' if len(user_message) > 200 else ''}"
            ),
        })

        all_text_parts = []

        # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
        for round_idx in range(MAX_TOOL_ROUNDS + 2):
            # Trim history to fit within token budget before each API call
            dropped = _trim_anthropic_history(system_blocks, _anthropic_history)
            if dropped > 0:
                est_tokens = _estimate_prompt_tokens(system_blocks, _anthropic_history)
                _append_comms("OUT", "request", {
                    "message": (
                        f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. "
                        f"Estimated {est_tokens} tokens remaining. {len(_anthropic_history)} messages in history.]"
                    ),
                })

            response = _anthropic_client.messages.create(
                model=_model,
                max_tokens=16384,
                system=system_blocks,
                tools=_build_anthropic_tools(),
                messages=_anthropic_history,
            )

            # Convert SDK content block objects to plain dicts before storing in history
            serialised_content = [_content_block_to_dict(b) for b in response.content]

            _anthropic_history.append({
                "role": "assistant",
                "content": serialised_content,
            })

            text_blocks = [b.text for b in response.content if hasattr(b, "text") and b.text]
            if text_blocks:
                all_text_parts.append("\n".join(text_blocks))

            tool_use_blocks = [
                {"id": b.id, "name": b.name, "input": b.input}
                for b in response.content
                if getattr(b, "type", None) == "tool_use"
            ]

            usage_dict: dict = {}
            if response.usage:
                usage_dict["input_tokens"]  = response.usage.input_tokens
                usage_dict["output_tokens"] = response.usage.output_tokens
                cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
                cache_read     = getattr(response.usage, "cache_read_input_tokens",     None)
                if cache_creation is not None:
                    usage_dict["cache_creation_input_tokens"] = cache_creation
                if cache_read is not None:
                    usage_dict["cache_read_input_tokens"] = cache_read

            _append_comms("IN", "response", {
                "round":       round_idx,
                "stop_reason": response.stop_reason,
                "text":        "\n".join(text_blocks),
                "tool_calls":  tool_use_blocks,
                "usage":       usage_dict,
            })

            if response.stop_reason != "tool_use" or not tool_use_blocks:
                break

            if round_idx > MAX_TOOL_ROUNDS:
                # The model ignored the MAX ROUNDS warning and kept calling tools.
                # Force abort to prevent infinite loop.
                break

            tool_results = []
            for block in response.content:
                if getattr(block, "type", None) != "tool_use":
                    continue
                b_name = getattr(block, "name", None)
                b_id   = getattr(block, "id",   "")
                b_input = getattr(block, "input", {})
                if b_name in mcp_client.TOOL_NAMES:
                    _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
                    output = mcp_client.dispatch(b_name, b_input)
                    _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
                        "content":     output,
                    })
                elif b_name == TOOL_NAME:
                    script = b_input.get("script", "")
                    _append_comms("OUT", "tool_call", {
                        "name":   TOOL_NAME,
                        "id":     b_id,
                        "script": script,
                    })
                    output = _run_script(script, base_dir)
                    _append_comms("IN", "tool_result", {
                        "name":   TOOL_NAME,
                        "id":     b_id,
                        "output": output,
                    })
                    tool_results.append({
                        "type":        "tool_result",
                        "tool_use_id": b_id,
                        "content":     output,
                    })

            # Refresh file context after tool calls and inject into tool result message
            if file_items:
                file_items = _reread_file_items(file_items)
                refreshed_ctx = _build_file_context_text(file_items)
                if refreshed_ctx:
                    tool_results.append({
                        "type": "text",
                        "text": (
                            "[FILES UPDATED — current contents below. "
                            "Do NOT re-read these files with PowerShell.]\n\n"
                            + refreshed_ctx
                        ),
                    })

            if round_idx == MAX_TOOL_ROUNDS:
                tool_results.append({
                    "type": "text",
                    "text": "SYSTEM WARNING: MAX TOOL ROUNDS REACHED. YOU MUST PROVIDE YOUR FINAL ANSWER NOW WITHOUT CALLING ANY MORE TOOLS."
                })

            _anthropic_history.append({
                "role":    "user",
                "content": tool_results,
            })

            _append_comms("OUT", "tool_result_send", {
                "results": [
                    {"tool_use_id": r["tool_use_id"], "content": r["content"]}
                    for r in tool_results if r.get("type") == "tool_result"
                ],
            })

        final_text = "\n\n".join(all_text_parts)
        return final_text if final_text.strip() else "(No text returned by the model)"

    except ProviderError:
        raise
    except Exception as exc:
        raise _classify_anthropic_error(exc) from exc


# ------------------------------------------------------------------ unified send

def send(
    md_content: str,
    user_message: str,
    base_dir: str = ".",
    file_items: list[dict] | None = None,
) -> str:
    """
    Send a message to the active provider.

    md_content  : aggregated markdown string from aggregate.run()
    user_message: the user question / instruction
    base_dir    : project base directory (for PowerShell tool calls)
    file_items  : list of file dicts from aggregate.build_file_items() for
                  dynamic context refresh after tool calls
    """
    if _provider == "gemini":
        return _send_gemini(md_content, user_message, base_dir, file_items)
    elif _provider == "anthropic":
        return _send_anthropic(md_content, user_message, base_dir, file_items)
    raise ValueError(f"unknown provider: {_provider}")