diff --git a/ai_client.py b/ai_client.py
index 277e6f2..4ca350d 100644
--- a/ai_client.py
+++ b/ai_client.py
@@ -32,7 +32,7 @@ MAX_TOOL_ROUNDS = 10
 
 # Maximum characters per text chunk sent to Anthropic.
 # Kept well under the ~200k token API limit.
-_ANTHROPIC_CHUNK_SIZE = 180_000
+_ANTHROPIC_CHUNK_SIZE = 120_000
 
 _SYSTEM_PROMPT = (
     "You are a helpful coding assistant with access to a PowerShell tool and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). "
@@ -538,6 +538,139 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str, file_items:
         raise _classify_gemini_error(exc) from exc
 
 
+
+# ------------------------------------------------------------------ anthropic history management
+
+# Rough chars-per-token ratio. Anthropic tokeniser averages ~3.5-4 chars/token.
+# We use 3.5 to be conservative (overestimate token count = safer).
+_CHARS_PER_TOKEN = 3.5
+
+# Maximum token budget for the entire prompt (system + tools + messages).
+# Anthropic's limit is 200k. We leave headroom for the response + tool schemas.
+_ANTHROPIC_MAX_PROMPT_TOKENS = 180_000
+
+# Marker prefix used to identify stale file-refresh injections in history
+_FILE_REFRESH_MARKER = "[FILES UPDATED"
+
+
+def _estimate_message_tokens(msg: dict) -> int:
+    """Rough token estimate for a single Anthropic message dict."""
+    total_chars = 0
+    content = msg.get("content", "")
+    if isinstance(content, str):
+        total_chars += len(content)
+    elif isinstance(content, list):
+        for block in content:
+            if isinstance(block, dict):
+                text = block.get("text", "") or block.get("content", "")
+                if isinstance(text, str):
+                    total_chars += len(text)
+                # tool_use input
+                inp = block.get("input")
+                if isinstance(inp, dict):
+                    import json as _json
+                    total_chars += len(_json.dumps(inp, ensure_ascii=False))
+            elif isinstance(block, str):
+                total_chars += len(block)
+    return max(1, int(total_chars / _CHARS_PER_TOKEN))
+
+
+def _estimate_prompt_tokens(system_blocks: list[dict], history: list[dict]) -> int:
+    """Estimate total prompt tokens: system + tools + all history messages."""
+    total = 0
+    # System blocks
+    for block in system_blocks:
+        text = block.get("text", "")
+        total += max(1, int(len(text) / _CHARS_PER_TOKEN))
+    # Tool definitions (rough fixed estimate — they're ~2k tokens for our set)
+    total += 2500
+    # History messages
+    for msg in history:
+        total += _estimate_message_tokens(msg)
+    return total
+
+
+def _strip_stale_file_refreshes(history: list[dict]):
+    """
+    Remove [FILES UPDATED ...] text blocks from all history turns EXCEPT
+    the very last user message. These are stale snapshots from previous
+    tool rounds that bloat the context without providing value.
+    """
+    if len(history) < 2:
+        return
+    # Find the index of the last user message — we keep its file refresh intact
+    last_user_idx = -1
+    for i in range(len(history) - 1, -1, -1):
+        if history[i].get("role") == "user":
+            last_user_idx = i
+            break
+    for i, msg in enumerate(history):
+        if msg.get("role") != "user" or i == last_user_idx:
+            continue
+        content = msg.get("content")
+        if not isinstance(content, list):
+            continue
+        cleaned = []
+        for block in content:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                if text.startswith(_FILE_REFRESH_MARKER):
+                    continue  # drop this stale file refresh block
+            cleaned.append(block)
+        if len(cleaned) < len(content):
+            msg["content"] = cleaned
+
+
+def _trim_anthropic_history(system_blocks: list[dict], history: list[dict]):
+    """
+    Trim the Anthropic history to fit within the token budget.
+    Strategy:
+      1. Strip stale file-refresh injections from old turns.
+      2. If still over budget, drop oldest turn pairs (user + assistant).
+    Returns the number of messages dropped.
+    """
+    # Phase 1: strip stale file refreshes
+    _strip_stale_file_refreshes(history)
+
+    est = _estimate_prompt_tokens(system_blocks, history)
+    if est <= _ANTHROPIC_MAX_PROMPT_TOKENS:
+        return 0
+
+    # Phase 2: drop oldest turn pairs until within budget
+    dropped = 0
+    while len(history) > 2 and est > _ANTHROPIC_MAX_PROMPT_TOKENS:
+        # Always drop from the front in pairs (user, assistant) to maintain alternation
+        # But be careful: the first message might be user, followed by assistant
+        if history[0].get("role") == "user" and len(history) > 1 and history[1].get("role") == "assistant":
+            removed_user = history.pop(0)
+            removed_asst = history.pop(0)
+            dropped += 2
+            est -= _estimate_message_tokens(removed_user)
+            est -= _estimate_message_tokens(removed_asst)
+            # If the next message is a user tool_result that belonged to the dropped assistant,
+            # we need to drop it too to avoid dangling tool_results
+            while history and history[0].get("role") == "user":
+                content = history[0].get("content", [])
+                if isinstance(content, list) and content and isinstance(content[0], dict) and content[0].get("type") == "tool_result":
+                    removed_tr = history.pop(0)
+                    dropped += 1
+                    est -= _estimate_message_tokens(removed_tr)
+                    # And the assistant reply that followed it
+                    if history and history[0].get("role") == "assistant":
+                        removed_a2 = history.pop(0)
+                        dropped += 1
+                        est -= _estimate_message_tokens(removed_a2)
+                else:
+                    break
+        else:
+            # Edge case: history starts with something unexpected. Drop one message.
+            removed = history.pop(0)
+            dropped += 1
+            est -= _estimate_message_tokens(removed)
+
+    return dropped
+
+
 # ------------------------------------------------------------------ anthropic
 
 def _ensure_anthropic_client():
@@ -640,9 +773,20 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
 
         # We allow MAX_TOOL_ROUNDS, plus 1 final loop to get the text synthesis
         for round_idx in range(MAX_TOOL_ROUNDS + 2):
+            # Trim history to fit within token budget before each API call
+            dropped = _trim_anthropic_history(system_blocks, _anthropic_history)
+            if dropped > 0:
+                est_tokens = _estimate_prompt_tokens(system_blocks, _anthropic_history)
+                _append_comms("OUT", "request", {
+                    "message": (
+                        f"[HISTORY TRIMMED: dropped {dropped} old messages to fit token budget. "
+                        f"Estimated {est_tokens} tokens remaining. {len(_anthropic_history)} messages in history.]"
+                    ),
+                })
+
             response = _anthropic_client.messages.create(
                 model=_model,
-                max_tokens=8096,
+                max_tokens=16384,
                 system=system_blocks,
                 tools=_build_anthropic_tools(),
                 messages=_anthropic_history,