feat(token-viz): Phase 1 — token budget panel with color bar and breakdown table

2026-03-02 11:16:32 -05:00
parent 80ebc9c4b1
commit 5bfb20f06f
5 changed files with 206 additions and 24 deletions
--- a/ai_client.py
+++ b/ai_client.py
@@ -1715,6 +1715,19 @@ def send(
  else:
   raise ValueError(f"Unknown provider: {_provider}")

+def _add_bleed_derived(d: dict[str, Any], sys_tok: int = 0, tool_tok: int = 0) -> dict[str, Any]:
+ cur = d.get("current", 0)
+ lim = d.get("limit", 0)
+ d["estimated_prompt_tokens"] = cur
+ d["max_prompt_tokens"] = lim
+ d["utilization_pct"] = d.get("percentage", 0.0)
+ d["headroom_tokens"] = max(0, lim - cur)
+ d["would_trim"] = (lim - cur) < 20000
+ d["system_tokens"] = sys_tok
+ d["tools_tokens"] = tool_tok
+ d["history_tokens"] = max(0, cur - sys_tok - tool_tok)
+ return d
+
 def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
 """
    Calculates how close the current conversation history is to the token limit.
@@ -1724,17 +1737,19 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
 # For Anthropic, we have a robust estimator
  with _anthropic_history_lock:
   history_snapshot = list(_anthropic_history)
+  hist_only = _estimate_prompt_tokens([], history_snapshot) - 2500  # subtract fixed tools
+  sys_tok = max(1, int(len(md_content) / _CHARS_PER_TOKEN)) if md_content else 0
  current_tokens = _estimate_prompt_tokens([], history_snapshot)
  if md_content:
   current_tokens += max(1, int(len(md_content) / _CHARS_PER_TOKEN))
  limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
  percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
-  return {
+  return _add_bleed_derived({
   "provider": "anthropic",
   "limit": limit_tokens,
   "current": current_tokens,
   "percentage": percentage,
-  }
+  }, sys_tok=sys_tok, tool_tok=2500)
 elif _provider == "gemini":
  effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
  if _gemini_chat:
@@ -1751,24 +1766,24 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
    # Prepend context as a user part for counting
     history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
    if not history:
-     return {
+     return _add_bleed_derived({
      "provider": "gemini",
      "limit": effective_limit,
      "current": 0,
      "percentage": 0,
-     }
+     })
    resp = _gemini_client.models.count_tokens(
     model=_model,
     contents=history
    )
    current_tokens = resp.total_tokens
    percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
-    return {
+    return _add_bleed_derived({
     "provider": "gemini",
     "limit": effective_limit,
     "current": current_tokens,
     "percentage": percentage,
-    }
+    }, sys_tok=0, tool_tok=0)
   except Exception as e:
    pass
  elif md_content:
@@ -1780,20 +1795,20 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
    )
    current_tokens = resp.total_tokens
    percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
-    return {
+    return _add_bleed_derived({
     "provider": "gemini",
     "limit": effective_limit,
     "current": current_tokens,
     "percentage": percentage,
-    }
+    })
   except Exception as e:
    pass
-  return {
+  return _add_bleed_derived({
   "provider": "gemini",
   "limit": effective_limit,
   "current": 0,
   "percentage": 0,
-  }
+  })
 elif _provider == "gemini_cli":
  effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
  limit_tokens = effective_limit
@@ -1802,12 +1817,12 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
   u = _gemini_cli_adapter.last_usage
   current_tokens = u.get("input_tokens") or u.get("input", 0)
  percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
-  return {
+  return _add_bleed_derived({
   "provider": "gemini_cli",
   "limit": limit_tokens,
   "current": current_tokens,
   "percentage": percentage,
-  }
+  })
 elif _provider == "deepseek":
  limit_tokens = 64000
  current_tokens = 0
@@ -1829,15 +1844,15 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
  if md_content: current_tokens += len(md_content)
  current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
  percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
-  return {
+  return _add_bleed_derived({
   "provider": "deepseek",
   "limit": limit_tokens,
   "current": current_tokens,
   "percentage": percentage,
-  }
- return {
+  })
+ return _add_bleed_derived({
  "provider": _provider,
  "limit": 0,
  "current": 0,
  "percentage": 0,
- }
+ })