feat(token-viz): Phase 1 — token budget panel with color bar and breakdown table

This commit is contained in:
2026-03-02 11:16:32 -05:00
parent 80ebc9c4b1
commit 5bfb20f06f
5 changed files with 206 additions and 24 deletions

View File

@@ -1715,6 +1715,19 @@ def send(
else:
raise ValueError(f"Unknown provider: {_provider}")
def _add_bleed_derived(d: dict[str, Any], sys_tok: int = 0, tool_tok: int = 0) -> dict[str, Any]:
cur = d.get("current", 0)
lim = d.get("limit", 0)
d["estimated_prompt_tokens"] = cur
d["max_prompt_tokens"] = lim
d["utilization_pct"] = d.get("percentage", 0.0)
d["headroom_tokens"] = max(0, lim - cur)
d["would_trim"] = (lim - cur) < 20000
d["system_tokens"] = sys_tok
d["tools_tokens"] = tool_tok
d["history_tokens"] = max(0, cur - sys_tok - tool_tok)
return d
def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
"""
Calculates how close the current conversation history is to the token limit.
@@ -1724,17 +1737,19 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
# For Anthropic, we have a robust estimator
with _anthropic_history_lock:
history_snapshot = list(_anthropic_history)
hist_only = _estimate_prompt_tokens([], history_snapshot) - 2500 # subtract fixed tools
sys_tok = max(1, int(len(md_content) / _CHARS_PER_TOKEN)) if md_content else 0
current_tokens = _estimate_prompt_tokens([], history_snapshot)
if md_content:
current_tokens += max(1, int(len(md_content) / _CHARS_PER_TOKEN))
limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
return {
return _add_bleed_derived({
"provider": "anthropic",
"limit": limit_tokens,
"current": current_tokens,
"percentage": percentage,
}
}, sys_tok=sys_tok, tool_tok=2500)
elif _provider == "gemini":
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
if _gemini_chat:
@@ -1751,24 +1766,24 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
# Prepend context as a user part for counting
history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
if not history:
return {
return _add_bleed_derived({
"provider": "gemini",
"limit": effective_limit,
"current": 0,
"percentage": 0,
}
})
resp = _gemini_client.models.count_tokens(
model=_model,
contents=history
)
current_tokens = resp.total_tokens
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
return {
return _add_bleed_derived({
"provider": "gemini",
"limit": effective_limit,
"current": current_tokens,
"percentage": percentage,
}
}, sys_tok=0, tool_tok=0)
except Exception as e:
pass
elif md_content:
@@ -1780,20 +1795,20 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
)
current_tokens = resp.total_tokens
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
return {
return _add_bleed_derived({
"provider": "gemini",
"limit": effective_limit,
"current": current_tokens,
"percentage": percentage,
}
})
except Exception as e:
pass
return {
return _add_bleed_derived({
"provider": "gemini",
"limit": effective_limit,
"current": 0,
"percentage": 0,
}
})
elif _provider == "gemini_cli":
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
limit_tokens = effective_limit
@@ -1802,12 +1817,12 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
u = _gemini_cli_adapter.last_usage
current_tokens = u.get("input_tokens") or u.get("input", 0)
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
return {
return _add_bleed_derived({
"provider": "gemini_cli",
"limit": limit_tokens,
"current": current_tokens,
"percentage": percentage,
}
})
elif _provider == "deepseek":
limit_tokens = 64000
current_tokens = 0
@@ -1829,15 +1844,15 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
if md_content: current_tokens += len(md_content)
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
return {
return _add_bleed_derived({
"provider": "deepseek",
"limit": limit_tokens,
"current": current_tokens,
"percentage": percentage,
}
return {
})
return _add_bleed_derived({
"provider": _provider,
"limit": 0,
"current": 0,
"percentage": 0,
}
})