feat(token-viz): Phase 1 — token budget panel with color bar and breakdown table
This commit is contained in:
47
ai_client.py
47
ai_client.py
@@ -1715,6 +1715,19 @@ def send(
|
||||
else:
|
||||
raise ValueError(f"Unknown provider: {_provider}")
|
||||
|
||||
def _add_bleed_derived(d: dict[str, Any], sys_tok: int = 0, tool_tok: int = 0) -> dict[str, Any]:
|
||||
cur = d.get("current", 0)
|
||||
lim = d.get("limit", 0)
|
||||
d["estimated_prompt_tokens"] = cur
|
||||
d["max_prompt_tokens"] = lim
|
||||
d["utilization_pct"] = d.get("percentage", 0.0)
|
||||
d["headroom_tokens"] = max(0, lim - cur)
|
||||
d["would_trim"] = (lim - cur) < 20000
|
||||
d["system_tokens"] = sys_tok
|
||||
d["tools_tokens"] = tool_tok
|
||||
d["history_tokens"] = max(0, cur - sys_tok - tool_tok)
|
||||
return d
|
||||
|
||||
def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
"""
|
||||
Calculates how close the current conversation history is to the token limit.
|
||||
@@ -1724,17 +1737,19 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
# For Anthropic, we have a robust estimator
|
||||
with _anthropic_history_lock:
|
||||
history_snapshot = list(_anthropic_history)
|
||||
hist_only = _estimate_prompt_tokens([], history_snapshot) - 2500 # subtract fixed tools
|
||||
sys_tok = max(1, int(len(md_content) / _CHARS_PER_TOKEN)) if md_content else 0
|
||||
current_tokens = _estimate_prompt_tokens([], history_snapshot)
|
||||
if md_content:
|
||||
current_tokens += max(1, int(len(md_content) / _CHARS_PER_TOKEN))
|
||||
limit_tokens = _ANTHROPIC_MAX_PROMPT_TOKENS
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "anthropic",
|
||||
"limit": limit_tokens,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
}, sys_tok=sys_tok, tool_tok=2500)
|
||||
elif _provider == "gemini":
|
||||
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
|
||||
if _gemini_chat:
|
||||
@@ -1751,24 +1766,24 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
# Prepend context as a user part for counting
|
||||
history.insert(0, types.Content(role="user", parts=[types.Part.from_text(text=md_content)]))
|
||||
if not history:
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
"current": 0,
|
||||
"percentage": 0,
|
||||
}
|
||||
})
|
||||
resp = _gemini_client.models.count_tokens(
|
||||
model=_model,
|
||||
contents=history
|
||||
)
|
||||
current_tokens = resp.total_tokens
|
||||
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
}, sys_tok=0, tool_tok=0)
|
||||
except Exception as e:
|
||||
pass
|
||||
elif md_content:
|
||||
@@ -1780,20 +1795,20 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
)
|
||||
current_tokens = resp.total_tokens
|
||||
percentage = (current_tokens / effective_limit) * 100 if effective_limit > 0 else 0
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
pass
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "gemini",
|
||||
"limit": effective_limit,
|
||||
"current": 0,
|
||||
"percentage": 0,
|
||||
}
|
||||
})
|
||||
elif _provider == "gemini_cli":
|
||||
effective_limit = _history_trunc_limit if _history_trunc_limit > 0 else _GEMINI_MAX_INPUT_TOKENS
|
||||
limit_tokens = effective_limit
|
||||
@@ -1802,12 +1817,12 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
u = _gemini_cli_adapter.last_usage
|
||||
current_tokens = u.get("input_tokens") or u.get("input", 0)
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "gemini_cli",
|
||||
"limit": limit_tokens,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
})
|
||||
elif _provider == "deepseek":
|
||||
limit_tokens = 64000
|
||||
current_tokens = 0
|
||||
@@ -1829,15 +1844,15 @@ def get_history_bleed_stats(md_content: str | None = None) -> dict[str, Any]:
|
||||
if md_content: current_tokens += len(md_content)
|
||||
current_tokens = max(1, int(current_tokens / _CHARS_PER_TOKEN))
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
return _add_bleed_derived({
|
||||
"provider": "deepseek",
|
||||
"limit": limit_tokens,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
return {
|
||||
})
|
||||
return _add_bleed_derived({
|
||||
"provider": _provider,
|
||||
"limit": 0,
|
||||
"current": 0,
|
||||
"percentage": 0,
|
||||
}
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user