feat(tokens): Implement accurate token counting for Gemini history
This commit is contained in:
24
ai_client.py
24
ai_client.py
@@ -1192,9 +1192,27 @@ def get_history_bleed_stats() -> dict:
|
||||
"percentage": percentage,
|
||||
}
|
||||
elif _provider == "gemini":
|
||||
# For Gemini, token estimation is complex and handled by the server.
|
||||
# We don't have a reliable client-side estimate, so we return a
|
||||
# "not implemented" state for now.
|
||||
if _gemini_chat:
|
||||
try:
|
||||
_ensure_gemini_client()
|
||||
history = _get_gemini_history_list(_gemini_chat)
|
||||
if history:
|
||||
resp = _gemini_client.models.count_tokens(
|
||||
model=_model,
|
||||
contents=history
|
||||
)
|
||||
current_tokens = resp.total_tokens
|
||||
limit_tokens = _GEMINI_MAX_INPUT_TOKENS
|
||||
percentage = (current_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0
|
||||
return {
|
||||
"provider": "gemini",
|
||||
"limit": limit_tokens,
|
||||
"current": current_tokens,
|
||||
"percentage": percentage,
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"provider": "gemini",
|
||||
"limit": _GEMINI_MAX_INPUT_TOKENS,
|
||||
|
||||
Reference in New Issue
Block a user