feat(token-viz): Phase 2 — trim warning, Gemini/Anthropic cache status display

This commit is contained in:
2026-03-02 11:23:57 -05:00
parent 1c8b094a77
commit 7b5d9b1212
3 changed files with 80 additions and 4 deletions

View File

@@ -11,10 +11,10 @@ Architecture reference: [docs/guide_architecture.md](../../docs/guide_architectu
## Phase 2: Trimming Preview & Cache Status
- [ ] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
- [ ] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
- [ ] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
- [ ] Task 2.4: Write tests for trimming warning visibility and cache status display.
- [x] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
- [x] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
- [x] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
- [~] Task 2.4: Write tests for trimming warning visibility and cache status display.
## Phase 3: Auto-Refresh & Integration

View File

@@ -2765,6 +2765,41 @@ class App:
imgui.table_set_column_index(1); imgui.text(f"{tok:,}")
imgui.table_set_column_index(2); imgui.text(f"{tok / total_tok * 100:.0f}%")
imgui.end_table()
if stats.get("would_trim"):
imgui.text_colored(imgui.ImVec4(1.0, 0.3, 0.0, 1.0), "WARNING: Next call will trim history")
trimmable = stats.get("trimmable_turns", 0)
if trimmable:
imgui.text_disabled(f"Trimmable turns: {trimmable}")
msgs = stats.get("messages")
if msgs:
shown = 0
for msg in msgs:
if shown >= 3:
break
if msg.get("trimmable"):
role = msg.get("role", "?")
toks = msg.get("tokens", 0)
imgui.text_disabled(f" [{role}] ~{toks:,} tokens")
shown += 1
imgui.separator()
if ai_client._provider == "gemini":
if ai_client._gemini_cache is not None:
age = time.time() - (ai_client._gemini_cache_created_at or time.time())
ttl = ai_client._GEMINI_CACHE_TTL
imgui.text_colored(C_LBL, f"Gemini Cache: ACTIVE | Age: {age:.0f}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s")
else:
imgui.text_disabled("Gemini Cache: INACTIVE")
elif ai_client._provider == "anthropic":
with ai_client._anthropic_history_lock:
turns = len(ai_client._anthropic_history)
cache_reads = 0
for entry in reversed(ai_client.get_comms_log()):
if entry.get("kind") == "response":
cache_reads = (entry.get("payload") or {}).get("usage", {}).get("cache_read_input_tokens") or 0
break
imgui.text_disabled("Anthropic: 4-breakpoint ephemeral caching (auto-managed)")
imgui.text_disabled(f" {turns} history turns | Cache reads last call: {cache_reads:,}")
def _render_message_panel(self) -> None:
# LIVE indicator

View File

@@ -113,3 +113,44 @@ def test_render_token_budget_panel_empty_stats_no_crash(app_instance: App) -> No
# We can't render ImGui in tests, so just verify the guard condition logic
# by checking the method exists and _token_stats is empty (early-return path)
assert not app_instance._token_stats # falsy — method would return early
# --- Trim warning logic ---
def test_would_trim_boundary_exact() -> None:
"""would_trim is False when headroom == 20000 (threshold is strictly < 20000)."""
base = {"provider": "test", "limit": 100000, "current": 80000, "percentage": 80.0}
result = _add_bleed_derived(base)
assert result["headroom_tokens"] == 20000
assert result["would_trim"] is False # headroom < 20000 is False at exactly 20000
def test_would_trim_just_below_threshold() -> None:
base = {"provider": "test", "limit": 100000, "current": 80001, "percentage": 80.0}
result = _add_bleed_derived(base)
assert result["headroom_tokens"] == 19999
assert result["would_trim"] is True
def test_would_trim_just_above_threshold() -> None:
base = {"provider": "test", "limit": 100000, "current": 79999, "percentage": 80.0}
result = _add_bleed_derived(base)
assert result["headroom_tokens"] == 20001
assert result["would_trim"] is False
# --- Cache status fields available from ai_client ---
def test_gemini_cache_fields_accessible() -> None:
"""_gemini_cache, _gemini_cache_created_at, _GEMINI_CACHE_TTL must be accessible."""
assert hasattr(ai_client, "_gemini_cache")
assert hasattr(ai_client, "_gemini_cache_created_at")
assert hasattr(ai_client, "_GEMINI_CACHE_TTL")
assert isinstance(ai_client._GEMINI_CACHE_TTL, int)
assert ai_client._GEMINI_CACHE_TTL > 0
def test_anthropic_history_lock_accessible() -> None:
"""_anthropic_history_lock must be accessible for cache hint rendering."""
assert hasattr(ai_client, "_anthropic_history_lock")
assert hasattr(ai_client, "_anthropic_history")