feat(token-viz): Phase 2 — trim warning, Gemini/Anthropic cache status display

2026-03-02 11:23:57 -05:00
parent 1c8b094a77
commit 7b5d9b1212
3 changed files with 80 additions and 4 deletions
@@ -11,10 +11,10 @@ Architecture reference: [docs/guide_architecture.md](../../docs/guide_architectu

 ## Phase 2: Trimming Preview & Cache Status

- [ ] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
- [ ] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
- [ ] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
- [ ] Task 2.4: Write tests for trimming warning visibility and cache status display.
+- [x] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
+- [x] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
+- [x] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
+- [~] Task 2.4: Write tests for trimming warning visibility and cache status display.

 ## Phase 3: Auto-Refresh & Integration

@@ -2765,6 +2765,41 @@ class App:
    imgui.table_set_column_index(1); imgui.text(f"{tok:,}")
    imgui.table_set_column_index(2); imgui.text(f"{tok / total_tok * 100:.0f}%")
   imgui.end_table()
+  if stats.get("would_trim"):
+   imgui.text_colored(imgui.ImVec4(1.0, 0.3, 0.0, 1.0), "WARNING: Next call will trim history")
+   trimmable = stats.get("trimmable_turns", 0)
+   if trimmable:
+    imgui.text_disabled(f"Trimmable turns: {trimmable}")
+   msgs = stats.get("messages")
+   if msgs:
+    shown = 0
+    for msg in msgs:
+     if shown >= 3:
+      break
+     if msg.get("trimmable"):
+      role = msg.get("role", "?")
+      toks = msg.get("tokens", 0)
+      imgui.text_disabled(f"  [{role}] ~{toks:,} tokens")
+      shown += 1
+
+  imgui.separator()
+  if ai_client._provider == "gemini":
+   if ai_client._gemini_cache is not None:
+    age = time.time() - (ai_client._gemini_cache_created_at or time.time())
+    ttl = ai_client._GEMINI_CACHE_TTL
+    imgui.text_colored(C_LBL, f"Gemini Cache: ACTIVE | Age: {age:.0f}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s")
+   else:
+    imgui.text_disabled("Gemini Cache: INACTIVE")
+  elif ai_client._provider == "anthropic":
+   with ai_client._anthropic_history_lock:
+    turns = len(ai_client._anthropic_history)
+   cache_reads = 0
+   for entry in reversed(ai_client.get_comms_log()):
+    if entry.get("kind") == "response":
+     cache_reads = (entry.get("payload") or {}).get("usage", {}).get("cache_read_input_tokens") or 0
+     break
+   imgui.text_disabled("Anthropic: 4-breakpoint ephemeral caching (auto-managed)")
+   imgui.text_disabled(f"  {turns} history turns | Cache reads last call: {cache_reads:,}")

 def _render_message_panel(self) -> None:
 # LIVE indicator
@@ -113,3 +113,44 @@ def test_render_token_budget_panel_empty_stats_no_crash(app_instance: App) -> No
 # We can't render ImGui in tests, so just verify the guard condition logic
 # by checking the method exists and _token_stats is empty (early-return path)
 assert not app_instance._token_stats  # falsy — method would return early
+
+
+# --- Trim warning logic ---
+
+def test_would_trim_boundary_exact() -> None:
+ """would_trim is False when headroom == 20000 (threshold is strictly < 20000)."""
+ base = {"provider": "test", "limit": 100000, "current": 80000, "percentage": 80.0}
+ result = _add_bleed_derived(base)
+ assert result["headroom_tokens"] == 20000
+ assert result["would_trim"] is False  # headroom < 20000 is False at exactly 20000
+
+
+def test_would_trim_just_below_threshold() -> None:
+ base = {"provider": "test", "limit": 100000, "current": 80001, "percentage": 80.0}
+ result = _add_bleed_derived(base)
+ assert result["headroom_tokens"] == 19999
+ assert result["would_trim"] is True
+
+
+def test_would_trim_just_above_threshold() -> None:
+ base = {"provider": "test", "limit": 100000, "current": 79999, "percentage": 80.0}
+ result = _add_bleed_derived(base)
+ assert result["headroom_tokens"] == 20001
+ assert result["would_trim"] is False
+
+
+# --- Cache status fields available from ai_client ---
+
+def test_gemini_cache_fields_accessible() -> None:
+ """_gemini_cache, _gemini_cache_created_at, _GEMINI_CACHE_TTL must be accessible."""
+ assert hasattr(ai_client, "_gemini_cache")
+ assert hasattr(ai_client, "_gemini_cache_created_at")
+ assert hasattr(ai_client, "_GEMINI_CACHE_TTL")
+ assert isinstance(ai_client._GEMINI_CACHE_TTL, int)
+ assert ai_client._GEMINI_CACHE_TTL > 0
+
+
+def test_anthropic_history_lock_accessible() -> None:
+ """_anthropic_history_lock must be accessible for cache hint rendering."""
+ assert hasattr(ai_client, "_anthropic_history_lock")
+ assert hasattr(ai_client, "_anthropic_history")