feat(token-viz): Phase 2 — trim warning, Gemini/Anthropic cache status display
This commit is contained in:
@@ -11,10 +11,10 @@ Architecture reference: [docs/guide_architecture.md](../../docs/guide_architectu
|
||||
|
||||
## Phase 2: Trimming Preview & Cache Status
|
||||
|
||||
- [ ] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
|
||||
- [ ] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
|
||||
- [ ] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
|
||||
- [ ] Task 2.4: Write tests for trimming warning visibility and cache status display.
|
||||
- [x] Task 2.1: When `stats.get('would_trim')` is True, render a warning: `imgui.text_colored(ImVec4(1,0.3,0,1), "WARNING: Next call will trim history")`. Below it, show `f"Trimmable turns: {stats['trimmable_turns']}"`. If `stats` contains per-message breakdown, render the first 3 trimmable messages with their role and token count in a compact list.
|
||||
- [x] Task 2.2: Add Gemini cache status display. Read `ai_client._gemini_cache` (check `is not None`), `ai_client._gemini_cache_created_at`, and `ai_client._GEMINI_CACHE_TTL`. If cache exists, show: `"Gemini Cache: ACTIVE | Age: {age_seconds}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s"`. If not, show `"Gemini Cache: INACTIVE"`. Guard with `if ai_client._provider == "gemini":`.
|
||||
- [x] Task 2.3: Add Anthropic cache hint. When provider is `"anthropic"`, show: `"Anthropic: 4-breakpoint ephemeral caching (auto-managed)"` with the number of history turns and whether the latest response used cache reads (check last comms log entry for `cache_read_input_tokens`).
|
||||
- [~] Task 2.4: Write tests for trimming warning visibility and cache status display.
|
||||
|
||||
## Phase 3: Auto-Refresh & Integration
|
||||
|
||||
|
||||
35
gui_2.py
35
gui_2.py
@@ -2765,6 +2765,41 @@ class App:
|
||||
imgui.table_set_column_index(1); imgui.text(f"{tok:,}")
|
||||
imgui.table_set_column_index(2); imgui.text(f"{tok / total_tok * 100:.0f}%")
|
||||
imgui.end_table()
|
||||
if stats.get("would_trim"):
|
||||
imgui.text_colored(imgui.ImVec4(1.0, 0.3, 0.0, 1.0), "WARNING: Next call will trim history")
|
||||
trimmable = stats.get("trimmable_turns", 0)
|
||||
if trimmable:
|
||||
imgui.text_disabled(f"Trimmable turns: {trimmable}")
|
||||
msgs = stats.get("messages")
|
||||
if msgs:
|
||||
shown = 0
|
||||
for msg in msgs:
|
||||
if shown >= 3:
|
||||
break
|
||||
if msg.get("trimmable"):
|
||||
role = msg.get("role", "?")
|
||||
toks = msg.get("tokens", 0)
|
||||
imgui.text_disabled(f" [{role}] ~{toks:,} tokens")
|
||||
shown += 1
|
||||
|
||||
imgui.separator()
|
||||
if ai_client._provider == "gemini":
|
||||
if ai_client._gemini_cache is not None:
|
||||
age = time.time() - (ai_client._gemini_cache_created_at or time.time())
|
||||
ttl = ai_client._GEMINI_CACHE_TTL
|
||||
imgui.text_colored(C_LBL, f"Gemini Cache: ACTIVE | Age: {age:.0f}s / {ttl}s | Renews at: {ttl * 0.9:.0f}s")
|
||||
else:
|
||||
imgui.text_disabled("Gemini Cache: INACTIVE")
|
||||
elif ai_client._provider == "anthropic":
|
||||
with ai_client._anthropic_history_lock:
|
||||
turns = len(ai_client._anthropic_history)
|
||||
cache_reads = 0
|
||||
for entry in reversed(ai_client.get_comms_log()):
|
||||
if entry.get("kind") == "response":
|
||||
cache_reads = (entry.get("payload") or {}).get("usage", {}).get("cache_read_input_tokens") or 0
|
||||
break
|
||||
imgui.text_disabled("Anthropic: 4-breakpoint ephemeral caching (auto-managed)")
|
||||
imgui.text_disabled(f" {turns} history turns | Cache reads last call: {cache_reads:,}")
|
||||
|
||||
def _render_message_panel(self) -> None:
|
||||
# LIVE indicator
|
||||
|
||||
@@ -113,3 +113,44 @@ def test_render_token_budget_panel_empty_stats_no_crash(app_instance: App) -> No
|
||||
# We can't render ImGui in tests, so just verify the guard condition logic
|
||||
# by checking the method exists and _token_stats is empty (early-return path)
|
||||
assert not app_instance._token_stats # falsy — method would return early
|
||||
|
||||
|
||||
# --- Trim warning logic ---
|
||||
|
||||
def test_would_trim_boundary_exact() -> None:
|
||||
"""would_trim is False when headroom == 20000 (threshold is strictly < 20000)."""
|
||||
base = {"provider": "test", "limit": 100000, "current": 80000, "percentage": 80.0}
|
||||
result = _add_bleed_derived(base)
|
||||
assert result["headroom_tokens"] == 20000
|
||||
assert result["would_trim"] is False # headroom < 20000 is False at exactly 20000
|
||||
|
||||
|
||||
def test_would_trim_just_below_threshold() -> None:
|
||||
base = {"provider": "test", "limit": 100000, "current": 80001, "percentage": 80.0}
|
||||
result = _add_bleed_derived(base)
|
||||
assert result["headroom_tokens"] == 19999
|
||||
assert result["would_trim"] is True
|
||||
|
||||
|
||||
def test_would_trim_just_above_threshold() -> None:
|
||||
base = {"provider": "test", "limit": 100000, "current": 79999, "percentage": 80.0}
|
||||
result = _add_bleed_derived(base)
|
||||
assert result["headroom_tokens"] == 20001
|
||||
assert result["would_trim"] is False
|
||||
|
||||
|
||||
# --- Cache status fields available from ai_client ---
|
||||
|
||||
def test_gemini_cache_fields_accessible() -> None:
|
||||
"""_gemini_cache, _gemini_cache_created_at, _GEMINI_CACHE_TTL must be accessible."""
|
||||
assert hasattr(ai_client, "_gemini_cache")
|
||||
assert hasattr(ai_client, "_gemini_cache_created_at")
|
||||
assert hasattr(ai_client, "_GEMINI_CACHE_TTL")
|
||||
assert isinstance(ai_client._GEMINI_CACHE_TTL, int)
|
||||
assert ai_client._GEMINI_CACHE_TTL > 0
|
||||
|
||||
|
||||
def test_anthropic_history_lock_accessible() -> None:
|
||||
"""_anthropic_history_lock must be accessible for cache hint rendering."""
|
||||
assert hasattr(ai_client, "_anthropic_history_lock")
|
||||
assert hasattr(ai_client, "_anthropic_history")
|
||||
|
||||
Reference in New Issue
Block a user