feat(gui): Implement per-response token metrics and AI discussion compression

- Display token metrics (input/output/cache) per response in Discussion Hub. - Add total Discussion Token usage in the panel header. - Implement 'Compress' feature to intelligently summarize and replace exhausted discussion histories using an AI subagent.
2026-06-02 01:36:57 -04:00
parent b3b9baf91f
commit 5b7b818ed2
5 changed files with 106 additions and 24 deletions
@@ -308,7 +308,7 @@ This file tracks all major tracks for the project. Each track has its own detail
 ---
- [ ] **Track: Add per-response token metrics and AI-assisted history compression**
+- [x] **Track: Add per-response token metrics and AI-assisted history compression**
 *Link: [./tracks/discussion_metrics_and_compression_20260601/](./tracks/discussion_metrics_and_compression_20260601/)*
 ---
@@ -1,19 +1,19 @@
 # Implementation Plan: Discussion Metrics and Compression
 ## Phase 1: Metrics Visibility
- [ ] Task: Update UI for Token Metrics
+- [x] Task: Update UI for Token Metrics
-    - [ ] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
+    - [x] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
 ## Phase 2: Compression Helper Agent
- [ ] Task: Implement Compression Agent
+- [x] Task: Implement Compression Agent
-    - [ ] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
+    - [x] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
- [ ] Task: Implement UI Triggers
+- [x] Task: Implement UI Triggers
-    - [ ] Add a "Compress Discussion" button to the Discussion Hub UI.
+    - [x] Add a "Compress Discussion" button to the Discussion Hub UI.
-    - [ ] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
+    - [x] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
-    - [ ] Upon completion, replace the older entries with the generated summary block.
+    - [x] Upon completion, replace the older entries with the generated summary block.
 ## Phase 3: Verification
- [ ] Task: Verification
+- [x] Task: Verification
-    - [ ] Verify token metrics are visible per response.
+    - [x] Verify token metrics are visible per response.
-    - [ ] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
+    - [x] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
+- [x] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
@@ -2623,4 +2623,39 @@ def run_subagent_summarization(file_path: str, content: str, is_code: bool, outl
  return resp_data.get("text", "")
 return "ERROR: Unsupported provider for sub-agent summarization"
 def run_discussion_compression(discussion_text: str) -> str:
 prompt = f"The following is a long conversation history.\\nPlease provide a highly compact, dense summary of the key facts, decisions, bugs encountered, and outcomes that should be retained for context going forward. Categorize into User intent, Tool outputs, and AI reasoning. Omit pleasantries and redundant thoughts.\\n\\n[HISTORY]\\n{discussion_text}"
 if _provider == "gemini":
  _ensure_gemini_client()
  if _gemini_client:
   resp = _gemini_client.models.generate_content(
    model=_model,
    contents=prompt,
    config=types.GenerateContentConfig(temperature=0.0, max_output_tokens=2048)
   )
   return resp.text or ""
 elif _provider == "anthropic":
  _ensure_anthropic_client()
  if _anthropic_client:
   resp = _anthropic_client.messages.create(
    model=_model, max_tokens=2048,
    messages=[{"role": "user", "content": prompt}]
   )
   return "".join([b.text for b in resp.content if hasattr(b, "text") and b.text])
 elif _provider == "deepseek":
  creds = _load_credentials()
  api_key = creds.get("deepseek", {}).get("api_key")
  if not api_key: return "ERROR: DeepSeek API key missing"
  try:
   r = requests.post("https://api.deepseek.com/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": _model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.0}, timeout=60)
   r.raise_for_status()
   return r.json()["choices"][0]["message"]["content"]
  except Exception as e:
   return f"ERROR: DeepSeek compression failed: {e}"
 elif _provider == "gemini_cli":
  adapter = GeminiCliAdapter(binary_path="gemini")
  resp_data = adapter.send(prompt, model=_model)
  return resp_data.get("text", "")
 return "ERROR: Unsupported provider for discussion compression"
 #endregion: Subagent Summarization
@@ -2323,6 +2323,8 @@ class AppController:
      "collapsed": True,
      "ts": entry.get("ts", project_manager.now_ts())
     }
     if "usage" in payload:
      entry_obj["usage"] = payload["usage"]
     if segments:
      entry_obj["thinking_segments"] = [{"content": s.content, "marker": s.marker} for s in segments]
@@ -3259,6 +3261,26 @@ class AppController:
    self.ai_status = f"error: {e}"
  threading.Thread(target=worker, daemon=True).start()
 def _handle_compress_discussion(self) -> None:
  def worker():
   try:
    self.ai_status = "compressing discussion..."
    disc_text = project_manager.format_discussion(self.disc_entries)
    if not disc_text.strip():
     self.ai_status = "discussion is empty"
     return
    response_text = ai_client.run_discussion_compression(disc_text)
    if response_text and not response_text.startswith("ERROR:"):
     self.disc_entries = [{"role": "System", "content": f"[COMPRESSED HISTORY]\n{response_text}", "collapsed": False, "ts": project_manager.now_ts()}]
     self.ai_status = "compression complete"
    else:
     self.ai_status = f"compression failed: {response_text}"
   except Exception as e:
    self.ai_status = f"compression error: {e}"
  threading.Thread(target=worker, daemon=True).start()
 def _handle_generate_send(self) -> None:
  """
@@ -3416,17 +3416,29 @@ def render_discussion_entry(app: App, entry: dict, index: int) -> None:
   imgui.same_line()
   if imgui.button("[Edit]" if read_mode else "[Read]"): entry["read_mode"] = not read_mode
  ts_str = entry.get("ts", "")
-  if ts_str:
+  usage = entry.get("usage", {})
-   imgui.same_line(); imgui.text_colored(vec4(120, 120, 100), str(ts_str)); e_dt = project_manager.parse_ts(ts_str)
+  if ts_str or usage:
-   if e_dt:
+   imgui.same_line()
-    e_unix, next_unix = e_dt.timestamp(), float('inf')
+   if ts_str:
-    if index + 1 < len(app.disc_entries):
+    imgui.text_colored(vec4(120, 120, 100), str(ts_str))
-     n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
+    e_dt = project_manager.parse_ts(ts_str)
-     if n_dt: next_unix = n_dt.timestamp()
+    if e_dt:
-    injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
+     e_unix, next_unix = e_dt.timestamp(), float('inf')
-    if injected:
+     if index + 1 < len(app.disc_entries):
-     imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
+      n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
-     if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
+      if n_dt: next_unix = n_dt.timestamp()
     injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
     if injected:
      imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
      if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
   if usage:
    inp = usage.get("input_tokens", 0)
    out = usage.get("output_tokens", 0)
    cache = usage.get("cache_read_input_tokens", 0)
    usage_str = f" in:{inp} out:{out}"
    if cache: usage_str += f" cache:{cache}"
    imgui.same_line()
    imgui.text_colored(vec4(100, 150, 180), usage_str)
  if collapsed:
   imgui.same_line()
   if imgui.button("Ins"): app.disc_entries.insert(index, {"role": "User", "content": "", "collapsed": True, "ts": project_manager.now_ts()})
@@ -3794,6 +3806,8 @@ def render_discussion_entry_controls(app: App) -> None:
 if imgui.button("Clear All"): app.disc_entries.clear()
 imgui.same_line()
 if imgui.button("Save"): app._flush_to_project(); app._flush_to_config(); models.save_config(app.config); app.ai_status = "discussion saved"
 imgui.same_line()
 if imgui.button("Compress"): app.controller._handle_compress_discussion()
 _, app.ui_auto_add_history = imgui.checkbox("Auto-add message & response to history", app.ui_auto_add_history)
 imgui.text("Keep Pairs:"); imgui.same_line(); imgui.set_next_item_width(80)
 ch, app.ui_disc_truncate_pairs = imgui.input_int("##trunc_pairs", app.ui_disc_truncate_pairs, 1)
@@ -3806,6 +3820,17 @@ def render_discussion_entry_controls(app: App) -> None:
 def render_discussion_metadata(app: App) -> None:
 disc_data = app.project.get("discussion", {}).get("discussions", {}).get(app.active_discussion, {})
 git_commit, last_updated = disc_data.get("git_commit", ""), disc_data.get("last_updated", "")
 total_in, total_out, total_cache = 0, 0, 0
 for entry in app.disc_entries:
  if "usage" in entry:
   total_in += entry["usage"].get("input_tokens", 0)
   total_out += entry["usage"].get("output_tokens", 0)
   total_cache += entry["usage"].get("cache_read_input_tokens", 0)
 if total_in > 0 or total_out > 0:
  imgui.text_colored(vec4(100, 150, 180), f"Discussion Tokens: {total_in} In | {total_out} Out | {total_cache} Cache")
  imgui.separator()
 imgui.text_colored(C_LBL, "commit:"); imgui.same_line()
 render_selectable_label(app, 'git_commit_val', git_commit[:12] if git_commit else '(none)', width=100, color=(C_IN if git_commit else C_LBL))
 imgui.same_line()