From 5b7b818ed2d03a7f1a4e84513a3bf920199ea812 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Tue, 2 Jun 2026 01:36:57 -0400
Subject: [PATCH] feat(gui): Implement per-response token metrics and AI
 discussion compression

- Display token metrics (input/output/cache) per response in Discussion Hub.
- Add total Discussion Token usage in the panel header.
- Implement 'Compress' feature to intelligently summarize and replace exhausted discussion histories using an AI subagent.
---
 conductor/tracks.md                           |  2 +-
 .../plan.md                                   | 24 +++++-----
 src/ai_client.py                              | 35 ++++++++++++++
 src/app_controller.py                         | 22 +++++++++
 src/gui_2.py                                  | 47 ++++++++++++++-----
 5 files changed, 106 insertions(+), 24 deletions(-)

diff --git a/conductor/tracks.md b/conductor/tracks.md
index b050c993..603b1630 100644
--- a/conductor/tracks.md
+++ b/conductor/tracks.md
@@ -308,7 +308,7 @@ This file tracks all major tracks for the project. Each track has its own detail
 
 ---
 
-- [ ] **Track: Add per-response token metrics and AI-assisted history compression**
+- [x] **Track: Add per-response token metrics and AI-assisted history compression**
 *Link: [./tracks/discussion_metrics_and_compression_20260601/](./tracks/discussion_metrics_and_compression_20260601/)*
 
 ---
diff --git a/conductor/tracks/discussion_metrics_and_compression_20260601/plan.md b/conductor/tracks/discussion_metrics_and_compression_20260601/plan.md
index ef1010a1..0ce72377 100644
--- a/conductor/tracks/discussion_metrics_and_compression_20260601/plan.md
+++ b/conductor/tracks/discussion_metrics_and_compression_20260601/plan.md
@@ -1,19 +1,19 @@
 # Implementation Plan: Discussion Metrics and Compression
 
 ## Phase 1: Metrics Visibility
-- [ ] Task: Update UI for Token Metrics
-    - [ ] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
+- [x] Task: Update UI for Token Metrics
+    - [x] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
 
 ## Phase 2: Compression Helper Agent
-- [ ] Task: Implement Compression Agent
-    - [ ] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
-- [ ] Task: Implement UI Triggers
-    - [ ] Add a "Compress Discussion" button to the Discussion Hub UI.
-    - [ ] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
-    - [ ] Upon completion, replace the older entries with the generated summary block.
+- [x] Task: Implement Compression Agent
+    - [x] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
+- [x] Task: Implement UI Triggers
+    - [x] Add a "Compress Discussion" button to the Discussion Hub UI.
+    - [x] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
+    - [x] Upon completion, replace the older entries with the generated summary block.
 
 ## Phase 3: Verification
-- [ ] Task: Verification
-    - [ ] Verify token metrics are visible per response.
-    - [ ] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
-- [ ] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
\ No newline at end of file
+- [x] Task: Verification
+    - [x] Verify token metrics are visible per response.
+    - [x] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
+- [x] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
\ No newline at end of file
diff --git a/src/ai_client.py b/src/ai_client.py
index 6eb482b6..2033cf84 100644
--- a/src/ai_client.py
+++ b/src/ai_client.py
@@ -2623,4 +2623,39 @@ def run_subagent_summarization(file_path: str, content: str, is_code: bool, outl
   return resp_data.get("text", "")
  return "ERROR: Unsupported provider for sub-agent summarization"
 
+def run_discussion_compression(discussion_text: str) -> str:
+ prompt = f"The following is a long conversation history.\\nPlease provide a highly compact, dense summary of the key facts, decisions, bugs encountered, and outcomes that should be retained for context going forward. Categorize into User intent, Tool outputs, and AI reasoning. Omit pleasantries and redundant thoughts.\\n\\n[HISTORY]\\n{discussion_text}"
+ if _provider == "gemini":
+  _ensure_gemini_client()
+  if _gemini_client:
+   resp = _gemini_client.models.generate_content(
+    model=_model,
+    contents=prompt,
+    config=types.GenerateContentConfig(temperature=0.0, max_output_tokens=2048)
+   )
+   return resp.text or ""
+ elif _provider == "anthropic":
+  _ensure_anthropic_client()
+  if _anthropic_client:
+   resp = _anthropic_client.messages.create(
+    model=_model, max_tokens=2048,
+    messages=[{"role": "user", "content": prompt}]
+   )
+   return "".join([b.text for b in resp.content if hasattr(b, "text") and b.text])
+ elif _provider == "deepseek":
+  creds = _load_credentials()
+  api_key = creds.get("deepseek", {}).get("api_key")
+  if not api_key: return "ERROR: DeepSeek API key missing"
+  try:
+   r = requests.post("https://api.deepseek.com/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": _model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.0}, timeout=60)
+   r.raise_for_status()
+   return r.json()["choices"][0]["message"]["content"]
+  except Exception as e:
+   return f"ERROR: DeepSeek compression failed: {e}"
+ elif _provider == "gemini_cli":
+  adapter = GeminiCliAdapter(binary_path="gemini")
+  resp_data = adapter.send(prompt, model=_model)
+  return resp_data.get("text", "")
+ return "ERROR: Unsupported provider for discussion compression"
+
 #endregion: Subagent Summarization
diff --git a/src/app_controller.py b/src/app_controller.py
index 6c4e3f60..aefe53a0 100644
--- a/src/app_controller.py
+++ b/src/app_controller.py
@@ -2323,6 +2323,8 @@ class AppController:
       "collapsed": True,
       "ts": entry.get("ts", project_manager.now_ts())
      }
+     if "usage" in payload:
+      entry_obj["usage"] = payload["usage"]
      if segments:
       entry_obj["thinking_segments"] = [{"content": s.content, "marker": s.marker} for s in segments]
 
@@ -3259,6 +3261,26 @@ class AppController:
     self.ai_status = f"error: {e}"
   threading.Thread(target=worker, daemon=True).start()
 
+ def _handle_compress_discussion(self) -> None:
+  def worker():
+   try:
+    self.ai_status = "compressing discussion..."
+    disc_text = project_manager.format_discussion(self.disc_entries)
+    if not disc_text.strip():
+     self.ai_status = "discussion is empty"
+     return
+    
+    response_text = ai_client.run_discussion_compression(disc_text)
+    
+    if response_text and not response_text.startswith("ERROR:"):
+     self.disc_entries = [{"role": "System", "content": f"[COMPRESSED HISTORY]\n{response_text}", "collapsed": False, "ts": project_manager.now_ts()}]
+     self.ai_status = "compression complete"
+    else:
+     self.ai_status = f"compression failed: {response_text}"
+   except Exception as e:
+    self.ai_status = f"compression error: {e}"
+  threading.Thread(target=worker, daemon=True).start()
+
  def _handle_generate_send(self) -> None:
   """
   
diff --git a/src/gui_2.py b/src/gui_2.py
index 1c8ef2b0..da8dc320 100644
--- a/src/gui_2.py
+++ b/src/gui_2.py
@@ -3416,17 +3416,29 @@ def render_discussion_entry(app: App, entry: dict, index: int) -> None:
    imgui.same_line()
    if imgui.button("[Edit]" if read_mode else "[Read]"): entry["read_mode"] = not read_mode
   ts_str = entry.get("ts", "")
-  if ts_str:
-   imgui.same_line(); imgui.text_colored(vec4(120, 120, 100), str(ts_str)); e_dt = project_manager.parse_ts(ts_str)
-   if e_dt:
-    e_unix, next_unix = e_dt.timestamp(), float('inf')
-    if index + 1 < len(app.disc_entries):
-     n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
-     if n_dt: next_unix = n_dt.timestamp()
-    injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
-    if injected:
-     imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
-     if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
+  usage = entry.get("usage", {})
+  if ts_str or usage:
+   imgui.same_line()
+   if ts_str:
+    imgui.text_colored(vec4(120, 120, 100), str(ts_str))
+    e_dt = project_manager.parse_ts(ts_str)
+    if e_dt:
+     e_unix, next_unix = e_dt.timestamp(), float('inf')
+     if index + 1 < len(app.disc_entries):
+      n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
+      if n_dt: next_unix = n_dt.timestamp()
+     injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
+     if injected:
+      imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
+      if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
+   if usage:
+    inp = usage.get("input_tokens", 0)
+    out = usage.get("output_tokens", 0)
+    cache = usage.get("cache_read_input_tokens", 0)
+    usage_str = f" in:{inp} out:{out}"
+    if cache: usage_str += f" cache:{cache}"
+    imgui.same_line()
+    imgui.text_colored(vec4(100, 150, 180), usage_str)
   if collapsed:
    imgui.same_line()
    if imgui.button("Ins"): app.disc_entries.insert(index, {"role": "User", "content": "", "collapsed": True, "ts": project_manager.now_ts()})
@@ -3794,6 +3806,8 @@ def render_discussion_entry_controls(app: App) -> None:
  if imgui.button("Clear All"): app.disc_entries.clear()
  imgui.same_line()
  if imgui.button("Save"): app._flush_to_project(); app._flush_to_config(); models.save_config(app.config); app.ai_status = "discussion saved"
+ imgui.same_line()
+ if imgui.button("Compress"): app.controller._handle_compress_discussion()
  _, app.ui_auto_add_history = imgui.checkbox("Auto-add message & response to history", app.ui_auto_add_history)
  imgui.text("Keep Pairs:"); imgui.same_line(); imgui.set_next_item_width(80)
  ch, app.ui_disc_truncate_pairs = imgui.input_int("##trunc_pairs", app.ui_disc_truncate_pairs, 1)
@@ -3806,6 +3820,17 @@ def render_discussion_entry_controls(app: App) -> None:
 def render_discussion_metadata(app: App) -> None:
  disc_data = app.project.get("discussion", {}).get("discussions", {}).get(app.active_discussion, {})
  git_commit, last_updated = disc_data.get("git_commit", ""), disc_data.get("last_updated", "")
+ 
+ total_in, total_out, total_cache = 0, 0, 0
+ for entry in app.disc_entries:
+  if "usage" in entry:
+   total_in += entry["usage"].get("input_tokens", 0)
+   total_out += entry["usage"].get("output_tokens", 0)
+   total_cache += entry["usage"].get("cache_read_input_tokens", 0)
+ if total_in > 0 or total_out > 0:
+  imgui.text_colored(vec4(100, 150, 180), f"Discussion Tokens: {total_in} In | {total_out} Out | {total_cache} Cache")
+  imgui.separator()
+
  imgui.text_colored(C_LBL, "commit:"); imgui.same_line()
  render_selectable_label(app, 'git_commit_val', git_commit[:12] if git_commit else '(none)', width=100, color=(C_IN if git_commit else C_LBL))
  imgui.same_line()