feat(gui): Implement per-response token metrics and AI discussion compression
- Display token metrics (input/output/cache) per response in Discussion Hub. - Add total Discussion Token usage in the panel header. - Implement 'Compress' feature to intelligently summarize and replace exhausted discussion histories using an AI subagent.
This commit is contained in:
+1
-1
@@ -308,7 +308,7 @@ This file tracks all major tracks for the project. Each track has its own detail
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
- [ ] **Track: Add per-response token metrics and AI-assisted history compression**
|
- [x] **Track: Add per-response token metrics and AI-assisted history compression**
|
||||||
*Link: [./tracks/discussion_metrics_and_compression_20260601/](./tracks/discussion_metrics_and_compression_20260601/)*
|
*Link: [./tracks/discussion_metrics_and_compression_20260601/](./tracks/discussion_metrics_and_compression_20260601/)*
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -1,19 +1,19 @@
|
|||||||
# Implementation Plan: Discussion Metrics and Compression
|
# Implementation Plan: Discussion Metrics and Compression
|
||||||
|
|
||||||
## Phase 1: Metrics Visibility
|
## Phase 1: Metrics Visibility
|
||||||
- [ ] Task: Update UI for Token Metrics
|
- [x] Task: Update UI for Token Metrics
|
||||||
- [ ] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
|
- [x] Modify `_render_comms_history_panel` and `render_discussion_entry` in `src/gui_2.py` to extract and prominently display `usage` stats (input, output, cache) from the entry payloads.
|
||||||
|
|
||||||
## Phase 2: Compression Helper Agent
|
## Phase 2: Compression Helper Agent
|
||||||
- [ ] Task: Implement Compression Agent
|
- [x] Task: Implement Compression Agent
|
||||||
- [ ] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
|
- [x] Create a new agent definition or function in `src/ai_client.py` (or a dedicated module) capable of receiving a discussion history and a system prompt instructing it to summarize and compact the history.
|
||||||
- [ ] Task: Implement UI Triggers
|
- [x] Task: Implement UI Triggers
|
||||||
- [ ] Add a "Compress Discussion" button to the Discussion Hub UI.
|
- [x] Add a "Compress Discussion" button to the Discussion Hub UI.
|
||||||
- [ ] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
|
- [x] Wire the button to dispatch the compression task to the background executor and display a loading indicator.
|
||||||
- [ ] Upon completion, replace the older entries with the generated summary block.
|
- [x] Upon completion, replace the older entries with the generated summary block.
|
||||||
|
|
||||||
## Phase 3: Verification
|
## Phase 3: Verification
|
||||||
- [ ] Task: Verification
|
- [x] Task: Verification
|
||||||
- [ ] Verify token metrics are visible per response.
|
- [x] Verify token metrics are visible per response.
|
||||||
- [ ] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
|
- [x] Run the "Compress Discussion" tool on a heavy discussion and verify the history is successfully summarized without losing core context.
|
||||||
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
|
- [x] Task: Conductor - User Manual Verification 'Phase 3: Verification' (Protocol in workflow.md)
|
||||||
@@ -2623,4 +2623,39 @@ def run_subagent_summarization(file_path: str, content: str, is_code: bool, outl
|
|||||||
return resp_data.get("text", "")
|
return resp_data.get("text", "")
|
||||||
return "ERROR: Unsupported provider for sub-agent summarization"
|
return "ERROR: Unsupported provider for sub-agent summarization"
|
||||||
|
|
||||||
|
def run_discussion_compression(discussion_text: str) -> str:
|
||||||
|
prompt = f"The following is a long conversation history.\\nPlease provide a highly compact, dense summary of the key facts, decisions, bugs encountered, and outcomes that should be retained for context going forward. Categorize into User intent, Tool outputs, and AI reasoning. Omit pleasantries and redundant thoughts.\\n\\n[HISTORY]\\n{discussion_text}"
|
||||||
|
if _provider == "gemini":
|
||||||
|
_ensure_gemini_client()
|
||||||
|
if _gemini_client:
|
||||||
|
resp = _gemini_client.models.generate_content(
|
||||||
|
model=_model,
|
||||||
|
contents=prompt,
|
||||||
|
config=types.GenerateContentConfig(temperature=0.0, max_output_tokens=2048)
|
||||||
|
)
|
||||||
|
return resp.text or ""
|
||||||
|
elif _provider == "anthropic":
|
||||||
|
_ensure_anthropic_client()
|
||||||
|
if _anthropic_client:
|
||||||
|
resp = _anthropic_client.messages.create(
|
||||||
|
model=_model, max_tokens=2048,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
return "".join([b.text for b in resp.content if hasattr(b, "text") and b.text])
|
||||||
|
elif _provider == "deepseek":
|
||||||
|
creds = _load_credentials()
|
||||||
|
api_key = creds.get("deepseek", {}).get("api_key")
|
||||||
|
if not api_key: return "ERROR: DeepSeek API key missing"
|
||||||
|
try:
|
||||||
|
r = requests.post("https://api.deepseek.com/chat/completions", headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, json={"model": _model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.0}, timeout=60)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()["choices"][0]["message"]["content"]
|
||||||
|
except Exception as e:
|
||||||
|
return f"ERROR: DeepSeek compression failed: {e}"
|
||||||
|
elif _provider == "gemini_cli":
|
||||||
|
adapter = GeminiCliAdapter(binary_path="gemini")
|
||||||
|
resp_data = adapter.send(prompt, model=_model)
|
||||||
|
return resp_data.get("text", "")
|
||||||
|
return "ERROR: Unsupported provider for discussion compression"
|
||||||
|
|
||||||
#endregion: Subagent Summarization
|
#endregion: Subagent Summarization
|
||||||
|
|||||||
@@ -2323,6 +2323,8 @@ class AppController:
|
|||||||
"collapsed": True,
|
"collapsed": True,
|
||||||
"ts": entry.get("ts", project_manager.now_ts())
|
"ts": entry.get("ts", project_manager.now_ts())
|
||||||
}
|
}
|
||||||
|
if "usage" in payload:
|
||||||
|
entry_obj["usage"] = payload["usage"]
|
||||||
if segments:
|
if segments:
|
||||||
entry_obj["thinking_segments"] = [{"content": s.content, "marker": s.marker} for s in segments]
|
entry_obj["thinking_segments"] = [{"content": s.content, "marker": s.marker} for s in segments]
|
||||||
|
|
||||||
@@ -3259,6 +3261,26 @@ class AppController:
|
|||||||
self.ai_status = f"error: {e}"
|
self.ai_status = f"error: {e}"
|
||||||
threading.Thread(target=worker, daemon=True).start()
|
threading.Thread(target=worker, daemon=True).start()
|
||||||
|
|
||||||
|
def _handle_compress_discussion(self) -> None:
|
||||||
|
def worker():
|
||||||
|
try:
|
||||||
|
self.ai_status = "compressing discussion..."
|
||||||
|
disc_text = project_manager.format_discussion(self.disc_entries)
|
||||||
|
if not disc_text.strip():
|
||||||
|
self.ai_status = "discussion is empty"
|
||||||
|
return
|
||||||
|
|
||||||
|
response_text = ai_client.run_discussion_compression(disc_text)
|
||||||
|
|
||||||
|
if response_text and not response_text.startswith("ERROR:"):
|
||||||
|
self.disc_entries = [{"role": "System", "content": f"[COMPRESSED HISTORY]\n{response_text}", "collapsed": False, "ts": project_manager.now_ts()}]
|
||||||
|
self.ai_status = "compression complete"
|
||||||
|
else:
|
||||||
|
self.ai_status = f"compression failed: {response_text}"
|
||||||
|
except Exception as e:
|
||||||
|
self.ai_status = f"compression error: {e}"
|
||||||
|
threading.Thread(target=worker, daemon=True).start()
|
||||||
|
|
||||||
def _handle_generate_send(self) -> None:
|
def _handle_generate_send(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|||||||
+36
-11
@@ -3416,17 +3416,29 @@ def render_discussion_entry(app: App, entry: dict, index: int) -> None:
|
|||||||
imgui.same_line()
|
imgui.same_line()
|
||||||
if imgui.button("[Edit]" if read_mode else "[Read]"): entry["read_mode"] = not read_mode
|
if imgui.button("[Edit]" if read_mode else "[Read]"): entry["read_mode"] = not read_mode
|
||||||
ts_str = entry.get("ts", "")
|
ts_str = entry.get("ts", "")
|
||||||
if ts_str:
|
usage = entry.get("usage", {})
|
||||||
imgui.same_line(); imgui.text_colored(vec4(120, 120, 100), str(ts_str)); e_dt = project_manager.parse_ts(ts_str)
|
if ts_str or usage:
|
||||||
if e_dt:
|
imgui.same_line()
|
||||||
e_unix, next_unix = e_dt.timestamp(), float('inf')
|
if ts_str:
|
||||||
if index + 1 < len(app.disc_entries):
|
imgui.text_colored(vec4(120, 120, 100), str(ts_str))
|
||||||
n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
|
e_dt = project_manager.parse_ts(ts_str)
|
||||||
if n_dt: next_unix = n_dt.timestamp()
|
if e_dt:
|
||||||
injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
|
e_unix, next_unix = e_dt.timestamp(), float('inf')
|
||||||
if injected:
|
if index + 1 < len(app.disc_entries):
|
||||||
imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
|
n_ts = app.disc_entries[index+1].get("ts", ""); n_dt = project_manager.parse_ts(n_ts)
|
||||||
if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
|
if n_dt: next_unix = n_dt.timestamp()
|
||||||
|
injected = [f for f in app.files if hasattr(f, 'injected_at') and f.injected_at and e_unix <= f.injected_at < next_unix]
|
||||||
|
if injected:
|
||||||
|
imgui.same_line(); imgui.text_colored(vec4(100, 255, 100), f"[{len(injected)}+]")
|
||||||
|
if imgui.is_item_hovered(): imgui.set_tooltip("Files injected at this point:\n" + "\n".join([f.path for f in injected]))
|
||||||
|
if usage:
|
||||||
|
inp = usage.get("input_tokens", 0)
|
||||||
|
out = usage.get("output_tokens", 0)
|
||||||
|
cache = usage.get("cache_read_input_tokens", 0)
|
||||||
|
usage_str = f" in:{inp} out:{out}"
|
||||||
|
if cache: usage_str += f" cache:{cache}"
|
||||||
|
imgui.same_line()
|
||||||
|
imgui.text_colored(vec4(100, 150, 180), usage_str)
|
||||||
if collapsed:
|
if collapsed:
|
||||||
imgui.same_line()
|
imgui.same_line()
|
||||||
if imgui.button("Ins"): app.disc_entries.insert(index, {"role": "User", "content": "", "collapsed": True, "ts": project_manager.now_ts()})
|
if imgui.button("Ins"): app.disc_entries.insert(index, {"role": "User", "content": "", "collapsed": True, "ts": project_manager.now_ts()})
|
||||||
@@ -3794,6 +3806,8 @@ def render_discussion_entry_controls(app: App) -> None:
|
|||||||
if imgui.button("Clear All"): app.disc_entries.clear()
|
if imgui.button("Clear All"): app.disc_entries.clear()
|
||||||
imgui.same_line()
|
imgui.same_line()
|
||||||
if imgui.button("Save"): app._flush_to_project(); app._flush_to_config(); models.save_config(app.config); app.ai_status = "discussion saved"
|
if imgui.button("Save"): app._flush_to_project(); app._flush_to_config(); models.save_config(app.config); app.ai_status = "discussion saved"
|
||||||
|
imgui.same_line()
|
||||||
|
if imgui.button("Compress"): app.controller._handle_compress_discussion()
|
||||||
_, app.ui_auto_add_history = imgui.checkbox("Auto-add message & response to history", app.ui_auto_add_history)
|
_, app.ui_auto_add_history = imgui.checkbox("Auto-add message & response to history", app.ui_auto_add_history)
|
||||||
imgui.text("Keep Pairs:"); imgui.same_line(); imgui.set_next_item_width(80)
|
imgui.text("Keep Pairs:"); imgui.same_line(); imgui.set_next_item_width(80)
|
||||||
ch, app.ui_disc_truncate_pairs = imgui.input_int("##trunc_pairs", app.ui_disc_truncate_pairs, 1)
|
ch, app.ui_disc_truncate_pairs = imgui.input_int("##trunc_pairs", app.ui_disc_truncate_pairs, 1)
|
||||||
@@ -3806,6 +3820,17 @@ def render_discussion_entry_controls(app: App) -> None:
|
|||||||
def render_discussion_metadata(app: App) -> None:
|
def render_discussion_metadata(app: App) -> None:
|
||||||
disc_data = app.project.get("discussion", {}).get("discussions", {}).get(app.active_discussion, {})
|
disc_data = app.project.get("discussion", {}).get("discussions", {}).get(app.active_discussion, {})
|
||||||
git_commit, last_updated = disc_data.get("git_commit", ""), disc_data.get("last_updated", "")
|
git_commit, last_updated = disc_data.get("git_commit", ""), disc_data.get("last_updated", "")
|
||||||
|
|
||||||
|
total_in, total_out, total_cache = 0, 0, 0
|
||||||
|
for entry in app.disc_entries:
|
||||||
|
if "usage" in entry:
|
||||||
|
total_in += entry["usage"].get("input_tokens", 0)
|
||||||
|
total_out += entry["usage"].get("output_tokens", 0)
|
||||||
|
total_cache += entry["usage"].get("cache_read_input_tokens", 0)
|
||||||
|
if total_in > 0 or total_out > 0:
|
||||||
|
imgui.text_colored(vec4(100, 150, 180), f"Discussion Tokens: {total_in} In | {total_out} Out | {total_cache} Cache")
|
||||||
|
imgui.separator()
|
||||||
|
|
||||||
imgui.text_colored(C_LBL, "commit:"); imgui.same_line()
|
imgui.text_colored(C_LBL, "commit:"); imgui.same_line()
|
||||||
render_selectable_label(app, 'git_commit_val', git_commit[:12] if git_commit else '(none)', width=100, color=(C_IN if git_commit else C_LBL))
|
render_selectable_label(app, 'git_commit_val', git_commit[:12] if git_commit else '(none)', width=100, color=(C_IN if git_commit else C_LBL))
|
||||||
imgui.same_line()
|
imgui.same_line()
|
||||||
|
|||||||
Reference in New Issue
Block a user