archive
This commit is contained in:
204
conductor/archive/session_insights_20260306/spec.md
Normal file
204
conductor/archive/session_insights_20260306/spec.md
Normal file
@@ -0,0 +1,204 @@
|
||||
# Track Specification: Session Insights & Efficiency Scores (session_insights_20260306)
|
||||
|
||||
## Overview
|
||||
Token usage over time, cost projections, session summary with efficiency scores. Visualize session_logger data.
|
||||
|
||||
## Current State Audit
|
||||
|
||||
### Already Implemented (DO NOT re-implement)
|
||||
|
||||
- **`session_logger.py`**: Logs comms, tool calls, API hooks
|
||||
- **`ai_client.get_comms_log()`**: Returns API interaction history
|
||||
- **`cost_tracker.estimate_cost()`**: Cost calculation
|
||||
|
||||
- **`project_manager.get_all_tracks()`**: Returns track progress
|
||||
|
||||
- **`ConductorEngine.tier_usage`**: Tracks per-tier token counts AND model
|
||||
|
||||
- **GUI**: Shows ticket status but no progress bar
|
||||
|
||||
### Gaps to Fill (This Track's Scope)
|
||||
- No token timeline visualization
|
||||
- No cost projection
|
||||
- No efficiency score calculation
|
||||
- No session summary text
|
||||
|
||||
## Architectural Constraints
|
||||
|
||||
### Efficient Calculation
|
||||
- Metrics MUST be calculated incrementally
|
||||
- **Real-Time**: Updates SHOULD reflect current session state
|
||||
|
||||
- **Incremental**: Costs MUST be calculated incrementally
|
||||
|
||||
- **Memory Bounded**: Session history arrays should be pruned
|
||||
|
||||
- **Session state**: All session state persisted in `state.toml`
|
||||
|
||||
## Functional Requirements
|
||||
|
||||
- Token timeline: Graph of token usage over session
|
||||
- Cost projection: Estimate remaining budget based on usage rate
|
||||
- Efficiency score: Calculate tokens per useful change ratio
|
||||
- Session summary: Text summary of session metrics
|
||||
|
||||
## Architecture Reference
|
||||
|
||||
| File | Lines | Purpose |
|
||||
|------|-------|---------|
|
||||
| `src/session_logger.py` | Read session data |
|
||||
| `src/gui_2.py` | Timeline rendering |
|
||||
|
||||
## Testing Requirements
|
||||
- Test UI renders without crash
|
||||
- Verify graphs display in GUI
|
||||
- Verify 60fps maintained with graphs
|
||||
|
||||
- Test artifacts go to `tests/artifacts/`
|
||||
|
||||
## Out of Scope
|
||||
- Historical cost tracking across sessions
|
||||
- Cost budgeting/alerts
|
||||
- Export cost reports
|
||||
|
||||
- Efficiency score persistence across sessions
|
||||
|
||||
## Acceptance Criteria
|
||||
- [ ] Token timeline renders
|
||||
- [ ] Cost projection accurate
|
||||
- [ ] Efficiency score calculated
|
||||
- [ ] Summary displays key metrics
|
||||
- [ ] Uses existing session_logger, ai_client.get_comms_log()
|
||||
- [ ] `session_cost_total: float` = state
|
||||
- [ ] `session_cost_by_model`: {}` (tokens per minute)
|
||||
- [ ] Session summary text block
|
||||
- imgui.text_wrapped("## Session")
|
||||
- **Total**:")
|
||||
imgui.text_wrapped(f" {session_cost_total:.4f}")
|
||||
imgui.text_wrapped("## Files")
|
||||
- files = self._file_items
|
||||
- screenshots = self._screenshot_base_dir
|
||||
- history = self._history
|
||||
- imgui.text("")
|
||||
imgui.end()
|
||||
# Footer
|
||||
imgui.text("")
|
||||
imgui.same_line()
|
||||
imgui.end()
|
||||
|
||||
|
||||
# Token usage by tier
|
||||
if imgui.begin_table("Token Usage by Tier"):
|
||||
imgui.table_next_row()
|
||||
for tier in ["Tier 1", "Tier 2", "Tier 3", "Tier 4"]:
|
||||
imgui.table_set_column_index(0)
|
||||
imgui.text("Tokens")
|
||||
usage = self._tier_usage[tier]
|
||||
input_tokens = tier_usage[tier]["input"]
|
||||
output_tokens = tier_usage[tier]["output"]
|
||||
imgui.text("Cost ($)")
|
||||
imgui.text(f"${cost_tracker.estimate_cost(model, usage['input'], usage['output']):.4f}")
|
||||
imgui.end_table()
|
||||
imgui.text_wrapped("## Tool Usage")
|
||||
imgui.text_wrapped("## Tool Usage by Tool Name")
|
||||
if imgui.begin_table("Tool Usage"):
|
||||
imgui.table_setup_column("Tool")
|
||||
imgui.table_headers_row()
|
||||
for tool in tool_list:
|
||||
count = tool._tool_count.get(tool, 0)
|
||||
avg_time = sum(tool_times) / elapsed
|
||||
total_time = sum(tool_times)
|
||||
if total_time > 0:
|
||||
avg_time = 0
|
||||
else:
|
||||
avg_time = 0.0
|
||||
imgui.end_table()
|
||||
imgui.text_wrapped(f"Total: {count} calls, avg time: {avg_time:.2f}ms")
|
||||
else:
|
||||
time.sleep(0.5)
|
||||
time.sleep(1)
|
||||
# Usage over time
|
||||
imgui.text_wrapped("## Usage Over Time")
|
||||
imgui.text_wrapped("## Tool Usage by Tool")
|
||||
imgui.text_wrapped("## Tool Usage")
|
||||
if tool_name not in usage:
|
||||
tool_usage[tool_name] = 0
|
||||
avg_time = sum(tool_times) / count(tool, count)
|
||||
if count > 100:
|
||||
avg_time = avg_time / 1000.0
|
||||
else:
|
||||
tool_usage[tool_name] = 0.0
|
||||
avg_time = 0.0
|
||||
imgui.end_table()
|
||||
imgui.text_wrapped(f" {tool} ({tool_usage_count}):")
|
||||
imgui.text_wrapped(f" Avg time: {avg_time:.2f}ms, failures: {fail_count}")
|
||||
failure_rate = failures / total_calls * 100
|
||||
failure_rate = (failures / total_calls) * 100
|
||||
failure_rate = f"{failure_rate:.1%}"
|
||||
else:
|
||||
imgui.text_wrapped("## Usage over Time")
|
||||
if imgui.collapsing_header("Tool Usage"):
|
||||
return
|
||||
imgui.end_child()
|
||||
imgui.text("")
|
||||
imgui.end()
|
||||
|
||||
# Efficiency score
|
||||
efficiency_score = tokens_per useful change ratio (if > 5 else 0)
|
||||
efficiency_text = summary
|
||||
else:
|
||||
efficiency_text = "N/A"
|
||||
|
||||
imgui.text_wrapped(f"Efficiency Score: {efficiency_score:.2f}")
|
||||
imgui.end_child()
|
||||
# Footer
|
||||
imgui.text("")
|
||||
imgui.same_line()
|
||||
imgui.end()
|
||||
|
||||
# Ticket counts
|
||||
if imgui.begin_table("Ticket Counts"):
|
||||
imgui.table_setup_column("Completed")
|
||||
imgui.table_setup_column("blocked")
|
||||
imgui.table_setup_column("priority")
|
||||
imgui.table_headers_row()
|
||||
imgui.text("")
|
||||
for row in rows:
|
||||
imgui.text("")
|
||||
imgui.end_table()
|
||||
|
||||
# Progress bar
|
||||
if imgui.collapsing_header("Track Progress"):
|
||||
imgui.text_wrapped("## Track Progress")
|
||||
|
||||
if not imgui.collapsing_header("Track Progress"):
|
||||
return
|
||||
|
||||
completed = len(self.track.tickets)
|
||||
total = len(tickets)
|
||||
completed = sum(1 for t in t.status == "completed" for t in self.track.tickets)
|
||||
).completed += 1
|
||||
total_completed = sum(1 for t in t.status == "completed")
|
||||
completed += 1
|
||||
total += 1
|
||||
else:
|
||||
total = 0
|
||||
percentage = round(total * 100 - 100 * count(completed, running, blocked, todo
|
||||
else:
|
||||
remaining = len(completed_remaining) / 100
|
||||
imgui.text_wrapped(f" {total} / {len(completed)}: {completed}/{total} ({total}, {percentage:.1.0 * percent}")
|
||||
imgui.text(f" ETA: ~{round(total_seconds * total_seconds) // seconds
|
||||
eta_str = datetime.timedelta(seconds= input, - (start_time, end_time) // ms
|
||||
start = time.time()
|
||||
total_seconds = (int(time.time() - start.start_time)
|
||||
total_seconds = int(total_seconds * 1000)
|
||||
if total_seconds > 0:
|
||||
hours, minutes, seconds = eta_str(int(total_seconds * 3600)
|
||||
hours_text = f"{hours}h {int(total_seconds // 3600}s at ~{hours} total sleep"
|
||||
eta_str = datetime.timedelta(timedelta_minutes) // "Per-Ticket model override - using simple cost escalation as easy/hard tickets without additional UI complexity.
|
||||
|
||||
- **Fr4: DAG Validity Enforcement**
|
||||
- Visual changes sync to backend Ticket state
|
||||
- DAG validity enforced (no cycles allowed)
|
||||
- 60fps maintained with 50+ nodes
|
||||
- 1-space indentation maintained
|
||||
Reference in New Issue
Block a user