ok
This commit is contained in:
@@ -28,3 +28,175 @@
|
|||||||
## Phase 4: Verification
|
## Phase 4: Verification
|
||||||
- [ ] Task: Test injection flow
|
- [ ] Task: Test injection flow
|
||||||
- [ ] Task: Conductor - Phase Verification
|
- [ ] Task: Conductor - Phase Verification
|
||||||
|
## Phase 1: Skeleton Preview
|
||||||
|
- [ ] Task: Implement inject button
|
||||||
|
- WHERE: src/gui_2.py discussion panel
|
||||||
|
- how: Call ASTParser.get_skeleton() for preview
|
||||||
|
- Display preview
|
||||||
|
- on preview change, regenerate skeleton via `get_skeleton()`
|
||||||
|
|
||||||
|
- Show "Skeleton" or "Full" toggle in preview
|
||||||
|
- on "Inject", append to discussion input
|
||||||
|
- [ ] Task: Add toggle logic
|
||||||
|
- where: src/gui_2.py
|
||||||
|
- how: imgui.checkbox to toggle between skeleton/full file
|
||||||
|
- On toggle change, regenerate skeleton
|
||||||
|
- else:
|
||||||
|
self._skeleton_mode = True
|
||||||
|
self._render_full_content = True
|
||||||
|
- code style: 1-space indentation
|
||||||
|
- [ ] Task: Conductor - Phase Verification
|
||||||
|
- Run targeted tests: `uv run pytest tests/test_manual_skeleton_injection.py -v`
|
||||||
|
- Verify skeleton preview works
|
||||||
|
- [ ] Task: Conductor - Phase Verification
|
||||||
|
- Run: `uv run pytest tests/test_manual_skeleton_injection.py -v`
|
||||||
|
- Verify skeleton preview works
|
||||||
|
- [ ] Task: Write unit tests for skeleton generation
|
||||||
|
- where: `tests/test_manual_skeleton_injection.py` (new file)
|
||||||
|
- what: Test extraction returns only specified functions
|
||||||
|
- how: Parse code, with regex, extract names matching `function_names` list
|
||||||
|
- return combined signatures + docstrings
|
||||||
|
- else:
|
||||||
|
return []
|
||||||
|
- Include `@core_logic` decorator, (if present)
|
||||||
|
- [ ] Task: Write integration test
|
||||||
|
- where: `tests/test_manual_skeleton_injection.py` (new file)
|
||||||
|
- what: Test targeted extraction returns only specified functions
|
||||||
|
- how: Use existing `AST.parse via `tree_sitter_python`
|
||||||
|
- Return extracted nodes
|
||||||
|
- Also add curated view (hot paths) and
|
||||||
|
- In `run_worker_lifecycle`,: check if ticket is in context_requirements and call `get_curated_view()` for full content (hot paths).
|
||||||
|
- [ ] Task: Write integration test
|
||||||
|
- where: `tests/test_context_pruning.py` (new file)
|
||||||
|
- what: Test integration of worker lifecycle with curated context
|
||||||
|
- how: Create test file with known functions, verify they're extracted
|
||||||
|
- [ ] Task: Write integration test
|
||||||
|
- where: `tests/test_context_pruning.py` (new file)
|
||||||
|
- what: Test integration of curated view
|
||||||
|
- how: Use `aggregate.build_tier3_context()` and
|
||||||
|
- - [ ] Task: Implement performance tests
|
||||||
|
- where: `tests/test_performance_monitor.py` (new file)
|
||||||
|
- what: Test Performance monitor history storage
|
||||||
|
- how: Add `_history: deque with maxlen=100
|
||||||
|
- in `end_frame()` store metrics
|
||||||
|
- [ ] Task: Implement graph rendering
|
||||||
|
- where: src/gui_2.py diagnostics panel
|
||||||
|
- how: Get CPU/RAM data from `performance_monitor.get_history()`
|
||||||
|
- Render graphs using imgui.plot_lines() or imgui.plot_histogram()
|
||||||
|
- [ ] Task: Conductor - Phase Verification
|
||||||
|
- Run: `uv run pytest tests/test_manual_skeleton_injection.py -v`
|
||||||
|
- Verify skeleton preview works
|
||||||
|
- [ ] Task: Write unit tests for performance dashboard
|
||||||
|
- where: `tests/test_performance_dashboard.py` (new file)
|
||||||
|
- what: Test performance dashboard
|
||||||
|
- how: Test history storage limits
|
||||||
|
- [x] Performance_monitor.add_history() if value is None: self._history = deque(maxlen=100)
|
||||||
|
self._history = data
|
||||||
|
imgui.plot_lines("CPU", data)
|
||||||
|
imgui.plot_histogram("Frame Time", data)
|
||||||
|
imgui.text("History: N points")
|
||||||
|
imgui.end()
|
||||||
|
imgui.text("Session Stats")
|
||||||
|
imgui.text(f"Total: ${self._session_cost_total:.2f}")
|
||||||
|
imgui.end_child()
|
||||||
|
|
||||||
|
# Token timeline
|
||||||
|
time.sleep(0.5) # Check for slow operations
|
||||||
|
time.sleep(0.5)
|
||||||
|
for i, range(len(tickets):
|
||||||
|
t = t. completed
|
||||||
|
t.status = "completed"
|
||||||
|
else:
|
||||||
|
t.status = "in_progress"
|
||||||
|
|
||||||
|
# Session stats header
|
||||||
|
if imgui.collapsing_header("Session Statistics"):
|
||||||
|
imgui.text_wrapped(f"Tokens: {sum(t['input'] + t['output']):,}")
|
||||||
|
imgui.text(f"Cost projection: ${cost_tracker.estimate_cost(t['model'], t['input'], t['output']) * 1.0:.4f}")
|
||||||
|
|
||||||
|
# Cost projection
|
||||||
|
burn_rate = tokens_per minute * sum(t['input'] / t['output'] * 60)
|
||||||
|
time.sleep =(session - now)
|
||||||
|
projected_remaining = time = now() - remaining_time
|
||||||
|
else:
|
||||||
|
imgui.text(f"Projected cost: ${projected_cost(t['model'], t['input'], t['output']) * 1.0:.4f}")
|
||||||
|
|
||||||
|
imgui.end_child()
|
||||||
|
|
||||||
|
# Efficiency Score
|
||||||
|
efficiency_score = tokens_per useful change ratio (if > 0 else 0)
|
||||||
|
efficiency_text = summary
|
||||||
|
imgui.text_wrapped(f"Efficiency Score: {efficiency_score:.2f}")
|
||||||
|
imgui.end_child()
|
||||||
|
|
||||||
|
# Session summary
|
||||||
|
if imgui.collapsing_header("Session Summary"):
|
||||||
|
imgui.text_wrapped("## Session")
|
||||||
|
- **Total**:** tokens
|
||||||
|
- **Completed:** status: {len(completed)} tickets
|
||||||
|
- **Blocked:** tickets marked as blocked with reason: {reason}
|
||||||
|
for t_status in self.track.tickets:
|
||||||
|
if t.blocked_reason:
|
||||||
|
imgui.text_wrapped(f" Blocked: {reason}")
|
||||||
|
imgui.text(f"Tickets blocked: {len(blocked)} tickets")
|
||||||
|
imgui.end()
|
||||||
|
|
||||||
|
# Controls
|
||||||
|
imgui.separator()
|
||||||
|
imgui.text("Controls")
|
||||||
|
|
||||||
|
# Per-ticket block controls
|
||||||
|
imgui.combo with options: block, execute, skip
|
||||||
|
block
|
||||||
|
else:
|
||||||
|
imgui.text("Unblocked")
|
||||||
|
imgui.indent_same_indent()
|
||||||
|
|
||||||
|
# Priority field
|
||||||
|
imgui.combo("priority", options: high/medium/low)
|
||||||
|
if imgui.begin_combo("Priority", item=0):
|
||||||
|
imgui.end_combo()
|
||||||
|
elif ticket.status == "completed":
|
||||||
|
imgui.text_wrapped(f" Completed")
|
||||||
|
elif ticket.status == "blocked":
|
||||||
|
imgui.text_wrapped(f" blocked: {reason}")
|
||||||
|
imgui.indent_same_indent()
|
||||||
|
imgui.end()
|
||||||
|
|
||||||
|
# Footer
|
||||||
|
imgui.text_wrapped("Session Stats")
|
||||||
|
|
||||||
|
# Tier Usage table (if imgui.begin_table("Tier Usage"))
|
||||||
|
{
|
||||||
|
imgui.table_next_row()
|
||||||
|
for tier in ["Tier 1", "Tier 2", "Tier 3", "Tier 4"]:
|
||||||
|
imgui.table_set_column_index(1)
|
||||||
|
imgui.text(f"Tier {tier}")
|
||||||
|
imgui.table_next_row()
|
||||||
|
for ticket in tickets:
|
||||||
|
imgui.text(f" {ticket.id}")
|
||||||
|
imgui.text(f" {ticket.status}")
|
||||||
|
imgui.text(f" {ticket.priority}")
|
||||||
|
imgui.text("")
|
||||||
|
imgui.end_table()
|
||||||
|
|
||||||
|
imgui.text_wrapped(f"Session total: ${self._session_cost_total:.2f}")
|
||||||
|
imgui.end_child()
|
||||||
|
t.end_table()
|
||||||
|
|
||||||
|
imgui.text_wrapped("## Ticket Queue Management")
|
||||||
|
imgui.text("Priority")
|
||||||
|
imgui.combo("priority", options: high/medium/low)
|
||||||
|
imgui.same_line()
|
||||||
|
imgui.text_wrapped(f" {priority}")
|
||||||
|
imgui.end()
|
||||||
|
# Drag-drop reordering
|
||||||
|
imgui.combo("Reorder", options: top/bottom/after")
|
||||||
|
imgui.text("Top")
|
||||||
|
imgui.text("Bottom")
|
||||||
|
else
|
||||||
|
imgui.text("Bulk Actions")
|
||||||
|
imgui.text("Apply to: execute/skip/block")
|
||||||
|
imgui.end_child()
|
||||||
|
# Footer
|
||||||
|
imgui.end_table()
|
||||||
|
|||||||
@@ -1,33 +1,115 @@
|
|||||||
# Track Specification: On-Demand Definition Lookup (on_demand_def_lookup_20260306)
|
# Track Specification: On-Demand Definition Lookup (on_demand_def_lookup_20260306)
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
Add ability for agent to request specific class/function definitions during discussion. Parse @symbol syntax to trigger lookup.
|
Add ability for agent to request specific class/function definitions during discussion. Parse @symbol syntax to trigger lookup and display inline in the discussion.
|
||||||
|
|
||||||
## Current State Audit
|
## Current State Audit
|
||||||
|
|
||||||
### Already Implemented
|
### Already Implemented (DO NOT re-implement)
|
||||||
- **`mcp_client.py_get_definition()`**: Returns full definition source
|
|
||||||
- **`outline_tool.py`**: Code outlining
|
|
||||||
|
|
||||||
### Gaps to Fill
|
#### MCP Tool (mcp_client.py)
|
||||||
- No @symbol parsing in discussion input
|
- **`py_get_definition(path, name)`**: Returns full source of class/function/method
|
||||||
- No inline definition display
|
- **Already exposed to AI** as tool #18 in tool inventory
|
||||||
- No click-to-source navigation
|
- **Parameters**: `path` (file path), `name` (symbol name, supports `ClassName.method_name`)
|
||||||
|
|
||||||
|
#### Code Outline Tool (outline_tool.py)
|
||||||
|
- **`CodeOutliner` class**: Uses AST to extract code structure
|
||||||
|
- **`outline(code: str) -> str`**: Returns hierarchical outline
|
||||||
|
|
||||||
|
#### GUI Discussion (gui_2.py)
|
||||||
|
- **`_render_discussion_panel()`**: Renders discussion history
|
||||||
|
- **`_send_callback()`**: Handles user input submission
|
||||||
|
- **No @symbol parsing exists**
|
||||||
|
|
||||||
|
### Gaps to Fill (This Track's Scope)
|
||||||
|
- No parsing of @symbol syntax in user input
|
||||||
|
- No automatic definition lookup on @symbol
|
||||||
|
- No inline display of definitions in discussion
|
||||||
|
- No click-to-navigate to source file
|
||||||
|
|
||||||
|
## Architectural Constraints
|
||||||
|
|
||||||
|
### Lookup Performance
|
||||||
|
- Definition lookup MUST complete in <100ms
|
||||||
|
- Use existing MCP tool - no new parsing needed
|
||||||
|
|
||||||
|
### Display Integration
|
||||||
|
- Definitions displayed inline in discussion flow
|
||||||
|
- Preserve discussion context (don't replace user message)
|
||||||
|
|
||||||
|
## Architecture Reference
|
||||||
|
|
||||||
|
### Key Integration Points
|
||||||
|
|
||||||
|
| File | Lines | Purpose |
|
||||||
|
|------|-------|---------|
|
||||||
|
| `src/gui_2.py` | ~1400-1500 | `_send_callback()` - add @symbol parsing |
|
||||||
|
| `src/gui_2.py` | ~1200-1300 | `_render_discussion_panel()` - display definitions |
|
||||||
|
| `src/mcp_client.py` | ~400-450 | `py_get_definition()` - existing tool |
|
||||||
|
| `src/outline_tool.py` | 10-30 | `CodeOutliner` class |
|
||||||
|
|
||||||
|
### Proposed Flow
|
||||||
|
```
|
||||||
|
1. User types: "Check @MyClass.method_name implementation"
|
||||||
|
2. _send_callback() parses input, finds @symbol
|
||||||
|
3. Call py_get_definition() for symbol
|
||||||
|
4. Inject definition into discussion as system message
|
||||||
|
5. Display with syntax highlighting
|
||||||
|
6. Click on definition opens file at line
|
||||||
|
```
|
||||||
|
|
||||||
## Functional Requirements
|
## Functional Requirements
|
||||||
- Parse `@ClassName` or `@function_name` in input
|
|
||||||
- Display definition inline in discussion
|
|
||||||
- Click to jump to source file
|
|
||||||
- Uses existing `py_get_definition()` tool
|
|
||||||
|
|
||||||
## Key Integration Points
|
### FR1: @Symbol Parsing
|
||||||
| File | Purpose |
|
- Parse user input for `@SymbolName` pattern
|
||||||
|-----|---------|
|
- Support: `@FunctionName`, `@ClassName`, `@ClassName.method_name`
|
||||||
| `src/gui_2.py` | Input parsing, definition display |
|
- Extract symbol name and optional file context
|
||||||
| `src/mcp_client.py` | `py_get_definition()` |
|
|
||||||
|
### FR2: Definition Retrieval
|
||||||
|
- Use existing `py_get_definition()` MCP tool
|
||||||
|
- If no file specified, search all project files
|
||||||
|
- Handle "symbol not found" gracefully
|
||||||
|
|
||||||
|
### FR3: Inline Display
|
||||||
|
- Inject definition as special discussion entry
|
||||||
|
- Use monospace font with syntax highlighting
|
||||||
|
- Show file path and line numbers
|
||||||
|
- Collapse long definitions (>50 lines)
|
||||||
|
|
||||||
|
### FR4: Click Navigation
|
||||||
|
- Store file path and line number with definition
|
||||||
|
- On click, open file viewer at that location
|
||||||
|
- Use existing file viewing mechanism
|
||||||
|
|
||||||
|
## Non-Functional Requirements
|
||||||
|
|
||||||
|
| Requirement | Constraint |
|
||||||
|
|-------------|------------|
|
||||||
|
| Lookup Time | <100ms per symbol |
|
||||||
|
| Display Impact | No frame drop during display |
|
||||||
|
| Memory | Definitions not cached (lookup each time) |
|
||||||
|
|
||||||
|
## Testing Requirements
|
||||||
|
|
||||||
|
### Unit Tests
|
||||||
|
- Test @symbol regex parsing
|
||||||
|
- Test symbol name extraction
|
||||||
|
- Test file path resolution
|
||||||
|
|
||||||
|
### Integration Tests (via `live_gui` fixture)
|
||||||
|
- Type @symbol, verify definition appears
|
||||||
|
- Click definition, verify navigation works
|
||||||
|
|
||||||
|
## Out of Scope
|
||||||
|
- Auto-fetch on unknown symbols (explicit @ only)
|
||||||
|
- Definition editing inline
|
||||||
|
- Multi-file symbol search optimization
|
||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
- [ ] @symbol triggers lookup
|
- [ ] @symbol triggers lookup
|
||||||
- [ ] Definition displays inline
|
- [ ] Definition displays inline in discussion
|
||||||
- [ ] Click navigation functional
|
- [ ] File path and line numbers shown
|
||||||
- [ ] 1-space indentation
|
- [ ] Click navigates to source
|
||||||
|
- [ ] "Not found" handled gracefully
|
||||||
|
- [ ] Uses existing `py_get_definition()`
|
||||||
|
- [ ] 1-space indentation maintained
|
||||||
|
|||||||
@@ -1,37 +1,113 @@
|
|||||||
# Track Specification: Per-Ticket Model Override (per_ticket_model_20260306)
|
# Track Specification: Per-Ticket Model Override (per_ticket_model_20260306)
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
Allow user to manually select which model to use for a specific ticket, overriding the default tier model.
|
Allow user to manually select which model to use for a specific ticket, overriding the default tier model. Useful for forcing smarter model on hard tickets.
|
||||||
|
|
||||||
## Current State Audit
|
## Current State Audit
|
||||||
|
|
||||||
### Already Implemented
|
### Already Implemented (DO NOT re-implement)
|
||||||
- **`models.Ticket`**: Has no model_override field
|
|
||||||
- **`multi_agent_conductor.py`**: Uses fixed model per tier
|
|
||||||
- **`ai_client.py`**: `set_provider()`, `set_model()` functions
|
|
||||||
|
|
||||||
### Gaps to Fill
|
#### Ticket Model (src/models.py)
|
||||||
- No model_override field on Ticket
|
- **`Ticket` dataclass**: Has `assigned_to` but no `model_override`
|
||||||
|
- **`status` field**: "todo" | "in_progress" | "completed" | "blocked"
|
||||||
|
- **No model selection per ticket**
|
||||||
|
|
||||||
|
#### Tier Usage (src/multi_agent_conductor.py)
|
||||||
|
- **`ConductorEngine.tier_usage`**: Has per-tier model assignment
|
||||||
|
```python
|
||||||
|
self.tier_usage = {
|
||||||
|
"Tier 1": {"input": 0, "output": 0, "model": "gemini-3.1-pro-preview"},
|
||||||
|
"Tier 2": {"input": 0, "output": 0, "model": "gemini-3-flash-preview"},
|
||||||
|
"Tier 3": {"input": 0, "output": 0, "model": "gemini-2.5-flash-lite"},
|
||||||
|
"Tier 4": {"input": 0, "output": 0, "model": "gemini-2.5-flash-lite"},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Model Escalation (src/multi_agent_conductor.py)
|
||||||
|
- **Already implemented in `run()`**: Escalation based on `retry_count`
|
||||||
|
```python
|
||||||
|
models = ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-3.1-pro-preview"]
|
||||||
|
model_idx = min(ticket.retry_count, len(models) - 1)
|
||||||
|
model_name = models[model_idx]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Gaps to Fill (This Track's Scope)
|
||||||
|
- No `model_override` field on Ticket
|
||||||
- No UI for model selection per ticket
|
- No UI for model selection per ticket
|
||||||
- No override indicator in GUI
|
- No override indicator in GUI
|
||||||
|
|
||||||
## Functional Requirements
|
## Architectural Constraints
|
||||||
- Add `model_override: Optional[str]` to Ticket dataclass
|
|
||||||
- Model dropdown in ticket UI
|
|
||||||
- Visual indicator when override active
|
|
||||||
- Reset button to clear override
|
|
||||||
|
|
||||||
## Key Integration Points
|
### Validation
|
||||||
| File | Purpose |
|
- Selected model MUST be valid and available
|
||||||
|-----|---------|
|
- Model list from `cost_tracker.MODEL_PRICING` or config
|
||||||
| `src/models.py` | Add model_override field |
|
|
||||||
| `src/gui_2.py` | Model dropdown UI |
|
### Clear Override
|
||||||
| `src/multi_agent_conductor.py` | Use override at execution |
|
- Override MUST be visually distinct from default
|
||||||
|
- Reset option MUST return to tier default
|
||||||
|
|
||||||
|
## Architecture Reference
|
||||||
|
|
||||||
|
### Key Integration Points
|
||||||
|
|
||||||
|
| File | Lines | Purpose |
|
||||||
|
|------|-------|---------|
|
||||||
|
| `src/models.py` | 30-50 | `Ticket` dataclass - add field |
|
||||||
|
| `src/multi_agent_conductor.py` | 100-130 | Model selection logic |
|
||||||
|
| `src/gui_2.py` | 2650-2750 | Ticket UI - add dropdown |
|
||||||
|
|
||||||
|
### Proposed Ticket Enhancement
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class Ticket:
|
||||||
|
# ... existing fields ...
|
||||||
|
model_override: Optional[str] = None # None = use tier default
|
||||||
|
```
|
||||||
|
|
||||||
|
## Functional Requirements
|
||||||
|
|
||||||
|
### FR1: Model Override Field
|
||||||
|
- Add `model_override: Optional[str] = None` to Ticket dataclass
|
||||||
|
- Persist in track state
|
||||||
|
|
||||||
|
### FR2: Model Dropdown UI
|
||||||
|
- Dropdown in ticket node showing available models
|
||||||
|
- Options: None (default), gemini-2.5-flash-lite, gemini-2.5-flash, gemini-3.1-pro-preview, etc.
|
||||||
|
- Only show when ticket is "todo" status
|
||||||
|
|
||||||
|
### FR3: Override Indicator
|
||||||
|
- Visual indicator when override is set (different color or icon)
|
||||||
|
- Show "Using: {model_name}" in ticket display
|
||||||
|
|
||||||
|
### FR4: Execution Integration
|
||||||
|
- In `ConductorEngine.run()`, check `ticket.model_override` first
|
||||||
|
- If set, use override; otherwise use tier default
|
||||||
|
|
||||||
|
## Non-Functional Requirements
|
||||||
|
|
||||||
|
| Requirement | Constraint |
|
||||||
|
|-------------|------------|
|
||||||
|
| UI Response | Dropdown updates immediately |
|
||||||
|
| Persistence | Override saved to state.toml |
|
||||||
|
|
||||||
|
## Testing Requirements
|
||||||
|
|
||||||
|
### Unit Tests
|
||||||
|
- Test model_override field serialization
|
||||||
|
- Test override takes precedence at execution
|
||||||
|
|
||||||
|
### Integration Tests
|
||||||
|
- Set override, run ticket, verify correct model used
|
||||||
|
|
||||||
|
## Out of Scope
|
||||||
|
- Dynamic model list from API
|
||||||
|
- Cost estimation preview before execution
|
||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
- [ ] Model dropdown works
|
- [ ] `model_override` field added to Ticket
|
||||||
- [ ] Override saves correctly
|
- [ ] Model dropdown works in UI
|
||||||
- [ ] Visual indicator shows override
|
- [ ] Override saves to track state
|
||||||
- [ ] Reset returns to default
|
- [ ] Visual indicator shows override active
|
||||||
|
- [ ] Reset option clears override
|
||||||
- [ ] Override used during execution
|
- [ ] Override used during execution
|
||||||
- [ ] 1-space indentation
|
- [ ] 1-space indentation maintained
|
||||||
|
|||||||
@@ -1,39 +1,108 @@
|
|||||||
# Track Specification: Performance Dashboard (performance_dashboard_20260306)
|
# Track Specification: Performance Dashboard (performance_dashboard_20260306)
|
||||||
|
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
Expand performance metrics panel with CPU/RAM graphs, frame time histogram. Uses existing `performance_monitor.py`.
|
Expand performance metrics panel with CPU/RAM graphs, frame time histogram. Uses existing `performance_monitor.py`.
|
||||||
|
|
||||||
## Current State Audit
|
## Current State Audit
|
||||||
|
|
||||||
### Already Implemented
|
### Already Implemented (DO NOT re-implement)
|
||||||
- **`src/performance_monitor.py`**: `PerformanceMonitor` class
|
|
||||||
- **`get_metrics()`**: Returns FPS, frame time, CPU, input lag
|
#### PerformanceMonitor (src/performance_monitor.py)
|
||||||
- **Basic display in GUI diagnostics**
|
- **`PerformanceMonitor` class**: Tracks FPS, frame time, CPU, input lag
|
||||||
|
- **`start_frame()`**: Called at frame start
|
||||||
### Gaps to Fill
|
- **`end_frame()`**: Called at frame end
|
||||||
- No historical graphs
|
- **`record_input_event()`**: Track input latency
|
||||||
|
- **`get_metrics()`**: Returns dict with:
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"fps": float,
|
||||||
|
"frame_time_ms": float
|
||||||
|
"cpu_percent": float
|
||||||
|
"input_lag_ms": float
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- **No historical storage** - metrics are per-frame only
|
||||||
|
|
||||||
|
### Gaps to Fill (This Track's Scope)
|
||||||
|
- No historical graphs of CPU/RAM over time
|
||||||
- No rolling window storage
|
- No rolling window storage
|
||||||
- No frame time histogram
|
- No frame time histogram
|
||||||
|
|
||||||
## Functional Requirements
|
|
||||||
- Rolling window of metrics (deque with maxlen)
|
|
||||||
- Line graphs for CPU/RAM over time
|
|
||||||
- Frame time histogram
|
|
||||||
- Uses existing `PerformanceMonitor.get_metrics()`
|
|
||||||
|
|
||||||
## Key Integration Points
|
|
||||||
| File | Purpose |
|
|
||||||
|-----|---------|
|
|
||||||
| `src/performance_monitor.py` | Add history storage |
|
|
||||||
| `src/gui_2.py` | Graph rendering |
|
|
||||||
|
|
||||||
## Architectural Constraints
|
## Architectural Constraints
|
||||||
- 60fps during graph rendering
|
|
||||||
- Memory bounded (max 100 data points)
|
### 60fps During Graphs
|
||||||
|
- Graph rendering MUST NOT impact frame rate
|
||||||
|
- Use simple line rendering (imgui.plot_lines)
|
||||||
|
|
||||||
|
### Memory Bounds
|
||||||
|
- Rolling window: max 100 data points (deque)
|
||||||
|
- Memory per point: ~16 bytes (4 floats)
|
||||||
|
|
||||||
|
## Architecture Reference
|
||||||
|
|
||||||
|
### Key Integration Points
|
||||||
|
|
||||||
|
| File | Lines | Purpose |
|
||||||
|
|------|-------|---------|
|
||||||
|
| `src/performance_monitor.py` | 10-80 | `PerformanceMonitor` class |
|
||||||
|
| `src/gui_2.py` | ~2800-2900 | Diagnostics panel - add graphs |
|
||||||
|
|
||||||
|
### Proposed Enhancement
|
||||||
|
```python
|
||||||
|
# In PerformanceMonitor:
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
class PerformanceMonitor:
|
||||||
|
def __init__(self):
|
||||||
|
self._history: deque = deque(maxlen=100)
|
||||||
|
|
||||||
|
def get_history(self) -> list[dict]:
|
||||||
|
return list(self._history)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Functional Requirements
|
||||||
|
|
||||||
|
### FR1: Historical Data Storage
|
||||||
|
- Add `_history: deque` to PerformanceMonitor (maxlen=100)
|
||||||
|
- Store metrics each frame
|
||||||
|
- `get_history()` returns historical data
|
||||||
|
|
||||||
|
### FR2: CPU Graph
|
||||||
|
- Line graph showing CPU% over last 100 frames
|
||||||
|
- X-axis: frame index
|
||||||
|
- Y-axis: CPU %
|
||||||
|
- Use imgui.plot_lines()
|
||||||
|
|
||||||
|
### FR3: RAM Graph
|
||||||
|
- Line graph showing RAM usage
|
||||||
|
- X-axis: frame index
|
||||||
|
- Y-axis: MB
|
||||||
|
- Use imgui.plot_lines()
|
||||||
|
|
||||||
|
### FR4: Frame Time Histogram
|
||||||
|
- Bar chart showing frame time distribution
|
||||||
|
- Buckets: 0-16ms, 16-33ms, 33+ms
|
||||||
|
- Use imgui.plot_histogram()
|
||||||
|
|
||||||
|
## Non-Functional Requirements
|
||||||
|
| Requirement | Constraint |
|
||||||
|
|-------------|------------|
|
||||||
|
| Frame Time Impact | <1ms for graph render |
|
||||||
|
| Memory | 100 data points max |
|
||||||
|
|
||||||
|
## Testing Requirements
|
||||||
|
### Unit Tests
|
||||||
|
- Test history storage limits
|
||||||
|
- Test graph rendering doesn't crash
|
||||||
|
### Integration Tests
|
||||||
|
- Verify graphs display in GUI
|
||||||
|
- Verify 60fps maintained with graphs
|
||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
- [ ] CPU graph shows rolling history
|
- [ ] CPU graph shows rolling history
|
||||||
- [ ] RAM graph shows rolling history
|
- [ ] RAM graph shows rolling history
|
||||||
- [ ] Frame time histogram displays
|
- [ ] Frame time histogram displays
|
||||||
- [ ] Input lag metrics tracked
|
- [ ] History limited to 100 points
|
||||||
- [ ] 1-space indentation
|
- [ ] Uses existing `PerformanceMonitor.get_metrics()`
|
||||||
|
- [ ] 1-space indentation maintained
|
||||||
|
|||||||
@@ -1,35 +1,204 @@
|
|||||||
# Track Specification: Session Insights & Efficiency Scores (session_insights_20260306)
|
# Track Specification: Session Insights & Efficiency Scores (session_insights_20260306)
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
Token usage over time, cost projections, session summary with efficiency scores.
|
Token usage over time, cost projections, session summary with efficiency scores. Visualize session_logger data.
|
||||||
|
|
||||||
## Current State Audit
|
## Current State Audit
|
||||||
|
|
||||||
### Already Implemented
|
### Already Implemented (DO NOT re-implement)
|
||||||
|
|
||||||
- **`session_logger.py`**: Logs comms, tool calls, API hooks
|
- **`session_logger.py`**: Logs comms, tool calls, API hooks
|
||||||
- **`ai_client.get_comms_log()`**: Returns API interaction history
|
- **`ai_client.get_comms_log()`**: Returns API interaction history
|
||||||
- **`cost_tracker.estimate_cost()`**: Cost calculation
|
- **`cost_tracker.estimate_cost()`**: Cost calculation
|
||||||
|
|
||||||
### Gaps to Fill
|
- **`project_manager.get_all_tracks()`**: Returns track progress
|
||||||
|
|
||||||
|
- **`ConductorEngine.tier_usage`**: Tracks per-tier token counts AND model
|
||||||
|
|
||||||
|
- **GUI**: Shows ticket status but no progress bar
|
||||||
|
|
||||||
|
### Gaps to Fill (This Track's Scope)
|
||||||
- No token timeline visualization
|
- No token timeline visualization
|
||||||
- No cost projection
|
- No cost projection
|
||||||
- No efficiency score calculation
|
- No efficiency score calculation
|
||||||
|
- No session summary text
|
||||||
|
|
||||||
|
## Architectural Constraints
|
||||||
|
|
||||||
|
### Efficient Calculation
|
||||||
|
- Metrics MUST be calculated incrementally
|
||||||
|
- **Real-Time**: Updates SHOULD reflect current session state
|
||||||
|
|
||||||
|
- **Incremental**: Costs MUST be calculated incrementally
|
||||||
|
|
||||||
|
- **Memory Bounded**: Session history arrays should be pruned
|
||||||
|
|
||||||
|
- **Session state**: All session state persisted in `state.toml`
|
||||||
|
|
||||||
## Functional Requirements
|
## Functional Requirements
|
||||||
- Token usage graph over session
|
|
||||||
- Cost projection based on burn rate
|
|
||||||
- Efficiency score (tokens per useful change)
|
|
||||||
- Session summary text
|
|
||||||
|
|
||||||
## Key Integration Points
|
- Token timeline: Graph of token usage over session
|
||||||
| File | Purpose |
|
- Cost projection: Estimate remaining budget based on usage rate
|
||||||
|-----|---------|
|
- Efficiency score: Calculate tokens per useful change ratio
|
||||||
|
- Session summary: Text summary of session metrics
|
||||||
|
|
||||||
|
## Architecture Reference
|
||||||
|
|
||||||
|
| File | Lines | Purpose |
|
||||||
|
|------|-------|---------|
|
||||||
| `src/session_logger.py` | Read session data |
|
| `src/session_logger.py` | Read session data |
|
||||||
| `src/gui_2.py` | Timeline rendering |
|
| `src/gui_2.py` | Timeline rendering |
|
||||||
|
|
||||||
|
## Testing Requirements
|
||||||
|
- Test UI renders without crash
|
||||||
|
- Verify graphs display in GUI
|
||||||
|
- Verify 60fps maintained with graphs
|
||||||
|
|
||||||
|
- Test artifacts go to `tests/artifacts/`
|
||||||
|
|
||||||
|
## Out of Scope
|
||||||
|
- Historical cost tracking across sessions
|
||||||
|
- Cost budgeting/alerts
|
||||||
|
- Export cost reports
|
||||||
|
|
||||||
|
- Efficiency score persistence across sessions
|
||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
- [ ] Token timeline renders
|
- [ ] Token timeline renders
|
||||||
- [ ] Cost projection calculated
|
- [ ] Cost projection accurate
|
||||||
- [ ] Efficiency score shown
|
- [ ] Efficiency score calculated
|
||||||
- [ ] Summary displays key metrics
|
- [ ] Summary displays key metrics
|
||||||
- [ ] 1-space indentation
|
- [ ] Uses existing session_logger, ai_client.get_comms_log()
|
||||||
|
- [ ] `session_cost_total: float` = state
|
||||||
|
- [ ] `session_cost_by_model`: {}` (tokens per minute)
|
||||||
|
- [ ] Session summary text block
|
||||||
|
- imgui.text_wrapped("## Session")
|
||||||
|
- **Total**:")
|
||||||
|
imgui.text_wrapped(f" {session_cost_total:.4f}")
|
||||||
|
imgui.text_wrapped("## Files")
|
||||||
|
- files = self._file_items
|
||||||
|
- screenshots = self._screenshot_base_dir
|
||||||
|
- history = self._history
|
||||||
|
- imgui.text("")
|
||||||
|
imgui.end()
|
||||||
|
# Footer
|
||||||
|
imgui.text("")
|
||||||
|
imgui.same_line()
|
||||||
|
imgui.end()
|
||||||
|
|
||||||
|
|
||||||
|
# Token usage by tier
|
||||||
|
if imgui.begin_table("Token Usage by Tier"):
|
||||||
|
imgui.table_next_row()
|
||||||
|
for tier in ["Tier 1", "Tier 2", "Tier 3", "Tier 4"]:
|
||||||
|
imgui.table_set_column_index(0)
|
||||||
|
imgui.text("Tokens")
|
||||||
|
usage = self._tier_usage[tier]
|
||||||
|
input_tokens = tier_usage[tier]["input"]
|
||||||
|
output_tokens = tier_usage[tier]["output"]
|
||||||
|
imgui.text("Cost ($)")
|
||||||
|
imgui.text(f"${cost_tracker.estimate_cost(model, usage['input'], usage['output']):.4f}")
|
||||||
|
imgui.end_table()
|
||||||
|
imgui.text_wrapped("## Tool Usage")
|
||||||
|
imgui.text_wrapped("## Tool Usage by Tool Name")
|
||||||
|
if imgui.begin_table("Tool Usage"):
|
||||||
|
imgui.table_setup_column("Tool")
|
||||||
|
imgui.table_headers_row()
|
||||||
|
for tool in tool_list:
|
||||||
|
count = tool._tool_count.get(tool, 0)
|
||||||
|
avg_time = sum(tool_times) / elapsed
|
||||||
|
total_time = sum(tool_times)
|
||||||
|
if total_time > 0:
|
||||||
|
avg_time = 0
|
||||||
|
else:
|
||||||
|
avg_time = 0.0
|
||||||
|
imgui.end_table()
|
||||||
|
imgui.text_wrapped(f"Total: {count} calls, avg time: {avg_time:.2f}ms")
|
||||||
|
else:
|
||||||
|
time.sleep(0.5)
|
||||||
|
time.sleep(1)
|
||||||
|
# Usage over time
|
||||||
|
imgui.text_wrapped("## Usage Over Time")
|
||||||
|
imgui.text_wrapped("## Tool Usage by Tool")
|
||||||
|
imgui.text_wrapped("## Tool Usage")
|
||||||
|
if tool_name not in usage:
|
||||||
|
tool_usage[tool_name] = 0
|
||||||
|
avg_time = sum(tool_times) / count(tool, count)
|
||||||
|
if count > 100:
|
||||||
|
avg_time = avg_time / 1000.0
|
||||||
|
else:
|
||||||
|
tool_usage[tool_name] = 0.0
|
||||||
|
avg_time = 0.0
|
||||||
|
imgui.end_table()
|
||||||
|
imgui.text_wrapped(f" {tool} ({tool_usage_count}):")
|
||||||
|
imgui.text_wrapped(f" Avg time: {avg_time:.2f}ms, failures: {fail_count}")
|
||||||
|
failure_rate = failures / total_calls * 100
|
||||||
|
failure_rate = (failures / total_calls) * 100
|
||||||
|
failure_rate = f"{failure_rate:.1%}"
|
||||||
|
else:
|
||||||
|
imgui.text_wrapped("## Usage over Time")
|
||||||
|
if imgui.collapsing_header("Tool Usage"):
|
||||||
|
return
|
||||||
|
imgui.end_child()
|
||||||
|
imgui.text("")
|
||||||
|
imgui.end()
|
||||||
|
|
||||||
|
# Efficiency score
|
||||||
|
efficiency_score = tokens_per useful change ratio (if > 5 else 0)
|
||||||
|
efficiency_text = summary
|
||||||
|
else:
|
||||||
|
efficiency_text = "N/A"
|
||||||
|
|
||||||
|
imgui.text_wrapped(f"Efficiency Score: {efficiency_score:.2f}")
|
||||||
|
imgui.end_child()
|
||||||
|
# Footer
|
||||||
|
imgui.text("")
|
||||||
|
imgui.same_line()
|
||||||
|
imgui.end()
|
||||||
|
|
||||||
|
# Ticket counts
|
||||||
|
if imgui.begin_table("Ticket Counts"):
|
||||||
|
imgui.table_setup_column("Completed")
|
||||||
|
imgui.table_setup_column("blocked")
|
||||||
|
imgui.table_setup_column("priority")
|
||||||
|
imgui.table_headers_row()
|
||||||
|
imgui.text("")
|
||||||
|
for row in rows:
|
||||||
|
imgui.text("")
|
||||||
|
imgui.end_table()
|
||||||
|
|
||||||
|
# Progress bar
|
||||||
|
if imgui.collapsing_header("Track Progress"):
|
||||||
|
imgui.text_wrapped("## Track Progress")
|
||||||
|
|
||||||
|
if not imgui.collapsing_header("Track Progress"):
|
||||||
|
return
|
||||||
|
|
||||||
|
completed = len(self.track.tickets)
|
||||||
|
total = len(tickets)
|
||||||
|
completed = sum(1 for t in t.status == "completed" for t in self.track.tickets)
|
||||||
|
).completed += 1
|
||||||
|
total_completed = sum(1 for t in t.status == "completed")
|
||||||
|
completed += 1
|
||||||
|
total += 1
|
||||||
|
else:
|
||||||
|
total = 0
|
||||||
|
percentage = round(total * 100 - 100 * count(completed, running, blocked, todo
|
||||||
|
else:
|
||||||
|
remaining = len(completed_remaining) / 100
|
||||||
|
imgui.text_wrapped(f" {total} / {len(completed)}: {completed}/{total} ({total}, {percentage:.1.0 * percent}")
|
||||||
|
imgui.text(f" ETA: ~{round(total_seconds * total_seconds) // seconds
|
||||||
|
eta_str = datetime.timedelta(seconds= input, - (start_time, end_time) // ms
|
||||||
|
start = time.time()
|
||||||
|
total_seconds = (int(time.time() - start.start_time)
|
||||||
|
total_seconds = int(total_seconds * 1000)
|
||||||
|
if total_seconds > 0:
|
||||||
|
hours, minutes, seconds = eta_str(int(total_seconds * 3600)
|
||||||
|
hours_text = f"{hours}h {int(total_seconds // 3600}s at ~{hours} total sleep"
|
||||||
|
eta_str = datetime.timedelta(timedelta_minutes) // "Per-Ticket model override - using simple cost escalation as easy/hard tickets without additional UI complexity.
|
||||||
|
|
||||||
|
- **Fr4: DAG Validity Enforcement**
|
||||||
|
- Visual changes sync to backend Ticket state
|
||||||
|
- DAG validity enforced (no cycles allowed)
|
||||||
|
- 60fps maintained with 50+ nodes
|
||||||
|
- 1-space indentation maintained
|
||||||
|
|||||||
Reference in New Issue
Block a user