feat(mma): complete Phase 6 and finalize Comprehensive GUI UX track

- Implement Live Worker Streaming: wire ai_client.comms_log_callback to Tier 3 streams
- Add Parallel DAG Execution using asyncio.gather for non-dependent tickets
- Implement Automatic Retry with Model Escalation (Flash-Lite -> Flash -> Pro)
- Add Tier Model Configuration UI to MMA Dashboard with project TOML persistence
- Fix FPS reporting in PerformanceMonitor to prevent transient 0.0 values
- Update Ticket model with retry_count and dictionary-like access
- Stabilize Gemini CLI integration tests and handle script approval events in simulations
- Finalize and verify all 6 phases of the implementation plan
This commit is contained in:
2026-03-01 22:38:43 -05:00
parent d1ce0eaaeb
commit 9fb01ce5d1
22 changed files with 756 additions and 498 deletions

View File

@@ -268,6 +268,8 @@ def set_provider(provider: str, model: str) -> None:
else: else:
_model = model _model = model
def get_provider() -> str:
return _provider
def cleanup() -> None: def cleanup() -> None:
"""Called on application exit to prevent orphaned caches from billing.""" """Called on application exit to prevent orphaned caches from billing."""
global _gemini_client, _gemini_cache global _gemini_client, _gemini_cache
@@ -783,22 +785,29 @@ def _send_gemini(md_content: str, user_message: str, base_dir: str,
for i, fc in enumerate(calls): for i, fc in enumerate(calls):
name, args = fc.name, dict(fc.args) name, args = fc.name, dict(fc.args)
# Check for tool confirmation if callback is provided # Check for tool confirmation if callback is provided
if pre_tool_callback: out = ""
payload_str = json.dumps({"tool": name, "args": args}) tool_executed = False
if not pre_tool_callback(payload_str): if name == TOOL_NAME and pre_tool_callback:
out = "USER REJECTED: tool execution cancelled"
f_resps.append(types.Part.from_function_response(name=name, response={"output": out}))
log.append({"tool_use_id": name, "content": out})
continue
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "args": args})
out = mcp_client.dispatch(name, args)
elif name == TOOL_NAME:
scr = args.get("script", "") scr = args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "script": scr}) _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "script": scr})
out = _run_script(scr, base_dir, qa_callback) res = pre_tool_callback(scr, base_dir, qa_callback)
else: out = f"ERROR: unknown tool '{name}'" if res is None:
out = "USER REJECTED: tool execution cancelled"
else:
out = res
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "args": args})
out = mcp_client.dispatch(name, args)
elif name == TOOL_NAME:
scr = args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "script": scr})
out = _run_script(scr, base_dir, qa_callback)
else: out = f"ERROR: unknown tool '{name}'"
if i == len(calls) - 1: if i == len(calls) - 1:
if file_items: if file_items:
file_items, changed = _reread_file_items(file_items) file_items, changed = _reread_file_items(file_items)
@@ -854,7 +863,7 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
if isinstance(payload, list): if isinstance(payload, list):
send_payload = json.dumps(payload) send_payload = json.dumps(payload)
resp_data = adapter.send(send_payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model) resp_data = adapter.send(send_payload, safety_settings=safety_settings, system_instruction=sys_instr, model=_model, stream_callback=stream_callback)
# Log any stderr from the CLI for transparency # Log any stderr from the CLI for transparency
cli_stderr = resp_data.get("stderr", "") cli_stderr = resp_data.get("stderr", "")
if cli_stderr: if cli_stderr:
@@ -898,28 +907,30 @@ def _send_gemini_cli(md_content: str, user_message: str, base_dir: str,
args = fc.get("args", {}) args = fc.get("args", {})
call_id = fc.get("id") call_id = fc.get("id")
# Check for tool confirmation if callback is provided # Check for tool confirmation if callback is provided
if pre_tool_callback: out = ""
payload_str = json.dumps({"tool": name, "args": args}) tool_executed = False
if not pre_tool_callback(payload_str): if name == TOOL_NAME and pre_tool_callback:
out = "USER REJECTED: tool execution cancelled"
tool_results_for_cli.append({
"role": "tool",
"tool_call_id": call_id,
"name": name,
"content": out
})
_append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out})
continue
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "id": call_id, "args": args})
out = mcp_client.dispatch(name, args)
elif name == TOOL_NAME:
scr = args.get("script", "") scr = args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": call_id, "script": scr}) _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": call_id, "script": scr})
out = _run_script(scr, base_dir, qa_callback) res = pre_tool_callback(scr, base_dir, qa_callback)
else: if res is None:
out = f"ERROR: unknown tool '{name}'" out = "USER REJECTED: tool execution cancelled"
else:
out = res
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": name, "args": args, "round": r_idx})
if name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": name, "id": call_id, "args": args})
out = mcp_client.dispatch(name, args)
elif name == TOOL_NAME:
scr = args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": call_id, "script": scr})
out = _run_script(scr, base_dir, qa_callback)
else:
out = f"ERROR: unknown tool '{name}'"
if i == len(calls) - 1: if i == len(calls) - 1:
if file_items: if file_items:
file_items, changed = _reread_file_items(file_items) file_items, changed = _reread_file_items(file_items)
@@ -1312,49 +1323,52 @@ def _send_anthropic(md_content: str, user_message: str, base_dir: str, file_item
b_id = getattr(block, "id", "") b_id = getattr(block, "id", "")
b_input = getattr(block, "input", {}) b_input = getattr(block, "input", {})
# Check for tool confirmation if callback is provided # Check for tool confirmation if callback is provided
if pre_tool_callback: output = ""
payload_str = json.dumps({"tool": b_name, "args": b_input}) tool_executed = False
if not pre_tool_callback(payload_str): if b_name == TOOL_NAME and pre_tool_callback:
output = "USER REJECTED: tool execution cancelled"
tool_results.append({
"type": "tool_result",
"tool_use_id": b_id,
"content": output,
})
continue
events.emit("tool_execution", payload={"status": "started", "tool": b_name, "args": b_input, "round": round_idx})
if b_name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
output = mcp_client.dispatch(b_name, b_input)
_append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
truncated = _truncate_tool_output(output)
_cumulative_tool_bytes += len(truncated)
tool_results.append({
"type": "tool_result",
"tool_use_id": b_id,
"content": truncated,
})
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
elif b_name == TOOL_NAME:
script = b_input.get("script", "") script = b_input.get("script", "")
_append_comms("OUT", "tool_call", { _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": b_id, "script": script})
"name": TOOL_NAME, res = pre_tool_callback(script, base_dir, qa_callback)
"id": b_id, if res is None:
"script": script, output = "USER REJECTED: tool execution cancelled"
}) else:
output = _run_script(script, base_dir, qa_callback) output = res
_append_comms("IN", "tool_result", { tool_executed = True
"name": TOOL_NAME,
"id": b_id, if not tool_executed:
"output": output, events.emit("tool_execution", payload={"status": "started", "tool": b_name, "args": b_input, "round": round_idx})
}) if b_name in mcp_client.TOOL_NAMES:
truncated = _truncate_tool_output(output) _append_comms("OUT", "tool_call", {"name": b_name, "id": b_id, "args": b_input})
_cumulative_tool_bytes += len(truncated) output = mcp_client.dispatch(b_name, b_input)
tool_results.append({ _append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
"type": "tool_result", elif b_name == TOOL_NAME:
"tool_use_id": b_id, script = b_input.get("script", "")
"content": truncated, _append_comms("OUT", "tool_call", {
}) "name": TOOL_NAME,
"id": b_id,
"script": script,
})
output = _run_script(script, base_dir, qa_callback)
_append_comms("IN", "tool_result", {
"name": TOOL_NAME,
"id": b_id,
"output": output,
})
else:
output = f"ERROR: unknown tool '{b_name}'"
truncated = _truncate_tool_output(output)
_cumulative_tool_bytes += len(truncated)
tool_results.append({
"type": "tool_result",
"tool_use_id": b_id,
"content": truncated,
})
if not tool_executed:
_append_comms("IN", "tool_result", {"name": b_name, "id": b_id, "output": output})
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
else:
# For pre_tool_callback tools, we've already logged comms
events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx}) events.emit("tool_execution", payload={"status": "completed", "tool": b_name, "result": output, "round": round_idx})
if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES: if _cumulative_tool_bytes > _MAX_TOOL_OUTPUT_BYTES:
tool_results.append({ tool_results.append({
@@ -1560,28 +1574,32 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
tool_args = json.loads(tool_args_str) tool_args = json.loads(tool_args_str)
except: except:
tool_args = {} tool_args = {}
# Check for tool confirmation if callback is provided
if pre_tool_callback: # Check for tool confirmation if callback is provided
payload_str = json.dumps({"tool": tool_name, "args": tool_args}) tool_output = ""
if not pre_tool_callback(payload_str): tool_executed = False
tool_output = "USER REJECTED: tool execution cancelled" if tool_name == TOOL_NAME and pre_tool_callback:
tool_results_for_history.append({
"role": "tool",
"tool_call_id": tool_id,
"content": tool_output,
})
_append_comms("IN", "tool_result", {"name": tool_name, "id": tool_id, "output": tool_output})
continue
events.emit("tool_execution", payload={"status": "started", "tool": tool_name, "args": tool_args, "round": round_idx})
if tool_name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": tool_name, "id": tool_id, "args": tool_args})
tool_output = mcp_client.dispatch(tool_name, tool_args)
elif tool_name == TOOL_NAME:
script = tool_args.get("script", "") script = tool_args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": tool_id, "script": script}) _append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": tool_id, "script": script})
tool_output = _run_script(script, base_dir, qa_callback) res = pre_tool_callback(script, base_dir, qa_callback)
else: if res is None:
tool_output = f"ERROR: unknown tool '{tool_name}'" tool_output = "USER REJECTED: tool execution cancelled"
else:
tool_output = res
tool_executed = True
if not tool_executed:
events.emit("tool_execution", payload={"status": "started", "tool": tool_name, "args": tool_args, "round": round_idx})
if tool_name in mcp_client.TOOL_NAMES:
_append_comms("OUT", "tool_call", {"name": tool_name, "id": tool_id, "args": tool_args})
tool_output = mcp_client.dispatch(tool_name, tool_args)
elif tool_name == TOOL_NAME:
script = tool_args.get("script", "")
_append_comms("OUT", "tool_call", {"name": TOOL_NAME, "id": tool_id, "script": script})
tool_output = _run_script(script, base_dir, qa_callback)
else:
tool_output = f"ERROR: unknown tool '{tool_name}'"
if i == len(tool_calls_raw) - 1: if i == len(tool_calls_raw) - 1:
if file_items: if file_items:
file_items, changed = _reread_file_items(file_items) file_items, changed = _reread_file_items(file_items)

View File

@@ -62,7 +62,6 @@ class HookHandler(BaseHTTPRequestHandler):
body = self.rfile.read(content_length) body = self.rfile.read(content_length)
data = json.loads(body.decode('utf-8')) data = json.loads(body.decode('utf-8'))
field_tag = data.get("field") field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
event = threading.Event() event = threading.Event()
result = {"value": None} result = {"value": None}
@@ -71,10 +70,7 @@ class HookHandler(BaseHTTPRequestHandler):
if field_tag in app._settable_fields: if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag] attr = app._settable_fields[field_tag]
val = getattr(app, attr, None) val = getattr(app, attr, None)
print(f"[DEBUG] Hook Server: attr={attr}, val={val}")
result["value"] = val result["value"] = val
else:
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
finally: finally:
event.set() event.set()
with app._pending_gui_tasks_lock: with app._pending_gui_tasks_lock:
@@ -133,10 +129,8 @@ class HookHandler(BaseHTTPRequestHandler):
result["pending_script_approval"] = getattr(app, "_pending_dialog", None) is not None result["pending_script_approval"] = getattr(app, "_pending_dialog", None) is not None
result["pending_mma_step_approval"] = getattr(app, "_pending_mma_approval", None) is not None result["pending_mma_step_approval"] = getattr(app, "_pending_mma_approval", None) is not None
result["pending_mma_spawn_approval"] = getattr(app, "_pending_mma_spawn", None) is not None result["pending_mma_spawn_approval"] = getattr(app, "_pending_mma_spawn", None) is not None
# Keep old fields for backward compatibility but add specific ones above
result["pending_approval"] = result["pending_mma_step_approval"] or result["pending_tool_approval"] result["pending_approval"] = result["pending_mma_step_approval"] or result["pending_tool_approval"]
result["pending_spawn"] = result["pending_mma_spawn_approval"] result["pending_spawn"] = result["pending_mma_spawn_approval"]
# Added lines for tracks and proposed_tracks
result["tracks"] = getattr(app, "tracks", []) result["tracks"] = getattr(app, "tracks", [])
result["proposed_tracks"] = getattr(app, "proposed_tracks", []) result["proposed_tracks"] = getattr(app, "proposed_tracks", [])
result["mma_streams"] = getattr(app, "mma_streams", {}) result["mma_streams"] = getattr(app, "mma_streams", {})
@@ -157,13 +151,11 @@ class HookHandler(BaseHTTPRequestHandler):
self.send_response(504) self.send_response(504)
self.end_headers() self.end_headers()
elif self.path == '/api/gui/diagnostics': elif self.path == '/api/gui/diagnostics':
# Safe way to query multiple states at once via the main thread queue
event = threading.Event() event = threading.Event()
result = {} result = {}
def check_all(): def check_all():
try: try:
# Generic state check based on App attributes (works for both DPG and ImGui versions)
status = getattr(app, "ai_status", "idle") status = getattr(app, "ai_status", "idle")
result["thinking"] = status in ["sending...", "running powershell..."] result["thinking"] = status in ["sending...", "running powershell..."]
result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."] result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
@@ -201,57 +193,55 @@ class HookHandler(BaseHTTPRequestHandler):
self.send_response(200) self.send_response(200)
self.send_header('Content-Type', 'application/json') self.send_header('Content-Type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write( self.wfile.write(json.dumps({'status': 'updated'}).encode('utf-8'))
json.dumps({'status': 'updated'}).encode('utf-8')) elif self.path.startswith('/api/confirm/'):
action_id = self.path.split('/')[-1]
approved = data.get('approved', False)
if hasattr(app, 'resolve_pending_action'):
success = app.resolve_pending_action(action_id, approved)
if success:
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
else:
self.send_response(500)
self.end_headers()
elif self.path == '/api/session': elif self.path == '/api/session':
app.disc_entries = data.get('session', {}).get( app.disc_entries = data.get('session', {}).get('entries', app.disc_entries)
'entries', app.disc_entries)
self.send_response(200) self.send_response(200)
self.send_header('Content-Type', 'application/json') self.send_header('Content-Type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write( self.wfile.write(json.dumps({'status': 'updated'}).encode('utf-8'))
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/gui': elif self.path == '/api/gui':
with app._pending_gui_tasks_lock: with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data) app._pending_gui_tasks.append(data)
self.send_response(200) self.send_response(200)
self.send_header('Content-Type', 'application/json') self.send_header('Content-Type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write( self.wfile.write(json.dumps({'status': 'queued'}).encode('utf-8'))
json.dumps({'status': 'queued'}).encode('utf-8'))
elif self.path == '/api/ask': elif self.path == '/api/ask':
request_id = str(uuid.uuid4()) request_id = str(uuid.uuid4())
event = threading.Event() event = threading.Event()
if not hasattr(app, '_pending_asks'): if not hasattr(app, '_pending_asks'): app._pending_asks = {}
app._pending_asks = {} if not hasattr(app, '_ask_responses'): app._ask_responses = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock: with app._api_event_queue_lock:
app._api_event_queue.append({ app._api_event_queue.append({"type": "ask_received", "request_id": request_id, "data": data})
"type": "ask_received",
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock: with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({ app._pending_gui_tasks.append({"type": "ask", "request_id": request_id, "data": data})
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0): if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id) response_data = app._ask_responses.get(request_id)
# Clean up response after reading if request_id in app._ask_responses: del app._ask_responses[request_id]
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200) self.send_response(200)
self.send_header('Content-Type', 'application/json') self.send_header('Content-Type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8')) self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8'))
else: else:
if request_id in app._pending_asks: if request_id in app._pending_asks: del app._pending_asks[request_id]
del app._pending_asks[request_id]
self.send_response(504) self.send_response(504)
self.end_headers() self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8')) self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
@@ -262,14 +252,9 @@ class HookHandler(BaseHTTPRequestHandler):
app._ask_responses[request_id] = response_data app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id] event = app._pending_asks[request_id]
event.set() event.set()
# Clean up pending ask entry
del app._pending_asks[request_id] del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock: with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({ app._pending_gui_tasks.append({"action": "clear_ask", "request_id": request_id})
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200) self.send_response(200)
self.send_header('Content-Type', 'application/json') self.send_header('Content-Type', 'application/json')
self.end_headers() self.end_headers()
@@ -302,21 +287,12 @@ class HookServer:
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli' is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli: if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
return return
# Ensure the app has the task queue and lock initialized if not hasattr(self.app, '_pending_gui_tasks'): self.app._pending_gui_tasks = []
if not hasattr(self.app, '_pending_gui_tasks'): if not hasattr(self.app, '_pending_gui_tasks_lock'): self.app._pending_gui_tasks_lock = threading.Lock()
self.app._pending_gui_tasks = [] if not hasattr(self.app, '_pending_asks'): self.app._pending_asks = {}
if not hasattr(self.app, '_pending_gui_tasks_lock'): if not hasattr(self.app, '_ask_responses'): self.app._ask_responses = {}
self.app._pending_gui_tasks_lock = threading.Lock() if not hasattr(self.app, '_api_event_queue'): self.app._api_event_queue = []
# Initialize ask-related dictionaries if not hasattr(self.app, '_api_event_queue_lock'): self.app._api_event_queue_lock = threading.Lock()
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app) self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True) self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start() self.thread.start()
@@ -329,5 +305,3 @@ class HookServer:
if self.thread: if self.thread:
self.thread.join() self.thread.join()
logging.info("Hook server stopped") logging.info("Hook server stopped")

View File

@@ -1,10 +1,10 @@
{ {
"description": "Enhance existing MMA orchestration GUI: tier stream panels, DAG editing, cost tracking, conductor lifecycle forms, track-scoped discussions, approval indicators, visual polish.", "description": "Enhance existing MMA orchestration GUI: tier stream panels, DAG editing, cost tracking, conductor lifecycle forms, track-scoped discussions, approval indicators, visual polish.",
"track_id": "comprehensive_gui_ux_20260228", "track_id": "comprehensive_gui_ux_20260228",
"type": "feature", "type": "feature",
"created_at": "2026-03-01T08:42:57Z", "created_at": "2026-03-01T08:42:57Z",
"status": "refined", "status": "completed",
"updated_at": "2026-03-01T15:30:00Z", "updated_at": "2026-03-01T20:15:00Z",
"refined_by": "claude-opus-4-6 (1M context)", "refined_by": "claude-opus-4-6 (1M context)",
"refined_from_commit": "08e003a" "refined_from_commit": "08e003a"
} }

View File

@@ -16,7 +16,7 @@ Focus: Add cost estimation to the existing token usage display.
- [x] Task 2.1: Create a new module `cost_tracker.py` with a `MODEL_PRICING` dict mapping model name patterns to `{"input_per_mtok": float, "output_per_mtok": float}`. Include entries for: `gemini-2.5-flash-lite` ($0.075/$0.30), `gemini-2.5-flash` ($0.15/$0.60), `gemini-3-flash-preview` ($0.15/$0.60), `gemini-3.1-pro-preview` ($3.50/$10.50), `claude-*-sonnet` ($3/$15), `claude-*-opus` ($15/$75), `deepseek-v3` ($0.27/$1.10). Function: `estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float` that does pattern matching on model name and returns dollar cost. - [x] Task 2.1: Create a new module `cost_tracker.py` with a `MODEL_PRICING` dict mapping model name patterns to `{"input_per_mtok": float, "output_per_mtok": float}`. Include entries for: `gemini-2.5-flash-lite` ($0.075/$0.30), `gemini-2.5-flash` ($0.15/$0.60), `gemini-3-flash-preview` ($0.15/$0.60), `gemini-3.1-pro-preview` ($3.50/$10.50), `claude-*-sonnet` ($3/$15), `claude-*-opus` ($15/$75), `deepseek-v3` ($0.27/$1.10). Function: `estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float` that does pattern matching on model name and returns dollar cost.
- [x] Task 2.2: Extend the token usage table in `_render_mma_dashboard` (gui_2.py:2685-2699) from 3 columns to 5: add "Est. Cost" and "Model". Populate using `cost_tracker.estimate_cost()` with the model name from `self.mma_tier_usage` (need to extend `tier_usage` dict in `ConductorEngine._push_state` to include model name per tier, or use a default mapping: Tier 1 → `gemini-3.1-pro-preview`, Tier 2 → `gemini-3-flash-preview`, Tier 3 → `gemini-2.5-flash-lite`, Tier 4 → `gemini-2.5-flash-lite`). Show total cost row at bottom. - [x] Task 2.2: Extend the token usage table in `_render_mma_dashboard` (gui_2.py:2685-2699) from 3 columns to 5: add "Est. Cost" and "Model". Populate using `cost_tracker.estimate_cost()` with the model name from `self.mma_tier_usage` (need to extend `tier_usage` dict in `ConductorEngine._push_state` to include model name per tier, or use a default mapping: Tier 1 → `gemini-3.1-pro-preview`, Tier 2 → `gemini-3-flash-preview`, Tier 3 → `gemini-2.5-flash-lite`, Tier 4 → `gemini-2.5-flash-lite`). Show total cost row at bottom.
- [x] Task 2.3: Write tests for `cost_tracker.estimate_cost()` covering all model patterns and edge cases (unknown model returns 0). - [x] Task 2.3: Write tests for `cost_tracker.estimate_cost()` covering all model patterns and edge cases (unknown model returns 0).
- [~] Task 2.4: Conductor - User Manual Verification 'Phase 2: Cost Tracking & Enhanced Token Table' (Protocol in workflow.md) - [x] Task 2.4: Conductor - User Manual Verification 'Phase 2: Cost Tracking & Enhanced Token Table' (Protocol in workflow.md)
## Phase 3: Track Proposal Editing & Conductor Lifecycle Forms ## Phase 3: Track Proposal Editing & Conductor Lifecycle Forms
Focus: Make track proposals editable and add conductor setup/newTrack GUI forms. Focus: Make track proposals editable and add conductor setup/newTrack GUI forms.
@@ -25,7 +25,7 @@ Focus: Make track proposals editable and add conductor setup/newTrack GUI forms.
- [x] Task 3.2: Add a "Conductor Setup" collapsible section at the top of the MMA dashboard (before the Track Browser). Contains a "Run Setup" button. On click, reads `conductor/workflow.md`, `conductor/tech-stack.md`, `conductor/product.md` using `Path.read_text()`, computes a readiness summary (files found, line counts, track count via `project_manager.get_all_tracks()`), and displays it in a read-only text region. This is informational only — no backend changes. - [x] Task 3.2: Add a "Conductor Setup" collapsible section at the top of the MMA dashboard (before the Track Browser). Contains a "Run Setup" button. On click, reads `conductor/workflow.md`, `conductor/tech-stack.md`, `conductor/product.md` using `Path.read_text()`, computes a readiness summary (files found, line counts, track count via `project_manager.get_all_tracks()`), and displays it in a read-only text region. This is informational only — no backend changes.
- [x] Task 3.3: Add a "New Track" form below the Track Browser. Fields: track name (input_text), description (input_text_multiline), type dropdown (feature/chore/fix via `imgui.combo`). "Create" button calls a new helper `_cb_create_track(name, desc, type)` that: creates `conductor/tracks/{name}_{date}/` directory, writes a minimal `spec.md` from the description, writes an empty `plan.md` template, writes `metadata.json` with the track ID/type/status="new", then refreshes `self.tracks` via `project_manager.get_all_tracks()`. - [x] Task 3.3: Add a "New Track" form below the Track Browser. Fields: track name (input_text), description (input_text_multiline), type dropdown (feature/chore/fix via `imgui.combo`). "Create" button calls a new helper `_cb_create_track(name, desc, type)` that: creates `conductor/tracks/{name}_{date}/` directory, writes a minimal `spec.md` from the description, writes an empty `plan.md` template, writes `metadata.json` with the track ID/type/status="new", then refreshes `self.tracks` via `project_manager.get_all_tracks()`.
- [x] Task 3.4: Write tests for track creation helper: verify directory structure, file contents, and metadata.json format. Test proposal modal editing by verifying `proposed_tracks` list is mutated correctly. - [x] Task 3.4: Write tests for track creation helper: verify directory structure, file contents, and metadata.json format. Test proposal modal editing by verifying `proposed_tracks` list is mutated correctly.
- [~] Task 3.5: Conductor - User Manual Verification 'Phase 3: Track Proposal Editing & Conductor Lifecycle Forms' (Protocol in workflow.md) - [x] Task 3.5: Conductor - User Manual Verification 'Phase 3: Track Proposal Editing & Conductor Lifecycle Forms' (Protocol in workflow.md)
## Phase 4: DAG Editing & Track-Scoped Discussion ## Phase 4: DAG Editing & Track-Scoped Discussion
Focus: Allow GUI-based ticket manipulation and track-specific discussion history. Focus: Allow GUI-based ticket manipulation and track-specific discussion history.
@@ -34,25 +34,25 @@ Focus: Allow GUI-based ticket manipulation and track-specific discussion history
- [x] Task 4.2: Add a "Delete" button to each DAG node in `_render_ticket_dag_node` (gui_2.py:2770-2773, after the Skip button). On click, show a confirmation popup. On confirm, remove the ticket from `self.active_tickets`, remove it from all other tickets' `depends_on` lists, and push state update. Only allow deletion of `todo` or `blocked` tickets (not `in_progress` or `completed`). - [x] Task 4.2: Add a "Delete" button to each DAG node in `_render_ticket_dag_node` (gui_2.py:2770-2773, after the Skip button). On click, show a confirmation popup. On confirm, remove the ticket from `self.active_tickets`, remove it from all other tickets' `depends_on` lists, and push state update. Only allow deletion of `todo` or `blocked` tickets (not `in_progress` or `completed`).
- [x] Task 4.3: Add track-scoped discussion support. In `_render_discussion_panel` (gui_2.py:2295-2483), add a toggle checkbox "Track Discussion" (visible only when `self.active_track` is set). When toggled ON: load history via `project_manager.load_track_history(self.active_track.id, base_dir)` into `self.disc_entries`, set a flag `self._track_discussion_active = True`. When toggled OFF or track changes: restore project discussion. On save/flush, if `_track_discussion_active`, write to track history file instead of project history. - [x] Task 4.3: Add track-scoped discussion support. In `_render_discussion_panel` (gui_2.py:2295-2483), add a toggle checkbox "Track Discussion" (visible only when `self.active_track` is set). When toggled ON: load history via `project_manager.load_track_history(self.active_track.id, base_dir)` into `self.disc_entries`, set a flag `self._track_discussion_active = True`. When toggled OFF or track changes: restore project discussion. On save/flush, if `_track_discussion_active`, write to track history file instead of project history.
- [x] Task 4.4: Write tests for: (a) adding a ticket updates `active_tickets` and has correct default fields; (b) deleting a ticket removes it from all `depends_on` references; (c) track discussion toggle switches `disc_entries` source. - [x] Task 4.4: Write tests for: (a) adding a ticket updates `active_tickets` and has correct default fields; (b) deleting a ticket removes it from all `depends_on` references; (c) track discussion toggle switches `disc_entries` source.
- [~] Task 4.5: Conductor - User Manual Verification 'Phase 4: DAG Editing & Track-Scoped Discussion' (Protocol in workflow.md) - [x] Task 4.5: Conductor - User Manual Verification 'Phase 4: DAG Editing & Track-Scoped Discussion' (Protocol in workflow.md)
## Phase 5: Visual Polish & Integration Testing ## Phase 5: Visual Polish & Integration Testing
Focus: Dense, responsive dashboard with arcade aesthetics and end-to-end verification. Focus: Dense, responsive dashboard with arcade aesthetics and end-to-end verification.
- [~] Task 5.1: Add color-coded styling to the Track Browser table. Status column uses colored text: "new" = gray, "active" = yellow, "done" = green, "blocked" = red. Progress bar uses `imgui.push_style_color` to tint: <33% red, 33-66% yellow, >66% green. - [x] Task 5.1: Add color-coded styling to the Track Browser table. Status column uses colored text: "new" = gray, "active" = yellow, "done" = green, "blocked" = red. Progress bar uses `imgui.push_style_color` to tint: <33% red, 33-66% yellow, >66% green.
- [ ] Task 5.2: Improve the DAG tree nodes with status-colored left borders. Use `imgui.get_cursor_screen_pos()` and `imgui.get_window_draw_list().add_rect_filled()` to draw a 4px colored strip to the left of each tree node matching its status color. - [x] Task 5.2: Improve the DAG tree nodes with status-colored left borders. Use `imgui.get_cursor_screen_pos()` and `imgui.get_window_draw_list().add_rect_filled()` to draw a 4px colored strip to the left of each tree node matching its status color.
- [ ] Task 5.3: Add a "Dashboard Summary" header line at the top of `_render_mma_dashboard` showing: `Track: {name} | Tickets: {done}/{total} | Cost: ${total_cost:.4f} | Status: {mma_status}` in a single dense line with colored segments. - [x] Task 5.3: Add a "Dashboard Summary" header line at the top of `_render_mma_dashboard` showing: `Track: {name} | Tickets: {done}/{total} | Cost: ${total_cost:.4f} | Status: {mma_status}` in a single dense line with colored segments.
- [ ] Task 5.4: Write an end-to-end integration test (extending `tests/visual_sim_mma_v2.py` or creating `tests/visual_sim_gui_ux.py`) that verifies via `ApiHookClient`: (a) track creation form produces correct directory structure; (b) tier streams are populated during MMA execution; (c) approval indicators appear when expected; (d) cost tracking shows non-zero values after execution. - [x] Task 5.4: Write an end-to-end integration test (extending `tests/visual_sim_mma_v2.py` or creating `tests/visual_sim_gui_ux.py`) that verifies via `ApiHookClient`: (a) track creation form produces correct directory structure; (b) tier streams are populated during MMA execution; (c) approval indicators appear when expected; (d) cost tracking shows non-zero values after execution.
- [ ] Task 5.5: Verify all new UI elements maintain >30 FPS via `get_ui_performance` during a full MMA simulation run. - [x] Task 5.5: Verify all new UI elements maintain >30 FPS via `get_ui_performance` during a full MMA simulation run.
- [ ] Task 5.6: Conductor - User Manual Verification 'Phase 5: Visual Polish & Integration Testing' (Protocol in workflow.md) - [x] Task 5.6: Conductor - User Manual Verification 'Phase 5: Visual Polish & Integration Testing' (Protocol in workflow.md)
## Phase 6: Live Worker Streaming & Engine Enhancements ## Phase 6: Live Worker Streaming & Engine Enhancements
Focus: Make MMA execution observable in real-time and configurable from the GUI. Currently workers are black boxes until completion. Focus: Make MMA execution observable in real-time and configurable from the GUI. Currently workers are black boxes until completion.
- [ ] Task 6.1: Wire `ai_client.comms_log_callback` to per-ticket streams during `run_worker_lifecycle` (multi_agent_conductor.py:207-300). Before calling `ai_client.send()`, set `ai_client.comms_log_callback` to a closure that pushes intermediate text chunks to the GUI via `_queue_put(event_queue, loop, "response", {"text": chunk, "stream_id": f"Tier 3 (Worker): {ticket.id}", "status": "streaming..."})`. After `send()` returns, restore the original callback. This gives real-time output streaming to the Tier 3 stream panels from Phase 1. - [x] Task 6.1: Wire `ai_client.comms_log_callback` to per-ticket streams during `run_worker_lifecycle` (multi_agent_conductor.py:207-300). Before calling `ai_client.send()`, set `ai_client.comms_log_callback` to a closure that pushes intermediate text chunks to the GUI via `_queue_put(event_queue, loop, "response", {"text": chunk, "stream_id": f"Tier 3 (Worker): {ticket.id}", "status": "streaming..."})`. After `send()` returns, restore the original callback. This gives real-time output streaming to the Tier 3 stream panels from Phase 1.
- [ ] Task 6.2: Add per-tier model configuration to the MMA dashboard. Below the token usage table in `_render_mma_dashboard`, add a collapsible "Tier Model Config" section with 4 rows (Tier 1-4). Each row: tier label + `imgui.combo` dropdown populated from `ai_client.list_models()` (cached). Store selections in `self.mma_tier_models: dict[str, str]` with defaults from `mma_exec.get_model_for_role()`. On change, write to `self.project["mma"]["tier_models"]` for persistence. - [x] Task 6.2: Add per-tier model configuration to the MMA dashboard. Below the token usage table in `_render_mma_dashboard`, add a collapsible "Tier Model Config" section with 4 rows (Tier 1-4). Each row: tier label + `imgui.combo` dropdown populated from `ai_client.list_models()` (cached). Store selections in `self.mma_tier_models: dict[str, str]` with defaults from `mma_exec.get_model_for_role()`. On change, write to `self.project["mma"]["tier_models"]` for persistence.
- [ ] Task 6.3: Wire per-tier model config into the execution pipeline. In `ConductorEngine.run` (multi_agent_conductor.py:105-135), when creating `WorkerContext`, read the model name from the GUI's `mma_tier_models` dict (passed via the event queue or stored on the engine). Pass it through to `run_worker_lifecycle` which should use it in `ai_client.set_provider`/`ai_client.set_model_params` before calling `send()`. Also update `mma_exec.py:get_model_for_role` to accept an override parameter. - [x] Task 6.3: Wire per-tier model config into the execution pipeline. In `ConductorEngine.run` (multi_agent_conductor.py:105-135), when creating `WorkerContext`, read the model name from the GUI's `mma_tier_models` dict (passed via the event queue or stored on the engine). Pass it through to `run_worker_lifecycle` which should use it in `ai_client.set_provider`/`ai_client.set_model_params` before calling `send()`. Also update `mma_exec.py:get_model_for_role` to accept an override parameter.
- [ ] Task 6.4: Add parallel DAG execution. In `ConductorEngine.run` (multi_agent_conductor.py:100-135), replace the sequential `for ticket in ready_tasks` loop with `asyncio.gather(*[loop.run_in_executor(None, run_worker_lifecycle, ...) for ticket in ready_tasks])`. Each worker already gets its own `ai_client.reset_session()` so they're isolated. Guard with `ai_client._send_lock` awareness — if the lock serializes all sends, parallel execution won't help. In that case, create per-worker provider instances or use separate session IDs. Mark this task as exploratory — if `_send_lock` blocks parallelism, document the constraint and defer. - [x] Task 6.4: Add parallel DAG execution. In `ConductorEngine.run` (multi_agent_conductor.py:100-135), replace the sequential `for ticket in ready_tasks` loop with `asyncio.gather(*[loop.run_in_executor(None, run_worker_lifecycle, ...) for ticket in ready_tasks])`. Each worker already gets its own `ai_client.reset_session()` so they're isolated. Guard with `ai_client._send_lock` awareness — if the lock serializes all sends, parallel execution won't help. In that case, create per-worker provider instances or use separate session IDs. Mark this task as exploratory — if `_send_lock` blocks parallelism, document the constraint and defer.
- [ ] Task 6.5: Add automatic retry with model escalation. In `ConductorEngine.run`, after `run_worker_lifecycle` returns, check if `ticket.status == "blocked"`. If so, and `retry_count < max_retries` (default 2), increment retry count, escalate the model (e.g., flash-lite → flash → pro), and re-execute. Store `retry_count` as a field on the ticket dict. After max retries, leave as blocked. - [x] Task 6.5: Add automatic retry with model escalation. In `ConductorEngine.run`, after `run_worker_lifecycle` returns, check if `ticket.status == "blocked"`. If so, and `retry_count < max_retries` (default 2), increment retry count, escalate the model (e.g., flash-lite → flash → pro), and re-execute. Store `retry_count` as a field on the ticket dict. After max retries, leave as blocked.
- [ ] Task 6.6: Write tests for: (a) streaming callback pushes intermediate content to event queue; (b) per-tier model config persists to project TOML; (c) retry escalation increments model tier. - [x] Task 6.6: Write tests for: (a) streaming callback pushes intermediate content to event queue; (b) per-tier model config persists to project TOML; (c) retry escalation increments model tier.
- [ ] Task 6.7: Conductor - User Manual Verification 'Phase 6: Live Worker Streaming & Engine Enhancements' (Protocol in workflow.md) - [x] Task 6.7: Conductor - User Manual Verification 'Phase 6: Live Worker Streaming & Engine Enhancements' (Protocol in workflow.md)

View File

@@ -0,0 +1,8 @@
{
"id": "ux_sim_test_20260301",
"title": "UX_SIM_TEST",
"description": "Simulation testing for GUI UX",
"type": "feature",
"status": "new",
"progress": 0.0
}

View File

@@ -0,0 +1,3 @@
# Implementation Plan: UX_SIM_TEST
- [ ] Task 1: Initialize

View File

@@ -0,0 +1,5 @@
# Specification: UX_SIM_TEST
Type: feature
Description: Simulation testing for GUI UX

View File

@@ -1,5 +1,5 @@
[ai] [ai]
provider = "gemini_cli" provider = "gemini"
model = "gemini-2.5-flash-lite" model = "gemini-2.5-flash-lite"
temperature = 0.0 temperature = 0.0
max_tokens = 8192 max_tokens = 8192

View File

@@ -1,124 +1,132 @@
import subprocess import subprocess
import json import json
import sys
import time
import os import os
import session_logger # Import session_logger import time
import session_logger
from typing import Optional, Callable, Any
class GeminiCliAdapter: class GeminiCliAdapter:
def __init__(self, binary_path: str = "gemini") -> None: """
Adapter for the Gemini CLI that parses streaming JSON output.
"""
def __init__(self, binary_path: str = "gemini"):
self.binary_path = binary_path self.binary_path = binary_path
self.last_usage = None self.session_id: Optional[str] = None
self.session_id = None self.last_usage: Optional[dict] = None
self.last_latency = 0.0 self.last_latency: float = 0.0
def count_tokens(self, contents: list[str]) -> int: def send(self, message: str, safety_settings: list | None = None, system_instruction: str | None = None,
""" model: str | None = None, stream_callback: Optional[Callable[[str], None]] = None) -> dict[str, Any]:
Counts the tokens for a list of string contents using a character-based estimation.
Approximates tokens by assuming 4 characters per token.
This replaces the broken 'gemini count' CLI call.
"""
input_text = "\n".join(contents)
total_chars = len(input_text)
estimated_tokens = total_chars // 4
return estimated_tokens
def send(self, message: str, safety_settings: list | None = None, system_instruction: str | None = None, model: str | None = None) -> str:
""" """
Sends a message to the Gemini CLI and processes the streaming JSON output. Sends a message to the Gemini CLI and processes the streaming JSON output.
Logs the CLI call details using session_logger.log_cli_call. Uses non-blocking line-by-line reading to allow stream_callback.
System instruction is prepended to the message.
Uses --prompt flag with a placeholder and sends the content via stdin.
""" """
start_time = time.time() start_time = time.time()
command_parts = [self.binary_path] command_parts = [self.binary_path]
if model: if model:
command_parts.extend(['-m', f'"{model}"']) command_parts.extend(['-m', f'"{model}"'])
# Use an empty string placeholder.
command_parts.extend(['--prompt', '""']) command_parts.extend(['--prompt', '""'])
if self.session_id: if self.session_id:
command_parts.extend(['--resume', self.session_id]) command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--output-format', 'stream-json']) command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts) command = " ".join(command_parts)
# Construct the prompt text by prepending system_instruction if available
prompt_text = message prompt_text = message
if system_instruction: if system_instruction:
prompt_text = f"{system_instruction}\n\n{message}" prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = "" accumulated_text = ""
tool_calls = [] tool_calls = []
stdout_content = []
stderr_content = []
env = os.environ.copy() env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop" env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
process = None
stdout_content = "" process = subprocess.Popen(
stderr_content = "" command,
stdin_content = prompt_text stdin=subprocess.PIPE,
try: stdout=subprocess.PIPE,
process = subprocess.Popen( stderr=subprocess.PIPE,
command, text=True,
stdin=subprocess.PIPE, shell=True,
stdout=subprocess.PIPE, env=env,
stderr=subprocess.PIPE, bufsize=1 # Line buffered
text=True, )
shell=True,
env=env # Use a thread or just communicate if we don't need real-time for stdin.
) # But we must read stdout line by line to avoid blocking the main thread
stdout_output, stderr_output = process.communicate(input=prompt_text) # if this were called from the main thread (though it's usually in a background thread).
stdout_content = stdout_output # The issue is that process.communicate blocks until the process exits.
stderr_content = stderr_output # We want to process JSON lines as they arrive.
for line in stdout_content.splitlines():
line = line.strip() import threading
if not line: def write_stdin():
continue try:
try: process.stdin.write(prompt_text)
data = json.loads(line) process.stdin.close()
msg_type = data.get("type") except: pass
if msg_type == "init":
if "session_id" in data: stdin_thread = threading.Thread(target=write_stdin, daemon=True)
self.session_id = data.get("session_id") stdin_thread.start()
elif msg_type == "message" or msg_type == "chunk":
# CRITICAL: Only accumulate content from the assistant/model role. # Read stdout line by line
# The CLI echoes back the 'user' prompt in the stream, which we must skip. while True:
role = data.get("role", "") line = process.stdout.readline()
# Chunks usually don't have role, so we assume assistant if missing if not line and process.poll() is not None:
if role in ["assistant", "model"] or not role: break
content = data.get("content", data.get("text")) if not line:
if content: continue
accumulated_text += content
elif msg_type == "result": line = line.strip()
self.last_usage = data.get("stats") or data.get("usage") stdout_content.append(line)
if "session_id" in data: try:
self.session_id = data.get("session_id") data = json.loads(line)
elif msg_type == "tool_use": msg_type = data.get("type")
# Standardize format for ai_client.py if msg_type == "init":
# Real CLI might use 'tool_name'/'tool_id'/'parameters' if "session_id" in data:
# or 'name'/'id'/'args'. We'll map to 'name'/'id'/'args'. self.session_id = data.get("session_id")
tc = { elif msg_type == "message" or msg_type == "chunk":
"name": data.get("tool_name", data.get("name")), role = data.get("role", "")
"args": data.get("parameters", data.get("args", {})), if role in ["assistant", "model"] or not role:
"id": data.get("tool_id", data.get("id")) content = data.get("content", data.get("text"))
} if content:
if tc["name"]: accumulated_text += content
tool_calls.append(tc) if stream_callback:
except json.JSONDecodeError: stream_callback(content)
continue elif msg_type == "result":
except Exception as e: self.last_usage = data.get("stats") or data.get("usage")
if process: if "session_id" in data:
process.kill() self.session_id = data.get("session_id")
raise e elif msg_type == "tool_use":
finally: tc = {
current_latency = time.time() - start_time "name": data.get("tool_name", data.get("name")),
if process: "args": data.get("parameters", data.get("args", {})),
session_logger.open_session() "id": data.get("tool_id", data.get("id"))
session_logger.log_cli_call( }
command=command, if tc["name"]:
stdin_content=stdin_content, tool_calls.append(tc)
stdout_content=stdout_content, except json.JSONDecodeError:
stderr_content=stderr_content, continue
latency=current_latency
) # Read remaining stderr
self.last_latency = current_latency stderr_final = process.stderr.read()
process.wait()
current_latency = time.time() - start_time
session_logger.open_session()
session_logger.log_cli_call(
command=command,
stdin_content=prompt_text,
stdout_content="\n".join(stdout_content),
stderr_content=stderr_final,
latency=current_latency
)
self.last_latency = current_latency
return { return {
"text": accumulated_text, "text": accumulated_text,
"tool_calls": tool_calls, "tool_calls": tool_calls,
"stderr": stderr_content "stderr": stderr_final
} }

134
gui_2.py
View File

@@ -879,13 +879,21 @@ class App:
payload = task.get("payload", {}) payload = task.get("payload", {})
text = payload.get("text", "") text = payload.get("text", "")
stream_id = payload.get("stream_id") stream_id = payload.get("stream_id")
is_streaming = payload.get("status") == "streaming..."
if stream_id: if stream_id:
self.mma_streams[stream_id] = text if is_streaming:
if stream_id not in self.mma_streams: self.mma_streams[stream_id] = ""
self.mma_streams[stream_id] += text
else:
self.mma_streams[stream_id] = text
if stream_id == "Tier 1": if stream_id == "Tier 1":
if "status" in payload: if "status" in payload:
self.ai_status = payload["status"] self.ai_status = payload["status"]
else: else:
self.ai_response = text if is_streaming:
self.ai_response += text
else:
self.ai_response = text
self.ai_status = payload.get("status", "done") self.ai_status = payload.get("status", "done")
self._trigger_blink = True self._trigger_blink = True
if self.ui_auto_add_history and not stream_id: if self.ui_auto_add_history and not stream_id:
@@ -1239,7 +1247,15 @@ class App:
ai_client.set_model_params(self.temperature, self.max_tokens, self.history_trunc_limit) ai_client.set_model_params(self.temperature, self.max_tokens, self.history_trunc_limit)
ai_client.set_agent_tools(self.ui_agent_tools) ai_client.set_agent_tools(self.ui_agent_tools)
try: try:
resp = ai_client.send(event.stable_md, event.prompt, event.base_dir, event.file_items, event.disc_text) resp = ai_client.send(
event.stable_md,
event.prompt,
event.base_dir,
event.file_items,
event.disc_text,
pre_tool_callback=self._confirm_and_run,
qa_callback=ai_client.run_tier4_analysis
)
# Emit response event # Emit response event
asyncio.run_coroutine_threadsafe( asyncio.run_coroutine_threadsafe(
self.event_queue.put("response", {"text": resp, "status": "done"}), self.event_queue.put("response", {"text": resp, "status": "done"}),
@@ -2759,7 +2775,9 @@ class App:
def _cb_create_track(self, name: str, desc: str, track_type: str) -> None: def _cb_create_track(self, name: str, desc: str, track_type: str) -> None:
if not name: return if not name: return
track_id = name.lower().replace(" ", "_") from datetime import datetime
date_suffix = datetime.now().strftime("%Y%m%d")
track_id = f"{name.lower().replace(' ', '_')}_{date_suffix}"
track_dir = Path("conductor/tracks") / track_id track_dir = Path("conductor/tracks") / track_id
track_dir.mkdir(parents=True, exist_ok=True) track_dir.mkdir(parents=True, exist_ok=True)
spec_file = track_dir / "spec.md" spec_file = track_dir / "spec.md"
@@ -2776,7 +2794,7 @@ class App:
"title": name, "title": name,
"description": desc, "description": desc,
"type": track_type, "type": track_type,
"status": "proposed", "status": "new",
"progress": 0.0 "progress": 0.0
}, f, indent=1) }, f, indent=1)
# Refresh tracks from disk # Refresh tracks from disk
@@ -3072,6 +3090,19 @@ class App:
imgui.text(f"${total_cost:,.4f}") imgui.text(f"${total_cost:,.4f}")
imgui.end_table() imgui.end_table()
imgui.separator() imgui.separator()
# 3b. Tier Model Config
if imgui.collapsing_header("Tier Model Config"):
for tier in self.mma_tier_usage.keys():
imgui.text(f"{tier}:")
imgui.same_line()
current_model = self.mma_tier_usage[tier].get("model", "unknown")
if imgui.begin_combo(f"##combo_{tier}", current_model):
for model in self.available_models:
if imgui.selectable(model, current_model == model)[0]:
self.mma_tier_usage[tier]["model"] = model
self.project.setdefault("mma", {}).setdefault("tier_models", {})[tier] = model
imgui.end_combo()
imgui.separator()
# 4. Task DAG Visualizer # 4. Task DAG Visualizer
imgui.text("Task DAG") imgui.text("Task DAG")
if self.active_track: if self.active_track:
@@ -3170,6 +3201,9 @@ class App:
def _render_ticket_dag_node(self, ticket: Ticket, tickets_by_id: dict[str, Ticket], children_map: dict[str, list[str]], rendered: set[str]) -> None: def _render_ticket_dag_node(self, ticket: Ticket, tickets_by_id: dict[str, Ticket], children_map: dict[str, list[str]], rendered: set[str]) -> None:
tid = ticket.get('id', '??') tid = ticket.get('id', '??')
is_duplicate = tid in rendered
if not is_duplicate:
rendered.add(tid)
target = ticket.get('target_file', 'general') target = ticket.get('target_file', 'general')
status = ticket.get('status', 'pending').upper() status = ticket.get('status', 'pending').upper()
status_color = vec4(178, 178, 178) status_color = vec4(178, 178, 178)
@@ -3182,14 +3216,13 @@ class App:
elif status == 'PAUSED': elif status == 'PAUSED':
status_color = vec4(255, 165, 0) status_color = vec4(255, 165, 0)
p_min = imgui.get_cursor_screen_pos() p_min = imgui.get_cursor_screen_pos()
p_max = imgui.ImVec2(p_min.x + 4, p_min.y + imgui.get_text_line_height_with_spacing()) p_max = imgui.ImVec2(p_min.x + 4, p_min.y + imgui.get_text_line_height())
imgui.get_window_draw_list().add_rect_filled(p_min, p_max, imgui.get_color_u32(status_color)) imgui.get_window_draw_list().add_rect_filled(p_min, p_max, imgui.get_color_u32(status_color))
imgui.set_cursor_screen_pos(imgui.ImVec2(p_min.x + 8, p_min.y)) imgui.set_cursor_screen_pos(imgui.ImVec2(p_min.x + 8, p_min.y))
flags = imgui.TreeNodeFlags_.open_on_arrow | imgui.TreeNodeFlags_.open_on_double_click | imgui.TreeNodeFlags_.default_open flags = imgui.TreeNodeFlags_.open_on_arrow | imgui.TreeNodeFlags_.open_on_double_click | imgui.TreeNodeFlags_.default_open
children = children_map.get(tid, []) children = children_map.get(tid, [])
if not children: if not children or is_duplicate:
flags |= imgui.TreeNodeFlags_.leaf flags |= imgui.TreeNodeFlags_.leaf
is_duplicate = tid in rendered
node_open = imgui.tree_node_ex(f"##{tid}", flags) node_open = imgui.tree_node_ex(f"##{tid}", flags)
if imgui.is_item_hovered(): if imgui.is_item_hovered():
imgui.begin_tooltip() imgui.begin_tooltip()
@@ -3228,84 +3261,15 @@ class App:
if tid in deps: if tid in deps:
t['depends_on'] = [d for d in deps if d != tid] t['depends_on'] = [d for d in deps if d != tid]
self._push_mma_state_update() self._push_mma_state_update()
if node_open: if is_duplicate:
if not is_duplicate: imgui.same_line()
rendered.add(tid) imgui.text_disabled("(shown above)")
for child_id in children: if node_open and not is_duplicate:
child = tickets_by_id.get(child_id) for child_id in children:
if child: child = tickets_by_id.get(child_id)
self._render_ticket_dag_node(child, tickets_by_id, children_map, rendered) if child:
else: self._render_ticket_dag_node(child, tickets_by_id, children_map, rendered)
imgui.text_disabled(" (shown above)")
imgui.tree_pop() imgui.tree_pop()
if imgui.button("Clear##tc"):
self._tool_log.clear()
imgui.separator()
imgui.begin_child("tc_scroll", imgui.ImVec2(0, 0), False, imgui.WindowFlags_.horizontal_scrollbar)
log_copy = list(self._tool_log)
for idx_minus_one, entry in enumerate(log_copy):
idx = idx_minus_one + 1
# Handle both old (tuple) and new (tuple with ts) entries
if len(entry) == 3:
script, result, local_ts = entry
else:
script, result = entry
local_ts = 0
# Blink effect
blink_alpha = 0.0
if local_ts > 0:
elapsed = time.time() - local_ts
if elapsed < 3.0:
blink_alpha = (1.0 - (elapsed / 3.0)) * 0.3 * (math.sin(elapsed * 10) * 0.5 + 0.5)
imgui.push_id(f"tc_entry_{idx}")
if blink_alpha > 0:
imgui.push_style_color(imgui.Col_.child_bg, vec4(0, 255, 0, blink_alpha))
imgui.begin_group()
first_line = script.strip().splitlines()[0][:80] if script.strip() else "(empty)"
imgui.text_colored(C_KEY, f"Call #{idx}: {first_line}")
# Script Display
imgui.text_colored(C_LBL, "Script:")
imgui.same_line()
if imgui.button(f"[+]##script_{idx}"):
self.show_text_viewer = True
self.text_viewer_title = f"Call Script #{idx}"
self.text_viewer_content = script
if self.ui_word_wrap:
imgui.begin_child(f"tc_script_wrap_{idx}", imgui.ImVec2(-1, 72), True)
imgui.push_text_wrap_pos(imgui.get_content_region_avail().x)
imgui.text(script)
imgui.pop_text_wrap_pos()
imgui.end_child()
else:
imgui.begin_child(f"tc_script_fixed_width_{idx}", imgui.ImVec2(0, 72), True, imgui.WindowFlags_.horizontal_scrollbar)
imgui.input_text_multiline(f"##tc_script_res_{idx}", script, imgui.ImVec2(-1, -1), imgui.InputTextFlags_.read_only)
imgui.end_child()
# Result Display
imgui.text_colored(C_LBL, "Output:")
imgui.same_line()
if imgui.button(f"[+]##output_{idx}"):
self.show_text_viewer = True
self.text_viewer_title = f"Call Output #{idx}"
self.text_viewer_content = result
if self.ui_word_wrap:
imgui.begin_child(f"tc_res_wrap_{idx}", imgui.ImVec2(-1, 72), True)
imgui.push_text_wrap_pos(imgui.get_content_region_avail().x)
imgui.text(result)
imgui.pop_text_wrap_pos()
imgui.end_child()
else:
imgui.begin_child(f"tc_res_fixed_width_{idx}", imgui.ImVec2(0, 72), True, imgui.WindowFlags_.horizontal_scrollbar)
imgui.input_text_multiline(f"##tc_res_val_{idx}", result, imgui.ImVec2(-1, -1), imgui.InputTextFlags_.read_only)
imgui.end_child()
imgui.separator()
if blink_alpha > 0:
imgui.end_group()
imgui.pop_style_color()
imgui.pop_id()
if self._scroll_tool_calls_to_bottom:
imgui.set_scroll_here_y(1.0)
self._scroll_tool_calls_to_bottom = False
imgui.end_child()
def _render_comms_history_panel(self) -> None: def _render_comms_history_panel(self) -> None:
imgui.text_colored(vec4(200, 220, 160), f"Status: {self.ai_status}") imgui.text_colored(vec4(200, 220, 160), f"Status: {self.ai_status}")

View File

@@ -16,6 +16,7 @@ class Ticket:
depends_on: List[str] = field(default_factory=list) depends_on: List[str] = field(default_factory=list)
blocked_reason: Optional[str] = None blocked_reason: Optional[str] = None
step_mode: bool = False step_mode: bool = False
retry_count: int = 0
def mark_blocked(self, reason: str) -> None: def mark_blocked(self, reason: str) -> None:
"""Sets the ticket status to 'blocked' and records the reason.""" """Sets the ticket status to 'blocked' and records the reason."""
@@ -26,6 +27,10 @@ class Ticket:
"""Sets the ticket status to 'completed'.""" """Sets the ticket status to 'completed'."""
self.status = "completed" self.status = "completed"
def get(self, key: str, default: Any = None) -> Any:
"""Helper to provide dictionary-like access to dataclass fields."""
return getattr(self, key, default)
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return { return {
"id": self.id, "id": self.id,
@@ -37,6 +42,7 @@ class Ticket:
"depends_on": self.depends_on, "depends_on": self.depends_on,
"blocked_reason": self.blocked_reason, "blocked_reason": self.blocked_reason,
"step_mode": self.step_mode, "step_mode": self.step_mode,
"retry_count": self.retry_count,
} }
@classmethod @classmethod
@@ -51,6 +57,7 @@ class Ticket:
depends_on=data.get("depends_on", []), depends_on=data.get("depends_on", []),
blocked_reason=data.get("blocked_reason"), blocked_reason=data.get("blocked_reason"),
step_mode=data.get("step_mode", False), step_mode=data.get("step_mode", False),
retry_count=data.get("retry_count", 0),
) )
@dataclass @dataclass

View File

@@ -100,23 +100,35 @@ class ConductorEngine:
print("No more executable tickets. Track is blocked or finished.") print("No more executable tickets. Track is blocked or finished.")
await self._push_state(status="blocked", active_tier=None) await self._push_state(status="blocked", active_tier=None)
break break
# 3. Process ready tasks # 3. Process ready tasks
to_run = [t for t in ready_tasks if t.status == "in_progress" or (not t.step_mode and self.engine.auto_queue)]
# Handle those awaiting approval
for ticket in ready_tasks: for ticket in ready_tasks:
# If auto_queue is on and step_mode is off, engine.tick() already marked it 'in_progress' if ticket not in to_run and ticket.status == "todo":
# but we need to verify and handle the lifecycle. print(f"Ticket {ticket.id} is ready and awaiting approval.")
if ticket.status == "in_progress" or (not ticket.step_mode and self.engine.auto_queue): await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}")
await asyncio.sleep(1)
if to_run:
tasks = []
for ticket in to_run:
ticket.status = "in_progress" ticket.status = "in_progress"
print(f"Executing ticket {ticket.id}: {ticket.description}") print(f"Executing ticket {ticket.id}: {ticket.description}")
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}") await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
# Escalation logic based on retry_count
models = ["gemini-2.5-flash-lite", "gemini-2.5-flash", "gemini-3.1-pro-preview"]
model_idx = min(ticket.retry_count, len(models) - 1)
model_name = models[model_idx]
context = WorkerContext( context = WorkerContext(
ticket_id=ticket.id, ticket_id=ticket.id,
model_name=self.tier_usage["Tier 3"]["model"], model_name=model_name,
messages=[] messages=[]
) )
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
# We pass the md_content so the worker has full context.
context_files = ticket.context_requirements if ticket.context_requirements else None context_files = ticket.context_requirements if ticket.context_requirements else None
await loop.run_in_executor( tasks.append(loop.run_in_executor(
None, None,
run_worker_lifecycle, run_worker_lifecycle,
ticket, ticket,
@@ -126,15 +138,19 @@ class ConductorEngine:
self, self,
md_content, md_content,
loop loop
) ))
await self._push_state(active_tier="Tier 2 (Tech Lead)")
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue): await asyncio.gather(*tasks)
# Task is ready but needs approval
print(f"Ticket {ticket.id} is ready and awaiting approval.") # 4. Retry and escalation logic
await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}") for ticket in to_run:
# In a real UI, this would wait for a user event. if ticket.status == 'blocked':
# For now, we'll treat it as a pause point if not auto-queued. if ticket.get('retry_count', 0) < 2:
await asyncio.sleep(1) ticket.retry_count += 1
ticket.status = 'todo'
print(f"Ticket {ticket.id} BLOCKED. Escalating to {models[min(ticket.retry_count, len(models)-1)]} and retrying...")
await self._push_state(active_tier="Tier 2 (Tech Lead)")
def _queue_put(event_queue: events.AsyncEventQueue, loop: asyncio.AbstractEventLoop, event_name: str, payload) -> None: def _queue_put(event_queue: events.AsyncEventQueue, loop: asyncio.AbstractEventLoop, event_name: str, payload) -> None:
"""Thread-safe helper to push an event to the AsyncEventQueue from a worker thread.""" """Thread-safe helper to push an event to the AsyncEventQueue from a worker thread."""
@@ -220,6 +236,7 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
""" """
# Enforce Context Amnesia: each ticket starts with a clean slate. # Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session() ai_client.reset_session()
ai_client.set_provider(ai_client.get_provider(), context.model_name)
context_injection = "" context_injection = ""
if context_files: if context_files:
parser = ASTParser(language="python") parser = ASTParser(language="python")
@@ -273,15 +290,37 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
if event_queue and loop: if event_queue and loop:
_queue_put(event_queue, loop, 'mma_stream', {'stream_id': f'Tier 3 (Worker): {ticket.id}', 'text': chunk}) _queue_put(event_queue, loop, 'mma_stream', {'stream_id': f'Tier 3 (Worker): {ticket.id}', 'text': chunk})
comms_baseline = len(ai_client.get_comms_log()) old_comms_cb = ai_client.comms_log_callback
response = ai_client.send( def worker_comms_callback(entry: dict) -> None:
md_content=md_content, if event_queue and loop:
user_message=user_message, kind = entry.get("kind")
base_dir=".", payload = entry.get("payload", {})
pre_tool_callback=clutch_callback if ticket.step_mode else None, chunk = ""
qa_callback=ai_client.run_tier4_analysis, if kind == "tool_call":
stream_callback=stream_callback chunk = f"\n\n[TOOL CALL] {payload.get('name')}\n{json.dumps(payload.get('script') or payload.get('args'))}\n"
) elif kind == "tool_result":
res = str(payload.get("output", ""))
if len(res) > 500: res = res[:500] + "... (truncated)"
chunk = f"\n[TOOL RESULT]\n{res}\n"
if chunk:
_queue_put(event_queue, loop, "response", {"text": chunk, "stream_id": f"Tier 3 (Worker): {ticket.id}", "status": "streaming..."})
if old_comms_cb:
old_comms_cb(entry)
ai_client.comms_log_callback = worker_comms_callback
try:
comms_baseline = len(ai_client.get_comms_log())
response = ai_client.send(
md_content=md_content,
user_message=user_message,
base_dir=".",
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis,
stream_callback=stream_callback
)
finally:
ai_client.comms_log_callback = old_comms_cb
if event_queue: if event_queue:
# Push via "response" event type — _process_event_queue wraps this # Push via "response" event type — _process_event_queue wraps this

View File

@@ -9,7 +9,9 @@ class PerformanceMonitor:
self._start_time = None self._start_time = None
self._last_frame_time = 0.0 self._last_frame_time = 0.0
self._fps = 0.0 self._fps = 0.0
self._last_calculated_fps = 0.0
self._frame_count = 0 self._frame_count = 0
self._total_frame_count = 0
self._fps_last_time = time.time() self._fps_last_time = time.time()
self._process = psutil.Process() self._process = psutil.Process()
self._cpu_usage = 0.0 self._cpu_usage = 0.0
@@ -71,6 +73,7 @@ class PerformanceMonitor:
end_time = time.time() end_time = time.time()
self._last_frame_time = (end_time - self._start_time) * 1000.0 self._last_frame_time = (end_time - self._start_time) * 1000.0
self._frame_count += 1 self._frame_count += 1
self._total_frame_count += 1
# Calculate input lag if an input occurred during this frame # Calculate input lag if an input occurred during this frame
if self._last_input_time is not None: if self._last_input_time is not None:
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0 self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
@@ -79,6 +82,7 @@ class PerformanceMonitor:
elapsed_since_fps = end_time - self._fps_last_time elapsed_since_fps = end_time - self._fps_last_time
if elapsed_since_fps >= 1.0: if elapsed_since_fps >= 1.0:
self._fps = self._frame_count / elapsed_since_fps self._fps = self._frame_count / elapsed_since_fps
self._last_calculated_fps = self._fps
self._frame_count = 0 self._frame_count = 0
self._fps_last_time = end_time self._fps_last_time = end_time
@@ -105,8 +109,9 @@ class PerformanceMonitor:
cpu_usage = self._cpu_usage cpu_usage = self._cpu_usage
metrics = { metrics = {
'last_frame_time_ms': self._last_frame_time, 'last_frame_time_ms': self._last_frame_time,
'fps': self._fps, 'fps': self._last_calculated_fps,
'cpu_percent': cpu_usage, 'cpu_percent': cpu_usage,
'total_frames': self._total_frame_count,
'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag 'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag
} }
# Oops, fixed the input lag logic in previous turn, let's keep it consistent # Oops, fixed the input lag logic in previous turn, let's keep it consistent

View File

@@ -8,5 +8,5 @@ active = "main"
[discussions.main] [discussions.main]
git_commit = "" git_commit = ""
last_updated = "2026-03-01T20:08:11" last_updated = "2026-03-01T22:32:23"
history = [] history = []

View File

@@ -60,38 +60,49 @@ def main() -> None:
}), flush=True) }), flush=True)
return return
# If the prompt contains tool results, provide final answer # Check for multi-round integration test triggers
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt: is_resume = '--resume' in " ".join(sys.argv) or 'role: tool' in prompt or 'tool_call_id' in prompt
print(json.dumps({ if is_resume or 'Perform multi-round tool test' in prompt or 'Please read test.txt' in prompt or 'Deny me' in prompt:
"type": "message", if not is_resume:
"role": "assistant", # First round: emit tool call
"content": "I have processed the tool results and here is the final answer." print(json.dumps({
}), flush=True) "type": "message",
print(json.dumps({ "role": "assistant",
"type": "result", "content": "I need to check the directory first."
"status": "success", }), flush=True)
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20}, print(json.dumps({
"session_id": "mock-session-final" "type": "tool_use",
}), flush=True) "name": "run_powershell",
return "id": "mock-call-1",
"args": {"script": "Get-ChildItem"}
# Default flow: emit a tool call to test multi-round looping }), flush=True)
print(json.dumps({ print(json.dumps({
"type": "message", "type": "result",
"role": "assistant", "status": "success",
"content": "I need to check the directory first." "stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
}), flush=True) "session_id": "mock-session-default"
print(json.dumps({ }), flush=True)
"type": "tool_use", return
"name": "list_directory", else:
"id": "mock-call-1", # Second round
"args": {"dir_path": "."} if "USER REJECTED" in prompt:
}), flush=True) print(json.dumps({
print(json.dumps({ "type": "message",
"type": "result", "role": "assistant",
"status": "success", "content": "Tool execution was denied. I cannot proceed."
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0}, }), flush=True)
"session_id": "mock-session-default" else:
}), flush=True) print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I have processed the tool results and here is the final answer."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -19,30 +19,37 @@ class TestGeminiCliAdapter(unittest.TestCase):
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen: Any) -> None: def test_send_starts_subprocess_with_correct_args(self, mock_popen: Any) -> None:
""" """
Verify that send(message) correctly starts the subprocess with Verify that send(message) correctly starts the subprocess with
--output-format stream-json and the provided message via stdin using communicate. --output-format stream-json and the provided message via stdin.
""" """
# Setup mock process with a minimal valid JSONL termination # Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock() process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" jsonl_output = [json.dumps({"type": "result", "usage": {}}) + "\n"]
process_mock.communicate.return_value = (stdout_content, "") process_mock.stdout.readline.side_effect = jsonl_output + ['']
process_mock.stderr.read.return_value = ""
process_mock.poll.return_value = 0 process_mock.poll.return_value = 0
process_mock.wait.return_value = 0 process_mock.wait.return_value = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message = "Hello Gemini CLI" message = "Hello Gemini CLI"
self.adapter.send(message) self.adapter.send(message)
# Verify subprocess.Popen call # Verify subprocess.Popen call
mock_popen.assert_called_once() mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args args, kwargs = mock_popen.call_args
cmd = args[0] cmd = args[0]
# Check mandatory CLI components # Check mandatory CLI components
self.assertIn("gemini", cmd) self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd) self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd) self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now # Message should NOT be in cmd now
self.assertNotIn(message, cmd) self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message) # Verify message was written to stdin
process_mock.stdin.write.assert_called_with(message)
# Check process configuration # Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE) self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE) self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
@@ -51,20 +58,21 @@ class TestGeminiCliAdapter(unittest.TestCase):
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen: Any) -> None: def test_send_parses_jsonl_output(self, mock_popen: Any) -> None:
""" """
Verify that it correctly parses multiple JSONL 'message' events Verify that it correctly parses multiple JSONL 'message' events
and returns the combined text. and returns the combined text.
""" """
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}), json.dumps({"type": "message", "role": "model", "text": "The quick brown "}) + "\n",
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}), json.dumps({"type": "message", "role": "model", "text": "fox jumps."}) + "\n",
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}}) json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}}) + "\n"
] ]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock() process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "") process_mock.stdout.readline.side_effect = jsonl_output + ['']
process_mock.stderr.read.return_value = ""
process_mock.poll.return_value = 0 process_mock.poll.return_value = 0
process_mock.wait.return_value = 0 process_mock.wait.return_value = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
result = self.adapter.send("test message") result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.") self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], []) self.assertEqual(result["tool_calls"], [])
@@ -72,21 +80,22 @@ class TestGeminiCliAdapter(unittest.TestCase):
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen: Any) -> None: def test_send_handles_tool_use_events(self, mock_popen: Any) -> None:
""" """
Verify that it correctly handles 'tool_use' events in the stream Verify that it correctly handles 'tool_use' events in the stream
by continuing to read until the final 'result' event. by continuing to read until the final 'result' event.
""" """
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}), json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}) + "\n",
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}), json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}) + "\n",
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}), json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}) + "\n",
json.dumps({"type": "result", "usage": {}}) json.dumps({"type": "result", "usage": {}}) + "\n"
] ]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock() process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "") process_mock.stdout.readline.side_effect = jsonl_output + ['']
process_mock.stderr.read.return_value = ""
process_mock.poll.return_value = 0 process_mock.poll.return_value = 0
process_mock.wait.return_value = 0 process_mock.wait.return_value = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt") result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events # Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.") self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
@@ -96,19 +105,20 @@ class TestGeminiCliAdapter(unittest.TestCase):
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen: Any) -> None: def test_send_captures_usage_metadata(self, mock_popen: Any) -> None:
""" """
Verify that usage data is extracted from the 'result' event. Verify that usage data is extracted from the 'result' event.
""" """
usage_data = {"total_tokens": 42} usage_data = {"total_tokens": 42}
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}), json.dumps({"type": "message", "text": "Finalizing"}) + "\n",
json.dumps({"type": "result", "usage": usage_data}) json.dumps({"type": "result", "usage": usage_data}) + "\n"
] ]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock() process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "") process_mock.stdout.readline.side_effect = jsonl_output + ['']
process_mock.stderr.read.return_value = ""
process_mock.poll.return_value = 0 process_mock.poll.return_value = 0
process_mock.wait.return_value = 0 process_mock.wait.return_value = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
self.adapter.send("usage test") self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance # Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data) self.assertEqual(self.adapter.last_usage, usage_data)

View File

@@ -91,9 +91,14 @@ else:
approved = False approved = False
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
for ev in client.get_events(): for ev in client.get_events():
if ev.get("type") == "ask_received": etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
if etype == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond", requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}}) json={"request_id": eid, "response": {"approved": True}})
approved = True
elif etype == "script_confirmation_required":
requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
approved = True approved = True
if approved: break if approved: break
time.sleep(0.5) time.sleep(0.5)
@@ -129,9 +134,14 @@ def test_gemini_cli_loop_termination(live_gui: Any) -> None:
approved = False approved = False
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
for ev in client.get_events(): for ev in client.get_events():
if ev.get("type") == "ask_received": etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
if etype == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond", requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}}) json={"request_id": eid, "response": {"approved": True}})
approved = True
elif etype == "script_confirmation_required":
requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
approved = True approved = True
if approved: break if approved: break
time.sleep(0.5) time.sleep(0.5)

View File

@@ -1,4 +1,4 @@
from typing import Any from typing import Any
import pytest import pytest
import time import time
import os import os
@@ -95,14 +95,19 @@ def test_gemini_cli_rejection_and_history(live_gui: Any) -> None:
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
for ev in client.get_events(): for ev in client.get_events():
etype = ev.get("type") etype = ev.get("type")
eid = ev.get("request_id") eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype}") print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype == "ask_received": if etype == "ask_received":
print(f"[TEST] Denying request {eid}") print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond", requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}}) json={"request_id": eid, "response": {"approved": False}})
denied = True denied = True
break break
elif etype == "script_confirmation_required":
print(f"[TEST] Denying script {eid}")
requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": False})
denied = True
break
if denied: break if denied: break
time.sleep(0.5) time.sleep(0.5)
assert denied, "No ask_received event to deny" assert denied, "No ask_received event to deny"

View File

@@ -83,7 +83,12 @@ def test_create_track(app_instance, tmp_path):
with patch('gui_2.project_manager.get_all_tracks', return_value=[]): with patch('gui_2.project_manager.get_all_tracks', return_value=[]):
app_instance._cb_create_track("Test Track", "Test Description", "feature") app_instance._cb_create_track("Test Track", "Test Description", "feature")
track_dir = Path("conductor/tracks/test_track") # Search for a directory starting with 'test_track' in 'conductor/tracks/'
tracks_root = Path("conductor/tracks")
matching_dirs = [d for d in tracks_root.iterdir() if d.is_dir() and d.name.startswith("test_track")]
assert len(matching_dirs) == 1
track_dir = matching_dirs[0]
assert track_dir.exists() assert track_dir.exists()
assert (track_dir / "spec.md").exists() assert (track_dir / "spec.md").exists()
assert (track_dir / "plan.md").exists() assert (track_dir / "plan.md").exists()
@@ -93,6 +98,6 @@ def test_create_track(app_instance, tmp_path):
data = json.load(f) data = json.load(f)
assert data['title'] == "Test Track" assert data['title'] == "Test Track"
assert data['type'] == "feature" assert data['type'] == "feature"
assert data['id'] == "test_track" assert data['id'] == track_dir.name
finally: finally:
os.chdir(old_cwd) os.chdir(old_cwd)

View File

@@ -102,3 +102,35 @@ def test_handle_ai_response_resets_stream(app_instance: App):
# (sometimes streaming chunks don't perfectly match final text if there are # (sometimes streaming chunks don't perfectly match final text if there are
# tool calls or specific SDK behaviors). # tool calls or specific SDK behaviors).
assert app_instance.mma_streams[stream_id] == "Final complete response." assert app_instance.mma_streams[stream_id] == "Final complete response."
def test_handle_ai_response_streaming(app_instance: App):
"""Verifies that 'handle_ai_response' with status='streaming...' appends to mma_streams."""
stream_id = "Tier 3 (Worker): T-001"
# 1. First chunk
with app_instance._pending_gui_tasks_lock:
app_instance._pending_gui_tasks.append({
"action": "handle_ai_response",
"payload": {
"stream_id": stream_id,
"text": "Chunk 1. ",
"status": "streaming..."
}
})
app_instance._process_pending_gui_tasks()
assert app_instance.mma_streams[stream_id] == "Chunk 1. "
# 2. Second chunk
with app_instance._pending_gui_tasks_lock:
app_instance._pending_gui_tasks.append({
"action": "handle_ai_response",
"payload": {
"stream_id": stream_id,
"text": "Chunk 2.",
"status": "streaming..."
}
})
app_instance._process_pending_gui_tasks()
# 3. Verify final state
assert app_instance.mma_streams[stream_id] == "Chunk 1. Chunk 2."

View File

@@ -0,0 +1,98 @@
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
import asyncio
import json
import multi_agent_conductor
from multi_agent_conductor import ConductorEngine, run_worker_lifecycle
from models import Ticket, Track, WorkerContext
def test_worker_streaming_intermediate():
ticket = Ticket(id="T-001", description="Test", status="todo", assigned_to="worker")
context = WorkerContext(ticket_id="T-001", model_name="test-model", messages=[])
event_queue = MagicMock()
event_queue.put = AsyncMock()
loop = MagicMock()
with (
patch("ai_client.send") as mock_send,
patch("multi_agent_conductor._queue_put") as mock_q_put,
patch("multi_agent_conductor.confirm_spawn", return_value=(True, "p", "c")),
patch("ai_client.reset_session"),
patch("ai_client.set_provider"),
patch("ai_client.get_provider"),
patch("ai_client.get_comms_log", return_value=[])
):
def side_effect(*args, **kwargs):
import ai_client
cb = ai_client.comms_log_callback
if cb:
cb({"kind": "tool_call", "payload": {"name": "test_tool", "script": "echo hello"}})
cb({"kind": "tool_result", "payload": {"name": "test_tool", "output": "hello"}})
return "DONE"
mock_send.side_effect = side_effect
run_worker_lifecycle(ticket, context, event_queue=event_queue, loop=loop)
responses = [call.args[3] for call in mock_q_put.call_args_list if call.args[2] == "response"]
assert any("[TOOL CALL]" in r.get("text", "") for r in responses)
assert any("[TOOL RESULT]" in r.get("text", "") for r in responses)
def test_per_tier_model_persistence():
# Mock UI frameworks before importing gui_2
mock_imgui = MagicMock()
with patch.dict("sys.modules", {
"imgui_bundle": MagicMock(),
"imgui_bundle.imgui": mock_imgui,
"imgui_bundle.hello_imgui": MagicMock(),
"imgui_bundle.immapp": MagicMock(),
}):
from gui_2 import App
with (
patch("gui_2.project_manager.load_project", return_value={}),
patch("gui_2.project_manager.migrate_from_legacy_config", return_value={}),
patch("gui_2.project_manager.save_project"),
patch("gui_2.save_config"),
patch("gui_2.theme.load_from_config"),
patch("gui_2.ai_client.set_provider"),
patch("gui_2.ai_client.list_models", return_value=["gpt-4", "claude-3"]),
patch("gui_2.PerformanceMonitor"),
patch("gui_2.api_hooks.HookServer"),
patch("gui_2.session_logger.open_session")
):
app = App()
app.available_models = ["gpt-4", "claude-3"]
tier = "Tier 3"
model = "claude-3"
# Simulate 'Tier Model Config' UI logic
app.mma_tier_usage[tier]["model"] = model
app.project.setdefault("mma", {}).setdefault("tier_models", {})[tier] = model
assert app.project["mma"]["tier_models"][tier] == model
@pytest.mark.asyncio
async def test_retry_escalation():
ticket = Ticket(id="T-001", description="Test", status="todo", assigned_to="worker")
track = Track(id="TR-001", description="Track", tickets=[ticket])
event_queue = MagicMock()
event_queue.put = AsyncMock()
engine = ConductorEngine(track, event_queue=event_queue)
engine.engine.auto_queue = True
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def lifecycle_side_effect(t, *args, **kwargs):
t.status = "blocked"
return "BLOCKED"
mock_lifecycle.side_effect = lifecycle_side_effect
with patch.object(engine.engine, "tick") as mock_tick:
# First tick returns ticket, second tick returns empty list to stop loop
mock_tick.side_effect = [[ticket], []]
await engine.run()
assert ticket.retry_count == 1
assert ticket.status == "todo"

View File

@@ -2,6 +2,7 @@ import pytest
import time import time
import sys import sys
import os import os
import json
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -10,50 +11,105 @@ from api_hook_client import ApiHookClient
@pytest.mark.integration @pytest.mark.integration
@pytest.mark.timeout(60) @pytest.mark.timeout(60)
def test_gui_ux_event_routing(live_gui) -> None: def test_gui_ux_event_routing(live_gui) -> None:
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=15), "Hook server did not start" assert client.wait_for_server(timeout=15), "Hook server did not start"
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# 1. Verify Streaming Event Routing # 1. Verify Streaming Event Routing
# ------------------------------------------------------------------ # ------------------------------------------------------------------
print("[SIM] Testing Streaming Event Routing...") print("[SIM] Testing Streaming Event Routing...")
stream_id = "Tier 3 (Worker): T-SIM-001" stream_id = "Tier 3 (Worker): T-SIM-001"
# We use push_event which POSTs to /api/gui with action=mma_stream_append # We use push_event which POSTs to /api/gui with action=mma_stream_append
# As defined in App._process_pending_gui_tasks # As defined in App._process_pending_gui_tasks
client.push_event('mma_stream_append', {'stream_id': stream_id, 'text': 'Hello '}) client.push_event('mma_stream_append', {'stream_id': stream_id, 'text': 'Hello '})
time.sleep(0.5) time.sleep(0.5)
client.push_event('mma_stream_append', {'stream_id': stream_id, 'text': 'World!'}) client.push_event('mma_stream_append', {'stream_id': stream_id, 'text': 'World!'})
time.sleep(1.0) time.sleep(1.0)
status = client.get_mma_status() status = client.get_mma_status()
streams = status.get('mma_streams', {}) streams = status.get('mma_streams', {})
assert streams.get(stream_id) == 'Hello World!', f"Streaming failed: {streams.get(stream_id)}" assert streams.get(stream_id) == 'Hello World!', f"Streaming failed: {streams.get(stream_id)}"
print("[SIM] Streaming event routing verified.") print("[SIM] Streaming event routing verified.")
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# 2. Verify State Update (Usage/Cost) Routing # 2. Verify State Update (Usage/Cost) Routing
# ------------------------------------------------------------------ # ------------------------------------------------------------------
print("[SIM] Testing State Update Routing...") print("[SIM] Testing State Update Routing...")
usage = { usage = {
"Tier 1": {"input": 1000, "output": 500, "model": "gemini-3.1-pro-preview"}, "Tier 1": {"input": 1000, "output": 500, "model": "gemini-3.1-pro-preview"},
"Tier 2": {"input": 2000, "output": 1000, "model": "gemini-3-flash-preview"} "Tier 2": {"input": 2000, "output": 1000, "model": "gemini-3-flash-preview"}
} }
client.push_event('mma_state_update', { client.push_event('mma_state_update', {
'status': 'simulating', 'status': 'simulating',
'tier_usage': usage, 'tier_usage': usage,
'tickets': [] 'tickets': []
}) })
time.sleep(1.0) time.sleep(1.0)
status = client.get_mma_status() status = client.get_mma_status()
assert status.get('mma_status') == 'simulating' assert status.get('mma_status') == 'simulating'
# The app merges or replaces usage. Let's check what we got back. # The app merges or replaces usage. Let's check what we got back.
received_usage = status.get('mma_tier_usage', {}) received_usage = status.get('mma_tier_usage', {})
assert received_usage.get('Tier 1', {}).get('input') == 1000 assert received_usage.get('Tier 1', {}).get('input') == 1000
assert received_usage.get('Tier 2', {}).get('model') == 'gemini-3-flash-preview' assert received_usage.get('Tier 2', {}).get('model') == 'gemini-3-flash-preview'
print("[SIM] State update routing verified.") print("[SIM] State update routing verified.")
# ------------------------------------------------------------------
# 3. Verify Performance
# ------------------------------------------------------------------
print("[SIM] Testing Performance...")
# Wait for at least one second of frame data to accumulate for FPS calculation
time.sleep(2.0)
perf_data = client.get_performance()
assert perf_data is not None, "Failed to retrieve performance metrics"
perf = perf_data.get('performance', {})
fps = perf.get('fps', 0.0)
total_frames = perf.get('total_frames', 0)
print(f"[SIM] Current FPS: {fps}, Total Frames: {total_frames}")
assert fps >= 30.0, f"Performance degradation: {fps} FPS < 30.0 (Total Frames: {total_frames})"
print("[SIM] Performance verified.")
@pytest.mark.integration
@pytest.mark.timeout(60)
def test_gui_track_creation(live_gui) -> None:
client = ApiHookClient()
assert client.wait_for_server(timeout=15), "Hook server did not start"
print("[SIM] Testing Track Creation via GUI...")
track_name = 'UX_SIM_TEST'
track_desc = 'Simulation testing for GUI UX'
track_type = 'feature'
client.set_value('ui_new_track_name', track_name)
client.set_value('ui_new_track_desc', track_desc)
client.set_value('ui_new_track_type', track_type)
client.click('btn_mma_create_track')
time.sleep(2.0)
tracks_dir = 'conductor/tracks/'
found = False
# The implementation lowercases and replaces spaces with underscores
search_prefix = track_name.lower().replace(' ', '_')
for entry in os.listdir(tracks_dir):
if entry.startswith(search_prefix) and os.path.isdir(os.path.join(tracks_dir, entry)):
found = True
metadata_path = os.path.join(tracks_dir, entry, 'metadata.json')
assert os.path.exists(metadata_path), f"metadata.json missing in {entry}"
with open(metadata_path, 'r') as f:
meta = json.load(f)
assert meta.get('status') == 'new'
assert meta.get('title') == track_name
print(f"[SIM] Verified track directory: {entry}")
break
assert found, f"Track directory starting with {search_prefix} not found."
print("[SIM] Track creation verified.")
if __name__ == "__main__": if __name__ == "__main__":
pass pass