feat(api): implement phase 4 headless refinement and verification

2026-03-11 23:17:57 -04:00
parent 930b833055
commit 036c2f360a
3 changed files with 304 additions and 181 deletions
@@ -35,10 +35,10 @@
 - [x] Task: Conductor - User Manual Verification 'Phase 3: Comprehensive Control Endpoints' (Protocol in workflow.md)
 ## Phase 4: Headless Refinement & Verification
- [ ] Task: Improve error reporting.
+- [x] Task: Improve error reporting.
-    - [ ] Refactor `HookHandler` to catch and wrap all internal exceptions in JSON error responses.
+    - [x] Refactor `HookHandler` to catch and wrap all internal exceptions in JSON error responses.
- [ ] Task: Conduct a full headless simulation.
+- [x] Task: Conduct a full headless simulation.
-    - [ ] Create a specialized simulation script that replicates a full MMA track lifecycle (planning, worker spawn, DAG mutation, completion) using ONLY the Hook API.
+    - [x] Create a specialized simulation script that replicates a full MMA track lifecycle (planning, worker spawn, DAG mutation, completion) using ONLY the Hook API.
- [ ] Task: Final performance audit.
+- [x] Task: Final performance audit.
-    - [ ] Ensure that active WebSocket clients and large state dumps do not cause GUI frame drops.
+    - [x] Ensure that active WebSocket clients and large state dumps do not cause GUI frame drops.
 - [ ] Task: Conductor - User Manual Verification 'Phase 4: Headless Refinement & Verification' (Protocol in workflow.md)
@@ -81,201 +81,207 @@ def _serialize_for_api(obj: Any) -> Any:
 class HookHandler(BaseHTTPRequestHandler):
 """Handles incoming HTTP requests for the API hooks."""
 def do_GET(self) -> None:
-  app = self.server.app
+  try:
-  session_logger.log_api_hook("GET", self.path, "")
+   app = self.server.app
-  if self.path == "/status":
+   session_logger.log_api_hook("GET", self.path, "")
-   self.send_response(200)
+   if self.path == "/status":
-   self.send_header("Content-Type", "application/json")
+    self.send_response(200)
-   self.end_headers()
+    self.send_header("Content-Type", "application/json")
-   self.wfile.write(json.dumps({"status": "ok"}).encode("utf-8"))
+    self.end_headers()
-  elif self.path == "/api/project":
+    self.wfile.write(json.dumps({"status": "ok"}).encode("utf-8"))
-   from src import project_manager
+   elif self.path == "/api/project":
-   self.send_response(200)
+    from src import project_manager
-   self.send_header("Content-Type", "application/json")
+    self.send_response(200)
-   self.end_headers()
+    self.send_header("Content-Type", "application/json")
-   flat = project_manager.flat_config(_get_app_attr(app, "project"))
+    self.end_headers()
-   self.wfile.write(json.dumps({"project": flat}).encode("utf-8"))
+    flat = project_manager.flat_config(_get_app_attr(app, "project"))
-  elif self.path == "/api/session":
+    self.wfile.write(json.dumps({"project": flat}).encode("utf-8"))
-   self.send_response(200)
+   elif self.path == "/api/session":
-   self.send_header("Content-Type", "application/json")
+    self.send_response(200)
-   self.end_headers()
+    self.send_header("Content-Type", "application/json")
-   lock = _get_app_attr(app, "_disc_entries_lock")
+    self.end_headers()
-   entries = _get_app_attr(app, "disc_entries", [])
+    lock = _get_app_attr(app, "_disc_entries_lock")
-   if lock:
+    entries = _get_app_attr(app, "disc_entries", [])
    with lock: entries_snapshot = list(entries)
   else:
    entries_snapshot = list(entries)
   self.wfile.write(json.dumps({"session": {"entries": entries_snapshot}}).encode("utf-8"))
  elif self.path == "/api/performance":
   self.send_response(200)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
   metrics = {}
   perf = _get_app_attr(app, "perf_monitor")
   if perf: metrics = perf.get_metrics()
   self.wfile.write(json.dumps({"performance": metrics}).encode("utf-8"))
  elif self.path == "/api/events":
   self.send_response(200)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
   events = []
   if _has_app_attr(app, "_api_event_queue"):
    lock = _get_app_attr(app, "_api_event_queue_lock")
    queue = _get_app_attr(app, "_api_event_queue")
    if lock:
-     with lock:
+     with lock: entries_snapshot = list(entries)
    else:
     entries_snapshot = list(entries)
    self.wfile.write(json.dumps({"session": {"entries": entries_snapshot}}).encode("utf-8"))
   elif self.path == "/api/performance":
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
    metrics = {}
    perf = _get_app_attr(app, "perf_monitor")
    if perf: metrics = perf.get_metrics()
    self.wfile.write(json.dumps({"performance": metrics}).encode("utf-8"))
   elif self.path == "/api/events":
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
    events = []
    if _has_app_attr(app, "_api_event_queue"):
     lock = _get_app_attr(app, "_api_event_queue_lock")
     queue = _get_app_attr(app, "_api_event_queue")
     if lock:
      with lock:
       events = list(queue)
       queue.clear()
     else:
      events = list(queue)
      queue.clear()
    self.wfile.write(json.dumps({"events": events}).encode("utf-8"))
   elif self.path.startswith("/api/gui/value/"):
    field_tag = self.path.split("/")[-1]
    event = threading.Event()
    result = {"value": None}
    def get_val():
     try:
      settable = _get_app_attr(app, "_settable_fields", {})
      gettable = _get_app_attr(app, "_gettable_fields", {})
      combined = {**settable, **gettable}
      if field_tag in combined:
       attr = combined[field_tag]
       result["value"] = _get_app_attr(app, attr, None)
      else:
       sys.stderr.write(f"[DEBUG] Hook API: field {field_tag} not found in settable or gettable\n")
       sys.stderr.flush()
     finally: event.set()
    lock = _get_app_attr(app, "_pending_gui_tasks_lock")
    tasks = _get_app_attr(app, "_pending_gui_tasks")
    if lock and tasks is not None:
     with lock: tasks.append({"action": "custom_callback", "callback": get_val})
    if event.wait(timeout=10):
     self.send_response(200)
     self.send_header("Content-Type", "application/json")
     self.end_headers()
     self.wfile.write(json.dumps(result).encode("utf-8"))
    else:
-     events = list(queue)
+     self.send_response(504)
-     queue.clear()
+     self.end_headers()
-   self.wfile.write(json.dumps({"events": events}).encode("utf-8"))
+   elif self.path == "/api/gui/mma_status":
-  elif self.path.startswith("/api/gui/value/"):
+    event = threading.Event()
-   field_tag = self.path.split("/")[-1]
+    result = {}
-   event = threading.Event()
+    def get_mma():
-   result = {"value": None}
+     try:
-   def get_val():
+      result["mma_status"] = _get_app_attr(app, "mma_status", "idle")
-    try:
+      result["ai_status"] = _get_app_attr(app, "ai_status", "idle")
-     settable = _get_app_attr(app, "_settable_fields", {})
+      result["active_tier"] = _get_app_attr(app, "active_tier", None)
-     gettable = _get_app_attr(app, "_gettable_fields", {})
+      at = _get_app_attr(app, "active_track", None)
-     combined = {**settable, **gettable}
+      result["active_track"] = at.id if hasattr(at, "id") else at
-     if field_tag in combined:
+      result["active_tickets"] = _get_app_attr(app, "active_tickets", [])
-      attr = combined[field_tag]
+      result["mma_step_mode"] = _get_app_attr(app, "mma_step_mode", False)
-      result["value"] = _get_app_attr(app, attr, None)
+      result["pending_tool_approval"] = _get_app_attr(app, "_pending_ask_dialog", False)
-     else:
+      result["pending_script_approval"] = _get_app_attr(app, "_pending_dialog", None) is not None
-      sys.stderr.write(f"[DEBUG] Hook API: field {field_tag} not found in settable or gettable\n")
+      result["pending_mma_step_approval"] = _get_app_attr(app, "_pending_mma_approval", None) is not None
-      sys.stderr.flush()
+      result["pending_mma_spawn_approval"] = _get_app_attr(app, "_pending_mma_spawn", None) is not None
-    finally: event.set()
+      result["pending_approval"] = result["pending_mma_step_approval"] or result["pending_tool_approval"]
-   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
+      result["pending_spawn"] = result["pending_mma_spawn_approval"]
-   tasks = _get_app_attr(app, "_pending_gui_tasks")
+      result["tracks"] = _get_app_attr(app, "tracks", [])
-   if lock and tasks is not None:
+      result["proposed_tracks"] = _get_app_attr(app, "proposed_tracks", [])
-    with lock: tasks.append({"action": "custom_callback", "callback": get_val})
+      result["mma_streams"] = _get_app_attr(app, "mma_streams", {})
-   if event.wait(timeout=10):
+      result["mma_tier_usage"] = _get_app_attr(app, "mma_tier_usage", {})
     finally: event.set()
    lock = _get_app_attr(app, "_pending_gui_tasks_lock")
    tasks = _get_app_attr(app, "_pending_gui_tasks")
    if lock and tasks is not None:
     with lock: tasks.append({"action": "custom_callback", "callback": get_mma})
    if event.wait(timeout=10):
     self.send_response(200)
     self.send_header("Content-Type", "application/json")
     self.end_headers()
     self.wfile.write(json.dumps(result).encode("utf-8"))
    else:
     self.send_response(504)
     self.end_headers()
   elif self.path == "/api/gui/diagnostics":
    event = threading.Event()
    result = {}
    def check_all():
     try:
      status = _get_app_attr(app, "ai_status", "idle")
      result["thinking"] = status in ["sending...", "running powershell..."]
      result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
      result["prior"] = _get_app_attr(app, "is_viewing_prior_session", False)
     finally: event.set()
    lock = _get_app_attr(app, "_pending_gui_tasks_lock")
    tasks = _get_app_attr(app, "_pending_gui_tasks")
    if lock and tasks is not None:
     with lock: tasks.append({"action": "custom_callback", "callback": check_all})
    if event.wait(timeout=10):
     self.send_response(200)
     self.send_header("Content-Type", "application/json")
     self.end_headers()
     self.wfile.write(json.dumps(result).encode("utf-8"))
    else:
     self.send_response(504)
     self.end_headers()
   elif self.path == '/api/gui/state':
    event = threading.Event()
    result = {}
    def get_state():
     try:
      gettable = _get_app_attr(app, "_gettable_fields", {})
      for key, attr in gettable.items():
       val = _get_app_attr(app, attr, None)
       result[key] = _serialize_for_api(val)
     finally: event.set()
    lock = _get_app_attr(app, "_pending_gui_tasks_lock")
    tasks = _get_app_attr(app, "_pending_gui_tasks")
    if lock and tasks is not None:
     with lock: tasks.append({"action": "custom_callback", "callback": get_state})
    if event.wait(timeout=10):
     self.send_response(200)
     self.send_header("Content-Type", "application/json")
     self.end_headers()
     self.wfile.write(json.dumps(result).encode("utf-8"))
    else:
     self.send_response(504)
     self.end_headers()
   elif self.path == "/api/mma/workers":
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
-    self.wfile.write(json.dumps(result).encode("utf-8"))
+    mma_streams = _get_app_attr(app, "mma_streams", {})
-   else:
+    self.wfile.write(json.dumps({"workers": _serialize_for_api(mma_streams)}).encode("utf-8"))
-    self.send_response(504)
+   elif self.path == "/api/context/state":
    self.end_headers()
  elif self.path == "/api/gui/mma_status":
   event = threading.Event()
   result = {}
   def get_mma():
    try:
     result["mma_status"] = _get_app_attr(app, "mma_status", "idle")
     result["ai_status"] = _get_app_attr(app, "ai_status", "idle")
     result["active_tier"] = _get_app_attr(app, "active_tier", None)
     at = _get_app_attr(app, "active_track", None)
     result["active_track"] = at.id if hasattr(at, "id") else at
     result["active_tickets"] = _get_app_attr(app, "active_tickets", [])
     result["mma_step_mode"] = _get_app_attr(app, "mma_step_mode", False)
     result["pending_tool_approval"] = _get_app_attr(app, "_pending_ask_dialog", False)
     result["pending_script_approval"] = _get_app_attr(app, "_pending_dialog", None) is not None
     result["pending_mma_step_approval"] = _get_app_attr(app, "_pending_mma_approval", None) is not None
     result["pending_mma_spawn_approval"] = _get_app_attr(app, "_pending_mma_spawn", None) is not None
     result["pending_approval"] = result["pending_mma_step_approval"] or result["pending_tool_approval"]
     result["pending_spawn"] = result["pending_mma_spawn_approval"]
     result["tracks"] = _get_app_attr(app, "tracks", [])
     result["proposed_tracks"] = _get_app_attr(app, "proposed_tracks", [])
     result["mma_streams"] = _get_app_attr(app, "mma_streams", {})
     result["mma_tier_usage"] = _get_app_attr(app, "mma_tier_usage", {})
    finally: event.set()
   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
   tasks = _get_app_attr(app, "_pending_gui_tasks")
   if lock and tasks is not None:
    with lock: tasks.append({"action": "custom_callback", "callback": get_mma})
   if event.wait(timeout=10):
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
-    self.wfile.write(json.dumps(result).encode("utf-8"))
+    files = _get_app_attr(app, "files", [])
-   else:
+    screenshots = _get_app_attr(app, "screenshots", [])
-    self.send_response(504)
+    self.wfile.write(json.dumps({"files": files, "screenshots": screenshots}).encode("utf-8"))
-    self.end_headers()
+   elif self.path == "/api/metrics/financial":
  elif self.path == "/api/gui/diagnostics":
   event = threading.Event()
   result = {}
   def check_all():
    try:
     status = _get_app_attr(app, "ai_status", "idle")
     result["thinking"] = status in ["sending...", "running powershell..."]
     result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
     result["prior"] = _get_app_attr(app, "is_viewing_prior_session", False)
    finally: event.set()
   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
   tasks = _get_app_attr(app, "_pending_gui_tasks")
   if lock and tasks is not None:
    with lock: tasks.append({"action": "custom_callback", "callback": check_all})
   if event.wait(timeout=10):
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
-    self.wfile.write(json.dumps(result).encode("utf-8"))
+    usage = _get_app_attr(app, "mma_tier_usage", {})
-   else:
+    metrics = {}
-    self.send_response(504)
+    for tier, data in usage.items():
-    self.end_headers()
+     model = data.get("model", "")
-  elif self.path == '/api/gui/state':
+     in_t = data.get("input", 0)
-   event = threading.Event()
+     out_t = data.get("output", 0)
-   result = {}
+     cost = cost_tracker.estimate_cost(model, in_t, out_t)
-   def get_state():
+     metrics[tier] = {**data, "estimated_cost": cost}
-    try:
+    self.wfile.write(json.dumps({"financial": metrics}).encode("utf-8"))
-     gettable = _get_app_attr(app, "_gettable_fields", {})
+   elif self.path == "/api/system/telemetry":
     for key, attr in gettable.items():
      val = _get_app_attr(app, attr, None)
      result[key] = _serialize_for_api(val)
    finally: event.set()
   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
   tasks = _get_app_attr(app, "_pending_gui_tasks")
   if lock and tasks is not None:
    with lock: tasks.append({"action": "custom_callback", "callback": get_state})
   if event.wait(timeout=10):
    self.send_response(200)
    self.send_header("Content-Type", "application/json")
    self.end_headers()
-    self.wfile.write(json.dumps(result).encode("utf-8"))
+    threads = [t.name for t in threading.enumerate()]
    queue_size = 0
    if _has_app_attr(app, "_api_event_queue"):
     queue = _get_app_attr(app, "_api_event_queue")
     if queue: queue_size = len(queue)
    self.wfile.write(json.dumps({"threads": threads, "event_queue_size": queue_size}).encode("utf-8"))
   else:
-    self.send_response(504)
+    self.send_response(404)
    self.end_headers()
-  elif self.path == "/api/mma/workers":
+  except Exception as e:
-   self.send_response(200)
+   self.send_response(500)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
-   mma_streams = _get_app_attr(app, "mma_streams", {})
+   self.wfile.write(json.dumps({"error": str(e)}).encode("utf-8"))
   self.wfile.write(json.dumps({"workers": _serialize_for_api(mma_streams)}).encode("utf-8"))
  elif self.path == "/api/context/state":
   self.send_response(200)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
   files = _get_app_attr(app, "files", [])
   screenshots = _get_app_attr(app, "screenshots", [])
   self.wfile.write(json.dumps({"files": files, "screenshots": screenshots}).encode("utf-8"))
  elif self.path == "/api/metrics/financial":
   self.send_response(200)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
   usage = _get_app_attr(app, "mma_tier_usage", {})
   metrics = {}
   for tier, data in usage.items():
    model = data.get("model", "")
    in_t = data.get("input", 0)
    out_t = data.get("output", 0)
    cost = cost_tracker.estimate_cost(model, in_t, out_t)
    metrics[tier] = {**data, "estimated_cost": cost}
   self.wfile.write(json.dumps({"financial": metrics}).encode("utf-8"))
  elif self.path == "/api/system/telemetry":
   self.send_response(200)
   self.send_header("Content-Type", "application/json")
   self.end_headers()
   threads = [t.name for t in threading.enumerate()]
   queue_size = 0
   if _has_app_attr(app, "_api_event_queue"):
    queue = _get_app_attr(app, "_api_event_queue")
    if queue: queue_size = len(queue)
   self.wfile.write(json.dumps({"threads": threads, "event_queue_size": queue_size}).encode("utf-8"))
  else:
   self.send_response(404)
   self.end_headers()
 def do_POST(self) -> None:
  app = self.server.app
@@ -0,0 +1,117 @@
 import pytest
 from unittest.mock import patch, MagicMock
 from src.api_hook_client import ApiHookClient
@pytest.mark.asyncio
 async def test_mma_track_lifecycle_simulation():
 """
 This test simulates the sequence of API calls an external orchestrator
 would make to manage an MMA track lifecycle via the Hook API.
 It verifies that ApiHookClient correctly routes requests to the 
 corresponding endpoints in src/api_hooks.py.
 """
 client = ApiHookClient("http://localhost:8999")
 with patch('requests.get') as mock_get, patch('requests.post') as mock_post:
  # --- PHASE 1: Initialization & Discovery ---
  # Mock successful status check
  mock_get.return_value.status_code = 200
  mock_get.return_value.json.return_value = {"status": "ok"}
  assert client.get_status()["status"] == "ok"
  # Mock project state retrieval
  mock_get.return_value.json.return_value = {"project": {"name": "test_project"}}
  project = client.get_project()
  assert project["project"]["name"] == "test_project"
  # --- PHASE 2: Track Planning & Initialization ---
  # Inject some files into context for the AI to work with
  mock_post.return_value.status_code = 200
  mock_post.return_value.json.return_value = {"status": "queued"}
  inject_data = {"files": ["src/app_controller.py", "tests/test_basic.py"]}
  res = client.inject_context(inject_data)
  assert res["status"] == "queued"
  mock_post.assert_called_with("http://localhost:8999/api/context/inject", json=inject_data, headers={}, timeout=5.0)
  # --- PHASE 3: Worker Spawn & Execution ---
  # Spawn a worker to start a ticket
  spawn_data = {
   "track_id": "track_20260311",
   "ticket_id": "TKT-001",
   "role": "tier3-worker",
   "prompt": "Implement the new logging feature"
  }
  res = client.spawn_mma_worker(spawn_data)
  assert res["status"] == "queued"
  mock_post.assert_called_with("http://localhost:8999/api/mma/workers/spawn", json=spawn_data, headers={}, timeout=5.0)
  # --- PHASE 4: DAG Mutation & Dependency Management ---
  # Add a second ticket that depends on the first one
  dag_mutation = {
   "action": "add_ticket",
   "ticket": {
    "id": "TKT-002",
    "deps": ["TKT-001"],
    "role": "tier4-qa",
    "prompt": "Verify the logging feature"
   }
  }
  res = client.mutate_mma_dag(dag_mutation)
  assert res["status"] == "queued"
  mock_post.assert_called_with("http://localhost:8999/api/mma/dag/mutate", json=dag_mutation, headers={}, timeout=5.0)
  # --- PHASE 5: Monitoring & Status Polling ---
  # Poll for MMA status
  mock_get.return_value.json.return_value = {
   "mma_status": "running",
   "active_tickets": ["TKT-001"],
   "active_tier": "Tier 3",
   "tracks": [{"id": "track_20260311", "status": "active"}]
  }
  mma_status = client.get_mma_status()
  assert mma_status["mma_status"] == "running"
  assert "TKT-001" in mma_status["active_tickets"]
  # Check worker stream status
  mock_get.return_value.json.return_value = {
   "workers": {
    "TKT-001": {"status": "running", "output": "Starting work..."}
   }
  }
  workers = client.get_mma_workers()
  assert workers["workers"]["TKT-001"]["status"] == "running"
  # --- PHASE 6: Human-in-the-Loop Interaction ---
  # Mock a tool approval request
  # In a real scenario, this would block until a POST to /api/ask/respond occurs
  mock_post.return_value.json.return_value = {"status": "ok", "response": True}
  approved = client.request_confirmation("run_powershell", {"script": "ls -Recurse"})
  assert approved is True
  # --- PHASE 7: Completion & Cleanup ---
  # Mock completion status
  mock_get.return_value.json.return_value = {
   "mma_status": "idle",
   "active_tickets": [],
   "tracks": [{"id": "track_20260311", "status": "completed"}]
  }
  final_status = client.get_mma_status()
  assert final_status["mma_status"] == "idle"
  assert len(final_status["active_tickets"]) == 0
  # Reset session to clean up
  client.reset_session()
  # Verify reset click was pushed
  mock_post.assert_called_with("http://localhost:8999/api/gui", json={"action": "click", "item": "btn_reset", "user_data": None}, headers={}, timeout=5.0)
 if __name__ == "__main__":
 import asyncio
 asyncio.run(test_mma_track_lifecycle_simulation())