WIP: Regression hell

2026-03-06 21:22:21 -05:00
parent 528f0a04c3
commit f65e9b40b2
14 changed files with 359 additions and 97 deletions
@@ -252,7 +252,12 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
 return ProviderError("unknown", "gemini", exc)

 def _classify_deepseek_error(exc: Exception) -> ProviderError:
- body = str(exc).lower()
+ body = ""
+ if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
+  body = exc.response.text.lower()
+ else:
+  body = str(exc).lower()
+ 
 if "429" in body or "rate" in body:
  return ProviderError("rate_limit", "deepseek", exc)
 if "401" in body or "403" in body or "auth" in body or "api key" in body:
@@ -263,6 +268,13 @@ def _classify_deepseek_error(exc: Exception) -> ProviderError:
  return ProviderError("quota", "deepseek", exc)
 if "connection" in body or "timeout" in body or "network" in body:
  return ProviderError("network", "deepseek", exc)
+ 
+ if "400" in body or "bad request" in body:
+  # Try to wrap the original error with the response body for better debugging
+  if body:
+   new_exc = Exception(f"Bad Request (400): {body}")
+   return ProviderError("unknown", "deepseek", new_exc)
+
 return ProviderError("unknown", "deepseek", exc)

 def set_provider(provider: str, model: str) -> None:
@@ -637,6 +649,54 @@ def _build_file_diff_text(changed_items: list[dict[str, Any]]) -> str:
    parts.append(f"### `{path}` (no changes detected)")
 return "\n\n---\n\n".join(parts)

+def _build_deepseek_tools() -> list[dict[str, Any]]:
+ mcp_tools: list[dict[str, Any]] = []
+ for spec in mcp_client.MCP_TOOL_SPECS:
+  if _agent_tools.get(spec["name"], True):
+   mcp_tools.append({
+     "type": "function",
+     "function": {
+      "name":        spec["name"],
+      "description": spec["description"],
+      "parameters":  spec["parameters"],
+     }
+    })
+ tools_list = mcp_tools
+ if _agent_tools.get(TOOL_NAME, True):
+  powershell_tool: dict[str, Any] = {
+   "type": "function",
+   "function": {
+    "name": TOOL_NAME,
+    "description": (
+     "Run a PowerShell script within the project base_dir. "
+     "Use this to create, edit, rename, or delete files and directories. "
+     "The working directory is set to base_dir automatically. "
+     "Always prefer targeted edits over full rewrites where possible. "
+     "stdout and stderr are returned to you as the result."
+    ),
+    "parameters": {
+     "type": "object",
+     "properties": {
+      "script": {
+       "type": "string",
+       "description": "The PowerShell script to execute."
+      }
+     },
+     "required": ["script"]
+    }
+   }
+  }
+  tools_list.append(powershell_tool)
+ return tools_list
+
+_CACHED_DEEPSEEK_TOOLS: Optional[list[dict[str, Any]]] = None
+
+def _get_deepseek_tools() -> list[dict[str, Any]]:
+ global _CACHED_DEEPSEEK_TOOLS
+ if _CACHED_DEEPSEEK_TOOLS is None:
+  _CACHED_DEEPSEEK_TOOLS = _build_deepseek_tools()
+ return _CACHED_DEEPSEEK_TOOLS
+
 def _content_block_to_dict(block: Any) -> dict[str, Any]:
 if isinstance(block, dict):
  return block
@@ -1356,33 +1416,56 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
   "Authorization": f"Bearer {api_key}",
   "Content-Type": "application/json",
  }
-  current_api_messages: list[dict[str, Any]] = []
+  
+  is_reasoner = _model == "deepseek-reasoner"
+  
+  # Update history following Anthropic pattern
  with _deepseek_history_lock:
-   for msg in _deepseek_history:
-    current_api_messages.append(msg)
-  initial_user_message_content = user_message
-  if discussion_history:
-   initial_user_message_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"
-  current_api_messages.append({"role": "user", "content": initial_user_message_content})
-  request_payload: dict[str, Any] = {
-   "model": _model,
-   "messages": current_api_messages,
-   "temperature": _temperature,
-   "max_tokens": _max_tokens,
-   "stream": stream,
-  }
-  sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
-  request_payload["messages"].insert(0, sys_msg)
+   if discussion_history and not _deepseek_history:
+    user_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"
+   else:
+    user_content = user_message
+   _deepseek_history.append({"role": "user", "content": user_content})
+  
  all_text_parts: list[str] = []
  _cumulative_tool_bytes = 0
-  round_idx = 0
-  while round_idx <= MAX_TOOL_ROUNDS + 1:
+  
+  for round_idx in range(MAX_TOOL_ROUNDS + 2):
+   current_api_messages: list[dict[str, Any]] = []
+   with _deepseek_history_lock:
+    for msg in _deepseek_history:
+     current_api_messages.append(msg)
+   
+   sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
+   current_api_messages.insert(0, sys_msg)
+   
+   request_payload: dict[str, Any] = {
+    "model": _model,
+    "messages": current_api_messages,
+    "stream": stream,
+   }
+   
+   if not is_reasoner:
+    request_payload["temperature"] = _temperature
+    request_payload["max_tokens"] = _max_tokens
+    tools = _get_deepseek_tools()
+    if tools:
+     request_payload["tools"] = tools
+   
   events.emit("request_start", payload={"provider": "deepseek", "model": _model, "round": round_idx, "streaming": stream})
+   
   try:
-    response = requests.post(api_url, headers=headers, json=request_payload, timeout=60, stream=stream)
+    response = requests.post(api_url, headers=headers, json=request_payload, timeout=120, stream=stream)
    response.raise_for_status()
   except requests.exceptions.RequestException as e:
    raise _classify_deepseek_error(e) from e
+
+   assistant_text = ""
+   tool_calls_raw = []
+   reasoning_content = ""
+   finish_reason = "stop"
+   usage = {}
+
   if stream:
    aggregated_content = ""
    aggregated_tool_calls: list[dict[str, Any]] = []
@@ -1443,10 +1526,12 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
    reasoning_content = message.get("reasoning_content", "")
    finish_reason = choice.get("finish_reason", "stop")
    usage = response_data.get("usage", {})
+
   thinking_tags = ""
   if reasoning_content:
    thinking_tags = f"<thinking>\n{reasoning_content}\n</thinking>\n"
   full_assistant_text = thinking_tags + assistant_text
+   
   with _deepseek_history_lock:
    msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text}
    if reasoning_content:
@@ -1454,8 +1539,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
    if tool_calls_raw:
     msg_to_store["tool_calls"] = tool_calls_raw
    _deepseek_history.append(msg_to_store)
+   
   if full_assistant_text:
    all_text_parts.append(full_assistant_text)
+   
   _append_comms("IN", "response", {
     "round": round_idx,
     "stop_reason": finish_reason,
@@ -1464,6 +1551,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
     "usage": usage,
     "streaming": stream
    })
+   
   if finish_reason != "tool_calls" and not tool_calls_raw:
    break
   if round_idx > MAX_TOOL_ROUNDS:
@@ -1507,16 +1595,11 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
      "content": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
     })
    _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
+   
   with _deepseek_history_lock:
    for tr in tool_results_for_history:
     _deepseek_history.append(tr)
-   next_messages: list[dict[str, Any]] = []
-   with _deepseek_history_lock:
-    for msg in _deepseek_history:
-     next_messages.append(msg)
-   next_messages.insert(0, sys_msg)
-   request_payload["messages"] = next_messages
-   round_idx += 1
+   
  return "\n\n".join(all_text_parts) if all_text_parts else "(No text returned)"
 except Exception as e:
  raise _classify_deepseek_error(e) from e
@@ -2,6 +2,7 @@ from __future__ import annotations
 import json
 import threading
 import uuid
+import sys
 from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
 from typing import Any
 import logging
@@ -9,9 +10,15 @@ from src import session_logger

 def _get_app_attr(app: Any, name: str, default: Any = None) -> Any:
 if hasattr(app, name):
-  return getattr(app, name)
+  val = getattr(app, name)
+  sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in app -> {val}\n")
+  sys.stderr.flush()
+  return val
 if hasattr(app, 'controller') and hasattr(app.controller, name):
-  return getattr(app.controller, name)
+  val = getattr(app.controller, name)
+  sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in controller -> {val}\n")
+  sys.stderr.flush()
+  return val
 return default

 def _has_app_attr(app: Any, name: str) -> bool:
@@ -94,7 +101,10 @@ class HookHandler(BaseHTTPRequestHandler):
     settable = _get_app_attr(app, "_settable_fields", {})
     if field_tag in settable:
      attr = settable[field_tag]
-      result["value"] = _get_app_attr(app, attr, None)
+      val = _get_app_attr(app, attr, "MISSING")
+      sys.stderr.write(f"[DEBUG] Hook API: get_value {field_tag} -> attr {attr} -> {val}\n")
+      sys.stderr.flush()
+      result["value"] = val if val != "MISSING" else None
    finally: event.set()
   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
   tasks = _get_app_attr(app, "_pending_gui_tasks")
@@ -361,12 +361,16 @@ class AppController:
 def _process_pending_gui_tasks(self) -> None:
  if not self._pending_gui_tasks:
   return
+  sys.stderr.write(f"[DEBUG] _process_pending_gui_tasks: processing {len(self._pending_gui_tasks)} tasks\n")
+  sys.stderr.flush()
  with self._pending_gui_tasks_lock:
   tasks = self._pending_gui_tasks[:]
   self._pending_gui_tasks.clear()
  for task in tasks:
   try:
    action = task.get("action")
+    sys.stderr.write(f"[DEBUG] Processing GUI task: action={action}\n")
+    sys.stderr.flush()
    if action:
     session_logger.log_api_hook("PROCESS_TASK", action, str(task))
     # ...
@@ -428,14 +432,18 @@ class AppController:
     self._show_track_proposal_modal = True
    elif action == "mma_state_update":
    # Handle both internal (nested) and hook-server (flattened) payloads
-     payload = task.get("payload")
-     if not isinstance(payload, dict):
-      payload = task # Fallback to task if payload missing or wrong type
-     self.mma_status = payload.get("status", "idle")
-     self.active_tier = payload.get("active_tier")
+     p = task.get("payload")
+     if not isinstance(p, dict):
+      p = task # Fallback to task itself if payload is missing or wrong type
+     
+     sys.stderr.write(f"[DEBUG] mma_state_update: status={p.get('status')} active_tier={p.get('active_tier')}\n")
+     sys.stderr.flush()
+     
+     self.mma_status = p.get("status", self.mma_status)
+     self.active_tier = p.get("active_tier", self.active_tier)
     
     # Preserve existing model/provider config if not explicitly in payload
-     new_usage = payload.get("tier_usage", {})
+     new_usage = p.get("tier_usage", {})
     for tier, data in new_usage.items():
      if tier in self.mma_tier_usage:
       # Update usage counts but keep selected model/provider if not in update
@@ -446,12 +454,18 @@ class AppController:
      else:
       self.mma_tier_usage[tier] = data

-     self.active_tickets = payload.get("tickets", [])
-     track_data = payload.get("track")
+     self.active_tickets = p.get("tickets", [])
+     track_data = p.get("track")
     if track_data:
      tickets = []
      for t_data in self.active_tickets:
-       tickets.append(models.Ticket(**t_data))
+       if isinstance(t_data, models.Ticket):
+        tickets.append(t_data)
+       else:
+        # Map 'goal' from Godot format to 'description' if needed
+        if "goal" in t_data and "description" not in t_data:
+         t_data["description"] = t_data["goal"]
+        tickets.append(models.Ticket.from_dict(t_data))
      self.active_track = models.Track(
       id=track_data.get("id"),
       description=track_data.get("title", ""),
@@ -1793,22 +1793,23 @@ class App:
  imgui.separator()
  # 4. Task DAG Visualizer
  imgui.text("Task DAG")
-  if self.active_track:
+  if self.active_track and self.node_editor_ctx:
   ed.set_current_editor(self.node_editor_ctx)
   ed.begin('Visual DAG')
   # Selection detection
   selected = ed.get_selected_nodes()
   if selected:
    for node_id in selected:
+     node_val = node_id.id()
     for t in self.active_tickets:
-      if abs(hash(str(t.get('id', '')))) == node_id:
+      if abs(hash(str(t.get('id', '')))) == node_val:
       self.ui_selected_ticket_id = str(t.get('id', ''))
       break
     break
   for t in self.active_tickets:
    tid = str(t.get('id', '??'))
    int_id = abs(hash(tid))
-    ed.begin_node(int_id)
+    ed.begin_node(ed.NodeId(int_id))
    imgui.text_colored(C_KEY, f"Ticket: {tid}")
    status = t.get('status', 'todo')
    s_col = C_VAL
@@ -1819,18 +1820,18 @@ class App:
    imgui.same_line()
    imgui.text_colored(s_col, status)
    imgui.text(f"Target: {t.get('target_file','')}")
-    ed.begin_pin(abs(hash(tid + "_in")), ed.PinKind.input)
+    ed.begin_pin(ed.PinId(abs(hash(tid + "_in"))), ed.PinKind.input)
    imgui.text("->")
    ed.end_pin()
    imgui.same_line()
-    ed.begin_pin(abs(hash(tid + "_out")), ed.PinKind.output)
+    ed.begin_pin(ed.PinId(abs(hash(tid + "_out"))), ed.PinKind.output)
    imgui.text("->")
    ed.end_pin()
    ed.end_node()
   for t in self.active_tickets:
    tid = str(t.get('id', '??'))
    for dep in t.get('depends_on', []):
-     ed.link(abs(hash(dep + "_" + tid)), abs(hash(dep + "_out")), abs(hash(tid + "_in")))
+     ed.link(ed.LinkId(abs(hash(dep + "_" + tid))), ed.PinId(abs(hash(dep + "_out"))), ed.PinId(abs(hash(tid + "_in"))))
   
   # Handle link creation
   if ed.begin_create():
@@ -1855,7 +1856,7 @@ class App:
          t.setdefault('depends_on', []).append(source_tid)
          self._push_mma_state_update()
         break
-   ed.end_create()
+     ed.end_create()

   # Handle link deletion
   if ed.begin_delete():
@@ -1870,7 +1871,7 @@ class App:
        t['depends_on'] = [dep for dep in deps if abs(hash(dep + "_" + tid)) != lid_val]
        self._push_mma_state_update()
        break
-   ed.end_delete()
+    ed.end_delete()
   # Validate DAG after any changes
   try:
    from src.dag_engine import TrackDAG
@@ -1881,7 +1882,6 @@ class App:
   except Exception:
    pass
   ed.end()
-   ed.set_current_editor(None)
   # 5. Add Ticket Form
   imgui.separator()
   if imgui.button("Add Ticket"):
@@ -1,5 +1,6 @@
 from __future__ import annotations
 import tomllib
+import datetime
 from dataclasses import dataclass, field
 from typing import List, Optional, Dict, Any, Union
 from pathlib import Path
@@ -179,12 +180,24 @@ class Metadata:

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
+        created = data.get("created_at")
+        updated = data.get("updated_at")
+        if isinstance(created, str):
+            try:
+                created = datetime.datetime.fromisoformat(created)
+            except ValueError:
+                pass
+        if isinstance(updated, str):
+            try:
+                updated = datetime.datetime.fromisoformat(updated)
+            except ValueError:
+                pass
        return cls(
            id=data["id"],
            name=data.get("name", ""),
            status=data.get("status", "todo"),
-            created_at=data.get("created_at"),
-            updated_at=data.get("updated_at"),
+            created_at=created,
+            updated_at=updated,
        )