REGRESSSIOSSSOONNNNSSSS

2026-03-06 21:39:50 -05:00
parent b88fdfde03
commit 12dba31c1d
3 changed files with 95 additions and 41 deletions
@@ -254,28 +254,35 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
 def _classify_deepseek_error(exc: Exception) -> ProviderError:
 body = ""
 if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
-  body = exc.response.text.lower()
+  try:
+   # Try to get the detailed error from DeepSeek's JSON response
+   err_data = exc.response.json()
+   if "error" in err_data:
+    body = str(err_data["error"].get("message", exc.response.text))
+   else:
+    body = exc.response.text
+  except:
+   body = exc.response.text
 else:
-  body = str(exc).lower()
+  body = str(exc)
 
- if "429" in body or "rate" in body:
-  return ProviderError("rate_limit", "deepseek", exc)
- if "401" in body or "403" in body or "auth" in body or "api key" in body:
-  return ProviderError("auth", "deepseek", exc)
- if "402" in body or "balance" in body or "billing" in body:
-  return ProviderError("balance", "deepseek", exc)
- if "quota" in body or "limit exceeded" in body:
-  return ProviderError("quota", "deepseek", exc)
- if "connection" in body or "timeout" in body or "network" in body:
-  return ProviderError("network", "deepseek", exc)
+ body_l = body.lower()
+ if "429" in body_l or "rate" in body_l:
+  return ProviderError("rate_limit", "deepseek", Exception(body))
+ if "401" in body_l or "403" in body_l or "auth" in body_l or "api key" in body_l:
+  return ProviderError("auth", "deepseek", Exception(body))
+ if "402" in body_l or "balance" in body_l or "billing" in body_l:
+  return ProviderError("balance", "deepseek", Exception(body))
+ if "quota" in body_l or "limit exceeded" in body_l:
+  return ProviderError("quota", "deepseek", Exception(body))
+ if "connection" in body_l or "timeout" in body_l or "network" in body_l:
+  return ProviderError("network", "deepseek", Exception(body))
 
- if "400" in body or "bad request" in body:
-  # Try to wrap the original error with the response body for better debugging
-  if body:
-   new_exc = Exception(f"Bad Request (400): {body}")
-   return ProviderError("unknown", "deepseek", new_exc)
+ # If we have a body for a 400 error, wrap it
+ if "400" in body_l or "bad request" in body_l:
+  return ProviderError("unknown", "deepseek", Exception(f"DeepSeek Bad Request: {body}"))

- return ProviderError("unknown", "deepseek", exc)
+ return ProviderError("unknown", "deepseek", Exception(body))

 def set_provider(provider: str, model: str) -> None:
 global _provider, _model
@@ -393,7 +400,7 @@ def _list_anthropic_models() -> list[str]:
  raise _classify_anthropic_error(exc) from exc

 def _list_deepseek_models(api_key: str) -> list[str]:
- return ["deepseek-chat", "deepseek-reasoner", "deepseek-v3", "deepseek-r1"]
+ return ["deepseek-chat", "deepseek-reasoner"]

 TOOL_NAME: str = "run_powershell"

@@ -1417,7 +1424,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
   "Content-Type": "application/json",
  }
  
-  is_reasoner = _model == "deepseek-reasoner"
+  is_reasoner = _model in ("deepseek-reasoner", "deepseek-r1")
  
  # Update history following Anthropic pattern
  with _deepseek_history_lock:
@@ -1432,12 +1439,41 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
  
  for round_idx in range(MAX_TOOL_ROUNDS + 2):
   current_api_messages: list[dict[str, Any]] = []
-   with _deepseek_history_lock:
-    for msg in _deepseek_history:
-     current_api_messages.append(msg)
   
-   sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
-   current_api_messages.insert(0, sys_msg)
+   # DeepSeek R1 (Reasoner) can be extremely strict about the 'system' role.
+   # For maximum compatibility, we'll only use 'system' for non-reasoner models.
+   if not is_reasoner:
+    sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
+    current_api_messages.append(sys_msg)
+   
+   with _deepseek_history_lock:
+    for i, msg in enumerate(_deepseek_history):
+     # Create a clean copy of the message for the API
+     role = msg.get("role")
+     api_msg = {"role": role}
+     
+     content = msg.get("content")
+     if i == 0 and is_reasoner:
+      # Prepend system instructions to the first user message for R1
+      content = f"System Instructions:\n{_get_combined_system_prompt()}\n\nContext:\n{md_content}\n\n---\n\n{content}"
+     
+     if role == "assistant":
+      # OpenAI/DeepSeek: content MUST be a string if tool_calls is absent
+      # If tool_calls is present, content can be null
+      if msg.get("tool_calls"):
+       api_msg["content"] = content or None
+       api_msg["tool_calls"] = msg["tool_calls"]
+      else:
+       api_msg["content"] = content or ""
+      if msg.get("reasoning_content"):
+       api_msg["reasoning_content"] = msg["reasoning_content"]
+     elif role == "tool":
+      api_msg["content"] = content or ""
+      api_msg["tool_call_id"] = msg.get("tool_call_id")
+     else:
+      api_msg["content"] = content or ""
+     
+     current_api_messages.append(api_msg)
   
   request_payload: dict[str, Any] = {
    "model": _model,
@@ -1445,9 +1481,13 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
    "stream": stream,
   }
   
+   if stream:
+    request_payload["stream_options"] = {"include_usage": True}
+   
   if not is_reasoner:
    request_payload["temperature"] = _temperature
-    request_payload["max_tokens"] = _max_tokens
+    # DeepSeek max_tokens is for the output, clamp to 8192 which is their hard limit for V3/Chat
+    request_payload["max_tokens"] = min(_max_tokens, 8192)
    tools = _get_deepseek_tools()
    if tools:
     request_payload["tools"] = tools
@@ -1482,6 +1522,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
       continue
      try:
       chunk = json.loads(chunk_str)
+       if not chunk.get("choices"):
+        if chunk.get("usage"):
+         current_usage = cast(dict[str, Any], chunk["usage"])
+        continue
       delta = cast(dict[str, Any], chunk.get("choices", [{}])[0].get("delta", {}))
       if delta.get("content"):
        content_chunk = cast(str, delta["content"])
@@ -1533,7 +1577,8 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
   full_assistant_text = thinking_tags + assistant_text
   
   with _deepseek_history_lock:
-    msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text}
+    # DeepSeek/OpenAI: If tool_calls are present, content can be null but should usually be present
+    msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text or None}
    if reasoning_content:
     msg_to_store["reasoning_content"] = reasoning_content
    if tool_calls_raw:
@@ -1583,7 +1628,6 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
    tool_results_for_history.append({
      "role": "tool",
      "tool_call_id": call_id,
-      "name": name,
      "content": truncated,
     })
    _append_comms("IN", "tool_result", {"name": name, "id": call_id, "output": out})
@@ -40,6 +40,15 @@ class HookServerInstance(ThreadingHTTPServer):
  super().__init__(server_address, RequestHandlerClass)
  self.app = app

+def _serialize_for_api(obj: Any) -> Any:
+ if hasattr(obj, "to_dict"):
+  return obj.to_dict()
+ if isinstance(obj, list):
+  return [_serialize_for_api(x) for x in obj]
+ if isinstance(obj, dict):
+  return {k: _serialize_for_api(v) for k, v in obj.items()}
+ return obj
+
 class HookHandler(BaseHTTPRequestHandler):
 """Handles incoming HTTP requests for the API hooks."""
 def do_GET(self) -> None:
@@ -184,7 +193,8 @@ class HookHandler(BaseHTTPRequestHandler):
    try:
     gettable = _get_app_attr(app, "_gettable_fields", {})
     for key, attr in gettable.items():
-      result[key] = _get_app_attr(app, attr, None)
+      val = _get_app_attr(app, attr, None)
+      result[key] = _serialize_for_api(val)
    finally: event.set()
   lock = _get_app_attr(app, "_pending_gui_tasks_lock")
   tasks = _get_app_attr(app, "_pending_gui_tasks")