WIP: Regression hell

This commit is contained in:
2026-03-06 21:22:21 -05:00
parent 528f0a04c3
commit f65e9b40b2
14 changed files with 359 additions and 97 deletions

View File

@@ -1,6 +1,6 @@
[ai] [ai]
provider = "gemini" provider = "deepseek"
model = "gemini-2.5-flash-lite" model = "deepseek-v3"
temperature = 0.0 temperature = 0.0
max_tokens = 8192 max_tokens = 8192
history_trunc_limit = 8000 history_trunc_limit = 8000

View File

@@ -84,10 +84,10 @@ Size=900,700
Collapsed=0 Collapsed=0
[Window][Diagnostics] [Window][Diagnostics]
Pos=519,17 Pos=2989,1760
Size=1256,379 Size=851,377
Collapsed=0 Collapsed=0
DockId=0x0000001A,0 DockId=0x00000002,0
[Window][Context Hub] [Window][Context Hub]
Pos=0,17 Pos=0,17
@@ -102,16 +102,16 @@ Collapsed=0
DockId=0x0000000D,0 DockId=0x0000000D,0
[Window][Discussion Hub] [Window][Discussion Hub]
Pos=1777,17 Pos=1750,17
Size=1210,1883 Size=1237,1270
Collapsed=0 Collapsed=0
DockId=0x00000013,0 DockId=0x00000013,0
[Window][Operations Hub] [Window][Operations Hub]
Pos=519,398 Pos=519,17
Size=1256,1502 Size=1229,1270
Collapsed=0 Collapsed=0
DockId=0x0000001B,0 DockId=0x00000012,0
[Window][Files & Media] [Window][Files & Media]
Pos=0,979 Pos=0,979
@@ -132,15 +132,15 @@ Collapsed=0
[Window][MMA Dashboard] [Window][MMA Dashboard]
Pos=2989,17 Pos=2989,17
Size=851,2120 Size=851,1741
Collapsed=0 Collapsed=0
DockId=0x00000004,0 DockId=0x00000001,0
[Window][Log Management] [Window][Log Management]
Pos=2989,17 Pos=2989,17
Size=851,2120 Size=851,1741
Collapsed=0 Collapsed=0
DockId=0x00000004,1 DockId=0x00000001,1
[Window][Track Proposal] [Window][Track Proposal]
Pos=709,326 Pos=709,326
@@ -148,26 +148,26 @@ Size=262,209
Collapsed=0 Collapsed=0
[Window][Tier 1: Strategy] [Window][Tier 1: Strategy]
Pos=519,1902 Pos=519,1289
Size=1836,235 Size=513,848
Collapsed=0 Collapsed=0
DockId=0x00000014,0 DockId=0x00000014,0
[Window][Tier 2: Tech Lead] [Window][Tier 2: Tech Lead]
Pos=2357,1902 Pos=1034,1289
Size=253,235 Size=714,848
Collapsed=0 Collapsed=0
DockId=0x00000016,0 DockId=0x00000016,0
[Window][Tier 4: QA] [Window][Tier 4: QA]
Pos=2803,1902 Pos=2576,1289
Size=184,235 Size=411,848
Collapsed=0 Collapsed=0
DockId=0x00000019,0 DockId=0x00000019,0
[Window][Tier 3: Workers] [Window][Tier 3: Workers]
Pos=2612,1902 Pos=1750,1289
Size=189,235 Size=824,848
Collapsed=0 Collapsed=0
DockId=0x00000018,0 DockId=0x00000018,0
@@ -265,20 +265,20 @@ DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=3840,2120
DockNode ID=0x00000005 Parent=0x00000007 SizeRef=295,960 Selected=0xF4139CA2 DockNode ID=0x00000005 Parent=0x00000007 SizeRef=295,960 Selected=0xF4139CA2
DockNode ID=0x00000006 Parent=0x00000007 SizeRef=295,1158 CentralNode=1 Selected=0x7BD57D6A DockNode ID=0x00000006 Parent=0x00000007 SizeRef=295,1158 CentralNode=1 Selected=0x7BD57D6A
DockNode ID=0x0000000E Parent=0x0000000B SizeRef=2468,858 Split=Y Selected=0x418C7449 DockNode ID=0x0000000E Parent=0x0000000B SizeRef=2468,858 Split=Y Selected=0x418C7449
DockNode ID=0x00000010 Parent=0x0000000E SizeRef=868,1883 Split=X Selected=0x418C7449 DockNode ID=0x00000010 Parent=0x0000000E SizeRef=868,1270 Split=X Selected=0x418C7449
DockNode ID=0x00000012 Parent=0x00000010 SizeRef=1256,402 Split=Y Selected=0xB4CBF21A DockNode ID=0x00000012 Parent=0x00000010 SizeRef=1229,402 Selected=0x418C7449
DockNode ID=0x0000001A Parent=0x00000012 SizeRef=1141,379 Selected=0xB4CBF21A DockNode ID=0x00000013 Parent=0x00000010 SizeRef=1237,402 Selected=0x6F2B5B04
DockNode ID=0x0000001B Parent=0x00000012 SizeRef=1141,1502 Selected=0x418C7449 DockNode ID=0x00000011 Parent=0x0000000E SizeRef=868,848 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000013 Parent=0x00000010 SizeRef=1210,402 Selected=0x6F2B5B04 DockNode ID=0x00000014 Parent=0x00000011 SizeRef=513,837 Selected=0xBB346584
DockNode ID=0x00000011 Parent=0x0000000E SizeRef=868,235 Split=X Selected=0x5CDB7A4B DockNode ID=0x00000015 Parent=0x00000011 SizeRef=1953,837 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000014 Parent=0x00000011 SizeRef=1836,837 Selected=0xBB346584 DockNode ID=0x00000016 Parent=0x00000015 SizeRef=714,837 Selected=0x390E7942
DockNode ID=0x00000015 Parent=0x00000011 SizeRef=630,837 Split=X Selected=0x5CDB7A4B DockNode ID=0x00000017 Parent=0x00000015 SizeRef=1237,837 Split=X Selected=0x655BC6E9
DockNode ID=0x00000016 Parent=0x00000015 SizeRef=730,837 Selected=0x390E7942 DockNode ID=0x00000018 Parent=0x00000017 SizeRef=824,874 Selected=0x655BC6E9
DockNode ID=0x00000017 Parent=0x00000015 SizeRef=1083,837 Split=X Selected=0x655BC6E9 DockNode ID=0x00000019 Parent=0x00000017 SizeRef=411,874 Selected=0x5CDB7A4B
DockNode ID=0x00000018 Parent=0x00000017 SizeRef=547,874 Selected=0x655BC6E9
DockNode ID=0x00000019 Parent=0x00000017 SizeRef=534,874 Selected=0x5CDB7A4B
DockNode ID=0x0000000D Parent=0x00000003 SizeRef=435,1186 Selected=0x363E93D6 DockNode ID=0x0000000D Parent=0x00000003 SizeRef=435,1186 Selected=0x363E93D6
DockNode ID=0x00000004 Parent=0xAFC85805 SizeRef=851,1183 Selected=0x3AEC3498 DockNode ID=0x00000004 Parent=0xAFC85805 SizeRef=851,1183 Split=Y Selected=0x3AEC3498
DockNode ID=0x00000001 Parent=0x00000004 SizeRef=851,1741 Selected=0x3AEC3498
DockNode ID=0x00000002 Parent=0x00000004 SizeRef=851,377 Selected=0xB4CBF21A
;;;<<<Layout_655921752_Default>>>;;; ;;;<<<Layout_655921752_Default>>>;;;
;;;<<<HelloImGui_Misc>>>;;; ;;;<<<HelloImGui_Misc>>>;;;

View File

@@ -632,3 +632,71 @@ When you need to verify a change, rely on the exit code and stdout/stderr from t
1+ 1? 1+ 1?
------------------ ------------------
--- MOCK INVOKED ---
ARGS: ['C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py', '-m', 'gemini-2.5-flash-lite', '--prompt', '', '--output-format', 'stream-json']
PROMPT:
You are a helpful coding assistant with access to a PowerShell tool (run_powershell) and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). When calling file/directory tools, always use the 'path' parameter for the target path. When asked to create or edit files, prefer targeted edits over full rewrites. Always explain what you are doing before invoking the tool.
When writing or rewriting large files (especially those containing quotes, backticks, or special characters), avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string (@'...'@ for literal content), (2) run it with `python <script>`, (3) delete the helper. For small targeted edits, use PowerShell's (Get-Content) / .Replace() / Set-Content or Add-Content directly.
When making function calls using tools that accept array or object parameters ensure those are structured using JSON. For example:
When you need to verify a change, rely on the exit code and stdout/stderr from the tool — the user's context files are automatically refreshed after every tool call, so you do NOT need to re-read files that are already provided in the <context> block.
<context>
</context>
[DISCUSSION HISTORY]
## Discussion History
### Discussion Excerpt 1
@2026-03-06T20:40:49
System:
[PERFORMANCE ALERT] CPU usage high: 91.0%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 2
@2026-03-06T20:47:26
System:
[PERFORMANCE ALERT] CPU usage high: 95.5%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 3
@2026-03-06T20:48:08
System:
[PERFORMANCE ALERT] CPU usage high: 93.4%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 4
@2026-03-06T20:49:49
System:
[PERFORMANCE ALERT] CPU usage high: 85.4%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 5
@2026-03-06T20:51:47
System:
[PERFORMANCE ALERT] CPU usage high: 86.7%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 6
@2026-03-06T20:53:31
System:
[PERFORMANCE ALERT] CPU usage high: 96.6%. Please consider optimizing recent changes or reducing load.
---
testing gemini cli
------------------

View File

@@ -252,7 +252,12 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
return ProviderError("unknown", "gemini", exc) return ProviderError("unknown", "gemini", exc)
def _classify_deepseek_error(exc: Exception) -> ProviderError: def _classify_deepseek_error(exc: Exception) -> ProviderError:
body = str(exc).lower() body = ""
if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
body = exc.response.text.lower()
else:
body = str(exc).lower()
if "429" in body or "rate" in body: if "429" in body or "rate" in body:
return ProviderError("rate_limit", "deepseek", exc) return ProviderError("rate_limit", "deepseek", exc)
if "401" in body or "403" in body or "auth" in body or "api key" in body: if "401" in body or "403" in body or "auth" in body or "api key" in body:
@@ -263,6 +268,13 @@ def _classify_deepseek_error(exc: Exception) -> ProviderError:
return ProviderError("quota", "deepseek", exc) return ProviderError("quota", "deepseek", exc)
if "connection" in body or "timeout" in body or "network" in body: if "connection" in body or "timeout" in body or "network" in body:
return ProviderError("network", "deepseek", exc) return ProviderError("network", "deepseek", exc)
if "400" in body or "bad request" in body:
# Try to wrap the original error with the response body for better debugging
if body:
new_exc = Exception(f"Bad Request (400): {body}")
return ProviderError("unknown", "deepseek", new_exc)
return ProviderError("unknown", "deepseek", exc) return ProviderError("unknown", "deepseek", exc)
def set_provider(provider: str, model: str) -> None: def set_provider(provider: str, model: str) -> None:
@@ -637,6 +649,54 @@ def _build_file_diff_text(changed_items: list[dict[str, Any]]) -> str:
parts.append(f"### `{path}` (no changes detected)") parts.append(f"### `{path}` (no changes detected)")
return "\n\n---\n\n".join(parts) return "\n\n---\n\n".join(parts)
def _build_deepseek_tools() -> list[dict[str, Any]]:
mcp_tools: list[dict[str, Any]] = []
for spec in mcp_client.MCP_TOOL_SPECS:
if _agent_tools.get(spec["name"], True):
mcp_tools.append({
"type": "function",
"function": {
"name": spec["name"],
"description": spec["description"],
"parameters": spec["parameters"],
}
})
tools_list = mcp_tools
if _agent_tools.get(TOOL_NAME, True):
powershell_tool: dict[str, Any] = {
"type": "function",
"function": {
"name": TOOL_NAME,
"description": (
"Run a PowerShell script within the project base_dir. "
"Use this to create, edit, rename, or delete files and directories. "
"The working directory is set to base_dir automatically. "
"Always prefer targeted edits over full rewrites where possible. "
"stdout and stderr are returned to you as the result."
),
"parameters": {
"type": "object",
"properties": {
"script": {
"type": "string",
"description": "The PowerShell script to execute."
}
},
"required": ["script"]
}
}
}
tools_list.append(powershell_tool)
return tools_list
_CACHED_DEEPSEEK_TOOLS: Optional[list[dict[str, Any]]] = None
def _get_deepseek_tools() -> list[dict[str, Any]]:
global _CACHED_DEEPSEEK_TOOLS
if _CACHED_DEEPSEEK_TOOLS is None:
_CACHED_DEEPSEEK_TOOLS = _build_deepseek_tools()
return _CACHED_DEEPSEEK_TOOLS
def _content_block_to_dict(block: Any) -> dict[str, Any]: def _content_block_to_dict(block: Any) -> dict[str, Any]:
if isinstance(block, dict): if isinstance(block, dict):
return block return block
@@ -1356,33 +1416,56 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"Authorization": f"Bearer {api_key}", "Authorization": f"Bearer {api_key}",
"Content-Type": "application/json", "Content-Type": "application/json",
} }
current_api_messages: list[dict[str, Any]] = []
is_reasoner = _model == "deepseek-reasoner"
# Update history following Anthropic pattern
with _deepseek_history_lock: with _deepseek_history_lock:
for msg in _deepseek_history: if discussion_history and not _deepseek_history:
current_api_messages.append(msg) user_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"
initial_user_message_content = user_message else:
if discussion_history: user_content = user_message
initial_user_message_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}" _deepseek_history.append({"role": "user", "content": user_content})
current_api_messages.append({"role": "user", "content": initial_user_message_content})
request_payload: dict[str, Any] = {
"model": _model,
"messages": current_api_messages,
"temperature": _temperature,
"max_tokens": _max_tokens,
"stream": stream,
}
sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
request_payload["messages"].insert(0, sys_msg)
all_text_parts: list[str] = [] all_text_parts: list[str] = []
_cumulative_tool_bytes = 0 _cumulative_tool_bytes = 0
round_idx = 0
while round_idx <= MAX_TOOL_ROUNDS + 1: for round_idx in range(MAX_TOOL_ROUNDS + 2):
current_api_messages: list[dict[str, Any]] = []
with _deepseek_history_lock:
for msg in _deepseek_history:
current_api_messages.append(msg)
sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
current_api_messages.insert(0, sys_msg)
request_payload: dict[str, Any] = {
"model": _model,
"messages": current_api_messages,
"stream": stream,
}
if not is_reasoner:
request_payload["temperature"] = _temperature
request_payload["max_tokens"] = _max_tokens
tools = _get_deepseek_tools()
if tools:
request_payload["tools"] = tools
events.emit("request_start", payload={"provider": "deepseek", "model": _model, "round": round_idx, "streaming": stream}) events.emit("request_start", payload={"provider": "deepseek", "model": _model, "round": round_idx, "streaming": stream})
try: try:
response = requests.post(api_url, headers=headers, json=request_payload, timeout=60, stream=stream) response = requests.post(api_url, headers=headers, json=request_payload, timeout=120, stream=stream)
response.raise_for_status() response.raise_for_status()
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
raise _classify_deepseek_error(e) from e raise _classify_deepseek_error(e) from e
assistant_text = ""
tool_calls_raw = []
reasoning_content = ""
finish_reason = "stop"
usage = {}
if stream: if stream:
aggregated_content = "" aggregated_content = ""
aggregated_tool_calls: list[dict[str, Any]] = [] aggregated_tool_calls: list[dict[str, Any]] = []
@@ -1443,10 +1526,12 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
reasoning_content = message.get("reasoning_content", "") reasoning_content = message.get("reasoning_content", "")
finish_reason = choice.get("finish_reason", "stop") finish_reason = choice.get("finish_reason", "stop")
usage = response_data.get("usage", {}) usage = response_data.get("usage", {})
thinking_tags = "" thinking_tags = ""
if reasoning_content: if reasoning_content:
thinking_tags = f"<thinking>\n{reasoning_content}\n</thinking>\n" thinking_tags = f"<thinking>\n{reasoning_content}\n</thinking>\n"
full_assistant_text = thinking_tags + assistant_text full_assistant_text = thinking_tags + assistant_text
with _deepseek_history_lock: with _deepseek_history_lock:
msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text} msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text}
if reasoning_content: if reasoning_content:
@@ -1454,8 +1539,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
if tool_calls_raw: if tool_calls_raw:
msg_to_store["tool_calls"] = tool_calls_raw msg_to_store["tool_calls"] = tool_calls_raw
_deepseek_history.append(msg_to_store) _deepseek_history.append(msg_to_store)
if full_assistant_text: if full_assistant_text:
all_text_parts.append(full_assistant_text) all_text_parts.append(full_assistant_text)
_append_comms("IN", "response", { _append_comms("IN", "response", {
"round": round_idx, "round": round_idx,
"stop_reason": finish_reason, "stop_reason": finish_reason,
@@ -1464,6 +1551,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"usage": usage, "usage": usage,
"streaming": stream "streaming": stream
}) })
if finish_reason != "tool_calls" and not tool_calls_raw: if finish_reason != "tool_calls" and not tool_calls_raw:
break break
if round_idx > MAX_TOOL_ROUNDS: if round_idx > MAX_TOOL_ROUNDS:
@@ -1507,16 +1595,11 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"content": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now." "content": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
}) })
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"}) _append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
with _deepseek_history_lock: with _deepseek_history_lock:
for tr in tool_results_for_history: for tr in tool_results_for_history:
_deepseek_history.append(tr) _deepseek_history.append(tr)
next_messages: list[dict[str, Any]] = []
with _deepseek_history_lock:
for msg in _deepseek_history:
next_messages.append(msg)
next_messages.insert(0, sys_msg)
request_payload["messages"] = next_messages
round_idx += 1
return "\n\n".join(all_text_parts) if all_text_parts else "(No text returned)" return "\n\n".join(all_text_parts) if all_text_parts else "(No text returned)"
except Exception as e: except Exception as e:
raise _classify_deepseek_error(e) from e raise _classify_deepseek_error(e) from e

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import json import json
import threading import threading
import uuid import uuid
import sys
from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
from typing import Any from typing import Any
import logging import logging
@@ -9,9 +10,15 @@ from src import session_logger
def _get_app_attr(app: Any, name: str, default: Any = None) -> Any: def _get_app_attr(app: Any, name: str, default: Any = None) -> Any:
if hasattr(app, name): if hasattr(app, name):
return getattr(app, name) val = getattr(app, name)
sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in app -> {val}\n")
sys.stderr.flush()
return val
if hasattr(app, 'controller') and hasattr(app.controller, name): if hasattr(app, 'controller') and hasattr(app.controller, name):
return getattr(app.controller, name) val = getattr(app.controller, name)
sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in controller -> {val}\n")
sys.stderr.flush()
return val
return default return default
def _has_app_attr(app: Any, name: str) -> bool: def _has_app_attr(app: Any, name: str) -> bool:
@@ -94,7 +101,10 @@ class HookHandler(BaseHTTPRequestHandler):
settable = _get_app_attr(app, "_settable_fields", {}) settable = _get_app_attr(app, "_settable_fields", {})
if field_tag in settable: if field_tag in settable:
attr = settable[field_tag] attr = settable[field_tag]
result["value"] = _get_app_attr(app, attr, None) val = _get_app_attr(app, attr, "MISSING")
sys.stderr.write(f"[DEBUG] Hook API: get_value {field_tag} -> attr {attr} -> {val}\n")
sys.stderr.flush()
result["value"] = val if val != "MISSING" else None
finally: event.set() finally: event.set()
lock = _get_app_attr(app, "_pending_gui_tasks_lock") lock = _get_app_attr(app, "_pending_gui_tasks_lock")
tasks = _get_app_attr(app, "_pending_gui_tasks") tasks = _get_app_attr(app, "_pending_gui_tasks")

View File

@@ -361,12 +361,16 @@ class AppController:
def _process_pending_gui_tasks(self) -> None: def _process_pending_gui_tasks(self) -> None:
if not self._pending_gui_tasks: if not self._pending_gui_tasks:
return return
sys.stderr.write(f"[DEBUG] _process_pending_gui_tasks: processing {len(self._pending_gui_tasks)} tasks\n")
sys.stderr.flush()
with self._pending_gui_tasks_lock: with self._pending_gui_tasks_lock:
tasks = self._pending_gui_tasks[:] tasks = self._pending_gui_tasks[:]
self._pending_gui_tasks.clear() self._pending_gui_tasks.clear()
for task in tasks: for task in tasks:
try: try:
action = task.get("action") action = task.get("action")
sys.stderr.write(f"[DEBUG] Processing GUI task: action={action}\n")
sys.stderr.flush()
if action: if action:
session_logger.log_api_hook("PROCESS_TASK", action, str(task)) session_logger.log_api_hook("PROCESS_TASK", action, str(task))
# ... # ...
@@ -428,14 +432,18 @@ class AppController:
self._show_track_proposal_modal = True self._show_track_proposal_modal = True
elif action == "mma_state_update": elif action == "mma_state_update":
# Handle both internal (nested) and hook-server (flattened) payloads # Handle both internal (nested) and hook-server (flattened) payloads
payload = task.get("payload") p = task.get("payload")
if not isinstance(payload, dict): if not isinstance(p, dict):
payload = task # Fallback to task if payload missing or wrong type p = task # Fallback to task itself if payload is missing or wrong type
self.mma_status = payload.get("status", "idle")
self.active_tier = payload.get("active_tier") sys.stderr.write(f"[DEBUG] mma_state_update: status={p.get('status')} active_tier={p.get('active_tier')}\n")
sys.stderr.flush()
self.mma_status = p.get("status", self.mma_status)
self.active_tier = p.get("active_tier", self.active_tier)
# Preserve existing model/provider config if not explicitly in payload # Preserve existing model/provider config if not explicitly in payload
new_usage = payload.get("tier_usage", {}) new_usage = p.get("tier_usage", {})
for tier, data in new_usage.items(): for tier, data in new_usage.items():
if tier in self.mma_tier_usage: if tier in self.mma_tier_usage:
# Update usage counts but keep selected model/provider if not in update # Update usage counts but keep selected model/provider if not in update
@@ -446,12 +454,18 @@ class AppController:
else: else:
self.mma_tier_usage[tier] = data self.mma_tier_usage[tier] = data
self.active_tickets = payload.get("tickets", []) self.active_tickets = p.get("tickets", [])
track_data = payload.get("track") track_data = p.get("track")
if track_data: if track_data:
tickets = [] tickets = []
for t_data in self.active_tickets: for t_data in self.active_tickets:
tickets.append(models.Ticket(**t_data)) if isinstance(t_data, models.Ticket):
tickets.append(t_data)
else:
# Map 'goal' from Godot format to 'description' if needed
if "goal" in t_data and "description" not in t_data:
t_data["description"] = t_data["goal"]
tickets.append(models.Ticket.from_dict(t_data))
self.active_track = models.Track( self.active_track = models.Track(
id=track_data.get("id"), id=track_data.get("id"),
description=track_data.get("title", ""), description=track_data.get("title", ""),

View File

@@ -1793,22 +1793,23 @@ class App:
imgui.separator() imgui.separator()
# 4. Task DAG Visualizer # 4. Task DAG Visualizer
imgui.text("Task DAG") imgui.text("Task DAG")
if self.active_track: if self.active_track and self.node_editor_ctx:
ed.set_current_editor(self.node_editor_ctx) ed.set_current_editor(self.node_editor_ctx)
ed.begin('Visual DAG') ed.begin('Visual DAG')
# Selection detection # Selection detection
selected = ed.get_selected_nodes() selected = ed.get_selected_nodes()
if selected: if selected:
for node_id in selected: for node_id in selected:
node_val = node_id.id()
for t in self.active_tickets: for t in self.active_tickets:
if abs(hash(str(t.get('id', '')))) == node_id: if abs(hash(str(t.get('id', '')))) == node_val:
self.ui_selected_ticket_id = str(t.get('id', '')) self.ui_selected_ticket_id = str(t.get('id', ''))
break break
break break
for t in self.active_tickets: for t in self.active_tickets:
tid = str(t.get('id', '??')) tid = str(t.get('id', '??'))
int_id = abs(hash(tid)) int_id = abs(hash(tid))
ed.begin_node(int_id) ed.begin_node(ed.NodeId(int_id))
imgui.text_colored(C_KEY, f"Ticket: {tid}") imgui.text_colored(C_KEY, f"Ticket: {tid}")
status = t.get('status', 'todo') status = t.get('status', 'todo')
s_col = C_VAL s_col = C_VAL
@@ -1819,18 +1820,18 @@ class App:
imgui.same_line() imgui.same_line()
imgui.text_colored(s_col, status) imgui.text_colored(s_col, status)
imgui.text(f"Target: {t.get('target_file','')}") imgui.text(f"Target: {t.get('target_file','')}")
ed.begin_pin(abs(hash(tid + "_in")), ed.PinKind.input) ed.begin_pin(ed.PinId(abs(hash(tid + "_in"))), ed.PinKind.input)
imgui.text("->") imgui.text("->")
ed.end_pin() ed.end_pin()
imgui.same_line() imgui.same_line()
ed.begin_pin(abs(hash(tid + "_out")), ed.PinKind.output) ed.begin_pin(ed.PinId(abs(hash(tid + "_out"))), ed.PinKind.output)
imgui.text("->") imgui.text("->")
ed.end_pin() ed.end_pin()
ed.end_node() ed.end_node()
for t in self.active_tickets: for t in self.active_tickets:
tid = str(t.get('id', '??')) tid = str(t.get('id', '??'))
for dep in t.get('depends_on', []): for dep in t.get('depends_on', []):
ed.link(abs(hash(dep + "_" + tid)), abs(hash(dep + "_out")), abs(hash(tid + "_in"))) ed.link(ed.LinkId(abs(hash(dep + "_" + tid))), ed.PinId(abs(hash(dep + "_out"))), ed.PinId(abs(hash(tid + "_in"))))
# Handle link creation # Handle link creation
if ed.begin_create(): if ed.begin_create():
@@ -1855,7 +1856,7 @@ class App:
t.setdefault('depends_on', []).append(source_tid) t.setdefault('depends_on', []).append(source_tid)
self._push_mma_state_update() self._push_mma_state_update()
break break
ed.end_create() ed.end_create()
# Handle link deletion # Handle link deletion
if ed.begin_delete(): if ed.begin_delete():
@@ -1870,7 +1871,7 @@ class App:
t['depends_on'] = [dep for dep in deps if abs(hash(dep + "_" + tid)) != lid_val] t['depends_on'] = [dep for dep in deps if abs(hash(dep + "_" + tid)) != lid_val]
self._push_mma_state_update() self._push_mma_state_update()
break break
ed.end_delete() ed.end_delete()
# Validate DAG after any changes # Validate DAG after any changes
try: try:
from src.dag_engine import TrackDAG from src.dag_engine import TrackDAG
@@ -1881,7 +1882,6 @@ class App:
except Exception: except Exception:
pass pass
ed.end() ed.end()
ed.set_current_editor(None)
# 5. Add Ticket Form # 5. Add Ticket Form
imgui.separator() imgui.separator()
if imgui.button("Add Ticket"): if imgui.button("Add Ticket"):

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import tomllib import tomllib
import datetime
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any, Union from typing import List, Optional, Dict, Any, Union
from pathlib import Path from pathlib import Path
@@ -179,12 +180,24 @@ class Metadata:
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Metadata": def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
created = data.get("created_at")
updated = data.get("updated_at")
if isinstance(created, str):
try:
created = datetime.datetime.fromisoformat(created)
except ValueError:
pass
if isinstance(updated, str):
try:
updated = datetime.datetime.fromisoformat(updated)
except ValueError:
pass
return cls( return cls(
id=data["id"], id=data["id"],
name=data.get("name", ""), name=data.get("name", ""),
status=data.get("status", "todo"), status=data.get("status", "todo"),
created_at=data.get("created_at"), created_at=created,
updated_at=data.get("updated_at"), updated_at=updated,
) )

View File

@@ -123,7 +123,7 @@ def mock_app() -> Generator[App, None, None]:
'projects': {'paths': [], 'active': ''}, 'projects': {'paths': [], 'active': ''},
'gui': {'show_windows': {}} 'gui': {'show_windows': {}}
}), }),
patch('src.gui_2.save_config'), patch('src.models.save_config'),
patch('src.gui_2.project_manager'), patch('src.gui_2.project_manager'),
patch('src.gui_2.session_logger'), patch('src.gui_2.session_logger'),
patch('src.gui_2.immapp.run'), patch('src.gui_2.immapp.run'),
@@ -155,7 +155,7 @@ def app_instance() -> Generator[App, None, None]:
'projects': {'paths': [], 'active': ''}, 'projects': {'paths': [], 'active': ''},
'gui': {'show_windows': {}} 'gui': {'show_windows': {}}
}), }),
patch('src.gui_2.save_config'), patch('src.models.save_config'),
patch('src.gui_2.project_manager'), patch('src.gui_2.project_manager'),
patch('src.gui_2.session_logger'), patch('src.gui_2.session_logger'),
patch('src.gui_2.immapp.run'), patch('src.gui_2.immapp.run'),

View File

@@ -34,10 +34,15 @@ def test_conductor_engine_run_executes_tickets_in_order(monkeypatch: pytest.Monk
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved # Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs): def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete() import threading
def do_work():
import time
time.sleep(0.1)
ticket.mark_complete()
threading.Thread(target=do_work).start()
return "Success" return "Success"
mock_lifecycle.side_effect = side_effect mock_lifecycle.side_effect = side_effect
engine.run() engine.run(max_ticks=20)
vlogger.log_state("T1 Status Final", "todo", ticket1.status) vlogger.log_state("T1 Status Final", "todo", ticket1.status)
vlogger.log_state("T2 Status Final", "todo", ticket2.status) vlogger.log_state("T2 Status Final", "todo", ticket2.status)
@@ -108,8 +113,8 @@ def test_run_worker_lifecycle_context_injection(monkeypatch: pytest.MonkeyPatch)
run_worker_lifecycle(ticket, context, context_files=context_files) run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls: # Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton # First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass") mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass", path="primary.py")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass") mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass", path="secondary.py")
# Verify user_message contains the views # Verify user_message contains the views
_, kwargs = mock_send.call_args _, kwargs = mock_send.call_args
user_message = kwargs["user_message"] user_message = kwargs["user_message"]
@@ -238,10 +243,15 @@ def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch: pytest.Monk
# Mock run_worker_lifecycle to mark tickets as complete # Mock run_worker_lifecycle to mark tickets as complete
with patch("src.multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle: with patch("src.multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs): def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete() import threading
def do_work():
import time
time.sleep(0.1)
ticket.mark_complete()
threading.Thread(target=do_work).start()
return "Success" return "Success"
mock_lifecycle.side_effect = side_effect mock_lifecycle.side_effect = side_effect
engine.run() engine.run(max_ticks=20)
assert mock_lifecycle.call_count == 3 assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2 # Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list] calls = [call[0][0].id for call in mock_lifecycle.call_args_list]

View File

@@ -111,3 +111,55 @@ def test_deepseek_streaming(mock_post: MagicMock) -> None:
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True) result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World" assert result == "Hello World"
@patch("requests.post")
def test_deepseek_payload_verification(mock_post: MagicMock) -> None:
"""
Verifies that the correct JSON payload (tools, history, params) is sent to DeepSeek.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
ai_client.reset_session()
with patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test-key"}}):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}]
}
mock_post.return_value = mock_response
ai_client.send(md_content="Context", user_message="Message 1", base_dir=".", discussion_history="History")
args, kwargs = mock_post.call_args
payload = kwargs["json"]
assert payload["model"] == "deepseek-chat"
assert "tools" in payload
assert len(payload["messages"]) == 2 # system + user
assert "[DISCUSSION HISTORY]\n\nHistory" in payload["messages"][1]["content"]
assert "temperature" in payload
assert "max_tokens" in payload
@patch("requests.post")
def test_deepseek_reasoner_payload_verification(mock_post: MagicMock) -> None:
"""
Verifies that deepseek-reasoner payload excludes tools and temperature.
"""
ai_client.set_provider("deepseek", "deepseek-reasoner")
ai_client.reset_session()
with patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test-key"}}):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}]
}
mock_post.return_value = mock_response
ai_client.send(md_content="Context", user_message="Message 1", base_dir=".")
args, kwargs = mock_post.call_args
payload = kwargs["json"]
assert payload["model"] == "deepseek-reasoner"
assert "tools" not in payload
assert "temperature" not in payload
assert "max_tokens" not in payload

View File

@@ -19,11 +19,12 @@ def test_gui2_hubs_exist_in_show_windows(app_instance: App) -> None:
def test_gui2_old_windows_removed_from_show_windows(app_instance: App) -> None: def test_gui2_old_windows_removed_from_show_windows(app_instance: App) -> None:
""" """
Verifies that the old fragmented windows are removed from show_windows. Verifies that the old fragmented windows are removed from show_windows.
Note: Message, Response, and Tool Calls are kept as they are now optional standalone windows.
""" """
old_windows = [ old_windows = [
"Projects", "Files", "Screenshots", "Projects", "Files", "Screenshots",
"Provider", "System Prompts", "Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History" "Comms History"
] ]
for old_win in old_windows: for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows" assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -77,7 +77,11 @@ def test_render_log_management_logic(app_instance: App) -> None:
patch("src.gui_2.imgui.table_headers_row"), \ patch("src.gui_2.imgui.table_headers_row"), \
patch("src.gui_2.imgui.table_next_row"), \ patch("src.gui_2.imgui.table_next_row"), \
patch("src.gui_2.imgui.table_next_column"), \ patch("src.gui_2.imgui.table_next_column"), \
patch("src.gui_2.imgui.same_line"), \
patch("src.gui_2.imgui.text_colored"), \
patch("src.gui_2.imgui.separator"), \
patch("src.gui_2.imgui.button"): patch("src.gui_2.imgui.button"):
mock_reg = MockRegistry.return_value mock_reg = MockRegistry.return_value
mock_reg.data = { mock_reg.data = {
"session_1": { "session_1": {

View File

@@ -39,11 +39,18 @@ def test_visual_mma_components(live_gui):
client.push_event("mma_state_update", payload) client.push_event("mma_state_update", payload)
print(" - MMA state update pushed.") print(" - MMA state update pushed.")
# Use ApiHookClient.wait_for_event if we had a specific event to wait for, # Poll for state update
# but here we just want to verify state. success = False
time.sleep(1) # Small sleep for UI to catch up with event queue for _ in range(50): # 10 seconds total
if client.get_value("mma_active_tier") == "Tier 3":
success = True
break
time.sleep(0.2)
assert success, f"State did not update to Tier 3. Current: {client.get_value('mma_active_tier')}"
# 3. Trigger HITL modal # 3. Trigger HITL modal
print("Pushing 'mma_step_approval' event to trigger HITL modal...") print("Pushing 'mma_step_approval' event to trigger HITL modal...")
approval_payload = { approval_payload = {
"ticket_id": "TICKET-002", "ticket_id": "TICKET-002",