WIP: Regression hell

This commit is contained in:
2026-03-06 21:22:21 -05:00
parent 528f0a04c3
commit f65e9b40b2
14 changed files with 359 additions and 97 deletions

View File

@@ -1,6 +1,6 @@
[ai]
provider = "gemini"
model = "gemini-2.5-flash-lite"
provider = "deepseek"
model = "deepseek-v3"
temperature = 0.0
max_tokens = 8192
history_trunc_limit = 8000

View File

@@ -84,10 +84,10 @@ Size=900,700
Collapsed=0
[Window][Diagnostics]
Pos=519,17
Size=1256,379
Pos=2989,1760
Size=851,377
Collapsed=0
DockId=0x0000001A,0
DockId=0x00000002,0
[Window][Context Hub]
Pos=0,17
@@ -102,16 +102,16 @@ Collapsed=0
DockId=0x0000000D,0
[Window][Discussion Hub]
Pos=1777,17
Size=1210,1883
Pos=1750,17
Size=1237,1270
Collapsed=0
DockId=0x00000013,0
[Window][Operations Hub]
Pos=519,398
Size=1256,1502
Pos=519,17
Size=1229,1270
Collapsed=0
DockId=0x0000001B,0
DockId=0x00000012,0
[Window][Files & Media]
Pos=0,979
@@ -132,15 +132,15 @@ Collapsed=0
[Window][MMA Dashboard]
Pos=2989,17
Size=851,2120
Size=851,1741
Collapsed=0
DockId=0x00000004,0
DockId=0x00000001,0
[Window][Log Management]
Pos=2989,17
Size=851,2120
Size=851,1741
Collapsed=0
DockId=0x00000004,1
DockId=0x00000001,1
[Window][Track Proposal]
Pos=709,326
@@ -148,26 +148,26 @@ Size=262,209
Collapsed=0
[Window][Tier 1: Strategy]
Pos=519,1902
Size=1836,235
Pos=519,1289
Size=513,848
Collapsed=0
DockId=0x00000014,0
[Window][Tier 2: Tech Lead]
Pos=2357,1902
Size=253,235
Pos=1034,1289
Size=714,848
Collapsed=0
DockId=0x00000016,0
[Window][Tier 4: QA]
Pos=2803,1902
Size=184,235
Pos=2576,1289
Size=411,848
Collapsed=0
DockId=0x00000019,0
[Window][Tier 3: Workers]
Pos=2612,1902
Size=189,235
Pos=1750,1289
Size=824,848
Collapsed=0
DockId=0x00000018,0
@@ -265,20 +265,20 @@ DockSpace ID=0xAFC85805 Window=0x079D3A04 Pos=0,17 Size=3840,2120
DockNode ID=0x00000005 Parent=0x00000007 SizeRef=295,960 Selected=0xF4139CA2
DockNode ID=0x00000006 Parent=0x00000007 SizeRef=295,1158 CentralNode=1 Selected=0x7BD57D6A
DockNode ID=0x0000000E Parent=0x0000000B SizeRef=2468,858 Split=Y Selected=0x418C7449
DockNode ID=0x00000010 Parent=0x0000000E SizeRef=868,1883 Split=X Selected=0x418C7449
DockNode ID=0x00000012 Parent=0x00000010 SizeRef=1256,402 Split=Y Selected=0xB4CBF21A
DockNode ID=0x0000001A Parent=0x00000012 SizeRef=1141,379 Selected=0xB4CBF21A
DockNode ID=0x0000001B Parent=0x00000012 SizeRef=1141,1502 Selected=0x418C7449
DockNode ID=0x00000013 Parent=0x00000010 SizeRef=1210,402 Selected=0x6F2B5B04
DockNode ID=0x00000011 Parent=0x0000000E SizeRef=868,235 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000014 Parent=0x00000011 SizeRef=1836,837 Selected=0xBB346584
DockNode ID=0x00000015 Parent=0x00000011 SizeRef=630,837 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000016 Parent=0x00000015 SizeRef=730,837 Selected=0x390E7942
DockNode ID=0x00000017 Parent=0x00000015 SizeRef=1083,837 Split=X Selected=0x655BC6E9
DockNode ID=0x00000018 Parent=0x00000017 SizeRef=547,874 Selected=0x655BC6E9
DockNode ID=0x00000019 Parent=0x00000017 SizeRef=534,874 Selected=0x5CDB7A4B
DockNode ID=0x00000010 Parent=0x0000000E SizeRef=868,1270 Split=X Selected=0x418C7449
DockNode ID=0x00000012 Parent=0x00000010 SizeRef=1229,402 Selected=0x418C7449
DockNode ID=0x00000013 Parent=0x00000010 SizeRef=1237,402 Selected=0x6F2B5B04
DockNode ID=0x00000011 Parent=0x0000000E SizeRef=868,848 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000014 Parent=0x00000011 SizeRef=513,837 Selected=0xBB346584
DockNode ID=0x00000015 Parent=0x00000011 SizeRef=1953,837 Split=X Selected=0x5CDB7A4B
DockNode ID=0x00000016 Parent=0x00000015 SizeRef=714,837 Selected=0x390E7942
DockNode ID=0x00000017 Parent=0x00000015 SizeRef=1237,837 Split=X Selected=0x655BC6E9
DockNode ID=0x00000018 Parent=0x00000017 SizeRef=824,874 Selected=0x655BC6E9
DockNode ID=0x00000019 Parent=0x00000017 SizeRef=411,874 Selected=0x5CDB7A4B
DockNode ID=0x0000000D Parent=0x00000003 SizeRef=435,1186 Selected=0x363E93D6
DockNode ID=0x00000004 Parent=0xAFC85805 SizeRef=851,1183 Selected=0x3AEC3498
DockNode ID=0x00000004 Parent=0xAFC85805 SizeRef=851,1183 Split=Y Selected=0x3AEC3498
DockNode ID=0x00000001 Parent=0x00000004 SizeRef=851,1741 Selected=0x3AEC3498
DockNode ID=0x00000002 Parent=0x00000004 SizeRef=851,377 Selected=0xB4CBF21A
;;;<<<Layout_655921752_Default>>>;;;
;;;<<<HelloImGui_Misc>>>;;;

View File

@@ -632,3 +632,71 @@ When you need to verify a change, rely on the exit code and stdout/stderr from t
1+ 1?
------------------
--- MOCK INVOKED ---
ARGS: ['C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py', '-m', 'gemini-2.5-flash-lite', '--prompt', '', '--output-format', 'stream-json']
PROMPT:
You are a helpful coding assistant with access to a PowerShell tool (run_powershell) and MCP tools (file access: read_file, list_directory, search_files, get_file_summary, web access: web_search, fetch_url). When calling file/directory tools, always use the 'path' parameter for the target path. When asked to create or edit files, prefer targeted edits over full rewrites. Always explain what you are doing before invoking the tool.
When writing or rewriting large files (especially those containing quotes, backticks, or special characters), avoid python -c with inline strings. Instead: (1) write a .py helper script to disk using a PS here-string (@'...'@ for literal content), (2) run it with `python <script>`, (3) delete the helper. For small targeted edits, use PowerShell's (Get-Content) / .Replace() / Set-Content or Add-Content directly.
When making function calls using tools that accept array or object parameters ensure those are structured using JSON. For example:
When you need to verify a change, rely on the exit code and stdout/stderr from the tool — the user's context files are automatically refreshed after every tool call, so you do NOT need to re-read files that are already provided in the <context> block.
<context>
</context>
[DISCUSSION HISTORY]
## Discussion History
### Discussion Excerpt 1
@2026-03-06T20:40:49
System:
[PERFORMANCE ALERT] CPU usage high: 91.0%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 2
@2026-03-06T20:47:26
System:
[PERFORMANCE ALERT] CPU usage high: 95.5%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 3
@2026-03-06T20:48:08
System:
[PERFORMANCE ALERT] CPU usage high: 93.4%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 4
@2026-03-06T20:49:49
System:
[PERFORMANCE ALERT] CPU usage high: 85.4%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 5
@2026-03-06T20:51:47
System:
[PERFORMANCE ALERT] CPU usage high: 86.7%. Please consider optimizing recent changes or reducing load.
---
### Discussion Excerpt 6
@2026-03-06T20:53:31
System:
[PERFORMANCE ALERT] CPU usage high: 96.6%. Please consider optimizing recent changes or reducing load.
---
testing gemini cli
------------------

View File

@@ -252,7 +252,12 @@ def _classify_gemini_error(exc: Exception) -> ProviderError:
return ProviderError("unknown", "gemini", exc)
def _classify_deepseek_error(exc: Exception) -> ProviderError:
body = str(exc).lower()
body = ""
if isinstance(exc, requests.exceptions.HTTPError) and exc.response is not None:
body = exc.response.text.lower()
else:
body = str(exc).lower()
if "429" in body or "rate" in body:
return ProviderError("rate_limit", "deepseek", exc)
if "401" in body or "403" in body or "auth" in body or "api key" in body:
@@ -263,6 +268,13 @@ def _classify_deepseek_error(exc: Exception) -> ProviderError:
return ProviderError("quota", "deepseek", exc)
if "connection" in body or "timeout" in body or "network" in body:
return ProviderError("network", "deepseek", exc)
if "400" in body or "bad request" in body:
# Try to wrap the original error with the response body for better debugging
if body:
new_exc = Exception(f"Bad Request (400): {body}")
return ProviderError("unknown", "deepseek", new_exc)
return ProviderError("unknown", "deepseek", exc)
def set_provider(provider: str, model: str) -> None:
@@ -637,6 +649,54 @@ def _build_file_diff_text(changed_items: list[dict[str, Any]]) -> str:
parts.append(f"### `{path}` (no changes detected)")
return "\n\n---\n\n".join(parts)
def _build_deepseek_tools() -> list[dict[str, Any]]:
mcp_tools: list[dict[str, Any]] = []
for spec in mcp_client.MCP_TOOL_SPECS:
if _agent_tools.get(spec["name"], True):
mcp_tools.append({
"type": "function",
"function": {
"name": spec["name"],
"description": spec["description"],
"parameters": spec["parameters"],
}
})
tools_list = mcp_tools
if _agent_tools.get(TOOL_NAME, True):
powershell_tool: dict[str, Any] = {
"type": "function",
"function": {
"name": TOOL_NAME,
"description": (
"Run a PowerShell script within the project base_dir. "
"Use this to create, edit, rename, or delete files and directories. "
"The working directory is set to base_dir automatically. "
"Always prefer targeted edits over full rewrites where possible. "
"stdout and stderr are returned to you as the result."
),
"parameters": {
"type": "object",
"properties": {
"script": {
"type": "string",
"description": "The PowerShell script to execute."
}
},
"required": ["script"]
}
}
}
tools_list.append(powershell_tool)
return tools_list
_CACHED_DEEPSEEK_TOOLS: Optional[list[dict[str, Any]]] = None
def _get_deepseek_tools() -> list[dict[str, Any]]:
global _CACHED_DEEPSEEK_TOOLS
if _CACHED_DEEPSEEK_TOOLS is None:
_CACHED_DEEPSEEK_TOOLS = _build_deepseek_tools()
return _CACHED_DEEPSEEK_TOOLS
def _content_block_to_dict(block: Any) -> dict[str, Any]:
if isinstance(block, dict):
return block
@@ -1356,33 +1416,56 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
current_api_messages: list[dict[str, Any]] = []
is_reasoner = _model == "deepseek-reasoner"
# Update history following Anthropic pattern
with _deepseek_history_lock:
for msg in _deepseek_history:
current_api_messages.append(msg)
initial_user_message_content = user_message
if discussion_history:
initial_user_message_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"
current_api_messages.append({"role": "user", "content": initial_user_message_content})
request_payload: dict[str, Any] = {
"model": _model,
"messages": current_api_messages,
"temperature": _temperature,
"max_tokens": _max_tokens,
"stream": stream,
}
sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
request_payload["messages"].insert(0, sys_msg)
if discussion_history and not _deepseek_history:
user_content = f"[DISCUSSION HISTORY]\n\n{discussion_history}\n\n---\n\n{user_message}"
else:
user_content = user_message
_deepseek_history.append({"role": "user", "content": user_content})
all_text_parts: list[str] = []
_cumulative_tool_bytes = 0
round_idx = 0
while round_idx <= MAX_TOOL_ROUNDS + 1:
for round_idx in range(MAX_TOOL_ROUNDS + 2):
current_api_messages: list[dict[str, Any]] = []
with _deepseek_history_lock:
for msg in _deepseek_history:
current_api_messages.append(msg)
sys_msg = {"role": "system", "content": f"{_get_combined_system_prompt()}\n\n<context>\n{md_content}\n</context>"}
current_api_messages.insert(0, sys_msg)
request_payload: dict[str, Any] = {
"model": _model,
"messages": current_api_messages,
"stream": stream,
}
if not is_reasoner:
request_payload["temperature"] = _temperature
request_payload["max_tokens"] = _max_tokens
tools = _get_deepseek_tools()
if tools:
request_payload["tools"] = tools
events.emit("request_start", payload={"provider": "deepseek", "model": _model, "round": round_idx, "streaming": stream})
try:
response = requests.post(api_url, headers=headers, json=request_payload, timeout=60, stream=stream)
response = requests.post(api_url, headers=headers, json=request_payload, timeout=120, stream=stream)
response.raise_for_status()
except requests.exceptions.RequestException as e:
raise _classify_deepseek_error(e) from e
assistant_text = ""
tool_calls_raw = []
reasoning_content = ""
finish_reason = "stop"
usage = {}
if stream:
aggregated_content = ""
aggregated_tool_calls: list[dict[str, Any]] = []
@@ -1443,10 +1526,12 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
reasoning_content = message.get("reasoning_content", "")
finish_reason = choice.get("finish_reason", "stop")
usage = response_data.get("usage", {})
thinking_tags = ""
if reasoning_content:
thinking_tags = f"<thinking>\n{reasoning_content}\n</thinking>\n"
full_assistant_text = thinking_tags + assistant_text
with _deepseek_history_lock:
msg_to_store: dict[str, Any] = {"role": "assistant", "content": assistant_text}
if reasoning_content:
@@ -1454,8 +1539,10 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
if tool_calls_raw:
msg_to_store["tool_calls"] = tool_calls_raw
_deepseek_history.append(msg_to_store)
if full_assistant_text:
all_text_parts.append(full_assistant_text)
_append_comms("IN", "response", {
"round": round_idx,
"stop_reason": finish_reason,
@@ -1464,6 +1551,7 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"usage": usage,
"streaming": stream
})
if finish_reason != "tool_calls" and not tool_calls_raw:
break
if round_idx > MAX_TOOL_ROUNDS:
@@ -1507,16 +1595,11 @@ def _send_deepseek(md_content: str, user_message: str, base_dir: str,
"content": f"SYSTEM WARNING: Cumulative tool output exceeded {_MAX_TOOL_OUTPUT_BYTES // 1000}KB budget. Provide your final answer now."
})
_append_comms("OUT", "request", {"message": f"[TOOL OUTPUT BUDGET EXCEEDED: {_cumulative_tool_bytes} bytes]"})
with _deepseek_history_lock:
for tr in tool_results_for_history:
_deepseek_history.append(tr)
next_messages: list[dict[str, Any]] = []
with _deepseek_history_lock:
for msg in _deepseek_history:
next_messages.append(msg)
next_messages.insert(0, sys_msg)
request_payload["messages"] = next_messages
round_idx += 1
return "\n\n".join(all_text_parts) if all_text_parts else "(No text returned)"
except Exception as e:
raise _classify_deepseek_error(e) from e

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import json
import threading
import uuid
import sys
from http.server import ThreadingHTTPServer, BaseHTTPRequestHandler
from typing import Any
import logging
@@ -9,9 +10,15 @@ from src import session_logger
def _get_app_attr(app: Any, name: str, default: Any = None) -> Any:
if hasattr(app, name):
return getattr(app, name)
val = getattr(app, name)
sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in app -> {val}\n")
sys.stderr.flush()
return val
if hasattr(app, 'controller') and hasattr(app.controller, name):
return getattr(app.controller, name)
val = getattr(app.controller, name)
sys.stderr.write(f"[DEBUG] _get_app_attr: found {name} in controller -> {val}\n")
sys.stderr.flush()
return val
return default
def _has_app_attr(app: Any, name: str) -> bool:
@@ -94,7 +101,10 @@ class HookHandler(BaseHTTPRequestHandler):
settable = _get_app_attr(app, "_settable_fields", {})
if field_tag in settable:
attr = settable[field_tag]
result["value"] = _get_app_attr(app, attr, None)
val = _get_app_attr(app, attr, "MISSING")
sys.stderr.write(f"[DEBUG] Hook API: get_value {field_tag} -> attr {attr} -> {val}\n")
sys.stderr.flush()
result["value"] = val if val != "MISSING" else None
finally: event.set()
lock = _get_app_attr(app, "_pending_gui_tasks_lock")
tasks = _get_app_attr(app, "_pending_gui_tasks")

View File

@@ -361,12 +361,16 @@ class AppController:
def _process_pending_gui_tasks(self) -> None:
if not self._pending_gui_tasks:
return
sys.stderr.write(f"[DEBUG] _process_pending_gui_tasks: processing {len(self._pending_gui_tasks)} tasks\n")
sys.stderr.flush()
with self._pending_gui_tasks_lock:
tasks = self._pending_gui_tasks[:]
self._pending_gui_tasks.clear()
for task in tasks:
try:
action = task.get("action")
sys.stderr.write(f"[DEBUG] Processing GUI task: action={action}\n")
sys.stderr.flush()
if action:
session_logger.log_api_hook("PROCESS_TASK", action, str(task))
# ...
@@ -428,14 +432,18 @@ class AppController:
self._show_track_proposal_modal = True
elif action == "mma_state_update":
# Handle both internal (nested) and hook-server (flattened) payloads
payload = task.get("payload")
if not isinstance(payload, dict):
payload = task # Fallback to task if payload missing or wrong type
self.mma_status = payload.get("status", "idle")
self.active_tier = payload.get("active_tier")
p = task.get("payload")
if not isinstance(p, dict):
p = task # Fallback to task itself if payload is missing or wrong type
sys.stderr.write(f"[DEBUG] mma_state_update: status={p.get('status')} active_tier={p.get('active_tier')}\n")
sys.stderr.flush()
self.mma_status = p.get("status", self.mma_status)
self.active_tier = p.get("active_tier", self.active_tier)
# Preserve existing model/provider config if not explicitly in payload
new_usage = payload.get("tier_usage", {})
new_usage = p.get("tier_usage", {})
for tier, data in new_usage.items():
if tier in self.mma_tier_usage:
# Update usage counts but keep selected model/provider if not in update
@@ -446,12 +454,18 @@ class AppController:
else:
self.mma_tier_usage[tier] = data
self.active_tickets = payload.get("tickets", [])
track_data = payload.get("track")
self.active_tickets = p.get("tickets", [])
track_data = p.get("track")
if track_data:
tickets = []
for t_data in self.active_tickets:
tickets.append(models.Ticket(**t_data))
if isinstance(t_data, models.Ticket):
tickets.append(t_data)
else:
# Map 'goal' from Godot format to 'description' if needed
if "goal" in t_data and "description" not in t_data:
t_data["description"] = t_data["goal"]
tickets.append(models.Ticket.from_dict(t_data))
self.active_track = models.Track(
id=track_data.get("id"),
description=track_data.get("title", ""),

View File

@@ -1793,22 +1793,23 @@ class App:
imgui.separator()
# 4. Task DAG Visualizer
imgui.text("Task DAG")
if self.active_track:
if self.active_track and self.node_editor_ctx:
ed.set_current_editor(self.node_editor_ctx)
ed.begin('Visual DAG')
# Selection detection
selected = ed.get_selected_nodes()
if selected:
for node_id in selected:
node_val = node_id.id()
for t in self.active_tickets:
if abs(hash(str(t.get('id', '')))) == node_id:
if abs(hash(str(t.get('id', '')))) == node_val:
self.ui_selected_ticket_id = str(t.get('id', ''))
break
break
for t in self.active_tickets:
tid = str(t.get('id', '??'))
int_id = abs(hash(tid))
ed.begin_node(int_id)
ed.begin_node(ed.NodeId(int_id))
imgui.text_colored(C_KEY, f"Ticket: {tid}")
status = t.get('status', 'todo')
s_col = C_VAL
@@ -1819,18 +1820,18 @@ class App:
imgui.same_line()
imgui.text_colored(s_col, status)
imgui.text(f"Target: {t.get('target_file','')}")
ed.begin_pin(abs(hash(tid + "_in")), ed.PinKind.input)
ed.begin_pin(ed.PinId(abs(hash(tid + "_in"))), ed.PinKind.input)
imgui.text("->")
ed.end_pin()
imgui.same_line()
ed.begin_pin(abs(hash(tid + "_out")), ed.PinKind.output)
ed.begin_pin(ed.PinId(abs(hash(tid + "_out"))), ed.PinKind.output)
imgui.text("->")
ed.end_pin()
ed.end_node()
for t in self.active_tickets:
tid = str(t.get('id', '??'))
for dep in t.get('depends_on', []):
ed.link(abs(hash(dep + "_" + tid)), abs(hash(dep + "_out")), abs(hash(tid + "_in")))
ed.link(ed.LinkId(abs(hash(dep + "_" + tid))), ed.PinId(abs(hash(dep + "_out"))), ed.PinId(abs(hash(tid + "_in"))))
# Handle link creation
if ed.begin_create():
@@ -1855,7 +1856,7 @@ class App:
t.setdefault('depends_on', []).append(source_tid)
self._push_mma_state_update()
break
ed.end_create()
ed.end_create()
# Handle link deletion
if ed.begin_delete():
@@ -1870,7 +1871,7 @@ class App:
t['depends_on'] = [dep for dep in deps if abs(hash(dep + "_" + tid)) != lid_val]
self._push_mma_state_update()
break
ed.end_delete()
ed.end_delete()
# Validate DAG after any changes
try:
from src.dag_engine import TrackDAG
@@ -1881,7 +1882,6 @@ class App:
except Exception:
pass
ed.end()
ed.set_current_editor(None)
# 5. Add Ticket Form
imgui.separator()
if imgui.button("Add Ticket"):

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import tomllib
import datetime
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any, Union
from pathlib import Path
@@ -179,12 +180,24 @@ class Metadata:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
created = data.get("created_at")
updated = data.get("updated_at")
if isinstance(created, str):
try:
created = datetime.datetime.fromisoformat(created)
except ValueError:
pass
if isinstance(updated, str):
try:
updated = datetime.datetime.fromisoformat(updated)
except ValueError:
pass
return cls(
id=data["id"],
name=data.get("name", ""),
status=data.get("status", "todo"),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
created_at=created,
updated_at=updated,
)

View File

@@ -123,7 +123,7 @@ def mock_app() -> Generator[App, None, None]:
'projects': {'paths': [], 'active': ''},
'gui': {'show_windows': {}}
}),
patch('src.gui_2.save_config'),
patch('src.models.save_config'),
patch('src.gui_2.project_manager'),
patch('src.gui_2.session_logger'),
patch('src.gui_2.immapp.run'),
@@ -155,7 +155,7 @@ def app_instance() -> Generator[App, None, None]:
'projects': {'paths': [], 'active': ''},
'gui': {'show_windows': {}}
}),
patch('src.gui_2.save_config'),
patch('src.models.save_config'),
patch('src.gui_2.project_manager'),
patch('src.gui_2.session_logger'),
patch('src.gui_2.immapp.run'),

View File

@@ -34,10 +34,15 @@ def test_conductor_engine_run_executes_tickets_in_order(monkeypatch: pytest.Monk
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
import threading
def do_work():
import time
time.sleep(0.1)
ticket.mark_complete()
threading.Thread(target=do_work).start()
return "Success"
mock_lifecycle.side_effect = side_effect
engine.run()
engine.run(max_ticks=20)
vlogger.log_state("T1 Status Final", "todo", ticket1.status)
vlogger.log_state("T2 Status Final", "todo", ticket2.status)
@@ -108,8 +113,8 @@ def test_run_worker_lifecycle_context_injection(monkeypatch: pytest.MonkeyPatch)
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass", path="primary.py")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass", path="secondary.py")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
@@ -238,10 +243,15 @@ def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch: pytest.Monk
# Mock run_worker_lifecycle to mark tickets as complete
with patch("src.multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
import threading
def do_work():
import time
time.sleep(0.1)
ticket.mark_complete()
threading.Thread(target=do_work).start()
return "Success"
mock_lifecycle.side_effect = side_effect
engine.run()
engine.run(max_ticks=20)
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]

View File

@@ -111,3 +111,55 @@ def test_deepseek_streaming(mock_post: MagicMock) -> None:
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"
@patch("requests.post")
def test_deepseek_payload_verification(mock_post: MagicMock) -> None:
"""
Verifies that the correct JSON payload (tools, history, params) is sent to DeepSeek.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
ai_client.reset_session()
with patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test-key"}}):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}]
}
mock_post.return_value = mock_response
ai_client.send(md_content="Context", user_message="Message 1", base_dir=".", discussion_history="History")
args, kwargs = mock_post.call_args
payload = kwargs["json"]
assert payload["model"] == "deepseek-chat"
assert "tools" in payload
assert len(payload["messages"]) == 2 # system + user
assert "[DISCUSSION HISTORY]\n\nHistory" in payload["messages"][1]["content"]
assert "temperature" in payload
assert "max_tokens" in payload
@patch("requests.post")
def test_deepseek_reasoner_payload_verification(mock_post: MagicMock) -> None:
"""
Verifies that deepseek-reasoner payload excludes tools and temperature.
"""
ai_client.set_provider("deepseek", "deepseek-reasoner")
ai_client.reset_session()
with patch("src.ai_client._load_credentials", return_value={"deepseek": {"api_key": "test-key"}}):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}]
}
mock_post.return_value = mock_response
ai_client.send(md_content="Context", user_message="Message 1", base_dir=".")
args, kwargs = mock_post.call_args
payload = kwargs["json"]
assert payload["model"] == "deepseek-reasoner"
assert "tools" not in payload
assert "temperature" not in payload
assert "max_tokens" not in payload

View File

@@ -19,11 +19,12 @@ def test_gui2_hubs_exist_in_show_windows(app_instance: App) -> None:
def test_gui2_old_windows_removed_from_show_windows(app_instance: App) -> None:
"""
Verifies that the old fragmented windows are removed from show_windows.
Note: Message, Response, and Tool Calls are kept as they are now optional standalone windows.
"""
old_windows = [
"Projects", "Files", "Screenshots",
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
"Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -77,7 +77,11 @@ def test_render_log_management_logic(app_instance: App) -> None:
patch("src.gui_2.imgui.table_headers_row"), \
patch("src.gui_2.imgui.table_next_row"), \
patch("src.gui_2.imgui.table_next_column"), \
patch("src.gui_2.imgui.same_line"), \
patch("src.gui_2.imgui.text_colored"), \
patch("src.gui_2.imgui.separator"), \
patch("src.gui_2.imgui.button"):
mock_reg = MockRegistry.return_value
mock_reg.data = {
"session_1": {

View File

@@ -39,11 +39,18 @@ def test_visual_mma_components(live_gui):
client.push_event("mma_state_update", payload)
print(" - MMA state update pushed.")
# Use ApiHookClient.wait_for_event if we had a specific event to wait for,
# but here we just want to verify state.
time.sleep(1) # Small sleep for UI to catch up with event queue
# Poll for state update
success = False
for _ in range(50): # 10 seconds total
if client.get_value("mma_active_tier") == "Tier 3":
success = True
break
time.sleep(0.2)
assert success, f"State did not update to Tier 3. Current: {client.get_value('mma_active_tier')}"
# 3. Trigger HITL modal
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
approval_payload = {
"ticket_id": "TICKET-002",