checkpoint: this is a mess... need to define stricter DSL or system for how the AI devices sims and hookup api for tests.
This commit is contained in:
24
ai_client.py
24
ai_client.py
@@ -1645,30 +1645,6 @@ def send(
|
|||||||
pre_tool_callback : Optional callback (payload: str) -> bool called before tool execution
|
pre_tool_callback : Optional callback (payload: str) -> bool called before tool execution
|
||||||
qa_callback : Optional callback (stderr: str) -> str called for Tier 4 error analysis
|
qa_callback : Optional callback (stderr: str) -> str called for Tier 4 error analysis
|
||||||
"""
|
"""
|
||||||
# --- START MOCK LOGIC ---
|
|
||||||
if _model == 'mock':
|
|
||||||
import json
|
|
||||||
keyword = "unknown"
|
|
||||||
if 'Epic Initialization' in _custom_system_prompt:
|
|
||||||
keyword = "Epic Initialization"
|
|
||||||
mock_response_content = [
|
|
||||||
{"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"},
|
|
||||||
{"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"}
|
|
||||||
]
|
|
||||||
elif 'Sprint Planning' in _custom_system_prompt:
|
|
||||||
keyword = "Sprint Planning"
|
|
||||||
mock_response_content = [
|
|
||||||
{"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"},
|
|
||||||
{"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"}
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
# Tier 3 mock response for ticket execution
|
|
||||||
mock_response_content = "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]"
|
|
||||||
|
|
||||||
print(f"[MOCK AI] Triggered for prompt keyword: {keyword}")
|
|
||||||
return json.dumps(mock_response_content)
|
|
||||||
# --- END MOCK LOGIC ---
|
|
||||||
|
|
||||||
with _send_lock:
|
with _send_lock:
|
||||||
if _provider == "gemini":
|
if _provider == "gemini":
|
||||||
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
|
return _send_gemini(md_content, user_message, base_dir, file_items, discussion_history, pre_tool_callback, qa_callback)
|
||||||
|
|||||||
@@ -32,3 +32,14 @@ This is a multi-track phase. To ensure architectural integrity, these tracks **M
|
|||||||
4. **[CURRENT] Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
4. **[CURRENT] Robust Live Simulation Verification:** (Builds the tests to verify the UI and state)
|
||||||
|
|
||||||
**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track.
|
**Prerequisites for this track:** `MMA Dashboard Visualization Overhaul` MUST be completed (`[x]`) before starting this track.
|
||||||
|
|
||||||
|
## Session Compression (2026-02-28)
|
||||||
|
**Current State & Glaring Issues:**
|
||||||
|
1. **Brittle Interception System:** The visual simulation (`tests/visual_sim_mma_v2.py`) relies heavily on polling an `api_hooks.py` endpoint (`/api/gui/mma_status`) that aggregates several boolean flags (`pending_approval`, `pending_spawn`). This has proven extremely brittle. For example, `mock_gemini_cli.py` defaults to emitting a `read_file` tool call, which triggers the *general* tool approval popup (`_pending_ask`), freezing the test because it was expecting the *MMA spawn* popup (`_pending_mma_spawn`) or the *Track Proposal* modal.
|
||||||
|
2. **Mock Pollution in App Domain:** Previous attempts to fix the simulation shoehorned test-specific mock JSON responses directly into `ai_client.py` and `scripts/mma_exec.py`. This conflates the test environment with the production application codebase.
|
||||||
|
3. **Popup Handling Failures:** The GUI's state machine for closing popups (like `_show_track_proposal_modal` in `_cb_accept_tracks`) is desynchronized from the hook API. The test clicks "Accept", the tracks generate, but the UI state doesn't cleanly reset, leading to endless timeouts during test runs.
|
||||||
|
|
||||||
|
**Next Steps for the Handoff:**
|
||||||
|
- Completely rip out the hardcoded mock JSON arrays from `ai_client.py` and `scripts/mma_exec.py`.
|
||||||
|
- Refactor `tests/mock_gemini_cli.py` to be a pure, standalone mock that perfectly simulates the expected streaming behavior of `gemini_cli` without relying on the app to intercept specific magic prompts.
|
||||||
|
- Stabilize the hook API (`api_hooks.py`) so the test script can unambiguously distinguish between a general tool approval, an MMA step approval, and an MMA worker spawn approval, instead of relying on a fragile `pending_approval` catch-all.
|
||||||
@@ -9,9 +9,7 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
|
|||||||
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
|
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
|
||||||
"""
|
"""
|
||||||
# 1. Set Tier 2 Model (Tech Lead - Flash)
|
# 1. Set Tier 2 Model (Tech Lead - Flash)
|
||||||
if ai_client._model != 'mock':
|
# 2. Construct Prompt
|
||||||
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
|
|
||||||
ai_client.reset_session() # 2. Construct Prompt
|
|
||||||
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
|
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
|
||||||
user_message = (
|
user_message = (
|
||||||
f"### TRACK BRIEF:\n{track_brief}\n\n"
|
f"### TRACK BRIEF:\n{track_brief}\n\n"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[ai]
|
[ai]
|
||||||
provider = "gemini_cli"
|
provider = "gemini_cli"
|
||||||
model = "mock"
|
model = "gemini-3-flash-preview"
|
||||||
temperature = 0.0
|
temperature = 0.0
|
||||||
max_tokens = 8192
|
max_tokens = 8192
|
||||||
history_trunc_limit = 8000
|
history_trunc_limit = 8000
|
||||||
|
|||||||
11
gui_2.py
11
gui_2.py
@@ -991,7 +991,7 @@ class App:
|
|||||||
def _handle_approve_tool(self) -> None:
|
def _handle_approve_tool(self) -> None:
|
||||||
"""Logic for approving a pending tool execution via API hooks."""
|
"""Logic for approving a pending tool execution via API hooks."""
|
||||||
print("[DEBUG] _handle_approve_tool called")
|
print("[DEBUG] _handle_approve_tool called")
|
||||||
if self._pending_ask:
|
if self._pending_ask_dialog:
|
||||||
self._handle_approve_ask()
|
self._handle_approve_ask()
|
||||||
else:
|
else:
|
||||||
print("[DEBUG] No pending tool approval found")
|
print("[DEBUG] No pending tool approval found")
|
||||||
@@ -1000,7 +1000,11 @@ class App:
|
|||||||
"""Logic for approving a pending sub-agent spawn via API hooks."""
|
"""Logic for approving a pending sub-agent spawn via API hooks."""
|
||||||
print("[DEBUG] _handle_approve_spawn called")
|
print("[DEBUG] _handle_approve_spawn called")
|
||||||
if self._pending_mma_spawn:
|
if self._pending_mma_spawn:
|
||||||
|
# Synchronize with the handler logic
|
||||||
self._handle_mma_respond(approved=True, prompt=self._mma_spawn_prompt, context_md=self._mma_spawn_context)
|
self._handle_mma_respond(approved=True, prompt=self._mma_spawn_prompt, context_md=self._mma_spawn_context)
|
||||||
|
# Crucially, close the modal state so UI can continue
|
||||||
|
self._mma_spawn_open = False
|
||||||
|
self._pending_mma_spawn = None
|
||||||
else:
|
else:
|
||||||
print("[DEBUG] No pending spawn approval found")
|
print("[DEBUG] No pending spawn approval found")
|
||||||
|
|
||||||
@@ -1982,6 +1986,7 @@ class App:
|
|||||||
threading.Thread(target=_bg_task, daemon=True).start()
|
threading.Thread(target=_bg_task, daemon=True).start()
|
||||||
|
|
||||||
def _cb_accept_tracks(self) -> None:
|
def _cb_accept_tracks(self) -> None:
|
||||||
|
self._show_track_proposal_modal = False
|
||||||
def _bg_task():
|
def _bg_task():
|
||||||
# Generate skeletons once
|
# Generate skeletons once
|
||||||
self.ai_status = "Phase 2: Generating skeletons for all tracks..."
|
self.ai_status = "Phase 2: Generating skeletons for all tracks..."
|
||||||
@@ -2118,6 +2123,10 @@ class App:
|
|||||||
if self._show_track_proposal_modal:
|
if self._show_track_proposal_modal:
|
||||||
imgui.open_popup("Track Proposal")
|
imgui.open_popup("Track Proposal")
|
||||||
if imgui.begin_popup_modal("Track Proposal", True, imgui.WindowFlags_.always_auto_resize)[0]:
|
if imgui.begin_popup_modal("Track Proposal", True, imgui.WindowFlags_.always_auto_resize)[0]:
|
||||||
|
if not self._show_track_proposal_modal:
|
||||||
|
imgui.close_current_popup()
|
||||||
|
imgui.end_popup()
|
||||||
|
return
|
||||||
imgui.text_colored(C_IN, "Proposed Implementation Tracks")
|
imgui.text_colored(C_IN, "Proposed Implementation Tracks")
|
||||||
imgui.separator()
|
imgui.separator()
|
||||||
if not self.proposed_tracks:
|
if not self.proposed_tracks:
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ Collapsed=0
|
|||||||
DockId=0x00000006,0
|
DockId=0x00000006,0
|
||||||
|
|
||||||
[Window][Approve Tool Execution]
|
[Window][Approve Tool Execution]
|
||||||
Pos=512,437
|
Pos=1009,547
|
||||||
Size=416,325
|
Size=416,325
|
||||||
Collapsed=0
|
Collapsed=0
|
||||||
|
|
||||||
@@ -147,6 +147,11 @@ Size=879,1183
|
|||||||
Collapsed=0
|
Collapsed=0
|
||||||
DockId=0x00000004,1
|
DockId=0x00000004,1
|
||||||
|
|
||||||
|
[Window][Track Proposal]
|
||||||
|
Pos=709,326
|
||||||
|
Size=262,209
|
||||||
|
Collapsed=0
|
||||||
|
|
||||||
[Table][0xFB6E3870,4]
|
[Table][0xFB6E3870,4]
|
||||||
RefScale=13
|
RefScale=13
|
||||||
Column 0 Width=80
|
Column 0 Width=80
|
||||||
|
|||||||
@@ -8,21 +8,60 @@ def main() -> None:
|
|||||||
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
|
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
|
||||||
# Read prompt from stdin
|
# Read prompt from stdin
|
||||||
try:
|
try:
|
||||||
# On Windows, stdin might be closed or behave weirdly if not handled
|
|
||||||
prompt = sys.stdin.read()
|
prompt = sys.stdin.read()
|
||||||
except EOFError:
|
except EOFError:
|
||||||
prompt = ""
|
prompt = ""
|
||||||
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
|
|
||||||
# Skip management commands
|
# Skip management commands
|
||||||
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
||||||
return
|
return
|
||||||
# If the prompt contains tool results, provide final answer
|
|
||||||
|
# Check for specific simulation contexts
|
||||||
|
# Use startswith or check the beginning of the prompt to avoid matching text inside skeletons
|
||||||
|
if 'PATH: Epic Initialization' in prompt[:500]:
|
||||||
|
mock_response = [
|
||||||
|
{"id": "mock-track-1", "type": "Track", "module": "core", "persona": "Tech Lead", "severity": "Medium", "goal": "Mock Goal 1", "acceptance_criteria": ["criteria 1"], "title": "Mock Goal 1"},
|
||||||
|
{"id": "mock-track-2", "type": "Track", "module": "ui", "persona": "Frontend Lead", "severity": "Low", "goal": "Mock Goal 2", "acceptance_criteria": ["criteria 2"], "title": "Mock Goal 2"}
|
||||||
|
]
|
||||||
|
print(json.dumps({
|
||||||
|
"type": "message",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": json.dumps(mock_response)
|
||||||
|
}), flush=True)
|
||||||
|
print(json.dumps({
|
||||||
|
"type": "result",
|
||||||
|
"status": "success",
|
||||||
|
"stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50},
|
||||||
|
"session_id": "mock-session-epic"
|
||||||
|
}), flush=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if 'PATH: Sprint Planning' in prompt[:500]:
|
||||||
|
mock_response = [
|
||||||
|
{"id": "mock-ticket-1", "type": "Ticket", "goal": "Mock Ticket 1", "target_file": "file1.py", "depends_on": [], "context_requirements": "req 1"},
|
||||||
|
{"id": "mock-ticket-2", "type": "Ticket", "goal": "Mock Ticket 2", "target_file": "file2.py", "depends_on": ["mock-ticket-1"], "context_requirements": "req 2"}
|
||||||
|
]
|
||||||
|
print(json.dumps({
|
||||||
|
"type": "message",
|
||||||
|
"role": "assistant",
|
||||||
|
"content": json.dumps(mock_response)
|
||||||
|
}), flush=True)
|
||||||
|
print(json.dumps({
|
||||||
|
"type": "result",
|
||||||
|
"status": "success",
|
||||||
|
"stats": {"total_tokens": 100, "input_tokens": 50, "output_tokens": 50},
|
||||||
|
"session_id": "mock-session-sprint"
|
||||||
|
}), flush=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# If the prompt contains tool results, provide final answer
|
||||||
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"content": "I have processed the tool results. Everything looks good!"
|
"content": "SUCCESS: Mock Tier 3 worker implemented the change. [MOCK OUTPUT]"
|
||||||
}), flush=True)
|
}), flush=True)
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
"type": "result",
|
"type": "result",
|
||||||
@@ -31,7 +70,8 @@ def main() -> None:
|
|||||||
"session_id": "mock-session-final"
|
"session_id": "mock-session-final"
|
||||||
}), flush=True)
|
}), flush=True)
|
||||||
return
|
return
|
||||||
# Default flow: simulate a tool call
|
|
||||||
|
# Default flow: simulate a tool call
|
||||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||||
# Using format that bridge understands
|
# Using format that bridge understands
|
||||||
bridge_tool_call = {
|
bridge_tool_call = {
|
||||||
@@ -66,11 +106,6 @@ def main() -> None:
|
|||||||
"tool_id": "call_123",
|
"tool_id": "call_123",
|
||||||
"parameters": {"path": "test.txt"}
|
"parameters": {"path": "test.txt"}
|
||||||
}), flush=True)
|
}), flush=True)
|
||||||
print(json.dumps({
|
|
||||||
"type": "message",
|
|
||||||
"role": "assistant",
|
|
||||||
"content": "I am reading the file now..."
|
|
||||||
}), flush=True)
|
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
"type": "result",
|
"type": "result",
|
||||||
"status": "success",
|
"status": "success",
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ base_dir = "."
|
|||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
[gemini_cli]
|
[gemini_cli]
|
||||||
binary_path = "gemini"
|
binary_path = "C:\\projects\\manual_slop\\.venv\\Scripts\\python.exe C:\\projects\\manual_slop\\tests\\mock_gemini_cli.py"
|
||||||
|
|
||||||
[deepseek]
|
[deepseek]
|
||||||
reasoning_effort = "medium"
|
reasoning_effort = "medium"
|
||||||
@@ -40,27 +40,3 @@ fetch_url = true
|
|||||||
epic = "Develop a new feature"
|
epic = "Develop a new feature"
|
||||||
active_track_id = ""
|
active_track_id = ""
|
||||||
tracks = []
|
tracks = []
|
||||||
|
|
||||||
[mma.active_track]
|
|
||||||
id = "track_024370f1b453"
|
|
||||||
description = "Mock Goal 1"
|
|
||||||
|
|
||||||
[[mma.active_track.tickets]]
|
|
||||||
id = "mock-ticket-1"
|
|
||||||
description = "Mock Ticket 1"
|
|
||||||
status = "todo"
|
|
||||||
assigned_to = "unassigned"
|
|
||||||
context_requirements = []
|
|
||||||
depends_on = []
|
|
||||||
step_mode = false
|
|
||||||
|
|
||||||
[[mma.active_track.tickets]]
|
|
||||||
id = "mock-ticket-2"
|
|
||||||
description = "Mock Ticket 2"
|
|
||||||
status = "todo"
|
|
||||||
assigned_to = "unassigned"
|
|
||||||
context_requirements = []
|
|
||||||
depends_on = [
|
|
||||||
"mock-ticket-1",
|
|
||||||
]
|
|
||||||
step_mode = false
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ auto_add = true
|
|||||||
|
|
||||||
[discussions.main]
|
[discussions.main]
|
||||||
git_commit = ""
|
git_commit = ""
|
||||||
last_updated = "2026-02-28T22:11:24"
|
last_updated = "2026-02-28T22:41:40"
|
||||||
history = [
|
history = [
|
||||||
"@2026-02-28T22:02:40\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 83.5%. Please consider optimizing recent changes or reducing load.",
|
"@2026-02-28T22:02:40\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 83.5%. Please consider optimizing recent changes or reducing load.",
|
||||||
"@2026-02-28T22:03:10\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 103.9%. Please consider optimizing recent changes or reducing load.",
|
"@2026-02-28T22:03:10\nSystem:\n[PERFORMANCE ALERT] CPU usage high: 103.9%. Please consider optimizing recent changes or reducing load.",
|
||||||
|
|||||||
@@ -17,11 +17,14 @@ def test_mma_complete_lifecycle(live_gui) -> None:
|
|||||||
client = ApiHookClient()
|
client = ApiHookClient()
|
||||||
assert client.wait_for_server(timeout=10)
|
assert client.wait_for_server(timeout=10)
|
||||||
|
|
||||||
# 1. Set model to 'mock'.
|
# 1. Set up the mock CLI provider
|
||||||
try:
|
try:
|
||||||
client.set_value('current_model', 'mock')
|
client.set_value('current_provider', 'gemini_cli')
|
||||||
|
# Point the CLI adapter to our mock script
|
||||||
|
mock_cli_path = f'{sys.executable} {os.path.abspath("tests/mock_gemini_cli.py")}'
|
||||||
|
client.set_value('gcli_path', mock_cli_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Failed to set model to 'mock': {e}")
|
pytest.fail(f"Failed to set up mock provider: {e}")
|
||||||
|
|
||||||
# 2. Enter epic and click 'Plan Epic'.
|
# 2. Enter epic and click 'Plan Epic'.
|
||||||
client.set_value('mma_epic_input', 'Develop a new feature')
|
client.set_value('mma_epic_input', 'Develop a new feature')
|
||||||
@@ -136,19 +139,30 @@ def test_mma_complete_lifecycle(live_gui) -> None:
|
|||||||
|
|
||||||
# 8. Verify 'active_tier' change and output in 'mma_streams'.
|
# 8. Verify 'active_tier' change and output in 'mma_streams'.
|
||||||
streams_found = False
|
streams_found = False
|
||||||
for _ in range(30):
|
for _ in range(60): # Give it more time for the worker to spawn and respond
|
||||||
status = client.get_mma_status()
|
status = client.get_mma_status()
|
||||||
streams = status.get('mma_streams', {})
|
|
||||||
if streams and any("Tier 3" in k for k in streams.keys()):
|
# Handle approvals if they pop up during worker execution
|
||||||
print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}")
|
|
||||||
streams_found = True
|
|
||||||
break
|
|
||||||
# Keep approving if needed
|
|
||||||
if status and status.get('pending_spawn') is True:
|
if status and status.get('pending_spawn') is True:
|
||||||
|
print('[SIM] Worker spawn required. Clicking btn_approve_spawn...')
|
||||||
client.click('btn_approve_spawn')
|
client.click('btn_approve_spawn')
|
||||||
elif status and status.get('pending_approval') is True:
|
elif status and status.get('pending_approval') is True:
|
||||||
|
print('[SIM] Tool approval required. Clicking btn_approve_tool...')
|
||||||
client.click('btn_approve_tool')
|
client.click('btn_approve_tool')
|
||||||
|
|
||||||
|
streams = status.get('mma_streams', {})
|
||||||
|
print(f"Polling streams: {list(streams.keys())}")
|
||||||
|
|
||||||
|
if streams and any("Tier 3" in k for k in streams.keys()):
|
||||||
|
print(f"[SIM] Found Tier 3 worker output in streams: {list(streams.keys())}")
|
||||||
|
# Check for our specific mock content
|
||||||
|
tier3_key = [k for k in streams.keys() if "Tier 3" in k][0]
|
||||||
|
if "SUCCESS: Mock Tier 3 worker" in streams[tier3_key]:
|
||||||
|
print("[SIM] Verified mock worker output content.")
|
||||||
|
streams_found = True
|
||||||
|
break
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
assert streams_found or 'Tier 1' in status.get('mma_streams', {}), "No output found in 'mma_streams'."
|
assert streams_found, "No Tier 3 mock output found in 'mma_streams'."
|
||||||
print("MMA complete lifecycle simulation successful.")
|
print("MMA complete lifecycle simulation successful.")
|
||||||
|
|||||||
Reference in New Issue
Block a user