checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -15,82 +15,76 @@ import ai_client
@pytest.fixture(autouse=True)
def reset_ai_client():
"""Reset ai_client global state between every test to prevent state pollution."""
ai_client.reset_session()
# Default to a safe model
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
yield
"""Reset ai_client global state between every test to prevent state pollution."""
ai_client.reset_session()
# Default to a safe model
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
yield
def kill_process_tree(pid):
"""Robustly kills a process and all its children."""
if pid is None:
return
try:
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
if os.name == 'nt':
# /F is force, /T is tree (includes children)
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
else:
# On Unix, kill the process group
os.killpg(os.getpgid(pid), signal.SIGKILL)
print(f"[Fixture] Process tree {pid} killed.")
except Exception as e:
print(f"[Fixture] Error killing process tree {pid}: {e}")
"""Robustly kills a process and all its children."""
if pid is None:
return
try:
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
if os.name == 'nt':
# /F is force, /T is tree (includes children)
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
else:
# On Unix, kill the process group
os.killpg(os.getpgid(pid), signal.SIGKILL)
print(f"[Fixture] Process tree {pid} killed.")
except Exception as e:
print(f"[Fixture] Error killing process tree {pid}: {e}")
@pytest.fixture(scope="session")
def live_gui():
"""
"""
Session-scoped fixture that starts gui_2.py with --enable-test-hooks.
"""
gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
stdout=log_file,
stderr=log_file,
text=True,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
start_time = time.time()
while time.time() - start_time < max_retries:
try:
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
if response.status_code == 200:
ready = True
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
if process.poll() is not None:
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break
time.sleep(0.5)
if not ready:
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
kill_process_tree(process.pid)
pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
yield process, gui_script
finally:
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
# Reset the GUI state before shutting down
try:
client = ApiHookClient()
client.reset_session()
time.sleep(0.5)
except: pass
kill_process_tree(process.pid)
log_file.close()
gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
stdout=log_file,
stderr=log_file,
text=True,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
start_time = time.time()
while time.time() - start_time < max_retries:
try:
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
if response.status_code == 200:
ready = True
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
if process.poll() is not None:
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break
time.sleep(0.5)
if not ready:
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
kill_process_tree(process.pid)
pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
yield process, gui_script
finally:
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
# Reset the GUI state before shutting down
try:
client = ApiHookClient()
client.reset_session()
time.sleep(0.5)
except: pass
kill_process_tree(process.pid)
log_file.close()

21
tests/mock_alias_tool.py Normal file
View File

@@ -0,0 +1,21 @@
import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
else:
# We must call the bridge to trigger the GUI approval!
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
stdout, _ = proc.communicate(input=json.dumps(tool_call))
# Even if bridge says allow, we emit the tool_use to the adapter
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
print(json.dumps({
"type": "tool_use",
"name": "list_directory",
"id": "alias_call",
"args": {"dir_path": "."}
}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)

View File

@@ -4,104 +4,92 @@ import subprocess
import os
def main():
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
try:
# On Windows, stdin might be closed or behave weirdly if not handled
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I have processed the tool results. Everything looks good!"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Using format that bridge understands
bridge_tool_call = {
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
try:
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
"type": "tool_use",
"tool_name": "read_file",
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
"session_id": "mock-session-123"
}), flush=True)
else:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
try:
# On Windows, stdin might be closed or behave weirdly if not handled
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I have processed the tool results. Everything looks good!"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Using format that bridge understands
bridge_tool_call = {
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
try:
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
"type": "tool_use",
"tool_name": "read_file",
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
"session_id": "mock-session-123"
}), flush=True)
else:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)
if __name__ == "__main__":
main()
main()

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:56:53"
last_updated = "2026-02-28T07:35:03"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -6,10 +6,10 @@ roles = [
"Reasoning",
]
history = []
active = "TestDisc_1772236592"
active = "TestDisc_1772282083"
auto_add = true
[discussions.TestDisc_1772236592]
[discussions.TestDisc_1772282083]
git_commit = ""
last_updated = "2026-02-27T18:56:46"
last_updated = "2026-02-28T07:34:56"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:53"
last_updated = "2026-02-28T07:35:49"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:10"
last_updated = "2026-02-28T07:35:20"
history = []

View File

@@ -18,7 +18,5 @@ history = [
[discussions.AutoDisc]
git_commit = ""
last_updated = "2026-02-27T23:54:05"
history = [
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
]
last_updated = "2026-02-28T07:34:41"
history = []

View File

@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client
def test_agent_capabilities_listing():
# Verify that the agent exposes its available tools correctly
pass
# Verify that the agent exposes its available tools correctly
pass

View File

@@ -9,14 +9,14 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import set_agent_tools, _build_anthropic_tools
def test_set_agent_tools():
# Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools)
# Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools)
def test_build_anthropic_tools_conversion():
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools]
assert "read_file" in tool_names
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools]
assert "read_file" in tool_names

View File

@@ -3,39 +3,33 @@ from unittest.mock import MagicMock, patch
import ai_client
def test_ai_client_send_gemini_cli():
"""
"""
Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter'
when the 'gemini_cli' provider is specified.
"""
test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
mock_adapter_instance = MockAdapterClass.return_value
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.session_id = "test-session"
# Verify that 'events' are emitted correctly
with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
md_content="<context></context>",
user_message=test_message,
base_dir="."
)
# Check that the adapter's send method was called.
mock_adapter_instance.send.assert_called()
# Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response
test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
mock_adapter_instance = MockAdapterClass.return_value
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.session_id = "test-session"
# Verify that 'events' are emitted correctly
with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
md_content="<context></context>",
user_message=test_message,
base_dir="."
)
# Check that the adapter's send method was called.
mock_adapter_instance.send.assert_called()
# Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response

View File

@@ -3,15 +3,14 @@ from unittest.mock import patch, MagicMock
import ai_client
def test_list_models_gemini_cli():
"""
"""
Verifies that 'ai_client.list_models' correctly returns a list of models
for the 'gemini_cli' provider.
"""
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash-lite" in models
assert len(models) == 5
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash-lite" in models
assert len(models) == 5

View File

@@ -3,22 +3,22 @@ import textwrap
from scripts.ai_style_formatter import format_code
def test_basic_indentation():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def hello():
print("world")
if True:
print("nested")
""")
expected = (
"def hello():\n"
" print(\"world\")\n"
" if True:\n"
" print(\"nested\")\n"
)
assert format_code(source) == expected
expected = (
"def hello():\n"
" print(\"world\")\n"
" if True:\n"
" print(\"nested\")\n"
)
assert format_code(source) == expected
def test_top_level_blank_lines():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
pass
@@ -26,31 +26,31 @@ def test_top_level_blank_lines():
def b():
pass
""")
expected = (
"def a():\n"
" pass\n"
"\n"
"def b():\n"
" pass\n"
)
assert format_code(source) == expected
expected = (
"def a():\n"
" pass\n"
"\n"
"def b():\n"
" pass\n"
)
assert format_code(source) == expected
def test_inner_blank_lines():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
print("start")
print("end")
""")
expected = (
"def a():\n"
" print(\"start\")\n"
" print(\"end\")\n"
)
assert format_code(source) == expected
expected = (
"def a():\n"
" print(\"start\")\n"
" print(\"end\")\n"
)
assert format_code(source) == expected
def test_multiline_string_safety():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
'''
This is a multiline
@@ -60,21 +60,20 @@ def test_multiline_string_safety():
'''
pass
""")
# Note: the indentation of the ''' itself becomes 1 space.
# The content inside remains exactly as in source.
# textwrap.dedent will remove the common leading whitespace from the source.
# The source's ''' is at 4 spaces. Content is at 4 spaces.
# After dedent:
# def a():
# '''
# This is a...
result = format_code(source)
assert " This is a multiline" in result
assert result.startswith("def a():\n '''")
# Note: the indentation of the ''' itself becomes 1 space.
# The content inside remains exactly as in source.
# textwrap.dedent will remove the common leading whitespace from the source.
# The source's ''' is at 4 spaces. Content is at 4 spaces.
# After dedent:
# def a():
# '''
# This is a...
result = format_code(source)
assert " This is a multiline" in result
assert result.startswith("def a():\n '''")
def test_continuation_indentation():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def long_func(
a,
b
@@ -84,20 +83,20 @@ def test_continuation_indentation():
b
)
""")
expected = (
"def long_func(\n"
" a,\n"
" b\n"
"):\n"
" return (\n"
" a +\n"
" b\n"
" )\n"
)
assert format_code(source) == expected
expected = (
"def long_func(\n"
" a,\n"
" b\n"
"):\n"
" return (\n"
" a +\n"
" b\n"
" )\n"
)
assert format_code(source) == expected
def test_multiple_top_level_definitions():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
class MyClass:
def __init__(self):
self.x = 1
@@ -109,14 +108,14 @@ def test_multiple_top_level_definitions():
def top_level():
pass
""")
expected = (
"class MyClass:\n"
" def __init__(self):\n"
" self.x = 1\n"
" def method(self):\n"
" pass\n"
"\n"
"def top_level():\n"
" pass\n"
)
assert format_code(source) == expected
expected = (
"class MyClass:\n"
" def __init__(self):\n"
" self.x = 1\n"
" def method(self):\n"
" pass\n"
"\n"
"def top_level():\n"
" pass\n"
)
assert format_code(source) == expected

View File

@@ -3,127 +3,104 @@ from unittest.mock import MagicMock, patch
import ai_client
class MockUsage:
def __init__(self):
self.prompt_token_count = 10
self.candidates_token_count = 5
self.total_token_count = 15
self.cached_content_token_count = 0
def __init__(self):
self.prompt_token_count = 10
self.candidates_token_count = 5
self.total_token_count = 15
self.cached_content_token_count = 0
class MockPart:
def __init__(self, text, function_call):
self.text = text
self.function_call = function_call
def __init__(self, text, function_call):
self.text = text
self.function_call = function_call
class MockContent:
def __init__(self, parts):
self.parts = parts
def __init__(self, parts):
self.parts = parts
class MockCandidate:
def __init__(self, parts):
self.content = MockContent(parts)
self.finish_reason = MagicMock()
self.finish_reason.name = "STOP"
def __init__(self, parts):
self.content = MockContent(parts)
self.finish_reason = MagicMock()
self.finish_reason.name = "STOP"
def test_ai_client_event_emitter_exists():
# This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events')
# This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events')
def test_event_emission():
callback = MagicMock()
ai_client.events.on("test_event", callback)
ai_client.events.emit("test_event", payload={"data": 123})
callback.assert_called_once_with(payload={"data": 123})
callback = MagicMock()
ai_client.events.on("test_event", callback)
ai_client.events.emit("test_event", payload={"data": 123})
callback.assert_called_once_with(payload={"data": 123})
def test_send_emits_events():
with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
def test_send_emits_events_proper():
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response.usage_metadata = MockUsage()
mock_chat.send_message.return_value = mock_response
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response.usage_metadata = MockUsage()
mock_chat.send_message.return_value = mock_response
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
def test_send_emits_tool_events():
import mcp_client
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
# 1. Setup mock response with a tool call
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"path": "test.txt"}
mock_response_with_tool = MagicMock()
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer)
mock_response_final = MagicMock()
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_dispatch.return_value = "file content"
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
ai_client.send("context", "message")
# Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
assert kwargs['payload']['result'] == 'file content'
import mcp_client
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
# 1. Setup mock response with a tool call
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"path": "test.txt"}
mock_response_with_tool = MagicMock()
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer)
mock_response_final = MagicMock()
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_dispatch.return_value = "file content"
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
ai_client.send("context", "message")
# Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
assert kwargs['payload']['result'] == 'file content'

View File

@@ -13,88 +13,84 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_get_status_success(live_gui):
"""
"""
Test that get_status successfully retrieves the server status
when the live GUI is running.
"""
client = ApiHookClient()
status = client.get_status()
assert status == {'status': 'ok'}
client = ApiHookClient()
status = client.get_status()
assert status == {'status': 'ok'}
def test_get_project_success(live_gui):
"""
"""
Test successful retrieval of project data from the live GUI.
"""
client = ApiHookClient()
response = client.get_project()
assert 'project' in response
# We don't assert specific content as it depends on the environment's active project
client = ApiHookClient()
response = client.get_project()
assert 'project' in response
# We don't assert specific content as it depends on the environment's active project
def test_get_session_success(live_gui):
"""
"""
Test successful retrieval of session data.
"""
client = ApiHookClient()
response = client.get_session()
assert 'session' in response
assert 'entries' in response['session']
client = ApiHookClient()
response = client.get_session()
assert 'session' in response
assert 'entries' in response['session']
def test_post_gui_success(live_gui):
"""
"""
Test successful posting of GUI data.
"""
client = ApiHookClient()
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
client = ApiHookClient()
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
def test_get_performance_success(live_gui):
"""
"""
Test successful retrieval of performance metrics.
"""
client = ApiHookClient()
response = client.get_performance()
assert "performance" in response
client = ApiHookClient()
response = client.get_performance()
assert "performance" in response
def test_unsupported_method_error():
"""
"""
Test that calling an unsupported HTTP method raises a ValueError.
"""
client = ApiHookClient()
with pytest.raises(ValueError, match="Unsupported HTTP method"):
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
client = ApiHookClient()
with pytest.raises(ValueError, match="Unsupported HTTP method"):
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
def test_get_text_value():
"""
"""
Test retrieval of string representation using get_text_value.
"""
client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
assert client.get_text_value("dummy_tag") is None
client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
assert client.get_text_value("dummy_tag") is None
def test_get_node_status():
"""
"""
Test retrieval of DAG node status using get_node_status.
"""
client = ApiHookClient()
# When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
assert client.get_node_status("my_node") == "completed"
# When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
assert client.get_node_status("my_node") == "failed"
# When neither works
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
assert client.get_node_status("my_node") is None
client = ApiHookClient()
# When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
assert client.get_node_status("my_node") == "completed"
# When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
assert client.get_node_status("my_node") == "failed"
# When neither works
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
assert client.get_node_status("my_node") is None

View File

@@ -8,68 +8,64 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_api_client_has_extensions():
client = ApiHookClient()
# These should fail initially as they are not implemented
assert hasattr(client, 'select_tab')
assert hasattr(client, 'select_list_item')
client = ApiHookClient()
# These should fail initially as they are not implemented
assert hasattr(client, 'select_tab')
assert hasattr(client, 'select_list_item')
def test_select_tab_integration(live_gui):
client = ApiHookClient()
# We'll need to make sure the tags exist in gui_legacy.py
# For now, this is a placeholder for the integration test
response = client.select_tab("operations_tabs", "tab_tool")
assert response == {'status': 'queued'}
client = ApiHookClient()
# We'll need to make sure the tags exist in gui_legacy.py
# For now, this is a placeholder for the integration test
response = client.select_tab("operations_tabs", "tab_tool")
assert response == {'status': 'queued'}
def test_select_list_item_integration(live_gui):
client = ApiHookClient()
# Assuming 'Default' discussion exists or we can just test that it queues
response = client.select_list_item("disc_listbox", "Default")
assert response == {'status': 'queued'}
client = ApiHookClient()
# Assuming 'Default' discussion exists or we can just test that it queues
response = client.select_list_item("disc_listbox", "Default")
assert response == {'status': 'queued'}
def test_get_indicator_state_integration(live_gui):
client = ApiHookClient()
# thinking_indicator is usually hidden unless AI is running
response = client.get_indicator_state("thinking_indicator")
assert 'shown' in response
assert response['tag'] == "thinking_indicator"
client = ApiHookClient()
# thinking_indicator is usually hidden unless AI is running
response = client.get_indicator_state("thinking_indicator")
assert 'shown' in response
assert response['tag'] == "thinking_indicator"
def test_app_processes_new_actions():
import gui_legacy
from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg
dpg.create_context()
try:
with patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(gui_legacy.App, '_load_active_project'):
app = gui_legacy.App()
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
# Test select_tab
app._pending_gui_tasks.append({
"action": "select_tab",
"tab_bar": "some_tab_bar",
"tab": "some_tab"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
# Test select_list_item
mock_cb = MagicMock()
mock_get_cb.return_value = mock_cb
app._pending_gui_tasks.append({
"action": "select_list_item",
"listbox": "some_listbox",
"item_value": "some_value"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_listbox", "some_value")
mock_cb.assert_called_with("some_listbox", "some_value")
finally:
dpg.destroy_context()
import gui_legacy
from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg
dpg.create_context()
try:
with patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(gui_legacy.App, '_load_active_project'):
app = gui_legacy.App()
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
# Test select_tab
app._pending_gui_tasks.append({
"action": "select_tab",
"tab_bar": "some_tab_bar",
"tab": "some_tab"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
# Test select_list_item
mock_cb = MagicMock()
mock_get_cb.return_value = mock_cb
app._pending_gui_tasks.append({
"action": "select_list_item",
"listbox": "some_listbox",
"item_value": "some_value"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_listbox", "some_value")
mock_cb.assert_called_with("some_listbox", "some_value")
finally:
dpg.destroy_context()

View File

@@ -3,24 +3,24 @@ import tree_sitter
from file_cache import ASTParser
def test_ast_parser_initialization():
"""Verify that ASTParser can be initialized with a language string."""
parser = ASTParser("python")
assert parser.language_name == "python"
"""Verify that ASTParser can be initialized with a language string."""
parser = ASTParser("python")
assert parser.language_name == "python"
def test_ast_parser_parse():
"""Verify that the parse method returns a tree_sitter.Tree."""
parser = ASTParser("python")
code = """def example_func():
"""Verify that the parse method returns a tree_sitter.Tree."""
parser = ASTParser("python")
code = """def example_func():
return 42"""
tree = parser.parse(code)
assert isinstance(tree, tree_sitter.Tree)
# Basic check that it parsed something
assert tree.root_node.type == "module"
tree = parser.parse(code)
assert isinstance(tree, tree_sitter.Tree)
# Basic check that it parsed something
assert tree.root_node.type == "module"
def test_ast_parser_get_skeleton_python():
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
parser = ASTParser("python")
code = '''
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
parser = ASTParser("python")
code = '''
def complex_function(a, b):
"""
This is a docstring.
@@ -36,35 +36,32 @@ class MyClass:
print("doing something")
return None
'''
skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
assert "def complex_function(a, b):" in skeleton
assert "class MyClass:" in skeleton
assert "def method_without_docstring(self):" in skeleton
# Check that docstring is preserved
assert '"""' in skeleton
assert "This is a docstring." in skeleton
assert "It should be preserved." in skeleton
# Check that bodies are replaced with '...'
assert "..." in skeleton
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert 'print("doing something")' not in skeleton
skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
assert "def complex_function(a, b):" in skeleton
assert "class MyClass:" in skeleton
assert "def method_without_docstring(self):" in skeleton
# Check that docstring is preserved
assert '"""' in skeleton
assert "This is a docstring." in skeleton
assert "It should be preserved." in skeleton
# Check that bodies are replaced with '...'
assert "..." in skeleton
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert 'print("doing something")' not in skeleton
def test_ast_parser_invalid_language():
"""Verify handling of unsupported or invalid languages."""
# This might raise an error or return a default, depending on implementation
# For now, we expect it to either fail gracefully or raise an exception we can catch
with pytest.raises(Exception):
ASTParser("not-a-language")
"""Verify handling of unsupported or invalid languages."""
# This might raise an error or return a default, depending on implementation
# For now, we expect it to either fail gracefully or raise an exception we can catch
with pytest.raises(Exception):
ASTParser("not-a-language")
def test_ast_parser_get_curated_view():
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
parser = ASTParser("python")
code = '''
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
parser = ASTParser("python")
code = '''
@core_logic
def core_func():
"""Core logic doc."""
@@ -86,20 +83,16 @@ class MyClass:
def core_method(self, x):
print("method preserved", x)
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated

View File

@@ -2,8 +2,8 @@ import pytest
from file_cache import ASTParser
def test_ast_parser_get_curated_view():
parser = ASTParser("python")
code = '''
parser = ASTParser("python")
code = '''
@core_logic
def core_func():
"""Core logic doc."""
@@ -25,20 +25,16 @@ class MyClass:
def core_method(self):
print("method preserved")
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated

View File

@@ -3,45 +3,40 @@ import pytest
from events import AsyncEventQueue
def test_async_event_queue_put_get():
"""Verify that an event can be asynchronously put and retrieved from the queue."""
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
"""Verify that an event can be asynchronously put and retrieved from the queue."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
asyncio.run(run_test())
def test_async_event_queue_multiple():
"""Verify that multiple events can be asynchronously put and retrieved in order."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
"""Verify that multiple events can be asynchronously put and retrieved in order."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
asyncio.run(run_test())
def test_async_event_queue_none_payload():
"""Verify that an event with None payload works correctly."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
"""Verify that an event with None payload works correctly."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
asyncio.run(run_test())

View File

@@ -5,72 +5,60 @@ from log_registry import LogRegistry
@pytest.fixture
def registry_setup(tmp_path):
registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry = LogRegistry(str(registry_path))
return registry, logs_dir
registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry = LogRegistry(str(registry_path))
return registry, logs_dir
def test_auto_whitelist_keywords(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
registry, logs_dir = registry_setup
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
def test_auto_whitelist_message_count(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
registry, logs_dir = registry_setup
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
def test_auto_whitelist_large_size(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
registry, logs_dir = registry_setup
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
def test_no_auto_whitelist_insignificant(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2
registry, logs_dir = registry_setup
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2

View File

@@ -12,64 +12,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main
class TestCliToolBridge(unittest.TestCase):
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
self.tool_call = {
'tool_name': 'read_file',
'tool_input': {'path': 'test.txt'}
}
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
self.tool_call = {
'tool_name': 'read_file',
'tool_input': {'path': 'test.txt'}
}
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
# 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
# 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -12,42 +12,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main
class TestCliToolBridgeMapping(unittest.TestCase):
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
"""
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
"""
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
into tool_name and tool_input for the hook client.
"""
api_tool_call = {
'id': 'call123',
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
api_tool_call = {
'id': 'call123',
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -13,61 +13,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def simulate_conductor_phase_completion(client: ApiHookClient):
"""
"""
Simulates the Conductor agent's logic for phase completion using ApiHookClient.
"""
results = {
"verification_successful": False,
"verification_message": ""
}
try:
status = client.get_status()
if status.get('status') == 'ok':
results["verification_successful"] = True
results["verification_message"] = "Automated verification completed successfully."
else:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {status}"
except Exception as e:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {e}"
return results
results = {
"verification_successful": False,
"verification_message": ""
}
try:
status = client.get_status()
if status.get('status') == 'ok':
results["verification_successful"] = True
results["verification_message"] = "Automated verification completed successfully."
else:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {status}"
except Exception as e:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {e}"
return results
def test_conductor_integrates_api_hook_client_for_verification(live_gui):
"""
"""
Verify that Conductor's simulated phase completion logic properly integrates
and uses the ApiHookClient for verification against the live GUI.
"""
client = ApiHookClient()
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert "successfully" in results["verification_message"]
client = ApiHookClient()
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert "successfully" in results["verification_message"]
def test_conductor_handles_api_hook_failure(live_gui):
"""
"""
Verify Conductor handles a simulated API hook verification failure.
We patch the client's get_status to simulate failure even with live GUI.
"""
client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
def test_conductor_handles_api_hook_connection_error():
"""
"""
Verify Conductor handles a simulated API hook connection error (server down).
"""
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
# Check for expected error substrings from ApiHookClient
msg = results["verification_message"]
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
# Check for expected error substrings from ApiHookClient
msg = results["verification_message"]
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])

View File

@@ -7,284 +7,234 @@ import ai_client
# which will be implemented in the next phase of TDD.
def test_conductor_engine_initialization():
"""
"""
Test that ConductorEngine can be initialized with a Track.
"""
track = Track(id="test_track", description="Test Track")
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
assert engine.track == track
track = Track(id="test_track", description="Test Track")
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
assert engine.track == track
@pytest.mark.asyncio
async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
"""
"""
Test that run_linear iterates through executable tickets and calls the worker lifecycle.
"""
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
# Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
assert ticket2.status == "completed"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
assert calls[1][0][0].id == "T2"
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
# Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
assert ticket2.status == "completed"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
assert calls[1][0][0].id == "T2"
@pytest.mark.asyncio
async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
"""
"""
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send.return_value = "Task complete. I have updated the file."
result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
assert ticket.status == "completed"
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
assert ticket.description in kwargs["user_message"]
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send.return_value = "Task complete. I have updated the file."
result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
assert ticket.status == "completed"
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
assert ticket.description in kwargs["user_message"]
@pytest.mark.asyncio
async def test_run_worker_lifecycle_context_injection(monkeypatch):
"""
"""
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock ASTParser which is expected to be imported in multi_agent_conductor
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# Setup open mock to return different content for different files
file_contents = {
"primary.py": "def primary(): pass",
"secondary.py": "def secondary(): pass"
}
def mock_open_side_effect(file, *args, **kwargs):
content = file_contents.get(file, "")
mock_file = MagicMock()
mock_file.read.return_value = content
mock_file.__enter__.return_value = mock_file
return mock_file
mock_open.side_effect = mock_open_side_effect
# Setup ASTParser mock
mock_ast_parser = mock_ast_parser_class.return_value
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
assert "primary.py" in user_message
assert "secondary.py" in user_message
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock ASTParser which is expected to be imported in multi_agent_conductor
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# Setup open mock to return different content for different files
file_contents = {
"primary.py": "def primary(): pass",
"secondary.py": "def secondary(): pass"
}
def mock_open_side_effect(file, *args, **kwargs):
content = file_contents.get(file, "")
mock_file = MagicMock()
mock_file.read.return_value = content
mock_file.__enter__.return_value = mock_file
return mock_file
mock_open.side_effect = mock_open_side_effect
# Setup ASTParser mock
mock_ast_parser = mock_ast_parser_class.return_value
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
assert "primary.py" in user_message
assert "secondary.py" in user_message
@pytest.mark.asyncio
async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
"""
"""
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Simulate a response indicating a block
mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Simulate a response indicating a block
mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
@pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
"""
"""
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
the flow works as expected.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
# We simulate ai_client.send by making it call the pre_tool_callback it received
def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
if callback:
# Simulate calling it with some payload
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
return "Success"
mock_send.side_effect = mock_send_side_effect
mock_confirm.return_value = True
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
# We simulate ai_client.send by making it call the pre_tool_callback it received
def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
if callback:
# Simulate calling it with some payload
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
return "Success"
mock_send.side_effect = mock_send_side_effect
mock_confirm.return_value = True
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
@pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
"""
"""
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
would prevent execution. In run_worker_lifecycle, we just check if it's passed.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
run_worker_lifecycle(ticket, context)
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
run_worker_lifecycle(ticket, context)
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
@pytest.mark.asyncio
async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
"""
"""
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
"""
import json
from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
engine = ConductorEngine(track=track)
tickets_json = json.dumps([
{
"id": "T1",
"description": "Initial task",
"status": "todo",
"assigned_to": "worker1",
"depends_on": []
},
{
"id": "T2",
"description": "Dependent task",
"status": "todo",
"assigned_to": "worker2",
"depends_on": ["T1"]
},
{
"id": "T3",
"description": "Another initial task",
"status": "todo",
"assigned_to": "worker3",
"depends_on": []
}
])
engine.parse_json_tickets(tickets_json)
assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls
import json
from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
engine = ConductorEngine(track=track)
tickets_json = json.dumps([
{
"id": "T1",
"description": "Initial task",
"status": "todo",
"assigned_to": "worker1",
"depends_on": []
},
{
"id": "T2",
"description": "Dependent task",
"status": "todo",
"assigned_to": "worker2",
"depends_on": ["T1"]
},
{
"id": "T3",
"description": "Another initial task",
"status": "todo",
"assigned_to": "worker3",
"depends_on": []
}
])
engine.parse_json_tickets(tickets_json)
assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls

View File

@@ -4,112 +4,106 @@ import json
import conductor_tech_lead
class TestConductorTechLead(unittest.TestCase):
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock response
mock_tickets = [
{
"id": "ticket_1",
"type": "Ticket",
"goal": "Test goal",
"target_file": "test.py",
"depends_on": [],
"context_requirements": []
}
]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
module_skeletons = "Test skeletons"
# Call the function
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock response
mock_tickets = [
{
"id": "ticket_1",
"type": "Ticket",
"goal": "Test goal",
"target_file": "test.py",
"depends_on": [],
"context_requirements": []
}
]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
module_skeletons = "Test skeletons"
# Call the function
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
# Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
# Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])
class TestTopologicalSort(unittest.TestCase):
def test_topological_sort_empty(self):
tickets = []
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets, [])
def test_topological_sort_empty(self):
tickets = []
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets, [])
def test_topological_sort_linear(self):
tickets = [
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
{"id": "t3", "depends_on": ["t2"]},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
self.assertEqual(ids, ["t1", "t2", "t3"])
def test_topological_sort_linear(self):
tickets = [
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
{"id": "t3", "depends_on": ["t2"]},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
self.assertEqual(ids, ["t1", "t2", "t3"])
def test_topological_sort_complex(self):
# t1
# | \
# t2 t3
# | /
# t4
tickets = [
{"id": "t4", "depends_on": ["t2", "t3"]},
{"id": "t3", "depends_on": ["t1"]},
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
self.assertEqual(ids[0], "t1")
self.assertEqual(ids[-1], "t4")
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
def test_topological_sort_complex(self):
# t1
# | \
# t2 t3
# | /
# t4
tickets = [
{"id": "t4", "depends_on": ["t2", "t3"]},
{"id": "t3", "depends_on": ["t1"]},
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
self.assertEqual(ids[0], "t1")
self.assertEqual(ids[-1], "t4")
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
def test_topological_sort_cycle(self):
tickets = [
{"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]},
]
with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets)
self.assertIn("Circular dependency detected", str(cm.exception))
def test_topological_sort_cycle(self):
tickets = [
{"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]},
]
with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets)
self.assertIn("Circular dependency detected", str(cm.exception))
def test_topological_sort_missing_dependency(self):
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track.
tickets = [
{"id": "t1", "depends_on": ["missing"]},
]
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
# OR it should just treat it as "ready" if it's external?
# Actually, let's just test that it doesn't crash if it's not a cycle.
# But if 'missing' is not in tickets, it will never be satisfied.
# Let's say it raises ValueError for missing internal dependencies.
with self.assertRaises(ValueError):
conductor_tech_lead.topological_sort(tickets)
def test_topological_sort_missing_dependency(self):
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track.
tickets = [
{"id": "t1", "depends_on": ["missing"]},
]
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
# OR it should just treat it as "ready" if it's external?
# Actually, let's just test that it doesn't crash if it's not a cycle.
# But if 'missing' is not in tickets, it will never be satisfied.
# Let's say it raises ValueError for missing internal dependencies.
with self.assertRaises(ValueError):
conductor_tech_lead.topological_sort(tickets)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -3,82 +3,72 @@ from models import Ticket
from dag_engine import TrackDAG
def test_get_ready_tasks_linear():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 1
assert ready[0].id == "T2"
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 1
assert ready[0].id == "T2"
def test_get_ready_tasks_branching():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 2
ready_ids = {t.id for t in ready}
assert ready_ids == {"T2", "T3"}
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 2
ready_ids = {t.id for t in ready}
assert ready_ids == {"T2", "T3"}
def test_has_cycle_no_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert not dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert not dag.has_cycle()
def test_has_cycle_direct_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert dag.has_cycle()
def test_has_cycle_indirect_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
assert dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
assert dag.has_cycle()
def test_has_cycle_complex_no_cycle():
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
assert not dag.has_cycle()
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
assert not dag.has_cycle()
def test_get_ready_tasks_multiple_deps():
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
def test_topological_sort():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
sort = dag.topological_sort()
assert sort == ["T1", "T2", "T3"]
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
sort = dag.topological_sort()
assert sort == ["T1", "T2", "T3"]
def test_topological_sort_cycle():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
with pytest.raises(ValueError, match="Dependency cycle detected"):
dag.topological_sort()
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
with pytest.raises(ValueError, match="Dependency cycle detected"):
dag.topological_sort()

View File

@@ -12,54 +12,51 @@ import ai_client
import project_manager
def test_credentials_error_mentions_deepseek(monkeypatch):
"""
"""
Verify that the error message shown when credentials.toml is missing
includes deepseek instructions.
"""
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
ai_client._load_credentials()
err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
ai_client._load_credentials()
err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
def test_default_project_includes_reasoning_role():
"""
"""
Verify that 'Reasoning' is included in the default discussion roles
to support DeepSeek-R1 reasoning traces.
"""
proj = project_manager.default_project("test")
roles = proj["discussion"]["roles"]
assert "Reasoning" in roles
proj = project_manager.default_project("test")
roles = proj["discussion"]["roles"]
assert "Reasoning" in roles
def test_gui_providers_list():
"""
"""
Check if 'deepseek' is in the GUI's provider list.
"""
import gui_2
assert "deepseek" in gui_2.PROVIDERS
import gui_2
assert "deepseek" in gui_2.PROVIDERS
def test_deepseek_model_listing():
"""
"""
Verify that list_models for deepseek returns expected models.
"""
models = ai_client.list_models("deepseek")
assert "deepseek-chat" in models
assert "deepseek-reasoner" in models
models = ai_client.list_models("deepseek")
assert "deepseek-chat" in models
assert "deepseek-reasoner" in models
def test_gui_provider_list_via_hooks(live_gui):
"""
"""
Verify 'deepseek' is present in the GUI provider list using API hooks.
"""
from api_hook_client import ApiHookClient
import time
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
client.set_value('current_provider', 'deepseek')
time.sleep(0.5)
assert client.get_value('current_provider') == 'deepseek'
from api_hook_client import ApiHookClient
import time
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
client.set_value('current_provider', 'deepseek')
time.sleep(0.5)
assert client.get_value('current_provider') == 'deepseek'

View File

@@ -3,137 +3,124 @@ from unittest.mock import patch, MagicMock
import ai_client
def test_deepseek_model_selection():
"""
"""
Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
assert ai_client._provider == "deepseek"
assert ai_client._model == "deepseek-chat"
ai_client.set_provider("deepseek", "deepseek-chat")
assert ai_client._provider == "deepseek"
assert ai_client._model == "deepseek-chat"
def test_deepseek_completion_logic():
"""
"""
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {"role": "assistant", "content": "DeepSeek Response"},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
assert mock_post.called
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {"role": "assistant", "content": "DeepSeek Response"},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
assert mock_post.called
def test_deepseek_reasoning_logic():
"""
"""
Verifies that reasoning_content is captured and wrapped in <thinking> tags.
"""
ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Final Answer",
"reasoning_content": "Chain of thought"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Final Answer",
"reasoning_content": "Chain of thought"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
def test_deepseek_tool_calling():
"""
"""
Verifies that DeepSeek provider correctly identifies and executes tool calls.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
patch("mcp_client.dispatch") as mock_dispatch:
# 1. Mock first response with a tool call
mock_resp1 = MagicMock()
mock_resp1.status_code = 200
mock_resp1.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Let me read that file.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path": "test.txt"}'
}
}]
},
"finish_reason": "tool_calls"
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
patch("mcp_client.dispatch") as mock_dispatch:
# 1. Mock first response with a tool call
mock_resp1 = MagicMock()
mock_resp1.status_code = 200
mock_resp1.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Let me read that file.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path": "test.txt"}'
}
# 2. Mock second response (final answer)
mock_resp2 = MagicMock()
mock_resp2.status_code = 200
mock_resp2.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "File content is: Hello World"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
}
mock_post.side_effect = [mock_resp1, mock_resp2]
mock_dispatch.return_value = "Hello World"
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert "File content is: Hello World" in result
assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
}]
},
"finish_reason": "tool_calls"
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
}
# 2. Mock second response (final answer)
mock_resp2 = MagicMock()
mock_resp2.status_code = 200
mock_resp2.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "File content is: Hello World"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
}
mock_post.side_effect = [mock_resp1, mock_resp2]
mock_dispatch.return_value = "Hello World"
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert "File content is: Hello World" in result
assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
def test_deepseek_streaming():
"""
"""
Verifies that DeepSeek provider correctly aggregates streaming chunks.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
# Mock a streaming response
mock_response = MagicMock()
mock_response.status_code = 200
# Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
chunks = [
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
'data: [DONE]'
]
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
# Mock a streaming response
mock_response = MagicMock()
mock_response.status_code = 200
# Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
chunks = [
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
'data: [DONE]'
]
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"

View File

@@ -3,121 +3,99 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine
def test_execution_engine_basic_flow():
# Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
engine = ExecutionEngine(dag)
# Tick 1: Only T1 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
ready = engine.tick()
assert len(ready) == 2
ids = {t.id for t in ready}
assert ids == {"T2", "T3"}
# Complete T2
engine.update_task_status("T2", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T3"
# Complete T3
engine.update_task_status("T3", "completed")
# Tick 4: T4 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
# Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
engine = ExecutionEngine(dag)
# Tick 1: Only T1 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
ready = engine.tick()
assert len(ready) == 2
ids = {t.id for t in ready}
assert ids == {"T2", "T3"}
# Complete T2
engine.update_task_status("T2", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T3"
# Complete T3
engine.update_task_status("T3", "completed")
# Tick 4: T4 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_update_nonexistent_task():
dag = TrackDAG([])
engine = ExecutionEngine(dag)
# Should not raise error, or handle gracefully
engine.update_task_status("NONEXISTENT", "completed")
dag = TrackDAG([])
engine = ExecutionEngine(dag)
# Should not raise error, or handle gracefully
engine.update_task_status("NONEXISTENT", "completed")
def test_execution_engine_status_persistence():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
assert t1.status == "in_progress"
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
assert t1.status == "in_progress"
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
def test_execution_engine_auto_queue():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "in_progress"
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
ready = engine.tick()
assert len(ready) == 0
assert t2.status == "todo"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T2"
assert t2.status == "in_progress"
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "in_progress"
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
ready = engine.tick()
assert len(ready) == 0
assert t2.status == "todo"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T2"
assert t2.status == "in_progress"
def test_execution_engine_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "todo"
# Manual approval
engine.approve_task("T1")
assert t1.status == "in_progress"
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "todo"
# Manual approval
engine.approve_task("T1")
assert t1.status == "in_progress"
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_approve_task():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
engine.approve_task("T1")
assert t1.status == "in_progress"
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
engine.approve_task("T1")
assert t1.status == "in_progress"

View File

@@ -14,44 +14,40 @@ from simulation.sim_execution import ExecutionSimulation
@pytest.mark.integration
def test_context_sim_live(live_gui):
"""Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim.setup("LiveContextSim")
sim.run()
sim.teardown()
"""Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim.setup("LiveContextSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_ai_settings_sim_live(live_gui):
"""Run the AI Settings simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim.setup("LiveAISettingsSim")
sim.run()
sim.teardown()
"""Run the AI Settings simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim.setup("LiveAISettingsSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_tools_sim_live(live_gui):
"""Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim.setup("LiveToolsSim")
sim.run()
sim.teardown()
"""Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim.setup("LiveToolsSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_execution_sim_live(live_gui):
"""Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
sim.run()
sim.teardown()
"""Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
sim.run()
sim.teardown()

View File

@@ -12,119 +12,105 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapter(unittest.TestCase):
def setUp(self):
self.adapter = GeminiCliAdapter(binary_path="gemini")
def setUp(self):
self.adapter = GeminiCliAdapter(binary_path="gemini")
@patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
"""
Verify that send(message) correctly starts the subprocess with
--output-format stream-json and the provided message via stdin using communicate.
"""
# Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
# Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
@patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen):
"""
Verify that it correctly parses multiple JSONL 'message' events
and returns the combined text.
"""
jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
@patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
"""
Verify that it correctly handles 'tool_use' events in the stream
by continuing to read until the final 'result' event.
"""
jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
json.dumps({"type": "result", "usage": {}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
json.dumps({"type": "result", "usage": {}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
@patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen):
"""
Verify that usage data is extracted from the 'result' event.
"""
usage_data = {"total_tokens": 42}
jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}),
json.dumps({"type": "result", "usage": usage_data})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)
usage_data = {"total_tokens": 42}
jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}),
json.dumps({"type": "result", "usage": usage_data})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -9,168 +9,143 @@ import os
# Ensure the project root is in sys.path to resolve imports correctly
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path:
sys.path.append(project_root)
# Import the class to be tested
sys.path.append(project_root)
# Import the class to be tested
from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapterParity(unittest.TestCase):
def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test."""
# Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
self.adapter.last_latency = 0.0
def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test."""
# Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
self.adapter.last_latency = 0.0
def tearDown(self):
self.session_logger_patcher.stop()
def tearDown(self):
self.session_logger_patcher.stop()
@patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen):
"""
Test that count_tokens uses character-based estimation.
"""
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
@patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
"""
Test that the send method does NOT add --safety flags when safety_settings are provided,
as this functionality is no longer supported via CLI flags.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
Test that when safety_settings is None or an empty list, no --safety flags are added.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
Test that the send method prepends the system instruction to the prompt
sent via stdin, and does NOT add a --system flag to the command.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
Test that the send method correctly adds the -m <model> flag when a model is specified.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
Test that if subprocess.Popen().communicate() raises an exception,
GeminiCliAdapter.send() kills the process and re-raises the exception.
"""
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -7,66 +7,57 @@ import json
from api_hook_client import ApiHookClient
def test_gemini_cli_context_bleed_prevention(live_gui):
"""
"""
Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
and only shows assistant content in the GUI history.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
with open(bleed_mock, "w") as f:
f.write('''import sys, json
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
with open(bleed_mock, "w") as f:
f.write('''import sys, json
print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
# Wait for completion
time.sleep(3)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
# Wait for completion
time.sleep(3)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
def test_gemini_cli_parameter_resilience(live_gui):
"""
"""
Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases
sent by the AI instead of 'path'.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Avoid backslashes in f-string expression part
if sys.platform == "win32":
bridge_path_str = bridge_path.replace("\\", "/")
else:
bridge_path_str = bridge_path
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Avoid backslashes in f-string expression part
if sys.platform == "win32":
bridge_path_str = bridge_path.replace("\\", "/")
else:
bridge_path_str = bridge_path
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
@@ -88,83 +79,71 @@ else:
}}), flush=True)
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
# Handle approval
timeout = 15
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
assert approved, "Tool approval event never received"
# Verify tool result in history
time.sleep(2)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
# Handle approval
timeout = 15
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
assert approved, "Tool approval event never received"
# Verify tool result in history
time.sleep(2)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
def test_gemini_cli_loop_termination(live_gui):
"""
"""
Test that multi-round tool calling correctly terminates and preserves
payload (session context) between rounds.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
# Handle approvals (mock does one tool call)
timeout = 20
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
# Wait for the second round and final answer
found_final = False
start_time = time.time()
while time.time() - start_time < 15:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for e in entries:
if "processed the tool results" in e.get("content", ""):
found_final = True
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
# Handle approvals (mock does one tool call)
timeout = 20
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
# Wait for the second round and final answer
found_final = False
start_time = time.time()
while time.time() - start_time < 15:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for e in entries:
if "processed the tool results" in e.get("content", ""):
found_final = True
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"

View File

@@ -6,136 +6,116 @@ import requests
from api_hook_client import ApiHookClient
def test_gemini_cli_full_integration(live_gui):
"""
"""
Integration test for the Gemini CLI provider and tool bridge.
Handles 'ask_received' events from the bridge and any other approval requests.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
# Clear events
client.get_events()
# 2. Trigger a message in the GUI
print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
# Clear events
client.get_events()
# 2. Trigger a message in the GUI
print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
def test_gemini_cli_rejection_and_history(live_gui):
"""
"""
Integration test for the Gemini CLI provider: Rejection flow and history.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if denied: break
time.sleep(0.5)
assert denied, "No ask_received event to deny"
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if denied: break
time.sleep(0.5)
assert denied, "No ask_received event to deny"
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"

View File

@@ -10,43 +10,38 @@ import ai_client
@pytest.fixture(autouse=True)
def setup_ai_client():
ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None
yield
ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None
yield
@patch('ai_client.GeminiCliAdapter')
@patch('ai_client._get_combined_system_prompt')
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
@patch('ai_client.GeminiCliAdapter')
def test_get_history_bleed_stats(mock_adapter_class):
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500

View File

@@ -10,41 +10,34 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import get_gemini_cache_stats, reset_session
def test_get_gemini_cache_stats_with_mock_client():
"""
"""
Test that get_gemini_cache_stats correctly processes cache lists
from a mocked client instance.
"""
# Ensure a clean state before the test by resetting the session
reset_session()
# 1. Create a mock for the cache object that the client will return
mock_cache = MagicMock()
mock_cache.name = "cachedContents/test-cache"
mock_cache.display_name = "Test Cache"
mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.size_bytes = 1024
# 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# Configure its `caches.list` method to return our mock cache
mock_client_instance.caches.list.return_value = [mock_cache]
# 3. Patch the Client constructor to return our mock instance
# This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 4. Call the function under test
stats = get_gemini_cache_stats()
# 5. Assert that the function behaved as expected
# It should have constructed the client
mock_client_constructor.assert_called_once()
# It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats
assert stats["cache_count"] == 1
assert stats["total_size_bytes"] == 1024
# Ensure a clean state before the test by resetting the session
reset_session()
# 1. Create a mock for the cache object that the client will return
mock_cache = MagicMock()
mock_cache.name = "cachedContents/test-cache"
mock_cache.display_name = "Test Cache"
mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.size_bytes = 1024
# 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# Configure its `caches.list` method to return our mock cache
mock_client_instance.caches.list.return_value = [mock_cache]
# 3. Patch the Client constructor to return our mock instance
# This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 4. Call the function under test
stats = get_gemini_cache_stats()
# 5. Assert that the function behaved as expected
# It should have constructed the client
mock_client_constructor.assert_called_once()
# It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats
assert stats["cache_count"] == 1
assert stats["total_size_bytes"] == 1024

View File

@@ -6,43 +6,40 @@ from events import EventEmitter
@pytest.fixture
def app_instance():
"""
"""
Fixture to create an instance of the gui_2.App class for testing.
It mocks functions that would render a window or block execution.
"""
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App
def test_app_subscribes_to_events(app_instance):
"""
"""
This test checks that the App's __init__ method subscribes the necessary
event handlers to the ai_client.events emitter.
This test will fail until the event subscription logic is added to gui_2.App.
"""
with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance()
mock_on.assert_called()
calls = mock_on.call_args_list
event_names = [call.args[0] for call in calls]
assert "request_start" in event_names
assert "response_received" in event_names
assert "tool_execution" in event_names
for call in calls:
handler = call.args[1]
assert hasattr(handler, '__self__')
assert handler.__self__ is app
with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance()
mock_on.assert_called()
calls = mock_on.call_args_list
event_names = [call.args[0] for call in calls]
assert "request_start" in event_names
assert "response_received" in event_names
assert "tool_execution" in event_names
for call in calls:
handler = call.args[1]
assert hasattr(handler, '__self__')
assert handler.__self__ is app

View File

@@ -4,45 +4,43 @@ from gui_2 import App
@pytest.fixture
def app_instance():
with (
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
with (
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
def test_gui2_hubs_exist_in_show_windows(app_instance):
"""
"""
Verifies that the new consolidated Hub windows are defined in the App's show_windows.
This ensures they will be available in the 'Windows' menu.
"""
expected_hubs = [
"Context Hub",
"AI Settings",
"Discussion Hub",
"Operations Hub",
"Files & Media",
"Theme",
]
for hub in expected_hubs:
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
expected_hubs = [
"Context Hub",
"AI Settings",
"Discussion Hub",
"Operations Hub",
"Files & Media",
"Theme",
]
for hub in expected_hubs:
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
def test_gui2_old_windows_removed_from_show_windows(app_instance):
"""
"""
Verifies that the old fragmented windows are removed from show_windows.
"""
old_windows = [
"Projects", "Files", "Screenshots",
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
old_windows = [
"Projects", "Files", "Screenshots",
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -6,74 +6,65 @@ from events import EventEmitter
@pytest.fixture
def app_instance():
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
def test_mcp_tool_call_is_dispatched(app_instance):
"""
"""
This test verifies that when the AI returns a tool call for an MCP function,
the ai_client correctly dispatches it to mcp_client.
This will fail until mcp_client is properly integrated.
"""
# 1. Define the mock tool call from the AI
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"}
# 1. Define the mock tool call from the AI
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"}
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = mock_fc
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "TOOL_CALLING"
mock_response_with_tool.candidates = [mock_candidate]
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = mock_fc
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "TOOL_CALLING"
mock_response_with_tool.candidates = [mock_candidate]
class DummyUsage:
prompt_token_count = 100
candidates_token_count = 10
cached_content_token_count = 0
mock_response_with_tool.usage_metadata = DummyUsage()
# 3. Create a mock for the final AI response after the tool call
mock_response_final = MagicMock()
mock_response_final.text = "Final answer"
mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
# 4. Patch the necessary components
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
user_message="read the file",
base_dir=".",
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
class DummyUsage:
prompt_token_count = 100
candidates_token_count = 10
cached_content_token_count = 0
mock_response_with_tool.usage_metadata = DummyUsage()
# 3. Create a mock for the final AI response after the tool call
mock_response_final = MagicMock()
mock_response_final.text = "Final answer"
mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
# 4. Patch the necessary components
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
user_message="read the file",
base_dir=".",
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})

View File

@@ -15,70 +15,62 @@ TEST_CALLBACK_FILE = Path("temp_callback_output.txt")
@pytest.fixture(scope="function", autouse=True)
def cleanup_callback_file():
"""Ensures the test callback file is cleaned up before and after each test."""
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
yield
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
"""Ensures the test callback file is cleaned up before and after each test."""
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
yield
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
def test_gui2_set_value_hook_works(live_gui):
"""
"""
Tests that the 'set_value' GUI hook is correctly implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
assert current_value == test_value
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
assert current_value == test_value
def test_gui2_click_hook_works(live_gui):
"""
"""
Tests that the 'click' GUI hook for the 'Reset' button is implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
test_value = "This text should be cleared by the reset button."
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
client.click('btn_reset')
time.sleep(0.5)
# Verify it was reset
assert client.get_value('ai_input') == ""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
test_value = "This text should be cleared by the reset button."
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
client.click('btn_reset')
time.sleep(0.5)
# Verify it was reset
assert client.get_value('ai_input') == ""
def test_gui2_custom_callback_hook_works(live_gui):
"""
"""
Tests that the 'custom_callback' GUI hook is correctly implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
'action': 'custom_callback',
'callback': '_test_callback_func_write_to_file',
'args': [test_data]
}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:
content = f.read()
assert content == test_data, "Callback executed, but file content is incorrect."
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
'action': 'custom_callback',
'callback': '_test_callback_func_write_to_file',
'args': [test_data]
}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:
content = f.read()
assert content == test_data, "Callback executed, but file content is incorrect."

View File

@@ -12,78 +12,66 @@ from api_hook_client import ApiHookClient
_shared_metrics = {}
def test_performance_benchmarking(live_gui):
"""
"""
Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py).
"""
process, gui_script = live_gui
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(3.0)
# Collect metrics over 5 seconds
fps_values = []
cpu_values = []
frame_time_values = []
start_time = time.time()
while time.time() - start_time < 5:
try:
perf_data = client.get_performance()
metrics = perf_data.get('performance', {})
if metrics:
fps = metrics.get('fps', 0.0)
cpu = metrics.get('cpu_percent', 0.0)
ft = metrics.get('last_frame_time_ms', 0.0)
# In some CI environments without a display, metrics might be 0
# We only record positive ones to avoid skewing averages if hooks are failing
if fps > 0:
fps_values.append(fps)
cpu_values.append(cpu)
frame_time_values.append(ft)
time.sleep(0.1)
except Exception:
break
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
_shared_metrics[gui_script] = {
"avg_fps": avg_fps,
"avg_cpu": avg_cpu,
"avg_ft": avg_ft
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
process, gui_script = live_gui
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(3.0)
# Collect metrics over 5 seconds
fps_values = []
cpu_values = []
frame_time_values = []
start_time = time.time()
while time.time() - start_time < 5:
try:
perf_data = client.get_performance()
metrics = perf_data.get('performance', {})
if metrics:
fps = metrics.get('fps', 0.0)
cpu = metrics.get('cpu_percent', 0.0)
ft = metrics.get('last_frame_time_ms', 0.0)
# In some CI environments without a display, metrics might be 0
# We only record positive ones to avoid skewing averages if hooks are failing
if fps > 0:
fps_values.append(fps)
cpu_values.append(cpu)
frame_time_values.append(ft)
time.sleep(0.1)
except Exception:
break
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
_shared_metrics[gui_script] = {
"avg_fps": avg_fps,
"avg_cpu": avg_cpu,
"avg_ft": avg_ft
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
def test_performance_parity():
"""
"""
Compare the metrics collected in the parameterized test_performance_benchmarking.
"""
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui2_m = _shared_metrics["gui_2.py"]
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
print(f"\n--- Performance Parity Results ---")
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui2_m = _shared_metrics["gui_2.py"]
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
print(f"\n--- Performance Parity Results ---")
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"

View File

@@ -6,75 +6,70 @@ from events import UserRequestEvent
@pytest.fixture
def mock_gui():
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
gui = App()
return gui
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
gui = App()
return gui
def test_handle_generate_send_pushes_event(mock_gui):
# Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text"
))
mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
mock_gui.event_queue.put = MagicMock()
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
mock_gui._handle_generate_send()
# Verify run_coroutine_threadsafe was called
assert mock_run.called
# Verify the call to event_queue.put was correct
# This is a bit tricky since the first arg to run_coroutine_threadsafe
# is the coroutine returned by event_queue.put().
# Let's verify that the call to put occurred.
mock_gui.event_queue.put.assert_called_once()
args, kwargs = mock_gui.event_queue.put.call_args
assert args[0] == "user_request"
event = args[1]
assert isinstance(event, UserRequestEvent)
assert event.prompt == "test prompt"
assert event.stable_md == "stable_md"
assert event.disc_text == "disc_text"
assert event.base_dir == "."
# Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text"
))
mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
mock_gui.event_queue.put = MagicMock()
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
mock_gui._handle_generate_send()
# Verify run_coroutine_threadsafe was called
assert mock_run.called
# Verify the call to event_queue.put was correct
# This is a bit tricky since the first arg to run_coroutine_threadsafe
# is the coroutine returned by event_queue.put().
# Let's verify that the call to put occurred.
mock_gui.event_queue.put.assert_called_once()
args, kwargs = mock_gui.event_queue.put.call_args
assert args[0] == "user_request"
event = args[1]
assert isinstance(event, UserRequestEvent)
assert event.prompt == "test prompt"
assert event.stable_md == "stable_md"
assert event.disc_text == "disc_text"
assert event.base_dir == "."
def test_user_request_event_payload():
payload = UserRequestEvent(
prompt="hello",
stable_md="md",
file_items=[],
disc_text="disc",
base_dir="."
)
d = payload.to_dict()
assert d["prompt"] == "hello"
assert d["stable_md"] == "md"
assert d["file_items"] == []
assert d["disc_text"] == "disc"
assert d["base_dir"] == "."
payload = UserRequestEvent(
prompt="hello",
stable_md="md",
file_items=[],
disc_text="disc",
base_dir="."
)
d = payload.to_dict()
assert d["prompt"] == "hello"
assert d["stable_md"] == "md"
assert d["file_items"] == []
assert d["disc_text"] == "disc"
assert d["base_dir"] == "."
@pytest.mark.asyncio
async def test_async_event_queue():
from events import AsyncEventQueue
q = AsyncEventQueue()
await q.put("test_event", {"data": 123})
name, payload = await q.get()
assert name == "test_event"
assert payload["data"] == 123
from events import AsyncEventQueue
q = AsyncEventQueue()
await q.put("test_event", {"data": 123})
name, payload = await q.get()
assert name == "test_event"
assert payload["data"] == 123

View File

@@ -13,53 +13,48 @@ from gui_legacy import App
@pytest.fixture
def app_instance():
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_diagnostics_panel_initialization(app_instance):
assert "Diagnostics" in app_instance.window_info
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
assert "frame_time" in app_instance.perf_history
assert len(app_instance.perf_history["frame_time"]) == 100
assert "Diagnostics" in app_instance.window_info
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
assert "frame_time" in app_instance.perf_history
assert len(app_instance.perf_history["frame_time"]) == 100
def test_diagnostics_panel_updates(app_instance):
# Mock dependencies
mock_metrics = {
'last_frame_time_ms': 10.0,
'fps': 100.0,
'cpu_percent': 50.0,
'input_lag_ms': 5.0
}
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
# We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
app_instance._update_performance_diagnostics()
# Verify UI updates
mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0
# Mock dependencies
mock_metrics = {
'last_frame_time_ms': 10.0,
'fps': 100.0,
'cpu_percent': 50.0,
'input_lag_ms': 5.0
}
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
# We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
app_instance._update_performance_diagnostics()
# Verify UI updates
mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0

View File

@@ -8,55 +8,47 @@ import ai_client
@pytest.fixture
def app_instance():
"""
"""
Fixture to create an instance of the App class for testing.
It creates a real DPG context but mocks functions that would
render a window or block execution.
"""
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(App, '_load_active_project'), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(App, '_load_active_project'), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_gui_updates_on_event(app_instance):
# Patch dependencies for the test
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.configure_item'), \
patch('ai_client.get_history_bleed_stats') as mock_stats:
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
# We'll use patch.object to see if _refresh_api_metrics is called
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
# Simulate event
ai_client.events.emit("response_received", payload={})
# Process tasks manually
app_instance._process_pending_gui_tasks()
# Verify that _refresh_api_metrics was called
mock_refresh.assert_called_once()
# Verify that dpg.set_value was called for the metrics widgets
calls = [call.args[0] for call in mock_set_value.call_args_list]
assert "token_budget_bar" in calls
assert "token_budget_label" in calls
# Patch dependencies for the test
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.configure_item'), \
patch('ai_client.get_history_bleed_stats') as mock_stats:
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
# We'll use patch.object to see if _refresh_api_metrics is called
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
# Simulate event
ai_client.events.emit("response_received", payload={})
# Process tasks manually
app_instance._process_pending_gui_tasks()
# Verify that _refresh_api_metrics was called
mock_refresh.assert_called_once()
# Verify that dpg.set_value was called for the metrics widgets
calls = [call.args[0] for call in mock_set_value.call_args_list]
assert "token_budget_bar" in calls
assert "token_budget_label" in calls

View File

@@ -9,32 +9,27 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_idle_performance_requirements(live_gui):
"""
"""
Requirement: GUI must maintain stable performance on idle.
"""
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(2.0)
# Get multiple samples to be sure
samples = []
for _ in range(5):
perf_data = client.get_performance()
samples.append(perf_data)
time.sleep(0.5)
# Check for valid metrics
valid_ft_count = 0
for sample in samples:
performance = sample.get('performance', {})
frame_time = performance.get('last_frame_time_ms', 0.0)
# We expect a positive frame time if rendering is happening
if frame_time > 0:
valid_ft_count += 1
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
# In some CI environments without a real display, frame time might remain 0
# but we've verified the hook is returning the dictionary.
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(2.0)
# Get multiple samples to be sure
samples = []
for _ in range(5):
perf_data = client.get_performance()
samples.append(perf_data)
time.sleep(0.5)
# Check for valid metrics
valid_ft_count = 0
for sample in samples:
performance = sample.get('performance', {})
frame_time = performance.get('last_frame_time_ms', 0.0)
# We expect a positive frame time if rendering is happening
if frame_time > 0:
valid_ft_count += 1
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
# In some CI environments without a real display, frame time might remain 0
# but we've verified the hook is returning the dictionary.

View File

@@ -9,45 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_comms_volume_stress_performance(live_gui):
"""
"""
Stress test: Inject many session entries and verify performance doesn't degrade.
"""
client = ApiHookClient()
# 1. Capture baseline
time.sleep(2.0) # Wait for stability
baseline_resp = client.get_performance()
baseline = baseline_resp.get('performance', {})
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
# 2. Inject 50 "dummy" session entries
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
large_session = []
for i in range(50):
large_session.append({
"role": "User",
"content": f"Stress test entry {i} " * 5,
"ts": time.time(),
"collapsed": False
})
client.post_session(large_session)
# Give it a moment to process UI updates
time.sleep(1.0)
# 3. Capture stress performance
stress_resp = client.get_performance()
stress = stress_resp.get('performance', {})
stress_ft = stress.get('last_frame_time_ms', 0.0)
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
# If we got valid timing, assert it's within reason
if stress_ft > 0:
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
# Ensure the session actually updated
session_data = client.get_session()
entries = session_data.get('session', {}).get('entries', [])
assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"
client = ApiHookClient()
# 1. Capture baseline
time.sleep(2.0) # Wait for stability
baseline_resp = client.get_performance()
baseline = baseline_resp.get('performance', {})
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
# 2. Inject 50 "dummy" session entries
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
large_session = []
for i in range(50):
large_session.append({
"role": "User",
"content": f"Stress test entry {i} " * 5,
"ts": time.time(),
"collapsed": False
})
client.post_session(large_session)
# Give it a moment to process UI updates
time.sleep(1.0)
# 3. Capture stress performance
stress_resp = client.get_performance()
stress = stress_resp.get('performance', {})
stress_ft = stress.get('last_frame_time_ms', 0.0)
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
# If we got valid timing, assert it's within reason
if stress_ft > 0:
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
# Ensure the session actually updated
session_data = client.get_session()
entries = session_data.get('session', {}).get('entries', [])
assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"

View File

@@ -17,103 +17,88 @@ from gui_legacy import App
@pytest.fixture
def app_instance():
"""
"""
Fixture to create an instance of the App class for testing.
It creates a real DPG context but mocks functions that would
render a window or block execution.
"""
dpg.create_context()
# Patch only the functions that would show a window or block,
# and the App methods that rebuild UI on init.
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
dpg.create_context()
# Patch only the functions that would show a window or block,
# and the App methods that rebuild UI on init.
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_telemetry_panel_updates_correctly(app_instance):
"""
"""
Tests that the _update_performance_diagnostics method correctly updates
DPG widgets based on the stats from ai_client.
"""
# 1. Set the provider to anthropic
app_instance.current_provider = "anthropic"
# 2. Define the mock stats
mock_stats = {
"provider": "anthropic",
"limit": 180000,
"current": 135000,
"percentage": 75.0,
}
# 3. Patch the dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# 4. Call the method under test
app_instance._refresh_api_metrics()
# 5. Assert the results
mock_get_stats.assert_called_once()
# Assert history bleed widgets were updated
mock_set_value.assert_any_call("token_budget_bar", 0.75)
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
# Assert Gemini-specific widget was hidden
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
# 1. Set the provider to anthropic
app_instance.current_provider = "anthropic"
# 2. Define the mock stats
mock_stats = {
"provider": "anthropic",
"limit": 180000,
"current": 135000,
"percentage": 75.0,
}
# 3. Patch the dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# 4. Call the method under test
app_instance._refresh_api_metrics()
# 5. Assert the results
mock_get_stats.assert_called_once()
# Assert history bleed widgets were updated
mock_set_value.assert_any_call("token_budget_bar", 0.75)
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
# Assert Gemini-specific widget was hidden
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
def test_cache_data_display_updates_correctly(app_instance):
"""
"""
Tests that the _update_performance_diagnostics method correctly updates the
GUI with Gemini cache statistics when the provider is set to Gemini.
"""
# 1. Set the provider to Gemini
app_instance.current_provider = "gemini"
# 2. Define mock cache stats
mock_cache_stats = {
'cache_count': 5,
'total_size_bytes': 12345
}
# Expected formatted string
expected_text = "Gemini Caches: 5 (12.1 KB)"
# 3. Patch dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# We also need to mock get_history_bleed_stats as it's called in the same function
with patch('ai_client.get_history_bleed_stats', return_value={}):
# 4. Call the method under test with payload
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
# 5. Assert the results
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
# Check that the UI item was shown and its value was set
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
mock_set_value.assert_any_call("gemini_cache_label", expected_text)
# 1. Set the provider to Gemini
app_instance.current_provider = "gemini"
# 2. Define mock cache stats
mock_cache_stats = {
'cache_count': 5,
'total_size_bytes': 12345
}
# Expected formatted string
expected_text = "Gemini Caches: 5 (12.1 KB)"
# 3. Patch dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# We also need to mock get_history_bleed_stats as it's called in the same function
with patch('ai_client.get_history_bleed_stats', return_value={}):
# 4. Call the method under test with payload
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
# 5. Assert the results
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
# Check that the UI item was shown and its value was set
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
mock_set_value.assert_any_call("gemini_cache_label", expected_text)

View File

@@ -8,173 +8,163 @@ from pathlib import Path
from fastapi.testclient import TestClient
class TestHeadlessAPI(unittest.TestCase):
def setUp(self):
# We need an App instance to initialize the API, but we want to avoid GUI stuff
with patch('gui_2.session_logger.open_session'), \
patch('gui_2.ai_client.set_provider'), \
patch('gui_2.session_logger.close_session'):
self.app_instance = gui_2.App()
# Set a default API key for tests
self.test_api_key = "test-secret-key"
self.app_instance.config["headless"] = {"api_key": self.test_api_key}
self.headers = {"X-API-KEY": self.test_api_key}
def setUp(self):
# We need an App instance to initialize the API, but we want to avoid GUI stuff
with patch('gui_2.session_logger.open_session'), \
patch('gui_2.ai_client.set_provider'), \
patch('gui_2.session_logger.close_session'):
self.app_instance = gui_2.App()
# Set a default API key for tests
self.test_api_key = "test-secret-key"
self.app_instance.config["headless"] = {"api_key": self.test_api_key}
self.headers = {"X-API-KEY": self.test_api_key}
# Clear any leftover state
self.app_instance._pending_actions = {}
self.app_instance._pending_dialog = None
self.api = self.app_instance.create_api()
self.client = TestClient(self.api)
# Clear any leftover state
self.app_instance._pending_actions = {}
self.app_instance._pending_dialog = None
def test_health_endpoint(self):
response = self.client.get("/health")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), {"status": "ok"})
self.api = self.app_instance.create_api()
self.client = TestClient(self.api)
def test_status_endpoint_unauthorized(self):
# Ensure a key is required
with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
response = self.client.get("/status")
self.assertEqual(response.status_code, 403)
def test_health_endpoint(self):
response = self.client.get("/health")
self.assertEqual(response.status_code, 200)
self.assertEqual(response.json(), {"status": "ok"})
def test_status_endpoint_authorized(self):
# We'll use a test key
headers = {"X-API-KEY": "test-secret-key"}
with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
response = self.client.get("/status", headers=headers)
self.assertEqual(response.status_code, 200)
def test_status_endpoint_unauthorized(self):
# Ensure a key is required
with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
response = self.client.get("/status")
self.assertEqual(response.status_code, 403)
def test_generate_endpoint(self):
payload = {
"prompt": "Hello AI"
}
# Mock ai_client.send and get_comms_log
with patch('gui_2.ai_client.send') as mock_send, \
patch('gui_2.ai_client.get_comms_log') as mock_log:
mock_send.return_value = "Hello from Mock AI"
mock_log.return_value = [{
"kind": "response",
"payload": {
"usage": {"input_tokens": 10, "output_tokens": 5}
}
}]
response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["text"], "Hello from Mock AI")
self.assertIn("metadata", data)
self.assertEqual(data["usage"]["input_tokens"], 10)
def test_status_endpoint_authorized(self):
# We'll use a test key
headers = {"X-API-KEY": "test-secret-key"}
with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
response = self.client.get("/status", headers=headers)
self.assertEqual(response.status_code, 200)
def test_pending_actions_endpoint(self):
# Manually add a pending action
with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
dialog = gui_2.ConfirmDialog("dir", ".")
self.app_instance._pending_actions[dialog._uid] = dialog
response = self.client.get("/api/v1/pending_actions", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(len(data), 1)
self.assertEqual(data[0]["action_id"], "test-action-id")
def test_generate_endpoint(self):
payload = {
"prompt": "Hello AI"
}
# Mock ai_client.send and get_comms_log
with patch('gui_2.ai_client.send') as mock_send, \
patch('gui_2.ai_client.get_comms_log') as mock_log:
mock_send.return_value = "Hello from Mock AI"
mock_log.return_value = [{
"kind": "response",
"payload": {
"usage": {"input_tokens": 10, "output_tokens": 5}
}
}]
def test_confirm_action_endpoint(self):
# Manually add a pending action
with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
dialog = gui_2.ConfirmDialog("dir", ".")
self.app_instance._pending_actions[dialog._uid] = dialog
payload = {"approved": True}
response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
self.assertEqual(response.status_code, 200)
self.assertTrue(dialog._done)
self.assertTrue(dialog._approved)
response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(data["text"], "Hello from Mock AI")
self.assertIn("metadata", data)
self.assertEqual(data["usage"]["input_tokens"], 10)
def test_list_sessions_endpoint(self):
# Ensure logs directory exists
Path("logs").mkdir(exist_ok=True)
# Create a dummy log
dummy_log = Path("logs/test_session_api.log")
dummy_log.write_text("dummy content")
try:
response = self.client.get("/api/v1/sessions", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertIn("test_session_api.log", data)
finally:
if dummy_log.exists():
dummy_log.unlink()
def test_pending_actions_endpoint(self):
# Manually add a pending action
with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
dialog = gui_2.ConfirmDialog("dir", ".")
self.app_instance._pending_actions[dialog._uid] = dialog
def test_get_context_endpoint(self):
response = self.client.get("/api/v1/context", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertIn("files", data)
self.assertIn("screenshots", data)
self.assertIn("files_base_dir", data)
response = self.client.get("/api/v1/pending_actions", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertEqual(len(data), 1)
self.assertEqual(data[0]["action_id"], "test-action-id")
def test_confirm_action_endpoint(self):
# Manually add a pending action
with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
dialog = gui_2.ConfirmDialog("dir", ".")
self.app_instance._pending_actions[dialog._uid] = dialog
payload = {"approved": True}
response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
self.assertEqual(response.status_code, 200)
self.assertTrue(dialog._done)
self.assertTrue(dialog._approved)
def test_list_sessions_endpoint(self):
# Ensure logs directory exists
Path("logs").mkdir(exist_ok=True)
# Create a dummy log
dummy_log = Path("logs/test_session_api.log")
dummy_log.write_text("dummy content")
try:
response = self.client.get("/api/v1/sessions", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertIn("test_session_api.log", data)
finally:
if dummy_log.exists():
dummy_log.unlink()
def test_get_context_endpoint(self):
response = self.client.get("/api/v1/context", headers=self.headers)
self.assertEqual(response.status_code, 200)
data = response.json()
self.assertIn("files", data)
self.assertIn("screenshots", data)
self.assertIn("files_base_dir", data)
def test_endpoint_no_api_key_configured(self):
# Test the security fix specifically
with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
response = self.client.get("/status", headers=self.headers)
self.assertEqual(response.status_code, 403)
self.assertEqual(response.json()["detail"], "API Key not configured on server")
def test_endpoint_no_api_key_configured(self):
# Test the security fix specifically
with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
response = self.client.get("/status", headers=self.headers)
self.assertEqual(response.status_code, 403)
self.assertEqual(response.json()["detail"], "API Key not configured on server")
class TestHeadlessStartup(unittest.TestCase):
@patch('gui_2.immapp.run')
@patch('gui_2.api_hooks.HookServer')
@patch('gui_2.save_config')
@patch('gui_2.ai_client.cleanup')
@patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
# Setup mock argv with --headless
test_args = ["gui_2.py", "--headless"]
@patch('gui_2.immapp.run')
@patch('gui_2.api_hooks.HookServer')
@patch('gui_2.save_config')
@patch('gui_2.ai_client.cleanup')
@patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
# Setup mock argv with --headless
test_args = ["gui_2.py", "--headless"]
with patch.object(sys, 'argv', test_args):
with patch('gui_2.session_logger.close_session'), \
patch('gui_2.session_logger.open_session'):
app = gui_2.App()
# Mock _fetch_models to avoid network calls
app._fetch_models = MagicMock()
app.run()
# Expectation: immapp.run should NOT be called in headless mode
mock_immapp_run.assert_not_called()
# Expectation: uvicorn.run SHOULD be called
mock_uvicorn_run.assert_called_once()
with patch.object(sys, 'argv', test_args):
with patch('gui_2.session_logger.close_session'), \
patch('gui_2.session_logger.open_session'):
app = gui_2.App()
# Mock _fetch_models to avoid network calls
app._fetch_models = MagicMock()
app.run()
# Expectation: immapp.run should NOT be called in headless mode
mock_immapp_run.assert_not_called()
# Expectation: uvicorn.run SHOULD be called
mock_uvicorn_run.assert_called_once()
@patch('gui_2.immapp.run')
def test_normal_startup_calls_gui_run(self, mock_immapp_run):
test_args = ["gui_2.py"]
with patch.object(sys, 'argv', test_args):
# In normal mode, it should still call immapp.run
with patch('gui_2.api_hooks.HookServer'), \
patch('gui_2.save_config'), \
patch('gui_2.ai_client.cleanup'), \
patch('gui_2.session_logger.close_session'), \
patch('gui_2.session_logger.open_session'):
app = gui_2.App()
app._fetch_models = MagicMock()
app.run()
mock_immapp_run.assert_called_once()
@patch('gui_2.immapp.run')
def test_normal_startup_calls_gui_run(self, mock_immapp_run):
test_args = ["gui_2.py"]
with patch.object(sys, 'argv', test_args):
# In normal mode, it should still call immapp.run
with patch('gui_2.api_hooks.HookServer'), \
patch('gui_2.save_config'), \
patch('gui_2.ai_client.cleanup'), \
patch('gui_2.session_logger.close_session'), \
patch('gui_2.session_logger.open_session'):
app = gui_2.App()
app._fetch_models = MagicMock()
app.run()
mock_immapp_run.assert_called_once()
def test_fastapi_installed():
"""Verify that fastapi is installed."""
try:
importlib.import_module("fastapi")
except ImportError:
pytest.fail("fastapi is not installed")
"""Verify that fastapi is installed."""
try:
importlib.import_module("fastapi")
except ImportError:
pytest.fail("fastapi is not installed")
def test_uvicorn_installed():
"""Verify that uvicorn is installed."""
try:
importlib.import_module("uvicorn")
except ImportError:
pytest.fail("uvicorn is not installed")
"""Verify that uvicorn is installed."""
try:
importlib.import_module("uvicorn")
except ImportError:
pytest.fail("uvicorn is not installed")
if __name__ == "__main__":
unittest.main()
unittest.main()

View File

@@ -7,137 +7,113 @@ import json
@pytest.mark.asyncio
async def test_headless_verification_full_run():
"""
"""
1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
2. Simulate a full execution run using engine.run_linear().
3. Mock ai_client.send to simulate successful tool calls and final responses.
4. Specifically verify that 'Context Amnesia' is maintained.
"""
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
from events import AsyncEventQueue
queue = AsyncEventQueue()
engine = ConductorEngine(track=track, event_queue=queue)
with patch("ai_client.send") as mock_send, \
patch("ai_client.reset_session") as mock_reset:
# We need mock_send to return something that doesn't contain "BLOCKED"
mock_send.return_value = "Task completed successfully."
await engine.run_linear()
# Verify both tickets are completed
assert t1.status == "completed"
assert t2.status == "completed"
# Verify that ai_client.send was called twice (once for each ticket)
assert mock_send.call_count == 2
# Verify Context Amnesia: reset_session should be called for each ticket
assert mock_reset.call_count == 2
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
from events import AsyncEventQueue
queue = AsyncEventQueue()
engine = ConductorEngine(track=track, event_queue=queue)
with patch("ai_client.send") as mock_send, \
patch("ai_client.reset_session") as mock_reset:
# We need mock_send to return something that doesn't contain "BLOCKED"
mock_send.return_value = "Task completed successfully."
await engine.run_linear()
# Verify both tickets are completed
assert t1.status == "completed"
assert t2.status == "completed"
# Verify that ai_client.send was called twice (once for each ticket)
assert mock_send.call_count == 2
# Verify Context Amnesia: reset_session should be called for each ticket
assert mock_reset.call_count == 2
@pytest.mark.asyncio
async def test_headless_verification_error_and_qa_interceptor():
"""
"""
5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered
and its summary is injected into the worker's history for the next retry.
"""
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
track = Track(id="track_error", description="Error Track", tickets=[t1])
from events import AsyncEventQueue
queue = AsyncEventQueue()
engine = ConductorEngine(track=track, event_queue=queue)
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
track = Track(id="track_error", description="Error Track", tickets=[t1])
from events import AsyncEventQueue
queue = AsyncEventQueue()
engine = ConductorEngine(track=track, event_queue=queue)
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
with patch("ai_client._provider", "gemini"), \
patch("ai_client._gemini_client") as mock_genai_client, \
patch("ai_client.confirm_and_run_callback") as mock_run, \
patch("ai_client.run_tier4_analysis") as mock_qa, \
patch("ai_client._ensure_gemini_client") as mock_ensure, \
patch("ai_client._gemini_tool_declaration", return_value=None):
# Ensure _gemini_client is restored by the mock ensure function
import ai_client
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
with patch("ai_client._provider", "gemini"), \
patch("ai_client._gemini_client") as mock_genai_client, \
patch("ai_client.confirm_and_run_callback") as mock_run, \
patch("ai_client.run_tier4_analysis") as mock_qa, \
patch("ai_client._ensure_gemini_client") as mock_ensure, \
patch("ai_client._gemini_tool_declaration", return_value=None):
# Ensure _gemini_client is restored by the mock ensure function
import ai_client
def restore_client():
ai_client._gemini_client = mock_genai_client
mock_ensure.side_effect = restore_client
ai_client._gemini_client = mock_genai_client
# Mocking Gemini chat response
mock_chat = MagicMock()
mock_genai_client.chats.create.return_value = mock_chat
# Mock count_tokens to avoid chat creation failure
mock_count_resp = MagicMock()
mock_count_resp.total_tokens = 100
mock_genai_client.models.count_tokens.return_value = mock_count_resp
def restore_client():
ai_client._gemini_client = mock_genai_client
mock_ensure.side_effect = restore_client
ai_client._gemini_client = mock_genai_client
# Mocking Gemini chat response
mock_chat = MagicMock()
mock_genai_client.chats.create.return_value = mock_chat
# Mock count_tokens to avoid chat creation failure
mock_count_resp = MagicMock()
mock_count_resp.total_tokens = 100
mock_genai_client.models.count_tokens.return_value = mock_count_resp
# 1st round: tool call to run_powershell
mock_part1 = MagicMock()
mock_part1.text = "I will run a command."
mock_part1.function_call = MagicMock()
mock_part1.function_call.name = "run_powershell"
mock_part1.function_call.args = {"script": "dir"}
mock_resp1 = MagicMock()
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
mock_resp1.usage_metadata.prompt_token_count = 10
mock_resp1.usage_metadata.candidates_token_count = 5
# 2nd round: Final text after tool result
mock_part2 = MagicMock()
mock_part2.text = "The command failed but I understand why. Task done."
mock_part2.function_call = None
mock_resp2 = MagicMock()
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
mock_resp2.usage_metadata.prompt_token_count = 20
mock_resp2.usage_metadata.candidates_token_count = 10
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
# Mock run_powershell behavior: it should call the qa_callback on error
# 1st round: tool call to run_powershell
mock_part1 = MagicMock()
mock_part1.text = "I will run a command."
mock_part1.function_call = MagicMock()
mock_part1.function_call.name = "run_powershell"
mock_part1.function_call.args = {"script": "dir"}
mock_resp1 = MagicMock()
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
mock_resp1.usage_metadata.prompt_token_count = 10
mock_resp1.usage_metadata.candidates_token_count = 5
# 2nd round: Final text after tool result
mock_part2 = MagicMock()
mock_part2.text = "The command failed but I understand why. Task done."
mock_part2.function_call = None
mock_resp2 = MagicMock()
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
mock_resp2.usage_metadata.prompt_token_count = 20
mock_resp2.usage_metadata.candidates_token_count = 10
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
# Mock run_powershell behavior: it should call the qa_callback on error
def run_side_effect(script, base_dir, qa_callback):
if qa_callback:
analysis = qa_callback("Error: file not found")
return f"""STDERR: Error: file not found
def run_side_effect(script, base_dir, qa_callback):
if qa_callback:
analysis = qa_callback("Error: file not found")
return f"""STDERR: Error: file not found
QA ANALYSIS:
{analysis}"""
return "Error: file not found"
mock_run.side_effect = run_side_effect
mock_qa.return_value = "FIX: Check if path exists."
await engine.run_linear()
# Verify QA analysis was triggered
mock_qa.assert_called_once_with("Error: file not found")
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
# The first call is the user message, the second is the tool response.
assert mock_chat.send_message.call_count == 2
args, kwargs = mock_chat.send_message.call_args_list[1]
f_resps = args[0]
print(f"DEBUG f_resps: {f_resps}")
# f_resps is expected to be a list of Part objects (from google.genai.types)
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
# In our case, ai_client.Part.from_function_response is used.
found_qa = False
for part in f_resps:
# Check if it's a function response and contains our QA analysis
# We need to be careful with how google.genai.types.Part is structured or mocked
part_str = str(part)
print(f"DEBUG part_str: {part_str}")
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
found_qa = True
assert found_qa, "QA Analysis was not injected into the next round"
return "Error: file not found"
mock_run.side_effect = run_side_effect
mock_qa.return_value = "FIX: Check if path exists."
await engine.run_linear()
# Verify QA analysis was triggered
mock_qa.assert_called_once_with("Error: file not found")
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
# The first call is the user message, the second is the tool response.
assert mock_chat.send_message.call_count == 2
args, kwargs = mock_chat.send_message.call_args_list[1]
f_resps = args[0]
print(f"DEBUG f_resps: {f_resps}")
# f_resps is expected to be a list of Part objects (from google.genai.types)
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
# In our case, ai_client.Part.from_function_response is used.
found_qa = False
for part in f_resps:
# Check if it's a function response and contains our QA analysis
# We need to be careful with how google.genai.types.Part is structured or mocked
part_str = str(part)
print(f"DEBUG part_str: {part_str}")
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
found_qa = True
assert found_qa, "QA Analysis was not injected into the next round"

View File

@@ -18,199 +18,164 @@ import ai_client
# --- Tests for Aggregate Module ---
def test_aggregate_includes_segregated_history(tmp_path):
"""
"""
Tests if the aggregate function correctly includes history
when it's segregated into a separate file.
"""
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Setup segregated project configuration
proj_data = project_manager.default_project("test-aggregate")
proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
# Save the project, which should segregate the history
project_manager.save_project(proj_data, proj_path)
# Load the project and aggregate its content
loaded_proj = project_manager.load_project(proj_path)
config = project_manager.flat_config(loaded_proj)
markdown, output_file, file_items = aggregate.run(config)
# Assert that the history is present in the aggregated markdown
assert "## Discussion History" in markdown
assert "Show me history" in markdown
# --- Tests for MCP Client and Blacklisting ---
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Setup segregated project configuration
proj_data = project_manager.default_project("test-aggregate")
proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
# Save the project, which should segregate the history
project_manager.save_project(proj_data, proj_path)
# Load the project and aggregate its content
loaded_proj = project_manager.load_project(proj_path)
config = project_manager.flat_config(loaded_proj)
markdown, output_file, file_items = aggregate.run(config)
# Assert that the history is present in the aggregated markdown
assert "## Discussion History" in markdown
assert "Show me history" in markdown
# --- Tests for MCP Client and Blacklisting ---
def test_mcp_blacklist(tmp_path):
"""
"""
Tests that the MCP client correctly blacklists specified files
and prevents listing them.
"""
# Setup a file that should be blacklisted
hist_file = tmp_path / "my_project_history.toml"
hist_file.write_text("secret history", encoding="utf-8")
# Configure MCP client to allow access to the temporary directory
# but ensure the history file is implicitly or explicitly blacklisted.
mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
# Attempt to read the blacklisted file - should result in an access denied message
result = mcp_client.read_file(str(hist_file))
assert "ACCESS DENIED" in result or "BLACKLISTED" in result
# Attempt to list the directory containing the blacklisted file
result = mcp_client.list_directory(str(tmp_path))
# The blacklisted file should not appear in the directory listing
assert "my_project_history.toml" not in result
# Setup a file that should be blacklisted
hist_file = tmp_path / "my_project_history.toml"
hist_file.write_text("secret history", encoding="utf-8")
# Configure MCP client to allow access to the temporary directory
# but ensure the history file is implicitly or explicitly blacklisted.
mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
# Attempt to read the blacklisted file - should result in an access denied message
result = mcp_client.read_file(str(hist_file))
assert "ACCESS DENIED" in result or "BLACKLISTED" in result
# Attempt to list the directory containing the blacklisted file
result = mcp_client.list_directory(str(tmp_path))
# The blacklisted file should not appear in the directory listing
assert "my_project_history.toml" not in result
def test_aggregate_blacklist(tmp_path):
"""
"""
Tests that aggregate's path resolution respects blacklisting,
ensuring history files are not included by default.
"""
# Setup a history file in the temporary directory
hist_file = tmp_path / "my_project_history.toml"
hist_file.write_text("secret history", encoding="utf-8")
# Attempt to resolve paths including the history file using a wildcard
paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
assert hist_file not in paths, "History file should be blacklisted and not resolved"
# Resolve all paths and ensure the history file is still excluded
paths = aggregate.resolve_paths(tmp_path, "*")
assert hist_file not in paths, "History file should be excluded even with a general glob"
# --- Tests for History Migration and Separation ---
# Setup a history file in the temporary directory
hist_file = tmp_path / "my_project_history.toml"
hist_file.write_text("secret history", encoding="utf-8")
# Attempt to resolve paths including the history file using a wildcard
paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
assert hist_file not in paths, "History file should be blacklisted and not resolved"
# Resolve all paths and ensure the history file is still excluded
paths = aggregate.resolve_paths(tmp_path, "*")
assert hist_file not in paths, "History file should be excluded even with a general glob"
# --- Tests for History Migration and Separation ---
def test_migration_on_load(tmp_path):
"""
"""
Tests that project loading migrates discussion history from manual_slop.toml
to manual_slop_history.toml if it exists in the main config.
"""
# Define paths for the main project config and the history file
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Create a legacy project data structure with discussion history
legacy_data = project_manager.default_project("test-project")
legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
# Save this legacy data into manual_slop.toml
with open(proj_path, "wb") as f:
tomli_w.dump(legacy_data, f)
# Load the project - this action should trigger the migration
loaded_data = project_manager.load_project(proj_path)
# Assertions:
assert "discussion" in loaded_data
assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
# 2. The history should no longer be present in the main manual_slop.toml on disk.
with open(proj_path, "rb") as f:
on_disk_main = tomllib.load(f)
assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
# 3. The history file (manual_slop_history.toml) should now exist and contain the data.
assert hist_path.exists()
with open(hist_path, "rb") as f:
on_disk_hist = tomllib.load(f)
assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]
# Define paths for the main project config and the history file
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Create a legacy project data structure with discussion history
legacy_data = project_manager.default_project("test-project")
legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
# Save this legacy data into manual_slop.toml
with open(proj_path, "wb") as f:
tomli_w.dump(legacy_data, f)
# Load the project - this action should trigger the migration
loaded_data = project_manager.load_project(proj_path)
# Assertions:
assert "discussion" in loaded_data
assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
# 2. The history should no longer be present in the main manual_slop.toml on disk.
with open(proj_path, "rb") as f:
on_disk_main = tomllib.load(f)
assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
# 3. The history file (manual_slop_history.toml) should now exist and contain the data.
assert hist_path.exists()
with open(hist_path, "rb") as f:
on_disk_hist = tomllib.load(f)
assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]
def test_save_separation(tmp_path):
"""
"""
Tests that saving project data correctly separates discussion history
into manual_slop_history.toml.
"""
# Define paths for the main project config and the history file
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Create fresh project data, including discussion history
proj_data = project_manager.default_project("test-project")
proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
# Save the project data
project_manager.save_project(proj_data, proj_path)
# Assertions:
assert proj_path.exists()
assert hist_path.exists()
# 2. The main project file should NOT contain the discussion history.
with open(proj_path, "rb") as f:
p_disk = tomllib.load(f)
assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
# 3. The history file should contain the discussion history.
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
# --- Tests for History Persistence Across Turns ---
# Define paths for the main project config and the history file
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Create fresh project data, including discussion history
proj_data = project_manager.default_project("test-project")
proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
# Save the project data
project_manager.save_project(proj_data, proj_path)
# Assertions:
assert proj_path.exists()
assert hist_path.exists()
# 2. The main project file should NOT contain the discussion history.
with open(proj_path, "rb") as f:
p_disk = tomllib.load(f)
assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
# 3. The history file should contain the discussion history.
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
# --- Tests for History Persistence Across Turns ---
def test_history_persistence_across_turns(tmp_path):
"""
"""
Tests that discussion history is correctly persisted across multiple save/load cycles.
"""
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Step 1: Initialize a new project and save it.
proj = project_manager.default_project("test-persistence")
project_manager.save_project(proj, proj_path)
# Step 2: Add a first turn of discussion history.
proj = project_manager.load_project(proj_path)
entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
project_manager.save_project(proj, proj_path)
# Verify separation after the first save
with open(proj_path, "rb") as f:
p_disk = tomllib.load(f)
assert "discussion" not in p_disk
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
# Step 3: Add a second turn of discussion history.
proj = project_manager.load_project(proj_path)
entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
project_manager.save_project(proj, proj_path)
# Verify persistence
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert len(h_disk["discussions"]["main"]["history"]) == 2
assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
# Step 4: Reload the project from disk and check history
proj_final = project_manager.load_project(proj_path)
assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
# --- Tests for AI Client History Management ---
proj_path = tmp_path / "manual_slop.toml"
hist_path = tmp_path / "manual_slop_history.toml"
# Step 1: Initialize a new project and save it.
proj = project_manager.default_project("test-persistence")
project_manager.save_project(proj, proj_path)
# Step 2: Add a first turn of discussion history.
proj = project_manager.load_project(proj_path)
entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
project_manager.save_project(proj, proj_path)
# Verify separation after the first save
with open(proj_path, "rb") as f:
p_disk = tomllib.load(f)
assert "discussion" not in p_disk
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
# Step 3: Add a second turn of discussion history.
proj = project_manager.load_project(proj_path)
entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
project_manager.save_project(proj, proj_path)
# Verify persistence
with open(hist_path, "rb") as f:
h_disk = tomllib.load(f)
assert len(h_disk["discussions"]["main"]["history"]) == 2
assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
# Step 4: Reload the project from disk and check history
proj_final = project_manager.load_project(proj_path)
assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
# --- Tests for AI Client History Management ---
def test_get_history_bleed_stats_basic():
"""
"""
Tests basic retrieval of history bleed statistics from the AI client.
"""
# Reset the AI client's session state
ai_client.reset_session()
# Set a custom history truncation limit for testing purposes.
ai_client.set_history_trunc_limit(500)
# For this test, we're primarily checking the structure of the returned stats
# and the configured limit.
stats = ai_client.get_history_bleed_stats()
assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
assert 'limit' in stats, "Stats dictionary should contain 'limit'"
assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
assert isinstance(stats['current'], int) and stats['current'] >= 0
# Reset the AI client's session state
ai_client.reset_session()
# Set a custom history truncation limit for testing purposes.
ai_client.set_history_trunc_limit(500)
# For this test, we're primarily checking the structure of the returned stats
# and the configured limit.
stats = ai_client.get_history_bleed_stats()
assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
assert 'limit' in stats, "Stats dictionary should contain 'limit'"
assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
assert isinstance(stats['current'], int) and stats['current'] >= 0

View File

@@ -12,40 +12,35 @@ from api_hook_client import ApiHookClient
import gui_legacy
def test_hooks_enabled_via_cli():
with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
app = gui_legacy.App()
assert app.test_hooks_enabled is True
with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
app = gui_legacy.App()
assert app.test_hooks_enabled is True
def test_hooks_disabled_by_default():
with patch.object(sys, 'argv', ['gui_legacy.py']):
if 'SLOP_TEST_HOOKS' in os.environ:
del os.environ['SLOP_TEST_HOOKS']
app = gui_legacy.App()
assert getattr(app, 'test_hooks_enabled', False) is False
with patch.object(sys, 'argv', ['gui_legacy.py']):
if 'SLOP_TEST_HOOKS' in os.environ:
del os.environ['SLOP_TEST_HOOKS']
app = gui_legacy.App()
assert getattr(app, 'test_hooks_enabled', False) is False
def test_live_hook_server_responses(live_gui):
"""
"""
Verifies the live hook server (started via fixture) responds correctly to all major endpoints.
"""
client = ApiHookClient()
# Test /status
status = client.get_status()
assert status == {'status': 'ok'}
# Test /api/project
project = client.get_project()
assert 'project' in project
# Test /api/session
session = client.get_session()
assert 'session' in session
# Test /api/performance
perf = client.get_performance()
assert 'performance' in perf
# Test POST /api/gui
gui_data = {"action": "test_action", "value": 42}
resp = client.post_gui(gui_data)
assert resp == {'status': 'queued'}
client = ApiHookClient()
# Test /status
status = client.get_status()
assert status == {'status': 'ok'}
# Test /api/project
project = client.get_project()
assert 'project' in project
# Test /api/session
session = client.get_session()
assert 'session' in session
# Test /api/performance
perf = client.get_performance()
assert 'performance' in perf
# Test POST /api/gui
gui_data = {"action": "test_action", "value": 42}
resp = client.post_gui(gui_data)
assert resp == {'status': 'queued'}

View File

@@ -14,89 +14,80 @@ spec.loader.exec_module(gui_legacy)
from gui_legacy import App
def test_new_hubs_defined_in_window_info():
"""
"""
Verifies that the new consolidated Hub windows are defined in the App's window_info.
This ensures they will be available in the 'Windows' menu.
"""
# We don't need a full App instance with DPG context for this,
# as window_info is initialized in __init__ before DPG starts.
# But we mock load_config to avoid file access.
from unittest.mock import patch
with patch('gui_legacy.load_config', return_value={}):
app = App()
expected_hubs = {
"Context Hub": "win_context_hub",
"AI Settings Hub": "win_ai_settings_hub",
"Discussion Hub": "win_discussion_hub",
"Operations Hub": "win_operations_hub",
}
for label, tag in expected_hubs.items():
assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
# Check if the label matches (or is present)
found = False
for l, t in app.window_info.items():
if t == tag:
found = True
assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
assert found, f"Expected window label {label} not found in window_info"
# We don't need a full App instance with DPG context for this,
# as window_info is initialized in __init__ before DPG starts.
# But we mock load_config to avoid file access.
from unittest.mock import patch
with patch('gui_legacy.load_config', return_value={}):
app = App()
expected_hubs = {
"Context Hub": "win_context_hub",
"AI Settings Hub": "win_ai_settings_hub",
"Discussion Hub": "win_discussion_hub",
"Operations Hub": "win_operations_hub",
}
for label, tag in expected_hubs.items():
assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
# Check if the label matches (or is present)
found = False
for l, t in app.window_info.items():
if t == tag:
found = True
assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
assert found, f"Expected window label {label} not found in window_info"
def test_old_windows_removed_from_window_info(app_instance_simple):
"""
"""
Verifies that the old fragmented windows are removed from window_info.
"""
old_tags = [
"win_projects", "win_files", "win_screenshots",
"win_provider", "win_system_prompts",
"win_discussion", "win_message", "win_response",
"win_comms", "win_tool_log"
]
for tag in old_tags:
assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"
old_tags = [
"win_projects", "win_files", "win_screenshots",
"win_provider", "win_system_prompts",
"win_discussion", "win_message", "win_response",
"win_comms", "win_tool_log"
]
for tag in old_tags:
assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"
@pytest.fixture
def app_instance_simple():
from unittest.mock import patch
from gui_legacy import App
with patch('gui_legacy.load_config', return_value={}):
app = App()
return app
from unittest.mock import patch
from gui_legacy import App
with patch('gui_legacy.load_config', return_value={}):
app = App()
return app
def test_hub_windows_have_correct_flags(app_instance_simple):
"""
"""
Verifies that the new Hub windows have appropriate flags for a professional workspace.
(e.g., no_collapse should be True for main hubs).
"""
import dearpygui.dearpygui as dpg
dpg.create_context()
# We need to actually call the build methods to check the configuration
app_instance_simple._build_context_hub()
app_instance_simple._build_ai_settings_hub()
app_instance_simple._build_discussion_hub()
app_instance_simple._build_operations_hub()
hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
for hub in hubs:
assert dpg.does_item_exist(hub)
# We can't easily check 'no_collapse' after creation without internal DPG calls
# but we can check if it's been configured if we mock dpg.window or check it manually
dpg.destroy_context()
import dearpygui.dearpygui as dpg
dpg.create_context()
# We need to actually call the build methods to check the configuration
app_instance_simple._build_context_hub()
app_instance_simple._build_ai_settings_hub()
app_instance_simple._build_discussion_hub()
app_instance_simple._build_operations_hub()
hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
for hub in hubs:
assert dpg.does_item_exist(hub)
# We can't easily check 'no_collapse' after creation without internal DPG calls
# but we can check if it's been configured if we mock dpg.window or check it manually
dpg.destroy_context()
def test_indicators_exist(app_instance_simple):
"""
"""
Verifies that the new thinking and live indicators exist in the UI.
"""
import dearpygui.dearpygui as dpg
dpg.create_context()
app_instance_simple._build_discussion_hub()
app_instance_simple._build_operations_hub()
assert dpg.does_item_exist("thinking_indicator")
assert dpg.does_item_exist("operations_live_indicator")
dpg.destroy_context()
import dearpygui.dearpygui as dpg
dpg.create_context()
app_instance_simple._build_discussion_hub()
app_instance_simple._build_operations_hub()
assert dpg.does_item_exist("thinking_indicator")
assert dpg.does_item_exist("operations_live_indicator")
dpg.destroy_context()

View File

@@ -8,120 +8,110 @@ import ai_client
@pytest.fixture
def mock_app():
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={
"project": {"name": "test_proj"},
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
"files": {"paths": [], "base_dir": "."},
"screenshots": {"paths": [], "base_dir": "."},
"agent": {"tools": {}}
}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
app = App()
yield app
# We don't have a clean way to stop the loop thread in gui_2.py App
# so we just let it daemon-exit.
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={
"project": {"name": "test_proj"},
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
"files": {"paths": [], "base_dir": "."},
"screenshots": {"paths": [], "base_dir": "."},
"agent": {"tools": {}}
}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
app = App()
yield app
# We don't have a clean way to stop the loop thread in gui_2.py App
# so we just let it daemon-exit.
@pytest.mark.timeout(10)
def test_user_request_integration_flow(mock_app):
"""
"""
Verifies that pushing a UserRequestEvent to the event_queue:
1. Triggers ai_client.send
2. Results in a 'response' event back to the queue
3. Eventually updates the UI state (ai_response, ai_status) after processing GUI tasks.
"""
app = mock_app
# Mock all ai_client methods called during _handle_request_event
mock_response = "This is a test AI response"
with (
patch('ai_client.send', return_value=mock_response) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
# 1. Create and push a UserRequestEvent
event = UserRequestEvent(
prompt="Hello AI",
stable_md="Context",
file_items=[],
disc_text="History",
base_dir="."
)
# 2. Push event to the app's internal loop
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# 3. Wait for ai_client.send to be called (polling background thread)
start_time = time.time()
while not mock_send.called and time.time() - start_time < 5:
time.sleep(0.1)
assert mock_send.called, "ai_client.send was not called within timeout"
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
start_time = time.time()
success = False
while time.time() - start_time < 3:
app._process_pending_gui_tasks()
if app.ai_response == mock_response and app.ai_status == "done":
success = True
break
time.sleep(0.1)
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
assert app.ai_response == mock_response
assert app.ai_status == "done"
app = mock_app
# Mock all ai_client methods called during _handle_request_event
mock_response = "This is a test AI response"
with (
patch('ai_client.send', return_value=mock_response) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
# 1. Create and push a UserRequestEvent
event = UserRequestEvent(
prompt="Hello AI",
stable_md="Context",
file_items=[],
disc_text="History",
base_dir="."
)
# 2. Push event to the app's internal loop
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# 3. Wait for ai_client.send to be called (polling background thread)
start_time = time.time()
while not mock_send.called and time.time() - start_time < 5:
time.sleep(0.1)
assert mock_send.called, "ai_client.send was not called within timeout"
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
start_time = time.time()
success = False
while time.time() - start_time < 3:
app._process_pending_gui_tasks()
if app.ai_response == mock_response and app.ai_status == "done":
success = True
break
time.sleep(0.1)
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
assert app.ai_response == mock_response
assert app.ai_status == "done"
@pytest.mark.timeout(10)
def test_user_request_error_handling(mock_app):
"""
"""
Verifies that if ai_client.send raises an exception, the UI is updated with the error state.
"""
app = mock_app
with (
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
event = UserRequestEvent(
prompt="Trigger Error",
stable_md="",
file_items=[],
disc_text="",
base_dir="."
)
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# Poll for error state by processing GUI tasks
start_time = time.time()
success = False
while time.time() - start_time < 5:
app._process_pending_gui_tasks()
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
success = True
break
time.sleep(0.1)
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
app = mock_app
with (
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
patch('ai_client.set_custom_system_prompt'),
patch('ai_client.set_model_params'),
patch('ai_client.set_agent_tools')
):
event = UserRequestEvent(
prompt="Trigger Error",
stable_md="",
file_items=[],
disc_text="",
base_dir="."
)
asyncio.run_coroutine_threadsafe(
app.event_queue.put("user_request", event),
app._loop
)
# Poll for error state by processing GUI tasks
start_time = time.time()
success = False
while time.time() - start_time < 5:
app._process_pending_gui_tasks()
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
success = True
break
time.sleep(0.1)
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"

View File

@@ -10,80 +10,67 @@ from api_hook_client import ApiHookClient
@pytest.mark.integration
def test_full_live_workflow(live_gui):
"""
"""
Integration test that drives the GUI through a full workflow.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
client.post_session(session_entries=[])
time.sleep(2)
# 1. Reset
client.click("btn_reset")
time.sleep(1)
# 2. Project Setup
temp_project_path = os.path.abspath("tests/temp_project.toml")
if os.path.exists(temp_project_path):
os.remove(temp_project_path)
client.click("btn_project_new_automated", user_data=temp_project_path)
time.sleep(1) # Wait for project creation and switch
# Verify metadata update
proj = client.get_project()
test_git = os.path.abspath(".")
client.set_value("project_git_dir", test_git)
client.click("btn_project_save")
time.sleep(1)
proj = client.get_project()
# flat_config returns {"project": {...}, "output": ...}
# so proj is {"project": {"project": {"git_dir": ...}}}
assert proj['project']['project']['git_dir'] == test_git
# Enable auto-add so the response ends up in history
client.set_value("auto_add_history", True)
client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.5)
# 3. Discussion Turn
client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
client.click("btn_gen_send")
# Verify thinking indicator appears (might be brief)
thinking_seen = False
print("\nPolling for thinking indicator...")
for i in range(40):
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
thinking_seen = True
print(f"Thinking indicator seen at poll {i}")
break
time.sleep(0.5)
# 4. Wait for response in session
success = False
print("Waiting for AI response in session...")
for i in range(120):
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if any(e.get('role') == 'AI' for e in entries):
success = True
print(f"AI response found at second {i}")
break
time.sleep(1)
assert success, "AI failed to respond within 120 seconds"
# 5. Switch Discussion
client.set_value("disc_new_name_input", "AutoDisc")
client.click("btn_disc_create")
time.sleep(1.0) # Wait for GUI to process creation
client.select_list_item("disc_listbox", "AutoDisc")
time.sleep(1.0) # Wait for GUI to switch
# Verify session is empty in new discussion
session = client.get_session()
assert len(session.get('session', {}).get('entries', [])) == 0
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
client.post_session(session_entries=[])
time.sleep(2)
# 1. Reset
client.click("btn_reset")
time.sleep(1)
# 2. Project Setup
temp_project_path = os.path.abspath("tests/temp_project.toml")
if os.path.exists(temp_project_path):
os.remove(temp_project_path)
client.click("btn_project_new_automated", user_data=temp_project_path)
time.sleep(1) # Wait for project creation and switch
# Verify metadata update
proj = client.get_project()
test_git = os.path.abspath(".")
client.set_value("project_git_dir", test_git)
client.click("btn_project_save")
time.sleep(1)
proj = client.get_project()
# flat_config returns {"project": {...}, "output": ...}
# so proj is {"project": {"project": {"git_dir": ...}}}
assert proj['project']['project']['git_dir'] == test_git
# Enable auto-add so the response ends up in history
client.set_value("auto_add_history", True)
client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.5)
# 3. Discussion Turn
client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
client.click("btn_gen_send")
# Verify thinking indicator appears (might be brief)
thinking_seen = False
print("\nPolling for thinking indicator...")
for i in range(40):
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
thinking_seen = True
print(f"Thinking indicator seen at poll {i}")
break
time.sleep(0.5)
# 4. Wait for response in session
success = False
print("Waiting for AI response in session...")
for i in range(120):
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if any(e.get('role') == 'AI' for e in entries):
success = True
print(f"AI response found at second {i}")
break
time.sleep(1)
assert success, "AI failed to respond within 120 seconds"
# 5. Switch Discussion
client.set_value("disc_new_name_input", "AutoDisc")
client.click("btn_disc_create")
time.sleep(1.0) # Wait for GUI to process creation
client.select_list_item("disc_listbox", "AutoDisc")
time.sleep(1.0) # Wait for GUI to switch
# Verify session is empty in new discussion
session = client.get_session()
assert len(session.get('session', {}).get('entries', [])) == 0

View File

@@ -9,20 +9,20 @@ from gui_2 import App
@pytest.fixture
def mock_config(tmp_path):
config_path = tmp_path / "config.toml"
config_path.write_text("""[projects]
config_path = tmp_path / "config.toml"
config_path.write_text("""[projects]
paths = []
active = ""
[ai]
provider = "gemini"
model = "model"
""", encoding="utf-8")
return config_path
return config_path
@pytest.fixture
def mock_project(tmp_path):
project_path = tmp_path / "project.toml"
project_path.write_text("""[project]
project_path = tmp_path / "project.toml"
project_path.write_text("""[project]
name = "test"
[discussion]
roles = ["User", "AI"]
@@ -30,74 +30,65 @@ active = "main"
[discussion.discussions.main]
history = []
""", encoding="utf-8")
return project_path
return project_path
@pytest.fixture
def app_instance(mock_config, mock_project, monkeypatch):
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
with patch("project_manager.load_project") as mock_load, \
patch("session_logger.open_session"):
mock_load.return_value = {
"project": {"name": "test"},
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
"files": {"paths": []},
"screenshots": {"paths": []}
}
# Mock the __init__ to do nothing, then set the fields we need manually
with patch.object(App, '__init__', lambda self: None):
app = App()
app.show_windows = {"Log Management": False}
app.ui_state = MagicMock()
app.ui_files_base_dir = "."
app.files = []
# Since we bypassed __init__, we need to bind the method manually
# but python allows calling it directly.
return app
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
with patch("project_manager.load_project") as mock_load, \
patch("session_logger.open_session"):
mock_load.return_value = {
"project": {"name": "test"},
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
"files": {"paths": []},
"screenshots": {"paths": []}
}
# Mock the __init__ to do nothing, then set the fields we need manually
with patch.object(App, '__init__', lambda self: None):
app = App()
app.show_windows = {"Log Management": False}
app.ui_state = MagicMock()
app.ui_files_base_dir = "."
app.files = []
# Since we bypassed __init__, we need to bind the method manually
# but python allows calling it directly.
return app
def test_log_management_init(app_instance):
app = app_instance
assert "Log Management" in app.show_windows
assert app.show_windows["Log Management"] is False
assert hasattr(app, "_render_log_management")
assert callable(app._render_log_management)
app = app_instance
assert "Log Management" in app.show_windows
assert app.show_windows["Log Management"] is False
assert hasattr(app, "_render_log_management")
assert callable(app._render_log_management)
def test_render_log_management_logic(app_instance):
app = app_instance
app.show_windows["Log Management"] = True
# Mock LogRegistry
with patch("gui_2.LogRegistry") as MockRegistry, \
patch("gui_2.imgui.begin") as mock_begin, \
patch("gui_2.imgui.begin_table") as mock_begin_table, \
patch("gui_2.imgui.text") as mock_text, \
patch("gui_2.imgui.end_table") as mock_end_table, \
patch("gui_2.imgui.end") as mock_end, \
patch("gui_2.imgui.push_style_color"), \
patch("gui_2.imgui.pop_style_color"), \
patch("gui_2.imgui.table_setup_column"), \
patch("gui_2.imgui.table_headers_row"), \
patch("gui_2.imgui.table_next_row"), \
patch("gui_2.imgui.table_next_column"), \
patch("gui_2.imgui.button"):
mock_reg = MockRegistry.return_value
mock_reg.data = {
"session_1": {
"start_time": "2023-01-01",
"whitelisted": False,
"metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
}
}
mock_begin.return_value = (True, True)
mock_begin_table.return_value = True
app._render_log_management()
mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
mock_begin_table.assert_called()
mock_text.assert_any_call("session_1")
app = app_instance
app.show_windows["Log Management"] = True
# Mock LogRegistry
with patch("gui_2.LogRegistry") as MockRegistry, \
patch("gui_2.imgui.begin") as mock_begin, \
patch("gui_2.imgui.begin_table") as mock_begin_table, \
patch("gui_2.imgui.text") as mock_text, \
patch("gui_2.imgui.end_table") as mock_end_table, \
patch("gui_2.imgui.end") as mock_end, \
patch("gui_2.imgui.push_style_color"), \
patch("gui_2.imgui.pop_style_color"), \
patch("gui_2.imgui.table_setup_column"), \
patch("gui_2.imgui.table_headers_row"), \
patch("gui_2.imgui.table_next_row"), \
patch("gui_2.imgui.table_next_column"), \
patch("gui_2.imgui.button"):
mock_reg = MockRegistry.return_value
mock_reg.data = {
"session_1": {
"start_time": "2023-01-01",
"whitelisted": False,
"metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
}
}
mock_begin.return_value = (True, True)
mock_begin_table.return_value = True
app._render_log_management()
mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
mock_begin_table.assert_called()
mock_text.assert_any_call("session_1")

View File

@@ -8,48 +8,42 @@ from log_pruner import LogPruner
@pytest.fixture
def pruner_setup(tmp_path):
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry_path = logs_dir / "log_registry.toml"
registry = LogRegistry(str(registry_path))
pruner = LogPruner(registry, str(logs_dir))
return pruner, registry, logs_dir
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry_path = logs_dir / "log_registry.toml"
registry = LogRegistry(str(registry_path))
pruner = LogPruner(registry, str(logs_dir))
return pruner, registry, logs_dir
def test_prune_old_insignificant_logs(pruner_setup):
pruner, registry, logs_dir = pruner_setup
# 1. Old and small (insignificant) -> should be pruned
session_id_old_small = "old_small"
dir_old_small = logs_dir / session_id_old_small
dir_old_small.mkdir()
(dir_old_small / "comms.log").write_text("small") # < 2KB
registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
# 2. Old and large (significant) -> should NOT be pruned
session_id_old_large = "old_large"
dir_old_large = logs_dir / session_id_old_large
dir_old_large.mkdir()
(dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
# 3. Recent and small -> should NOT be pruned
session_id_recent_small = "recent_small"
dir_recent_small = logs_dir / session_id_recent_small
dir_recent_small.mkdir()
(dir_recent_small / "comms.log").write_text("small")
registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
# 4. Old and whitelisted -> should NOT be pruned
session_id_old_whitelisted = "old_whitelisted"
dir_old_whitelisted = logs_dir / session_id_old_whitelisted
dir_old_whitelisted.mkdir()
(dir_old_whitelisted / "comms.log").write_text("small")
registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
pruner.prune()
assert not dir_old_small.exists()
assert dir_old_large.exists()
assert dir_recent_small.exists()
assert dir_old_whitelisted.exists()
pruner, registry, logs_dir = pruner_setup
# 1. Old and small (insignificant) -> should be pruned
session_id_old_small = "old_small"
dir_old_small = logs_dir / session_id_old_small
dir_old_small.mkdir()
(dir_old_small / "comms.log").write_text("small") # < 2KB
registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
# 2. Old and large (significant) -> should NOT be pruned
session_id_old_large = "old_large"
dir_old_large = logs_dir / session_id_old_large
dir_old_large.mkdir()
(dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
# 3. Recent and small -> should NOT be pruned
session_id_recent_small = "recent_small"
dir_recent_small = logs_dir / session_id_recent_small
dir_recent_small.mkdir()
(dir_recent_small / "comms.log").write_text("small")
registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
# 4. Old and whitelisted -> should NOT be pruned
session_id_old_whitelisted = "old_whitelisted"
dir_old_whitelisted = logs_dir / session_id_old_whitelisted
dir_old_whitelisted.mkdir()
(dir_old_whitelisted / "comms.log").write_text("small")
registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
pruner.prune()
assert not dir_old_small.exists()
assert dir_old_large.exists()
assert dir_recent_small.exists()
assert dir_old_whitelisted.exists()

View File

@@ -8,173 +8,149 @@ from log_registry import LogRegistry
class TestLogRegistry(unittest.TestCase):
def setUp(self):
"""Set up a temporary directory and registry file for each test."""
self.temp_dir = tempfile.TemporaryDirectory()
self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
# Ensure the file is created and empty initially for a clean state.
# LogRegistry is assumed to load from this file on instantiation.
with open(self.registry_path, 'w') as f:
f.write("# Initial empty registry\n")
# Instantiate LogRegistry. This will load from the empty file.
self.registry = LogRegistry(self.registry_path)
def setUp(self):
"""Set up a temporary directory and registry file for each test."""
self.temp_dir = tempfile.TemporaryDirectory()
self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
# Ensure the file is created and empty initially for a clean state.
# LogRegistry is assumed to load from this file on instantiation.
with open(self.registry_path, 'w') as f:
f.write("# Initial empty registry\n")
# Instantiate LogRegistry. This will load from the empty file.
self.registry = LogRegistry(self.registry_path)
def tearDown(self):
"""Clean up the temporary directory and its contents after each test."""
self.temp_dir.cleanup()
def tearDown(self):
"""Clean up the temporary directory and its contents after each test."""
self.temp_dir.cleanup()
def test_instantiation(self):
"""Test LogRegistry instantiation with a file path."""
self.assertIsInstance(self.registry, LogRegistry)
self.assertEqual(self.registry.registry_path, self.registry_path)
# Check if the file exists. LogRegistry is assumed to create it if not.
self.assertTrue(os.path.exists(self.registry_path))
# We will verify content in other tests that explicitly save and reload.
def test_instantiation(self):
"""Test LogRegistry instantiation with a file path."""
self.assertIsInstance(self.registry, LogRegistry)
self.assertEqual(self.registry.registry_path, self.registry_path)
# Check if the file exists. LogRegistry is assumed to create it if not.
self.assertTrue(os.path.exists(self.registry_path))
# We will verify content in other tests that explicitly save and reload.
def test_register_session(self):
"""Test registering a new session."""
session_id = "session-123"
path = "/path/to/session/123"
start_time = datetime.utcnow()
def test_register_session(self):
"""Test registering a new session."""
session_id = "session-123"
path = "/path/to/session/123"
start_time = datetime.utcnow()
self.registry.register_session(session_id, path, start_time)
# Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertEqual(session_data['path'], path)
# Convert stored ISO string back to datetime for comparison
stored_start_time = datetime.fromisoformat(session_data['start_time'])
self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
self.assertIsNone(session_data.get('metadata'))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
self.registry.register_session(session_id, path, start_time)
def test_update_session_metadata(self):
"""Test updating session metadata."""
session_id = "session-456"
path = "/path/to/session/456"
start_time = datetime.utcnow()
self.registry.register_session(session_id, path, start_time)
message_count = 100
errors = 5
size_kb = 1024
whitelisted = True
reason = "Automated process"
self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
# Verify metadata was updated in internal data
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertIsNotNone(session_data.get('metadata'))
metadata = session_data['metadata']
self.assertEqual(metadata['message_count'], message_count)
self.assertEqual(metadata['errors'], errors)
self.assertEqual(metadata['size_kb'], size_kb)
self.assertEqual(metadata['whitelisted'], whitelisted)
self.assertEqual(metadata['reason'], reason)
# Also check if the whitelisted flag in the main session data is updated
self.assertTrue(session_data.get('whitelisted', False))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
# Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertEqual(session_data['path'], path)
# Convert stored ISO string back to datetime for comparison
stored_start_time = datetime.fromisoformat(session_data['start_time'])
self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
self.assertIsNone(session_data.get('metadata'))
def test_is_session_whitelisted(self):
"""Test checking if a session is whitelisted."""
session_id_whitelisted = "session-789-whitelisted"
path_w = "/path/to/session/789"
start_time_w = datetime.utcnow()
self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
session_id_not_whitelisted = "session-abc-not-whitelisted"
path_nw = "/path/to/session/abc"
start_time_nw = datetime.utcnow()
self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
# Test explicitly whitelisted session
self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
# Test session registered but not updated, should default to not whitelisted
self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
# Test for a non-existent session, should be treated as not whitelisted
self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
def test_update_session_metadata(self):
"""Test updating session metadata."""
session_id = "session-456"
path = "/path/to/session/456"
start_time = datetime.utcnow()
self.registry.register_session(session_id, path, start_time)
message_count = 100
errors = 5
size_kb = 1024
whitelisted = True
reason = "Automated process"
self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
# Verify metadata was updated in internal data
self.assertIn(session_id, self.registry.data)
session_data = self.registry.data[session_id]
self.assertIsNotNone(session_data.get('metadata'))
metadata = session_data['metadata']
self.assertEqual(metadata['message_count'], message_count)
self.assertEqual(metadata['errors'], errors)
self.assertEqual(metadata['size_kb'], size_kb)
self.assertEqual(metadata['whitelisted'], whitelisted)
self.assertEqual(metadata['reason'], reason)
# Also check if the whitelisted flag in the main session data is updated
self.assertTrue(session_data.get('whitelisted', False))
# Verify data was written to the TOML file by reloading
reloaded_registry = LogRegistry(self.registry_path)
self.assertIn(session_id, reloaded_registry.data)
reloaded_session_data = reloaded_registry.data[session_id]
self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
def test_is_session_whitelisted(self):
"""Test checking if a session is whitelisted."""
session_id_whitelisted = "session-789-whitelisted"
path_w = "/path/to/session/789"
start_time_w = datetime.utcnow()
self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
session_id_not_whitelisted = "session-abc-not-whitelisted"
path_nw = "/path/to/session/abc"
start_time_nw = datetime.utcnow()
self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
# Test explicitly whitelisted session
self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
# Test session registered but not updated, should default to not whitelisted
self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
# Test for a non-existent session, should be treated as not whitelisted
self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
def test_get_old_non_whitelisted_sessions(self):
"""Test retrieving old, non-whitelisted sessions."""
now = datetime.utcnow()
# Define a cutoff time that is 7 days ago
cutoff_time = now - timedelta(days=7)
# Session 1: Old and not whitelisted
session_id_old_nw = "session-old-nw"
path_old_nw = "/path/to/session/old_nw"
start_time_old_nw = now - timedelta(days=10) # Older than cutoff
self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
# Session 2: Recent and not whitelisted
session_id_recent_nw = "session-recent-nw"
path_recent_nw = "/path/to/session/recent_nw"
start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
# Session 3: Old and whitelisted
session_id_old_w = "session-old-w"
path_old_w = "/path/to/session/old_w"
start_time_old_w = now - timedelta(days=15) # Older than cutoff
self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
# Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
# The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
session_id_old_nw_incomplete = "session-old-nw-incomplete"
path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
# Update with some metadata, but set 'whitelisted' to False explicitly
self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed")
# Get sessions older than cutoff_time and not whitelisted
old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
# Collect session IDs from the result
found_session_ids = {s['session_id'] for s in old_sessions}
# Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
self.assertIn(session_id_old_nw, found_session_ids)
self.assertIn(session_id_old_nw_incomplete, found_session_ids)
# Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
self.assertNotIn(session_id_recent_nw, found_session_ids)
self.assertNotIn(session_id_old_w, found_session_ids)
# Ensure only the expected sessions are in the result
self.assertEqual(len(found_session_ids), 2)
# Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
future_cutoff = now + timedelta(days=1) # All sessions are older than this
all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
all_found_session_ids = {s['session_id'] for s in all_old_sessions}
# Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
# Not expected: session_id_old_w (whitelisted)
self.assertEqual(len(all_found_session_ids), 3)
self.assertIn(session_id_old_nw, all_found_session_ids)
self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
self.assertIn(session_id_recent_nw, all_found_session_ids)
self.assertNotIn(session_id_old_w, all_found_session_ids)
def test_get_old_non_whitelisted_sessions(self):
"""Test retrieving old, non-whitelisted sessions."""
now = datetime.utcnow()
# Define a cutoff time that is 7 days ago
cutoff_time = now - timedelta(days=7)
# Session 1: Old and not whitelisted
session_id_old_nw = "session-old-nw"
path_old_nw = "/path/to/session/old_nw"
start_time_old_nw = now - timedelta(days=10) # Older than cutoff
self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
# Session 2: Recent and not whitelisted
session_id_recent_nw = "session-recent-nw"
path_recent_nw = "/path/to/session/recent_nw"
start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
# Session 3: Old and whitelisted
session_id_old_w = "session-old-w"
path_old_w = "/path/to/session/old_w"
start_time_old_w = now - timedelta(days=15) # Older than cutoff
self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
# Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
# The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
session_id_old_nw_incomplete = "session-old-nw-incomplete"
path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
# Update with some metadata, but set 'whitelisted' to False explicitly
self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed")
# Get sessions older than cutoff_time and not whitelisted
old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
# Collect session IDs from the result
found_session_ids = {s['session_id'] for s in old_sessions}
# Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
self.assertIn(session_id_old_nw, found_session_ids)
self.assertIn(session_id_old_nw_incomplete, found_session_ids)
# Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
self.assertNotIn(session_id_recent_nw, found_session_ids)
self.assertNotIn(session_id_old_w, found_session_ids)
# Ensure only the expected sessions are in the result
self.assertEqual(len(found_session_ids), 2)
# Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
future_cutoff = now + timedelta(days=1) # All sessions are older than this
all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
all_found_session_ids = {s['session_id'] for s in all_old_sessions}
# Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
# Not expected: session_id_old_w (whitelisted)
self.assertEqual(len(all_found_session_ids), 3)
self.assertIn(session_id_old_nw, all_found_session_ids)
self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
self.assertIn(session_id_recent_nw, all_found_session_ids)
self.assertNotIn(session_id_old_w, all_found_session_ids)

View File

@@ -11,69 +11,56 @@ from log_pruner import LogPruner
@pytest.fixture
def e2e_setup(tmp_path, monkeypatch):
# Ensure closed before starting
session_logger.close_session()
monkeypatch.setattr(session_logger, "_comms_fh", None)
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
original_log_dir = session_logger._LOG_DIR
session_logger._LOG_DIR = tmp_path / "logs"
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
original_scripts_dir = session_logger._SCRIPTS_DIR
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
yield tmp_path
# Cleanup
session_logger.close_session()
session_logger._LOG_DIR = original_log_dir
session_logger._SCRIPTS_DIR = original_scripts_dir
# Ensure closed before starting
session_logger.close_session()
monkeypatch.setattr(session_logger, "_comms_fh", None)
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
original_log_dir = session_logger._LOG_DIR
session_logger._LOG_DIR = tmp_path / "logs"
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
original_scripts_dir = session_logger._SCRIPTS_DIR
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
yield tmp_path
# Cleanup
session_logger.close_session()
session_logger._LOG_DIR = original_log_dir
session_logger._SCRIPTS_DIR = original_scripts_dir
def test_logging_e2e(e2e_setup):
tmp_path = e2e_setup
logs_dir = tmp_path / "logs"
# Step 1: Initialize (open_session)
session_logger.open_session(label="E2E_Test")
session_id = session_logger._session_id
session_dir = session_logger._session_dir
# Step 2: Simulate logs (write 'ERROR' to 'comms.log')
# Use log_comms which writes to comms.log
session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
# Step 3: Shutdown (close_session)
session_logger.close_session()
# Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
registry_path = logs_dir / "log_registry.toml"
registry = LogRegistry(str(registry_path))
assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
# Step 5: Simulate an OLD insignificant session in the registry and directory
old_session_id = "20200101_120000_OLD"
old_session_dir = logs_dir / old_session_id
old_session_dir.mkdir()
(old_session_dir / "comms.log").write_text("nothing special") # < 2KB
old_start_time = datetime.now() - timedelta(days=2)
registry.register_session(old_session_id, str(old_session_dir), old_start_time)
# Step 6: Trigger 'LogPruner.prune()'
pruner = LogPruner(registry, str(logs_dir))
pruner.prune()
# Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
assert session_dir.exists(), "New whitelisted session should have been kept"
# Extra check: Whitelisted sessions should be kept even if old
# Manually backdate the current session
registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
registry.save_registry()
pruner.prune()
assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"
tmp_path = e2e_setup
logs_dir = tmp_path / "logs"
# Step 1: Initialize (open_session)
session_logger.open_session(label="E2E_Test")
session_id = session_logger._session_id
session_dir = session_logger._session_dir
# Step 2: Simulate logs (write 'ERROR' to 'comms.log')
# Use log_comms which writes to comms.log
session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
# Step 3: Shutdown (close_session)
session_logger.close_session()
# Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
registry_path = logs_dir / "log_registry.toml"
registry = LogRegistry(str(registry_path))
assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
# Step 5: Simulate an OLD insignificant session in the registry and directory
old_session_id = "20200101_120000_OLD"
old_session_dir = logs_dir / old_session_id
old_session_dir.mkdir()
(old_session_dir / "comms.log").write_text("nothing special") # < 2KB
old_start_time = datetime.now() - timedelta(days=2)
registry.register_session(old_session_id, str(old_session_dir), old_start_time)
# Step 6: Trigger 'LogPruner.prune()'
pruner = LogPruner(registry, str(logs_dir))
pruner.prune()
# Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
assert session_dir.exists(), "New whitelisted session should have been kept"
# Extra check: Whitelisted sessions should be kept even if old
# Manually backdate the current session
registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
registry.save_registry()
pruner.prune()
assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"

View File

@@ -9,11 +9,10 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import mcp_client
def test_mcp_perf_tool_retrieval():
# Test that the MCP tool can call performance_monitor metrics
mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
# Simulate tool call by patching the callback
with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
result = mcp_client.get_ui_performance()
assert "60" in result
assert "16.6" in result
# Test that the MCP tool can call performance_monitor metrics
mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
# Simulate tool call by patching the callback
with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
result = mcp_client.get_ui_performance()
assert "60" in result
assert "16.6" in result

View File

@@ -4,63 +4,55 @@ from gui_2 import App
@pytest.fixture
def app_instance():
# We patch the dependencies of App.__init__ to avoid side effects
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager') as mock_pm,
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
# Ensure project and ui_files_base_dir are set for _refresh_from_project
app.project = {}
app.ui_files_base_dir = "."
# Return the app and the mock_pm for use in tests
yield app, mock_pm
# We patch the dependencies of App.__init__ to avoid side effects
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager') as mock_pm,
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
# Ensure project and ui_files_base_dir are set for _refresh_from_project
app.project = {}
app.ui_files_base_dir = "."
# Return the app and the mock_pm for use in tests
yield app, mock_pm
def test_mma_dashboard_refresh(app_instance):
app, mock_pm = app_instance
# 1. Define mock tracks
mock_tracks = [
MagicMock(id="track_1", description="Track 1"),
MagicMock(id="track_2", description="Track 2")
]
# 2. Patch get_all_tracks to return our mock list
mock_pm.get_all_tracks.return_value = mock_tracks
# 3. Call _refresh_from_project
app._refresh_from_project()
# 4. Verify that app.tracks contains the mock tracks
assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
assert app.tracks == mock_tracks
assert len(app.tracks) == 2
assert app.tracks[0].id == "track_1"
assert app.tracks[1].id == "track_2"
# Verify get_all_tracks was called with the correct base_dir
mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)
app, mock_pm = app_instance
# 1. Define mock tracks
mock_tracks = [
MagicMock(id="track_1", description="Track 1"),
MagicMock(id="track_2", description="Track 2")
]
# 2. Patch get_all_tracks to return our mock list
mock_pm.get_all_tracks.return_value = mock_tracks
# 3. Call _refresh_from_project
app._refresh_from_project()
# 4. Verify that app.tracks contains the mock tracks
assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
assert app.tracks == mock_tracks
assert len(app.tracks) == 2
assert app.tracks[0].id == "track_1"
assert app.tracks[1].id == "track_2"
# Verify get_all_tracks was called with the correct base_dir
mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)
def test_mma_dashboard_initialization_refresh(app_instance):
"""
"""
Checks that _refresh_from_project is called during initialization if
_load_active_project is NOT mocked to skip it (but here it IS mocked in fixture).
This test verifies that calling it manually works as expected for initialization scenarios.
"""
app, mock_pm = app_instance
mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
mock_pm.get_all_tracks.return_value = mock_tracks
# Simulate the refresh that would happen during a project load
app._refresh_from_project()
assert app.tracks == mock_tracks
assert app.tracks[0].id == "init_track"
app, mock_pm = app_instance
mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
mock_pm.get_all_tracks.return_value = mock_tracks
# Simulate the refresh that would happen during a project load
app._refresh_from_project()
assert app.tracks == mock_tracks
assert app.tracks[0].id == "init_track"

View File

@@ -2,175 +2,159 @@ import pytest
from models import Ticket, Track, WorkerContext
def test_ticket_instantiation():
"""
"""
Verifies that a Ticket can be instantiated with its required fields:
id, description, status, assigned_to.
"""
ticket_id = "T1"
description = "Implement surgical code changes"
status = "todo"
assigned_to = "tier3-worker"
ticket = Ticket(
id=ticket_id,
description=description,
status=status,
assigned_to=assigned_to
)
assert ticket.id == ticket_id
assert ticket.description == description
assert ticket.status == status
assert ticket.assigned_to == assigned_to
assert ticket.depends_on == []
ticket_id = "T1"
description = "Implement surgical code changes"
status = "todo"
assigned_to = "tier3-worker"
ticket = Ticket(
id=ticket_id,
description=description,
status=status,
assigned_to=assigned_to
)
assert ticket.id == ticket_id
assert ticket.description == description
assert ticket.status == status
assert ticket.assigned_to == assigned_to
assert ticket.depends_on == []
def test_ticket_with_dependencies():
"""
"""
Verifies that a Ticket can store dependencies.
"""
ticket = Ticket(
id="T2",
description="Write code",
status="todo",
assigned_to="worker-1",
depends_on=["T1"]
)
assert ticket.depends_on == ["T1"]
ticket = Ticket(
id="T2",
description="Write code",
status="todo",
assigned_to="worker-1",
depends_on=["T1"]
)
assert ticket.depends_on == ["T1"]
def test_track_instantiation():
"""
"""
Verifies that a Track can be instantiated with its required fields:
id, description, and a list of Tickets.
"""
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
track_id = "TRACK-1"
track_desc = "Implement MMA Models"
tickets = [ticket1, ticket2]
track = Track(
id=track_id,
description=track_desc,
tickets=tickets
)
assert track.id == track_id
assert track.description == track_desc
assert len(track.tickets) == 2
assert track.tickets[0].id == "T1"
assert track.tickets[1].id == "T2"
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
track_id = "TRACK-1"
track_desc = "Implement MMA Models"
tickets = [ticket1, ticket2]
track = Track(
id=track_id,
description=track_desc,
tickets=tickets
)
assert track.id == track_id
assert track.description == track_desc
assert len(track.tickets) == 2
assert track.tickets[0].id == "T1"
assert track.tickets[1].id == "T2"
def test_track_can_handle_empty_tickets():
"""
"""
Verifies that a Track can be instantiated with an empty list of tickets.
"""
track = Track(id="TRACK-2", description="Empty Track", tickets=[])
assert track.tickets == []
track = Track(id="TRACK-2", description="Empty Track", tickets=[])
assert track.tickets == []
def test_worker_context_instantiation():
"""
"""
Verifies that a WorkerContext can be instantiated with ticket_id,
model_name, and messages.
"""
ticket_id = "T1"
model_name = "gemini-2.0-flash-lite"
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"}
]
context = WorkerContext(
ticket_id=ticket_id,
model_name=model_name,
messages=messages
)
assert context.ticket_id == ticket_id
assert context.model_name == model_name
assert context.messages == messages
ticket_id = "T1"
model_name = "gemini-2.0-flash-lite"
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"}
]
context = WorkerContext(
ticket_id=ticket_id,
model_name=model_name,
messages=messages
)
assert context.ticket_id == ticket_id
assert context.model_name == model_name
assert context.messages == messages
def test_ticket_mark_blocked():
"""
"""
Verifies that ticket.mark_blocked(reason) sets the status to 'blocked'.
Note: The reason field might need to be added to the Ticket class.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket.mark_blocked("Waiting for API key")
assert ticket.status == "blocked"
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket.mark_blocked("Waiting for API key")
assert ticket.status == "blocked"
def test_ticket_mark_complete():
"""
"""
Verifies that ticket.mark_complete() sets the status to 'completed'.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket.mark_complete()
assert ticket.status == "completed"
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
ticket.mark_complete()
assert ticket.status == "completed"
def test_track_get_executable_tickets():
"""
"""
Verifies that track.get_executable_tickets() returns only 'todo' tickets
whose dependencies are all 'completed'.
"""
# T1: todo, no deps -> executable
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
# T2: todo, deps [T1] -> not executable (T1 is todo)
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
# T3: todo, deps [T4] -> not executable (T4 is blocked)
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
# T4: blocked, no deps -> not executable (not 'todo')
t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
# T5: completed, no deps -> not executable (not 'todo')
t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
# T6: todo, deps [T5] -> executable (T5 is completed)
t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
executable = track.get_executable_tickets()
executable_ids = [t.id for t in executable]
assert "T1" in executable_ids
assert "T6" in executable_ids
assert len(executable_ids) == 2
# T1: todo, no deps -> executable
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
# T2: todo, deps [T1] -> not executable (T1 is todo)
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
# T3: todo, deps [T4] -> not executable (T4 is blocked)
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
# T4: blocked, no deps -> not executable (not 'todo')
t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
# T5: completed, no deps -> not executable (not 'todo')
t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
# T6: todo, deps [T5] -> executable (T5 is completed)
t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
executable = track.get_executable_tickets()
executable_ids = [t.id for t in executable]
assert "T1" in executable_ids
assert "T6" in executable_ids
assert len(executable_ids) == 2
def test_track_get_executable_tickets_complex():
"""
"""
Verifies executable tickets with complex dependency chains.
Chain: T1 (comp) -> T2 (todo) -> T3 (todo)
T4 (comp) -> T3
T5 (todo) -> T3
"""
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
# At this point:
# T1 is completed
# T4 is completed
# T2 is todo, depends on T1 (completed) -> Executable
# T5 is todo, no deps -> Executable
# T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T2", "T5"]
# Mark T2 complete
t2.mark_complete()
# T3 still depends on T5
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T5"]
# Mark T5 complete
t5.mark_complete()
# Now T3 should be executable
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T3"]
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
# At this point:
# T1 is completed
# T4 is completed
# T2 is todo, depends on T1 (completed) -> Executable
# T5 is todo, no deps -> Executable
# T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T2", "T5"]
# Mark T2 complete
t2.mark_complete()
# T3 still depends on T5
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T5"]
# Mark T5 complete
t5.mark_complete()
# Now T3 should be executable
executable = track.get_executable_tickets()
executable_ids = sorted([t.id for t in executable])
assert executable_ids == ["T3"]

View File

@@ -7,141 +7,126 @@ from gui_2 import App
@pytest.fixture
def app_instance():
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
# Initialize the new state variables if they aren't there yet (they won't be until we implement them)
if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
yield app
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
# Initialize the new state variables if they aren't there yet (they won't be until we implement them)
if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
yield app
def test_mma_ui_state_initialization(app_instance):
"""Verifies that the new MMA UI state variables are initialized correctly."""
assert hasattr(app_instance, 'ui_epic_input')
assert hasattr(app_instance, 'proposed_tracks')
assert hasattr(app_instance, '_show_track_proposal_modal')
assert hasattr(app_instance, 'mma_streams')
assert app_instance.ui_epic_input == ""
assert app_instance.proposed_tracks == []
assert app_instance._show_track_proposal_modal is False
assert app_instance.mma_streams == {}
"""Verifies that the new MMA UI state variables are initialized correctly."""
assert hasattr(app_instance, 'ui_epic_input')
assert hasattr(app_instance, 'proposed_tracks')
assert hasattr(app_instance, '_show_track_proposal_modal')
assert hasattr(app_instance, 'mma_streams')
assert app_instance.ui_epic_input == ""
assert app_instance.proposed_tracks == []
assert app_instance._show_track_proposal_modal is False
assert app_instance.mma_streams == {}
def test_process_pending_gui_tasks_show_track_proposal(app_instance):
"""Verifies that the 'show_track_proposal' action correctly updates the UI state."""
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
task = {
"action": "show_track_proposal",
"payload": mock_tracks
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.proposed_tracks == mock_tracks
assert app_instance._show_track_proposal_modal is True
"""Verifies that the 'show_track_proposal' action correctly updates the UI state."""
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
task = {
"action": "show_track_proposal",
"payload": mock_tracks
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.proposed_tracks == mock_tracks
assert app_instance._show_track_proposal_modal is True
def test_cb_plan_epic_launches_thread(app_instance):
"""Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
app_instance.ui_epic_input = "Develop a new feature"
app_instance.active_project_path = "test_project.toml"
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
with (
patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
patch('aggregate.build_file_items', return_value=[]) as mock_build_files
):
# We need to mock project_manager.flat_config and project_manager.load_project
with (
patch('project_manager.load_project', return_value={}),
patch('project_manager.flat_config', return_value={})
):
app_instance._cb_plan_epic()
# Wait for the background thread to finish (it should be quick with mocks)
max_wait = 5
start_time = time.time()
while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
time.sleep(0.1)
assert len(app_instance._pending_gui_tasks) == 2
task1 = app_instance._pending_gui_tasks[0]
assert task1['action'] == 'handle_ai_response'
assert task1['payload']['stream_id'] == 'Tier 1'
assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
task2 = app_instance._pending_gui_tasks[1]
assert task2['action'] == 'show_track_proposal'
assert task2['payload'] == mock_tracks
mock_get_history.assert_called_once()
mock_gen_tracks.assert_called_once()
"""Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
app_instance.ui_epic_input = "Develop a new feature"
app_instance.active_project_path = "test_project.toml"
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
with (
patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
patch('aggregate.build_file_items', return_value=[]) as mock_build_files
):
# We need to mock project_manager.flat_config and project_manager.load_project
with (
patch('project_manager.load_project', return_value={}),
patch('project_manager.flat_config', return_value={})
):
app_instance._cb_plan_epic()
# Wait for the background thread to finish (it should be quick with mocks)
max_wait = 5
start_time = time.time()
while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
time.sleep(0.1)
assert len(app_instance._pending_gui_tasks) == 2
task1 = app_instance._pending_gui_tasks[0]
assert task1['action'] == 'handle_ai_response'
assert task1['payload']['stream_id'] == 'Tier 1'
assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
task2 = app_instance._pending_gui_tasks[1]
assert task2['action'] == 'show_track_proposal'
assert task2['payload'] == mock_tracks
mock_get_history.assert_called_once()
mock_gen_tracks.assert_called_once()
def test_process_pending_gui_tasks_mma_spawn_approval(app_instance):
"""Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
task = {
"action": "mma_spawn_approval",
"ticket_id": "T1",
"role": "Tier 3 Worker",
"prompt": "Test Prompt",
"context_md": "Test Context",
"dialog_container": [None]
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance._pending_mma_spawn == task
assert app_instance._mma_spawn_prompt == "Test Prompt"
assert app_instance._mma_spawn_context == "Test Context"
assert app_instance._mma_spawn_open is True
assert app_instance._mma_spawn_edit_mode is False
assert task["dialog_container"][0] is not None
assert task["dialog_container"][0]._ticket_id == "T1"
"""Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
task = {
"action": "mma_spawn_approval",
"ticket_id": "T1",
"role": "Tier 3 Worker",
"prompt": "Test Prompt",
"context_md": "Test Context",
"dialog_container": [None]
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance._pending_mma_spawn == task
assert app_instance._mma_spawn_prompt == "Test Prompt"
assert app_instance._mma_spawn_context == "Test Context"
assert app_instance._mma_spawn_open is True
assert app_instance._mma_spawn_edit_mode is False
assert task["dialog_container"][0] is not None
assert task["dialog_container"][0]._ticket_id == "T1"
def test_handle_ai_response_with_stream_id(app_instance):
"""Verifies routing to mma_streams."""
task = {
"action": "handle_ai_response",
"payload": {
"text": "Tier 1 Strategy Content",
"stream_id": "Tier 1",
"status": "Thinking..."
}
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
assert app_instance.ai_status == "Thinking..."
assert app_instance.ai_response == ""
"""Verifies routing to mma_streams."""
task = {
"action": "handle_ai_response",
"payload": {
"text": "Tier 1 Strategy Content",
"stream_id": "Tier 1",
"status": "Thinking..."
}
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
assert app_instance.ai_status == "Thinking..."
assert app_instance.ai_response == ""
def test_handle_ai_response_fallback(app_instance):
"""Verifies fallback to ai_response when stream_id is missing."""
task = {
"action": "handle_ai_response",
"payload": {
"text": "Regular AI Response",
"status": "done"
}
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.ai_response == "Regular AI Response"
assert app_instance.ai_status == "done"
assert len(app_instance.mma_streams) == 0
"""Verifies fallback to ai_response when stream_id is missing."""
task = {
"action": "handle_ai_response",
"payload": {
"text": "Regular AI Response",
"status": "done"
}
}
app_instance._pending_gui_tasks.append(task)
app_instance._process_pending_gui_tasks()
assert app_instance.ai_response == "Regular AI Response"
assert app_instance.ai_status == "done"
assert len(app_instance.mma_streams) == 0

View File

@@ -2,51 +2,51 @@ import pytest
from mma_prompts import PROMPTS
def test_tier1_epic_init_constraints():
prompt = PROMPTS["tier1_epic_init"]
assert "Godot ECS Flat List format" in prompt
assert "JSON array" in prompt
assert "Tracks" in prompt
assert "severity" in prompt
assert "IGNORE all source code" in prompt
prompt = PROMPTS["tier1_epic_init"]
assert "Godot ECS Flat List format" in prompt
assert "JSON array" in prompt
assert "Tracks" in prompt
assert "severity" in prompt
assert "IGNORE all source code" in prompt
def test_tier1_track_delegation_constraints():
prompt = PROMPTS["tier1_track_delegation"]
assert "Track Brief" in prompt
assert "AST Skeleton View" in prompt
assert "IGNORE unrelated module docs" in prompt
prompt = PROMPTS["tier1_track_delegation"]
assert "Track Brief" in prompt
assert "AST Skeleton View" in prompt
assert "IGNORE unrelated module docs" in prompt
def test_tier1_macro_merge_constraints():
prompt = PROMPTS["tier1_macro_merge"]
assert "Macro-Merge" in prompt
assert "Macro-Diff" in prompt
assert "IGNORE Tier 3 trial-and-error" in prompt
prompt = PROMPTS["tier1_macro_merge"]
assert "Macro-Merge" in prompt
assert "Macro-Diff" in prompt
assert "IGNORE Tier 3 trial-and-error" in prompt
def test_tier2_sprint_planning_constraints():
prompt = PROMPTS["tier2_sprint_planning"]
assert "Tickets" in prompt
assert "Godot ECS Flat List format" in prompt
assert "depends_on" in prompt
assert "DAG" in prompt
assert "Skeleton View" in prompt
assert "Curated Implementation View" in prompt
prompt = PROMPTS["tier2_sprint_planning"]
assert "Tickets" in prompt
assert "Godot ECS Flat List format" in prompt
assert "depends_on" in prompt
assert "DAG" in prompt
assert "Skeleton View" in prompt
assert "Curated Implementation View" in prompt
def test_tier2_code_review_constraints():
prompt = PROMPTS["tier2_code_review"]
assert "Code Review" in prompt
assert "IGNORE the Contributor's internal trial-and-error" in prompt
assert "Tier 4 (QA) logs" in prompt
prompt = PROMPTS["tier2_code_review"]
assert "Code Review" in prompt
assert "IGNORE the Contributor's internal trial-and-error" in prompt
assert "Tier 4 (QA) logs" in prompt
def test_tier2_track_finalization_constraints():
prompt = PROMPTS["tier2_track_finalization"]
assert "Track Finalization" in prompt
assert "Executive Summary" in prompt
assert "Macro-Diff" in prompt
assert "Dependency Delta" in prompt
prompt = PROMPTS["tier2_track_finalization"]
assert "Track Finalization" in prompt
assert "Executive Summary" in prompt
assert "Macro-Diff" in prompt
assert "Dependency Delta" in prompt
def test_tier2_contract_first_constraints():
prompt = PROMPTS["tier2_contract_first"]
assert "Stub Ticket" in prompt
assert "Consumer Ticket" in prompt
assert "Implementation Ticket" in prompt
assert "Interface-Driven Development" in prompt
assert "Godot ECS Flat List format" in prompt
prompt = PROMPTS["tier2_contract_first"]
assert "Stub Ticket" in prompt
assert "Consumer Ticket" in prompt
assert "Implementation Ticket" in prompt
assert "Interface-Driven Development" in prompt
assert "Godot ECS Flat List format" in prompt

View File

@@ -5,49 +5,43 @@ from gui_2 import App
@pytest.fixture
def app_instance():
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
app.active_tickets = []
app._loop = MagicMock()
yield app
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
app = App()
app.active_tickets = []
app._loop = MagicMock()
yield app
def test_cb_ticket_retry(app_instance):
ticket_id = "test_ticket_1"
app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
app_instance._cb_ticket_retry(ticket_id)
# Verify status update
assert app_instance.active_tickets[0]['status'] == 'todo'
# Verify event pushed
mock_run_safe.assert_called_once()
# First arg is the coroutine (event_queue.put), second is self._loop
args, _ = mock_run_safe.call_args
assert args[1] == app_instance._loop
ticket_id = "test_ticket_1"
app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
app_instance._cb_ticket_retry(ticket_id)
# Verify status update
assert app_instance.active_tickets[0]['status'] == 'todo'
# Verify event pushed
mock_run_safe.assert_called_once()
# First arg is the coroutine (event_queue.put), second is self._loop
args, _ = mock_run_safe.call_args
assert args[1] == app_instance._loop
def test_cb_ticket_skip(app_instance):
ticket_id = "test_ticket_1"
app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
app_instance._cb_ticket_skip(ticket_id)
# Verify status update
assert app_instance.active_tickets[0]['status'] == 'skipped'
# Verify event pushed
mock_run_safe.assert_called_once()
args, _ = mock_run_safe.call_args
assert args[1] == app_instance._loop
ticket_id = "test_ticket_1"
app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
app_instance._cb_ticket_skip(ticket_id)
# Verify status update
assert app_instance.active_tickets[0]['status'] == 'skipped'
# Verify event pushed
mock_run_safe.assert_called_once()
args, _ = mock_run_safe.call_args
assert args[1] == app_instance._loop

View File

@@ -8,126 +8,104 @@ from models import Track, Ticket
@pytest.fixture
def mock_ai_client():
with patch("ai_client.send") as mock_send:
yield mock_send
with patch("ai_client.send") as mock_send:
yield mock_send
def test_generate_tracks(mock_ai_client):
# Tier 1 (PM) response mock
mock_ai_client.return_value = json.dumps([
{"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
{"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
])
user_request = "Build a new app"
project_config = {}
file_items = []
tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
assert len(tracks) == 2
assert tracks[0]["id"] == "track_1"
assert tracks[1]["id"] == "track_2"
mock_ai_client.assert_called_once()
# Tier 1 (PM) response mock
mock_ai_client.return_value = json.dumps([
{"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
{"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
])
user_request = "Build a new app"
project_config = {}
file_items = []
tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
assert len(tracks) == 2
assert tracks[0]["id"] == "track_1"
assert tracks[1]["id"] == "track_2"
mock_ai_client.assert_called_once()
def test_generate_tickets(mock_ai_client):
# Tier 2 (Tech Lead) response mock
mock_ai_client.return_value = json.dumps([
{"id": "T-001", "description": "Define interfaces", "depends_on": []},
{"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
])
track_brief = "Implement a new feature."
module_skeletons = "class Feature: pass"
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
assert len(tickets) == 2
assert tickets[0]["id"] == "T-001"
assert tickets[1]["id"] == "T-002"
assert tickets[1]["depends_on"] == ["T-001"]
# Tier 2 (Tech Lead) response mock
mock_ai_client.return_value = json.dumps([
{"id": "T-001", "description": "Define interfaces", "depends_on": []},
{"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
])
track_brief = "Implement a new feature."
module_skeletons = "class Feature: pass"
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
assert len(tickets) == 2
assert tickets[0]["id"] == "T-001"
assert tickets[1]["id"] == "T-002"
assert tickets[1]["depends_on"] == ["T-001"]
def test_topological_sort():
tickets = [
{"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
{"id": "T-001", "description": "Base", "depends_on": []},
{"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
assert sorted_tickets[0]["id"] == "T-001"
assert sorted_tickets[1]["id"] == "T-002"
assert sorted_tickets[2]["id"] == "T-003"
tickets = [
{"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
{"id": "T-001", "description": "Base", "depends_on": []},
{"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
assert sorted_tickets[0]["id"] == "T-001"
assert sorted_tickets[1]["id"] == "T-002"
assert sorted_tickets[2]["id"] == "T-003"
def test_topological_sort_circular():
tickets = [
{"id": "T-001", "depends_on": ["T-002"]},
{"id": "T-002", "depends_on": ["T-001"]}
]
with pytest.raises(ValueError, match="Circular dependency detected"):
conductor_tech_lead.topological_sort(tickets)
tickets = [
{"id": "T-001", "depends_on": ["T-002"]},
{"id": "T-002", "depends_on": ["T-001"]}
]
with pytest.raises(ValueError, match="Circular dependency detected"):
conductor_tech_lead.topological_sort(tickets)
def test_track_executable_tickets():
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
track = Track(id="track_1", description="desc", tickets=[t1, t2])
executable = track.get_executable_tickets()
assert len(executable) == 1
assert executable[0].id == "T1"
# Complete T1
t1.status = "completed"
executable = track.get_executable_tickets()
assert len(executable) == 1
assert executable[0].id == "T2"
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
track = Track(id="track_1", description="desc", tickets=[t1, t2])
executable = track.get_executable_tickets()
assert len(executable) == 1
assert executable[0].id == "T1"
# Complete T1
t1.status = "completed"
executable = track.get_executable_tickets()
assert len(executable) == 1
assert executable[0].id == "T2"
@pytest.mark.asyncio
async def test_conductor_engine_run_linear():
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
track = Track(id="track_1", description="desc", tickets=[t1, t2])
engine = multi_agent_conductor.ConductorEngine(track)
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
# Mock worker to complete tickets
track = Track(id="track_1", description="desc", tickets=[t1, t2])
engine = multi_agent_conductor.ConductorEngine(track)
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
# Mock worker to complete tickets
def complete_ticket(ticket, context, **kwargs):
ticket.status = "completed"
mock_worker.side_effect = complete_ticket
await engine.run_linear()
assert t1.status == "completed"
assert t2.status == "completed"
assert mock_worker.call_count == 2
def complete_ticket(ticket, context, **kwargs):
ticket.status = "completed"
mock_worker.side_effect = complete_ticket
await engine.run_linear()
assert t1.status == "completed"
assert t2.status == "completed"
assert mock_worker.call_count == 2
def test_conductor_engine_parse_json_tickets():
track = Track(id="track_1", description="desc")
engine = multi_agent_conductor.ConductorEngine(track)
json_data = json.dumps([
{"id": "T1", "description": "desc 1", "depends_on": []},
{"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
])
engine.parse_json_tickets(json_data)
assert len(track.tickets) == 2
assert track.tickets[0].id == "T1"
assert track.tickets[1].id == "T2"
assert track.tickets[1].depends_on == ["T1"]
track = Track(id="track_1", description="desc")
engine = multi_agent_conductor.ConductorEngine(track)
json_data = json.dumps([
{"id": "T1", "description": "desc 1", "depends_on": []},
{"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
])
engine.parse_json_tickets(json_data)
assert len(track.tickets) == 2
assert track.tickets[0].id == "T1"
assert track.tickets[1].id == "T2"
assert track.tickets[1].depends_on == ["T1"]
def test_run_worker_lifecycle_blocked(mock_ai_client):
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
mock_ai_client.return_value = "BLOCKED because of missing info"
multi_agent_conductor.run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert ticket.blocked_reason == "BLOCKED because of missing info"
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
mock_ai_client.return_value = "BLOCKED because of missing info"
multi_agent_conductor.run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert ticket.blocked_reason == "BLOCKED because of missing info"

View File

@@ -6,75 +6,67 @@ import mma_prompts
class TestOrchestratorPM(unittest.TestCase):
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_success(self, mock_send, mock_summarize):
# Setup mocks
mock_summarize.return_value = "REPO_MAP_CONTENT"
mock_response_data = [
{
"id": "track_1",
"type": "Track",
"module": "test_module",
"persona": "Tech Lead",
"severity": "Medium",
"goal": "Test goal",
"acceptance_criteria": ["criteria 1"]
}
]
mock_send.return_value = json.dumps(mock_response_data)
user_request = "Implement unit tests"
project_config = {"files": {"paths": ["src"]}}
file_items = [{"path": "src/main.py", "content": "print('hello')"}]
# Execute
result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
# Verify summarize call
mock_summarize.assert_called_once_with(file_items)
# Verify ai_client.send call
expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
# Cannot check system_prompt via mock_send kwargs anymore as it's set globally
# But we can verify user_message was passed
self.assertIn(user_request, kwargs['user_message'])
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_success(self, mock_send, mock_summarize):
# Setup mocks
mock_summarize.return_value = "REPO_MAP_CONTENT"
mock_response_data = [
{
"id": "track_1",
"type": "Track",
"module": "test_module",
"persona": "Tech Lead",
"severity": "Medium",
"goal": "Test goal",
"acceptance_criteria": ["criteria 1"]
}
]
mock_send.return_value = json.dumps(mock_response_data)
user_request = "Implement unit tests"
project_config = {"files": {"paths": ["src"]}}
file_items = [{"path": "src/main.py", "content": "print('hello')"}]
# Execute
result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
# Verify summarize call
mock_summarize.assert_called_once_with(file_items)
# Verify ai_client.send call
expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
# Cannot check system_prompt via mock_send kwargs anymore as it's set globally
# But we can verify user_message was passed
self.assertIn(user_request, kwargs['user_message'])
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
# Verify result
self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
# Verify result
self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
mock_response_data = [{"id": "track_1"}]
expected_result = [{"id": "track_1", "title": "Untitled Track"}]
# Wrapped in ```json ... ```
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, expected_result)
# Wrapped in ``` ... ```
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, expected_result)
mock_response_data = [{"id": "track_1"}]
expected_result = [{"id": "track_1", "title": "Untitled Track"}]
# Wrapped in ```json ... ```
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, expected_result)
# Wrapped in ``` ... ```
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, expected_result)
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
mock_send.return_value = "NOT A JSON"
# Should return empty list and print error (we can mock print if we want to be thorough)
with patch('builtins.print') as mock_print:
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, [])
mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")
@patch('summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
mock_send.return_value = "NOT A JSON"
# Should return empty list and print error (we can mock print if we want to be thorough)
with patch('builtins.print') as mock_print:
result = orchestrator_pm.generate_tracks("req", {}, [])
self.assertEqual(result, [])
mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -7,70 +7,63 @@ from pathlib import Path
import orchestrator_pm
class TestOrchestratorPMHistory(unittest.TestCase):
def setUp(self):
self.test_dir = Path("test_conductor")
self.test_dir.mkdir(exist_ok=True)
self.archive_dir = self.test_dir / "archive"
self.tracks_dir = self.test_dir / "tracks"
self.archive_dir.mkdir(exist_ok=True)
self.tracks_dir.mkdir(exist_ok=True)
def setUp(self):
self.test_dir = Path("test_conductor")
self.test_dir.mkdir(exist_ok=True)
self.archive_dir = self.test_dir / "archive"
self.tracks_dir = self.test_dir / "tracks"
self.archive_dir.mkdir(exist_ok=True)
self.tracks_dir.mkdir(exist_ok=True)
def tearDown(self):
if self.test_dir.exists():
shutil.rmtree(self.test_dir)
def tearDown(self):
if self.test_dir.exists():
shutil.rmtree(self.test_dir)
def create_track(self, parent_dir, track_id, title, status, overview):
track_path = parent_dir / track_id
track_path.mkdir(exist_ok=True)
metadata = {"title": title, "status": status}
with open(track_path / "metadata.json", "w") as f:
json.dump(metadata, f)
spec_content = f"# Specification\n\n## Overview\n{overview}"
with open(track_path / "spec.md", "w") as f:
f.write(spec_content)
def create_track(self, parent_dir, track_id, title, status, overview):
track_path = parent_dir / track_id
track_path.mkdir(exist_ok=True)
metadata = {"title": title, "status": status}
with open(track_path / "metadata.json", "w") as f:
json.dump(metadata, f)
spec_content = f"# Specification\n\n## Overview\n{overview}"
with open(track_path / "spec.md", "w") as f:
f.write(spec_content)
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
def test_get_track_history_summary(self):
# Setup mock tracks
self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
summary = orchestrator_pm.get_track_history_summary()
self.assertIn("Initial Setup", summary)
self.assertIn("completed", summary)
self.assertIn("Setting up the project structure.", summary)
self.assertIn("Feature A", summary)
self.assertIn("in_progress", summary)
self.assertIn("Implementing Feature A.", summary)
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
def test_get_track_history_summary(self):
# Setup mock tracks
self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
summary = orchestrator_pm.get_track_history_summary()
self.assertIn("Initial Setup", summary)
self.assertIn("completed", summary)
self.assertIn("Setting up the project structure.", summary)
self.assertIn("Feature A", summary)
self.assertIn("in_progress", summary)
self.assertIn("Implementing Feature A.", summary)
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
def test_get_track_history_summary_missing_files(self):
# Track with missing spec.md
track_path = self.tracks_dir / "track_003"
track_path.mkdir(exist_ok=True)
with open(track_path / "metadata.json", "w") as f:
json.dump({"title": "Missing Spec", "status": "pending"}, f)
summary = orchestrator_pm.get_track_history_summary()
self.assertIn("Missing Spec", summary)
self.assertIn("pending", summary)
self.assertIn("No overview available", summary)
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
def test_get_track_history_summary_missing_files(self):
# Track with missing spec.md
track_path = self.tracks_dir / "track_003"
track_path.mkdir(exist_ok=True)
with open(track_path / "metadata.json", "w") as f:
json.dump({"title": "Missing Spec", "status": "pending"}, f)
summary = orchestrator_pm.get_track_history_summary()
self.assertIn("Missing Spec", summary)
self.assertIn("pending", summary)
self.assertIn("No overview available", summary)
@patch('orchestrator_pm.summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_with_history(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
mock_send.return_value = "[]"
history_summary = "PAST_HISTORY_SUMMARY"
orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
args, kwargs = mock_send.call_args
self.assertIn(history_summary, kwargs['user_message'])
self.assertIn("### TRACK HISTORY:", kwargs['user_message'])
@patch('orchestrator_pm.summarize.build_summary_markdown')
@patch('ai_client.send')
def test_generate_tracks_with_history(self, mock_send, mock_summarize):
mock_summarize.return_value = "REPO_MAP"
mock_send.return_value = "[]"
history_summary = "PAST_HISTORY_SUMMARY"
orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
args, kwargs = mock_send.call_args
self.assertIn(history_summary, kwargs['user_message'])
self.assertIn("### TRACK HISTORY:", kwargs['user_message'])
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -9,21 +9,19 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from performance_monitor import PerformanceMonitor
def test_perf_monitor_basic_timing():
pm = PerformanceMonitor()
pm.start_frame()
time.sleep(0.02) # 20ms
pm.end_frame()
metrics = pm.get_metrics()
assert metrics['last_frame_time_ms'] >= 20.0
pm.stop()
pm = PerformanceMonitor()
pm.start_frame()
time.sleep(0.02) # 20ms
pm.end_frame()
metrics = pm.get_metrics()
assert metrics['last_frame_time_ms'] >= 20.0
pm.stop()
def test_perf_monitor_component_timing():
pm = PerformanceMonitor()
pm.start_component("test_comp")
time.sleep(0.01)
pm.end_component("test_comp")
metrics = pm.get_metrics()
assert metrics['time_test_comp_ms'] >= 10.0
pm.stop()
pm = PerformanceMonitor()
pm.start_component("test_comp")
time.sleep(0.01)
pm.end_component("test_comp")
metrics = pm.get_metrics()
assert metrics['time_test_comp_ms'] >= 10.0
pm.stop()

View File

@@ -5,58 +5,49 @@ from gui_2 import App
@pytest.fixture
def app_instance():
with (
patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init'),
patch('ai_client.set_provider'),
patch('ai_client.reset_session')
):
app = App()
yield app
with (
patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init'),
patch('ai_client.set_provider'),
patch('ai_client.reset_session')
):
app = App()
yield app
def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
# Setup
app_instance._pending_gui_tasks = [
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
]
with patch('ai_client.set_provider') as mock_set_provider, \
patch('ai_client.reset_session') as mock_reset_session:
# We need to make sure the property setter's internal calls are also tracked or mocked.
# However, the App instance was created with mocked ai_client.
# Let's re-patch it specifically for this test.
app_instance._process_pending_gui_tasks()
# current_provider setter calls:
# ai_client.reset_session()
# ai_client.set_provider(value, self.current_model)
# _process_pending_gui_tasks NO LONGER calls it redundantly:
# Total should be 1 call for each.
assert mock_set_provider.call_count == 1
assert mock_reset_session.call_count == 1
# Setup
app_instance._pending_gui_tasks = [
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
]
with patch('ai_client.set_provider') as mock_set_provider, \
patch('ai_client.reset_session') as mock_reset_session:
# We need to make sure the property setter's internal calls are also tracked or mocked.
# However, the App instance was created with mocked ai_client.
# Let's re-patch it specifically for this test.
app_instance._process_pending_gui_tasks()
# current_provider setter calls:
# ai_client.reset_session()
# ai_client.set_provider(value, self.current_model)
# _process_pending_gui_tasks NO LONGER calls it redundantly:
# Total should be 1 call for each.
assert mock_set_provider.call_count == 1
assert mock_reset_session.call_count == 1
def test_gcli_path_updates_adapter(app_instance):
# Setup
app_instance.current_provider = 'gemini_cli'
app_instance._pending_gui_tasks = [
{'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
]
# Initialize adapter if it doesn't exist (it shouldn't in mock env)
ai_client._gemini_cli_adapter = None
app_instance._process_pending_gui_tasks()
assert ai_client._gemini_cli_adapter is not None
assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'
# Setup
app_instance.current_provider = 'gemini_cli'
app_instance._pending_gui_tasks = [
{'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
]
# Initialize adapter if it doesn't exist (it shouldn't in mock env)
ai_client._gemini_cli_adapter = None
app_instance._process_pending_gui_tasks()
assert ai_client._gemini_cli_adapter is not None
assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'

View File

@@ -6,89 +6,79 @@ from models import TrackState, Metadata, Ticket
from datetime import datetime
def test_get_all_tracks_empty(tmp_path):
# conductor/tracks directory doesn't exist
assert get_all_tracks(tmp_path) == []
# conductor/tracks directory doesn't exist
assert get_all_tracks(tmp_path) == []
def test_get_all_tracks_with_state(tmp_path):
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "test_track_1"
track_dir = tracks_dir / track_id
track_dir.mkdir()
# Create TrackState
metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress",
created_at=datetime.now(), updated_at=datetime.now())
tasks = [
Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
]
state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
save_track_state(track_id, state, tmp_path)
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["title"] == "Test Track 1"
assert track["status"] == "in_progress"
assert track["complete"] == 1
assert track["total"] == 2
assert track["progress"] == 0.5
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "test_track_1"
track_dir = tracks_dir / track_id
track_dir.mkdir()
# Create TrackState
metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress",
created_at=datetime.now(), updated_at=datetime.now())
tasks = [
Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
]
state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
save_track_state(track_id, state, tmp_path)
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["title"] == "Test Track 1"
assert track["status"] == "in_progress"
assert track["complete"] == 1
assert track["total"] == 2
assert track["progress"] == 0.5
def test_get_all_tracks_with_metadata_json(tmp_path):
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "test_track_2"
track_dir = tracks_dir / track_id
track_dir.mkdir()
metadata = {
"id": track_id,
"title": "Test Track 2",
"status": "planned"
}
with open(track_dir / "metadata.json", "w") as f:
json.dump(metadata, f)
# Create plan.md to test parsing
plan_content = """
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "test_track_2"
track_dir = tracks_dir / track_id
track_dir.mkdir()
metadata = {
"id": track_id,
"title": "Test Track 2",
"status": "planned"
}
with open(track_dir / "metadata.json", "w") as f:
json.dump(metadata, f)
# Create plan.md to test parsing
plan_content = """
# Plan
- [x] Task: Task 1
- [ ] Task: Task 2
- [~] Task: Task 3
"""
with open(track_dir / "plan.md", "w") as f:
f.write(plan_content)
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["title"] == "Test Track 2"
assert track["status"] == "planned"
assert track["complete"] == 1
assert track["total"] == 3
assert pytest.approx(track["progress"]) == 0.333333
with open(track_dir / "plan.md", "w") as f:
f.write(plan_content)
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["title"] == "Test Track 2"
assert track["status"] == "planned"
assert track["complete"] == 1
assert track["total"] == 3
assert pytest.approx(track["progress"]) == 0.333333
def test_get_all_tracks_malformed(tmp_path):
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "malformed_track"
track_dir = tracks_dir / track_id
track_dir.mkdir()
# Malformed metadata.json
with open(track_dir / "metadata.json", "w") as f:
f.write("{ invalid json }")
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["status"] == "unknown"
assert track["complete"] == 0
assert track["total"] == 0
tracks_dir = tmp_path / "conductor" / "tracks"
tracks_dir.mkdir(parents=True)
track_id = "malformed_track"
track_dir = tracks_dir / track_id
track_dir.mkdir()
# Malformed metadata.json
with open(track_dir / "metadata.json", "w") as f:
f.write("{ invalid json }")
tracks = get_all_tracks(tmp_path)
assert len(tracks) == 1
track = tracks[0]
assert track["id"] == track_id
assert track["status"] == "unknown"
assert track["complete"] == 0
assert track["total"] == 0

View File

@@ -9,56 +9,46 @@ import tomllib
@pytest.fixture
def temp_logs(tmp_path, monkeypatch):
# Ensure closed before starting
session_logger.close_session()
monkeypatch.setattr(session_logger, "_comms_fh", None)
# Mock _LOG_DIR in session_logger
original_log_dir = session_logger._LOG_DIR
session_logger._LOG_DIR = tmp_path / "logs"
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
# Mock _SCRIPTS_DIR
original_scripts_dir = session_logger._SCRIPTS_DIR
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
yield tmp_path / "logs"
# Cleanup: Close handles if open
session_logger.close_session()
session_logger._LOG_DIR = original_log_dir
session_logger._SCRIPTS_DIR = original_scripts_dir
# Ensure closed before starting
session_logger.close_session()
monkeypatch.setattr(session_logger, "_comms_fh", None)
# Mock _LOG_DIR in session_logger
original_log_dir = session_logger._LOG_DIR
session_logger._LOG_DIR = tmp_path / "logs"
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
# Mock _SCRIPTS_DIR
original_scripts_dir = session_logger._SCRIPTS_DIR
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
yield tmp_path / "logs"
# Cleanup: Close handles if open
session_logger.close_session()
session_logger._LOG_DIR = original_log_dir
session_logger._SCRIPTS_DIR = original_scripts_dir
def test_open_session_creates_subdir_and_registry(temp_logs):
label = "test-label"
# We can't easily mock datetime.datetime.now() because it's a built-in
# but we can check the resulting directory name pattern
session_logger.open_session(label=label)
# Check that a subdirectory was created
subdirs = list(temp_logs.iterdir())
# One is the log_registry.toml, one is the session dir
session_dirs = [d for d in subdirs if d.is_dir()]
assert len(session_dirs) == 1
session_dir = session_dirs[0]
assert session_dir.name.endswith(f"_{label}")
# Check for log files
assert (session_dir / "comms.log").exists()
assert (session_dir / "toolcalls.log").exists()
assert (session_dir / "apihooks.log").exists()
assert (session_dir / "clicalls.log").exists()
# Check registry
registry_path = temp_logs / "log_registry.toml"
assert registry_path.exists()
with open(registry_path, "rb") as f:
data = tomllib.load(f)
assert session_dir.name in data
assert data[session_dir.name]["path"] == str(session_dir)
label = "test-label"
# We can't easily mock datetime.datetime.now() because it's a built-in
# but we can check the resulting directory name pattern
session_logger.open_session(label=label)
# Check that a subdirectory was created
subdirs = list(temp_logs.iterdir())
# One is the log_registry.toml, one is the session dir
session_dirs = [d for d in subdirs if d.is_dir()]
assert len(session_dirs) == 1
session_dir = session_dirs[0]
assert session_dir.name.endswith(f"_{label}")
# Check for log files
assert (session_dir / "comms.log").exists()
assert (session_dir / "toolcalls.log").exists()
assert (session_dir / "apihooks.log").exists()
assert (session_dir / "clicalls.log").exists()
# Check registry
registry_path = temp_logs / "log_registry.toml"
assert registry_path.exists()
with open(registry_path, "rb") as f:
data = tomllib.load(f)
assert session_dir.name in data
assert data[session_dir.name]["path"] == str(session_dir)

View File

@@ -9,33 +9,28 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.sim_ai_settings import AISettingsSimulation
def test_ai_settings_simulation_run():
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
mock_client.get_value.side_effect = lambda key: {
"current_provider": "gemini",
"current_model": "gemini-2.5-flash-lite"
}.get(key)
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = AISettingsSimulation(mock_client)
# Override the side effect after initial setup if needed or just let it return the same for simplicity
# Actually, let's use a side effect that updates
vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
def side_effect(key):
return vals.get(key)
def set_side_effect(key, val):
vals[key] = val
mock_client.get_value.side_effect = side_effect
mock_client.set_value.side_effect = set_side_effect
sim.run()
# Verify calls
mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
mock_client.get_value.side_effect = lambda key: {
"current_provider": "gemini",
"current_model": "gemini-2.5-flash-lite"
}.get(key)
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = AISettingsSimulation(mock_client)
# Override the side effect after initial setup if needed or just let it return the same for simplicity
# Actually, let's use a side effect that updates
vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
def side_effect(key):
return vals.get(key)
def set_side_effect(key, val):
vals[key] = val
mock_client.get_value.side_effect = side_effect
mock_client.set_value.side_effect = set_side_effect
sim.run()
# Verify calls
mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")

View File

@@ -9,26 +9,22 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.sim_base import BaseSimulation
def test_base_simulation_init():
with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
sim = BaseSimulation()
assert sim.client == mock_client
assert sim.sim is not None
with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
sim = BaseSimulation()
assert sim.client == mock_client
assert sim.sim is not None
def test_base_simulation_setup():
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = BaseSimulation(mock_client)
sim.setup("TestSim")
mock_client.wait_for_server.assert_called()
mock_client.click.assert_any_call("btn_reset")
mock_sim.setup_new_project.assert_called()
assert sim.project_path.endswith("temp_testsim.toml")
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = BaseSimulation(mock_client)
sim.setup("TestSim")
mock_client.wait_for_server.assert_called()
mock_client.click.assert_any_call("btn_reset")
mock_sim.setup_new_project.assert_called()
assert sim.project_path.endswith("temp_testsim.toml")

View File

@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.sim_context import ContextSimulation
def test_context_simulation_run():
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock project config
mock_project = {
'project': {
'files': {'paths': []}
}
}
mock_client.get_project.return_value = mock_project
mock_client.get_value.side_effect = lambda key: {
"ai_status": "md written: test.md",
"token_budget_pct": 0.05
}.get(key)
# Mock session entries
mock_session = {
'session': {
'entries': [
{'role': 'User', 'content': 'Hello'},
{'role': 'AI', 'content': 'Hi'}
]
}
}
mock_client.get_session.return_value = mock_session
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ContextSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.create_discussion.assert_called()
mock_client.post_project.assert_called()
mock_client.click.assert_called_with("btn_md_only")
mock_sim.run_discussion_turn.assert_called()
mock_sim.truncate_history.assert_called_with(1)
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock project config
mock_project = {
'project': {
'files': {'paths': []}
}
}
mock_client.get_project.return_value = mock_project
mock_client.get_value.side_effect = lambda key: {
"ai_status": "md written: test.md",
"token_budget_pct": 0.05
}.get(key)
# Mock session entries
mock_session = {
'session': {
'entries': [
{'role': 'User', 'content': 'Hello'},
{'role': 'AI', 'content': 'Hi'}
]
}
}
mock_client.get_session.return_value = mock_session
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ContextSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.create_discussion.assert_called()
mock_client.post_project.assert_called()
mock_client.click.assert_called_with("btn_md_only")
mock_sim.run_discussion_turn.assert_called()
mock_sim.truncate_history.assert_called_with(1)

View File

@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.sim_execution import ExecutionSimulation
def test_execution_simulation_run():
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock show_confirm_modal state
vals = {"show_confirm_modal": False}
def side_effect(key):
return vals.get(key)
def set_side_effect(key, val):
vals[key] = val
mock_client.get_value.side_effect = side_effect
mock_client.set_value.side_effect = set_side_effect
# Mock session entries with tool output
mock_session = {
'session': {
'entries': [
{'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
]
}
}
mock_client.get_session.return_value = mock_session
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock show_confirm_modal state
vals = {"show_confirm_modal": False}
# Mock script confirmation event
mock_client.wait_for_event.side_effect = [
{"type": "script_confirmation_required", "script": "dir"},
None # Second call returns None to end the loop
]
def side_effect(key):
return vals.get(key)
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ExecutionSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.run_discussion_turn_async.assert_called()
mock_client.click.assert_called_with("btn_approve_script")
def set_side_effect(key, val):
vals[key] = val
mock_client.get_value.side_effect = side_effect
mock_client.set_value.side_effect = set_side_effect
# Mock session entries with tool output
mock_session = {
'session': {
'entries': [
{'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
]
}
}
mock_client.get_session.return_value = mock_session
# Mock script confirmation event
mock_client.wait_for_event.side_effect = [
{"type": "script_confirmation_required", "script": "dir"},
None # Second call returns None to end the loop
]
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ExecutionSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.run_discussion_turn_async.assert_called()
mock_client.click.assert_called_with("btn_approve_script")

View File

@@ -9,28 +9,24 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.sim_tools import ToolsSimulation
def test_tools_simulation_run():
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock session entries with tool output
mock_session = {
'session': {
'entries': [
{'role': 'User', 'content': 'List files'},
{'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
{'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
]
}
}
mock_client.get_session.return_value = mock_session
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ToolsSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")
mock_client = MagicMock()
mock_client.wait_for_server.return_value = True
# Mock session entries with tool output
mock_session = {
'session': {
'entries': [
{'role': 'User', 'content': 'List files'},
{'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
{'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
]
}
}
mock_client.get_session.return_value = mock_session
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
mock_sim = MagicMock()
mock_sim_class.return_value = mock_sim
sim = ToolsSimulation(mock_client)
sim.run()
# Verify calls
mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")

View File

@@ -7,84 +7,75 @@ import asyncio
import concurrent.futures
class MockDialog:
def __init__(self, approved, final_payload=None):
self.approved = approved
self.final_payload = final_payload
def wait(self):
# Match the new return format: a dictionary
res = {'approved': self.approved, 'abort': False}
if self.final_payload:
res.update(self.final_payload)
return res
def __init__(self, approved, final_payload=None):
self.approved = approved
self.final_payload = final_payload
def wait(self):
# Match the new return format: a dictionary
res = {'approved': self.approved, 'abort': False}
if self.final_payload:
res.update(self.final_payload)
return res
@pytest.fixture
def mock_ai_client():
with patch("ai_client.send") as mock_send:
mock_send.return_value = "Task completed"
yield mock_send
with patch("ai_client.send") as mock_send:
mock_send.return_value = "Task completed"
yield mock_send
@pytest.mark.asyncio
async def test_confirm_spawn_pushed_to_queue():
event_queue = events.AsyncEventQueue()
ticket_id = "T1"
role = "Tier 3 Worker"
prompt = "Original Prompt"
context_md = "Original Context"
event_queue = events.AsyncEventQueue()
ticket_id = "T1"
role = "Tier 3 Worker"
prompt = "Original Prompt"
context_md = "Original Context"
# Start confirm_spawn in a thread since it blocks with time.sleep
# Start confirm_spawn in a thread since it blocks with time.sleep
def run_confirm():
return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
loop = asyncio.get_running_loop()
with concurrent.futures.ThreadPoolExecutor() as executor:
future = loop.run_in_executor(executor, run_confirm)
# Wait for the event to appear in the queue
event_name, payload = await event_queue.get()
assert event_name == "mma_spawn_approval"
assert payload["ticket_id"] == ticket_id
assert payload["role"] == role
assert payload["prompt"] == prompt
assert payload["context_md"] == context_md
assert "dialog_container" in payload
# Simulate GUI injecting a dialog
payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
approved, final_prompt, final_context = await future
assert approved is True
assert final_prompt == "Modified Prompt"
assert final_context == "Modified Context"
def run_confirm():
return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
loop = asyncio.get_running_loop()
with concurrent.futures.ThreadPoolExecutor() as executor:
future = loop.run_in_executor(executor, run_confirm)
# Wait for the event to appear in the queue
event_name, payload = await event_queue.get()
assert event_name == "mma_spawn_approval"
assert payload["ticket_id"] == ticket_id
assert payload["role"] == role
assert payload["prompt"] == prompt
assert payload["context_md"] == context_md
assert "dialog_container" in payload
# Simulate GUI injecting a dialog
payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
approved, final_prompt, final_context = await future
assert approved is True
assert final_prompt == "Modified Prompt"
assert final_context == "Modified Context"
@patch("multi_agent_conductor.confirm_spawn")
def test_run_worker_lifecycle_approved(mock_confirm, mock_ai_client):
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
event_queue = events.AsyncEventQueue()
mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
mock_confirm.assert_called_once()
# Check that ai_client.send was called with modified values
args, kwargs = mock_ai_client.call_args
assert kwargs["user_message"] == "Modified Prompt"
assert kwargs["md_content"] == "Modified Context"
assert ticket.status == "completed"
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
event_queue = events.AsyncEventQueue()
mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
mock_confirm.assert_called_once()
# Check that ai_client.send was called with modified values
args, kwargs = mock_ai_client.call_args
assert kwargs["user_message"] == "Modified Prompt"
assert kwargs["md_content"] == "Modified Context"
assert ticket.status == "completed"
@patch("multi_agent_conductor.confirm_spawn")
def test_run_worker_lifecycle_rejected(mock_confirm, mock_ai_client):
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
event_queue = events.AsyncEventQueue()
mock_confirm.return_value = (False, "Original Prompt", "Original Context")
result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
mock_confirm.assert_called_once()
mock_ai_client.assert_not_called()
assert ticket.status == "blocked"
assert "Spawn rejected by user" in ticket.blocked_reason
assert "BLOCKED" in result
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
event_queue = events.AsyncEventQueue()
mock_confirm.return_value = (False, "Original Prompt", "Original Context")
result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
mock_confirm.assert_called_once()
mock_ai_client.assert_not_called()
assert ticket.status == "blocked"
assert "Spawn rejected by user" in ticket.blocked_reason
assert "BLOCKED" in result

View File

@@ -5,57 +5,50 @@ import pytest
from api_hook_client import ApiHookClient
def test_api_ask_client_method(live_gui):
"""
"""
Tests the request_confirmation method in ApiHookClient.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# Drain existing events
client.get_events()
results = {"response": None, "error": None}
def make_blocking_request():
try:
# This call should block until we respond
results["response"] = client.request_confirmation(
tool_name="powershell",
args={"command": "echo hello"}
)
except Exception as e:
results["error"] = str(e)
client = ApiHookClient("http://127.0.0.1:8999")
# Drain existing events
client.get_events()
results = {"response": None, "error": None}
# Start the request in a background thread
t = threading.Thread(target=make_blocking_request)
t.start()
# Poll for the 'ask_received' event
request_id = None
start_time = time.time()
while time.time() - start_time < 5:
events = client.get_events()
for ev in events:
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
break
if request_id:
break
time.sleep(0.1)
assert request_id is not None, "Timed out waiting for 'ask_received' event"
# Respond
expected_response = {"approved": True}
resp = requests.post(
"http://127.0.0.1:8999/api/ask/respond",
json={
"request_id": request_id,
"response": expected_response
}
)
assert resp.status_code == 200
t.join(timeout=5)
assert not t.is_alive()
assert results["error"] is None
assert results["response"] == expected_response
def make_blocking_request():
try:
# This call should block until we respond
results["response"] = client.request_confirmation(
tool_name="powershell",
args={"command": "echo hello"}
)
except Exception as e:
results["error"] = str(e)
# Start the request in a background thread
t = threading.Thread(target=make_blocking_request)
t.start()
# Poll for the 'ask_received' event
request_id = None
start_time = time.time()
while time.time() - start_time < 5:
events = client.get_events()
for ev in events:
if ev.get("type") == "ask_received":
request_id = ev.get("request_id")
break
if request_id:
break
time.sleep(0.1)
assert request_id is not None, "Timed out waiting for 'ask_received' event"
# Respond
expected_response = {"approved": True}
resp = requests.post(
"http://127.0.0.1:8999/api/ask/respond",
json={
"request_id": request_id,
"response": expected_response
}
)
assert resp.status_code == 200
t.join(timeout=5)
assert not t.is_alive()
assert results["error"] is None
assert results["response"] == expected_response

View File

@@ -4,221 +4,176 @@ import subprocess
from shell_runner import run_powershell
def test_run_powershell_qa_callback_on_failure():
"""
"""
Test that qa_callback is called when a powershell command fails (non-zero exit code).
The result of the callback should be appended to the output.
"""
script = "Write-Error 'something went wrong'; exit 1"
base_dir = "."
# Mocking subprocess.run to simulate failure
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = "something went wrong"
mock_result.returncode = 1
qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
# We expect run_powershell to accept qa_callback
output = run_powershell(script, base_dir, qa_callback=qa_callback)
# Verify callback was called with stderr
qa_callback.assert_called_once_with("something went wrong")
# Verify output contains the callback result
assert "QA ANALYSIS: This looks like a syntax error." in output
assert "STDERR:\nsomething went wrong" in output
assert "EXIT CODE: 1" in output
script = "Write-Error 'something went wrong'; exit 1"
base_dir = "."
# Mocking subprocess.run to simulate failure
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = "something went wrong"
mock_result.returncode = 1
qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
# We expect run_powershell to accept qa_callback
output = run_powershell(script, base_dir, qa_callback=qa_callback)
# Verify callback was called with stderr
qa_callback.assert_called_once_with("something went wrong")
# Verify output contains the callback result
assert "QA ANALYSIS: This looks like a syntax error." in output
assert "STDERR:\nsomething went wrong" in output
assert "EXIT CODE: 1" in output
def test_run_powershell_qa_callback_on_stderr_only():
"""
"""
Test that qa_callback is called when a command has stderr even if exit code is 0.
"""
script = "Write-Error 'non-fatal error'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = "Success"
mock_result.stderr = "non-fatal error"
mock_result.returncode = 0
qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
output = run_powershell(script, base_dir, qa_callback=qa_callback)
qa_callback.assert_called_once_with("non-fatal error")
assert "QA ANALYSIS: Ignorable warning." in output
assert "STDOUT:\nSuccess" in output
script = "Write-Error 'non-fatal error'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = "Success"
mock_result.stderr = "non-fatal error"
mock_result.returncode = 0
qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
output = run_powershell(script, base_dir, qa_callback=qa_callback)
qa_callback.assert_called_once_with("non-fatal error")
assert "QA ANALYSIS: Ignorable warning." in output
assert "STDOUT:\nSuccess" in output
def test_run_powershell_no_qa_callback_on_success():
"""
"""
Test that qa_callback is NOT called when the command succeeds without stderr.
"""
script = "Write-Output 'All good'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = "All good"
mock_result.stderr = ""
mock_result.returncode = 0
qa_callback = MagicMock()
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
output = run_powershell(script, base_dir, qa_callback=qa_callback)
qa_callback.assert_not_called()
assert "STDOUT:\nAll good" in output
assert "EXIT CODE: 0" in output
assert "QA ANALYSIS" not in output
script = "Write-Output 'All good'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = "All good"
mock_result.stderr = ""
mock_result.returncode = 0
qa_callback = MagicMock()
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
output = run_powershell(script, base_dir, qa_callback=qa_callback)
qa_callback.assert_not_called()
assert "STDOUT:\nAll good" in output
assert "EXIT CODE: 0" in output
assert "QA ANALYSIS" not in output
def test_run_powershell_optional_qa_callback():
"""
"""
Test that run_powershell still works without providing a qa_callback.
"""
script = "Write-Error 'error'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = "error"
mock_result.returncode = 1
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
# Should not raise TypeError even if qa_callback is not provided
output = run_powershell(script, base_dir)
assert "STDERR:\nerror" in output
assert "EXIT CODE: 1" in output
script = "Write-Error 'error'"
base_dir = "."
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = "error"
mock_result.returncode = 1
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"):
# Should not raise TypeError even if qa_callback is not provided
output = run_powershell(script, base_dir)
assert "STDERR:\nerror" in output
assert "EXIT CODE: 1" in output
def test_end_to_end_tier4_integration():
"""
"""
Verifies that shell_runner.run_powershell correctly uses ai_client.run_tier4_analysis.
"""
import ai_client
script = "Invoke-Item non_existent_file"
base_dir = "."
stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = stderr_content
mock_result.returncode = 1
expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"), \
patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
mock_analysis.assert_called_once_with(stderr_content)
assert f"QA ANALYSIS:\n{expected_analysis}" in output
import ai_client
script = "Invoke-Item non_existent_file"
base_dir = "."
stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
mock_result = MagicMock()
mock_result.stdout = ""
mock_result.stderr = stderr_content
mock_result.returncode = 1
expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
with patch("subprocess.run", return_value=mock_result), \
patch("shutil.which", return_value="powershell.exe"), \
patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
mock_analysis.assert_called_once_with(stderr_content)
assert f"QA ANALYSIS:\n{expected_analysis}" in output
def test_ai_client_passes_qa_callback():
"""
"""
Verifies that ai_client.send passes the qa_callback down to the provider function.
"""
import ai_client
# Mocking a provider function to avoid actual API calls
mock_send_gemini = MagicMock(return_value="AI Response")
qa_callback = MagicMock(return_value="QA Analysis")
# Force provider to gemini and mock its send function
with patch("ai_client._provider", "gemini"), \
patch("ai_client._send_gemini", mock_send_gemini):
ai_client.send(
md_content="Context",
user_message="Hello",
qa_callback=qa_callback
)
# Verify provider received the qa_callback
mock_send_gemini.assert_called_once()
args, kwargs = mock_send_gemini.call_args
# qa_callback is the 7th positional argument in _send_gemini
assert args[6] == qa_callback
import ai_client
# Mocking a provider function to avoid actual API calls
mock_send_gemini = MagicMock(return_value="AI Response")
qa_callback = MagicMock(return_value="QA Analysis")
# Force provider to gemini and mock its send function
with patch("ai_client._provider", "gemini"), \
patch("ai_client._send_gemini", mock_send_gemini):
ai_client.send(
md_content="Context",
user_message="Hello",
qa_callback=qa_callback
)
# Verify provider received the qa_callback
mock_send_gemini.assert_called_once()
args, kwargs = mock_send_gemini.call_args
# qa_callback is the 7th positional argument in _send_gemini
assert args[6] == qa_callback
def test_gemini_provider_passes_qa_callback_to_run_script():
"""
"""
Verifies that _send_gemini passes the qa_callback to _run_script.
"""
import ai_client
# Mock Gemini chat and client
mock_client = MagicMock()
mock_chat = MagicMock()
# Simulate a tool call response
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = MagicMock()
mock_part.function_call.name = "run_powershell"
mock_part.function_call.args = {"script": "dir"}
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "STOP"
mock_response = MagicMock()
mock_response.candidates = [mock_candidate]
mock_response.usage_metadata.prompt_token_count = 10
mock_response.usage_metadata.candidates_token_count = 5
# Second call returns a stop response to break the loop
mock_stop_part = MagicMock()
mock_stop_part.text = "Done"
mock_stop_part.function_call = None
mock_stop_candidate = MagicMock()
mock_stop_candidate.content.parts = [mock_stop_part]
mock_stop_candidate.finish_reason.name = "STOP"
mock_stop_response = MagicMock()
mock_stop_response.candidates = [mock_stop_candidate]
mock_stop_response.usage_metadata.prompt_token_count = 5
mock_stop_response.usage_metadata.candidates_token_count = 2
mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
# Mock count_tokens to avoid chat creation failure
mock_count_resp = MagicMock()
mock_count_resp.total_tokens = 100
mock_client.models.count_tokens.return_value = mock_count_resp
qa_callback = MagicMock()
# Set global state for the test
with patch("ai_client._gemini_client", mock_client), \
patch("ai_client._gemini_chat", None), \
patch("ai_client._ensure_gemini_client"), \
patch("ai_client._run_script", return_value="output") as mock_run_script, \
patch("ai_client._get_gemini_history_list", return_value=[]):
# Ensure chats.create returns our mock_chat
mock_client.chats.create.return_value = mock_chat
ai_client._send_gemini(
md_content="Context",
user_message="Run dir",
base_dir=".",
qa_callback=qa_callback
)
# Verify _run_script received the qa_callback
mock_run_script.assert_called_once_with("dir", ".", qa_callback)
import ai_client
# Mock Gemini chat and client
mock_client = MagicMock()
mock_chat = MagicMock()
# Simulate a tool call response
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = MagicMock()
mock_part.function_call.name = "run_powershell"
mock_part.function_call.args = {"script": "dir"}
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "STOP"
mock_response = MagicMock()
mock_response.candidates = [mock_candidate]
mock_response.usage_metadata.prompt_token_count = 10
mock_response.usage_metadata.candidates_token_count = 5
# Second call returns a stop response to break the loop
mock_stop_part = MagicMock()
mock_stop_part.text = "Done"
mock_stop_part.function_call = None
mock_stop_candidate = MagicMock()
mock_stop_candidate.content.parts = [mock_stop_part]
mock_stop_candidate.finish_reason.name = "STOP"
mock_stop_response = MagicMock()
mock_stop_response.candidates = [mock_stop_candidate]
mock_stop_response.usage_metadata.prompt_token_count = 5
mock_stop_response.usage_metadata.candidates_token_count = 2
mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
# Mock count_tokens to avoid chat creation failure
mock_count_resp = MagicMock()
mock_count_resp.total_tokens = 100
mock_client.models.count_tokens.return_value = mock_count_resp
qa_callback = MagicMock()
# Set global state for the test
with patch("ai_client._gemini_client", mock_client), \
patch("ai_client._gemini_chat", None), \
patch("ai_client._ensure_gemini_client"), \
patch("ai_client._run_script", return_value="output") as mock_run_script, \
patch("ai_client._get_gemini_history_list", return_value=[]):
# Ensure chats.create returns our mock_chat
mock_client.chats.create.return_value = mock_chat
ai_client._send_gemini(
md_content="Context",
user_message="Run dir",
base_dir=".",
qa_callback=qa_callback
)
# Verify _run_script received the qa_callback
mock_run_script.assert_called_once_with("dir", ".", qa_callback)

View File

@@ -3,134 +3,113 @@ from pathlib import Path
from aggregate import build_tier1_context, build_tier2_context, build_tier3_context
def test_build_tier1_context_exists():
# This should fail if the function is not defined
file_items = [
{"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
]
history = ["User: hello", "AI: hi"]
result = build_tier1_context(file_items, Path("."), [], history)
assert "Product content" in result
# other.py should be summarized, not full content in a code block
assert "Other content" not in result or "Summarized" in result # Assuming summary format
# This should fail if the function is not defined
file_items = [
{"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
]
history = ["User: hello", "AI: hi"]
result = build_tier1_context(file_items, Path("."), [], history)
assert "Product content" in result
# other.py should be summarized, not full content in a code block
assert "Other content" not in result or "Summarized" in result # Assuming summary format
def test_build_tier2_context_exists():
file_items = [
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
]
history = ["User: hello"]
result = build_tier2_context(file_items, Path("."), [], history)
assert "Other content" in result
file_items = [
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
]
history = ["User: hello"]
result = build_tier2_context(file_items, Path("."), [], history)
assert "Other content" in result
def test_build_tier3_context_ast_skeleton(monkeypatch):
from unittest.mock import MagicMock
import aggregate
import file_cache
# Mock ASTParser
mock_parser_instance = MagicMock()
mock_parser_instance.get_skeleton.return_value = "def other():\n ..."
mock_parser_class = MagicMock(return_value=mock_parser_instance)
# Mock file_cache.ASTParser in aggregate module
monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
file_items = [
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
]
history = []
# New behavior check: it should use ASTParser for .py files not in focus
result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
assert "def other():" in result
assert "..." in result
assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
mock_parser_class.assert_called_once_with("python")
mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n pass")
from unittest.mock import MagicMock
import aggregate
import file_cache
# Mock ASTParser
mock_parser_instance = MagicMock()
mock_parser_instance.get_skeleton.return_value = "def other():\n ..."
mock_parser_class = MagicMock(return_value=mock_parser_instance)
# Mock file_cache.ASTParser in aggregate module
monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
file_items = [
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
]
history = []
# New behavior check: it should use ASTParser for .py files not in focus
result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
assert "def other():" in result
assert "..." in result
assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
mock_parser_class.assert_called_once_with("python")
mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n pass")
def test_build_tier3_context_exists():
file_items = [
{"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n pass", "error": False},
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
]
history = ["User: hello"]
result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
assert "def focus():" in result
assert "pass" in result
# other.py should have skeletonized content, not full "pass" (if get_skeleton works)
# However, for a simple "pass", the skeleton might be the same or similar.
# Let's check for the header
assert "other.py" in result
assert "AST Skeleton" in result
file_items = [
{"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n pass", "error": False},
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
]
history = ["User: hello"]
result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
assert "def focus():" in result
assert "pass" in result
# other.py should have skeletonized content, not full "pass" (if get_skeleton works)
# However, for a simple "pass", the skeleton might be the same or similar.
# Let's check for the header
assert "other.py" in result
assert "AST Skeleton" in result
def test_build_file_items_with_tiers(tmp_path):
from aggregate import build_file_items
# Create some dummy files
file1 = tmp_path / "file1.txt"
file1.write_text("content1")
file2 = tmp_path / "file2.txt"
file2.write_text("content2")
files_config = [
"file1.txt",
{"path": "file2.txt", "tier": 3}
]
items = build_file_items(tmp_path, files_config)
assert len(items) == 2
item1 = next(i for i in items if i["entry"] == "file1.txt")
assert item1["content"] == "content1"
assert "tier" in item1
assert item1["tier"] is None
item2 = next(i for i in items if i["entry"] == "file2.txt")
assert item2["content"] == "content2"
assert item2["tier"] == 3
from aggregate import build_file_items
# Create some dummy files
file1 = tmp_path / "file1.txt"
file1.write_text("content1")
file2 = tmp_path / "file2.txt"
file2.write_text("content2")
files_config = [
"file1.txt",
{"path": "file2.txt", "tier": 3}
]
items = build_file_items(tmp_path, files_config)
assert len(items) == 2
item1 = next(i for i in items if i["entry"] == "file1.txt")
assert item1["content"] == "content1"
assert "tier" in item1
assert item1["tier"] is None
item2 = next(i for i in items if i["entry"] == "file2.txt")
assert item2["content"] == "content2"
assert item2["tier"] == 3
def test_build_files_section_with_dicts(tmp_path):
from aggregate import build_files_section
file1 = tmp_path / "file1.txt"
file1.write_text("content1")
files_config = [
{"path": str(file1)}
]
result = build_files_section(tmp_path, files_config)
assert "content1" in result
assert "file1.txt" in result
from aggregate import build_files_section
file1 = tmp_path / "file1.txt"
file1.write_text("content1")
files_config = [
{"path": str(file1)}
]
result = build_files_section(tmp_path, files_config)
assert "content1" in result
assert "file1.txt" in result
def test_tiered_context_by_tier_field():
file_items = [
{"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
{"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
{"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
]
# Test Tier 1 Context
result_t1 = build_tier1_context(file_items, Path("."), [], [])
assert "Full Tier 1 Content" in result_t1
assert "Line 2" in result_t1 # In full
# tier3_file.txt should be summarized
assert "tier3_file.txt" in result_t1
assert "preview:" in result_t1
assert "Line 9" not in result_t1 # Only first 8 lines in preview
# Test Tier 3 Context
result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
assert "Full Tier 3 Content" in result_t3
assert "Line 10" in result_t3 # In full
# tier1_file.txt should be summarized
assert "tier1_file.txt" in result_t3
assert "preview:" in result_t3
assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview
file_items = [
{"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
{"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
{"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
]
# Test Tier 1 Context
result_t1 = build_tier1_context(file_items, Path("."), [], [])
assert "Full Tier 1 Content" in result_t1
assert "Line 2" in result_t1 # In full
# tier3_file.txt should be summarized
assert "tier3_file.txt" in result_t1
assert "preview:" in result_t1
assert "Line 9" not in result_t1 # Only first 8 lines in preview
# Test Tier 3 Context
result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
assert "Full Tier 3 Content" in result_t3
assert "Line 10" in result_t3 # In full
# tier1_file.txt should be summarized
assert "tier1_file.txt" in result_t3
assert "preview:" in result_t3
assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview

View File

@@ -8,8 +8,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client
def test_token_usage_tracking():
ai_client.reset_session()
# Mock an API response with token usage
usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
# This would test the internal accumulator in ai_client
pass
ai_client.reset_session()
# Mock an API response with token usage
usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
# This would test the internal accumulator in ai_client
pass

View File

@@ -9,73 +9,61 @@ from models import TrackState, Metadata, Ticket
from project_manager import save_track_state, load_track_state
def test_track_state_persistence(tmp_path):
"""
"""
Tests saving and loading a TrackState object to/from a TOML file.
1. Create a TrackState object with sample metadata, discussion, and tasks.
2. Call save_track_state('test_track', state, base_dir).
3. Verify that base_dir/conductor/tracks/test_track/state.toml exists.
4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object.
"""
base_dir = tmp_path
track_id = "test-track-999" # Metadata internal ID
track_folder_name = "test_track" # Folder name used in persistence
# 1. Create a TrackState object with sample data
metadata = Metadata(
id=track_id,
name="Test Track",
status="in_progress",
created_at=datetime(2023, 1, 1, 12, 0, 0),
updated_at=datetime(2023, 1, 2, 13, 0, 0)
)
discussion = [
{"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
{"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
]
tasks = [
Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
]
original_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks
)
# 2. Call save_track_state('test_track', state, base_dir)
save_track_state(track_folder_name, original_state, base_dir)
# 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
assert state_file_path.exists(), f"State file should exist at {state_file_path}"
# 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
loaded_state = load_track_state(track_folder_name, base_dir)
assert loaded_state is not None, "load_track_state returned None"
# Verify equality
assert loaded_state.metadata.id == original_state.metadata.id
assert loaded_state.metadata.name == original_state.metadata.name
assert loaded_state.metadata.status == original_state.metadata.status
assert loaded_state.metadata.created_at == original_state.metadata.created_at
assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
assert len(loaded_state.tasks) == len(original_state.tasks)
for i in range(len(original_state.tasks)):
assert loaded_state.tasks[i].id == original_state.tasks[i].id
assert loaded_state.tasks[i].description == original_state.tasks[i].description
assert loaded_state.tasks[i].status == original_state.tasks[i].status
assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
assert len(loaded_state.discussion) == len(original_state.discussion)
for i in range(len(original_state.discussion)):
assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
# Final check: deep equality of dataclasses
assert loaded_state == original_state
base_dir = tmp_path
track_id = "test-track-999" # Metadata internal ID
track_folder_name = "test_track" # Folder name used in persistence
# 1. Create a TrackState object with sample data
metadata = Metadata(
id=track_id,
name="Test Track",
status="in_progress",
created_at=datetime(2023, 1, 1, 12, 0, 0),
updated_at=datetime(2023, 1, 2, 13, 0, 0)
)
discussion = [
{"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
{"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
]
tasks = [
Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
]
original_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks
)
# 2. Call save_track_state('test_track', state, base_dir)
save_track_state(track_folder_name, original_state, base_dir)
# 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
assert state_file_path.exists(), f"State file should exist at {state_file_path}"
# 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
loaded_state = load_track_state(track_folder_name, base_dir)
assert loaded_state is not None, "load_track_state returned None"
# Verify equality
assert loaded_state.metadata.id == original_state.metadata.id
assert loaded_state.metadata.name == original_state.metadata.name
assert loaded_state.metadata.status == original_state.metadata.status
assert loaded_state.metadata.created_at == original_state.metadata.created_at
assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
assert len(loaded_state.tasks) == len(original_state.tasks)
for i in range(len(original_state.tasks)):
assert loaded_state.tasks[i].id == original_state.tasks[i].id
assert loaded_state.tasks[i].description == original_state.tasks[i].description
assert loaded_state.tasks[i].status == original_state.tasks[i].status
assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
assert len(loaded_state.discussion) == len(original_state.discussion)
for i in range(len(original_state.discussion)):
assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
# Final check: deep equality of dataclasses
assert loaded_state == original_state

View File

@@ -7,166 +7,154 @@ from models import Metadata, TrackState, Ticket
# --- Pytest Tests ---
def test_track_state_instantiation():
"""Test creating a TrackState object."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-123",
name="Initial Setup",
status="in_progress",
created_at=now - timedelta(days=1),
updated_at=now,
)
discussion = [
{"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
{"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
]
# Update Ticket instantiation to match models.py fields (description, assigned_to)
tasks = [
Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
assert track_state.metadata.id == "track-123"
assert len(track_state.discussion) == 2
assert len(track_state.tasks) == 2
assert isinstance(track_state.tasks[0], Ticket)
assert track_state.tasks[0].description == "Design UI"
assert track_state.tasks[0].assigned_to == "dev1"
"""Test creating a TrackState object."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-123",
name="Initial Setup",
status="in_progress",
created_at=now - timedelta(days=1),
updated_at=now,
)
discussion = [
{"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
{"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
]
# Update Ticket instantiation to match models.py fields (description, assigned_to)
tasks = [
Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
assert track_state.metadata.id == "track-123"
assert len(track_state.discussion) == 2
assert len(track_state.tasks) == 2
assert isinstance(track_state.tasks[0], Ticket)
assert track_state.tasks[0].description == "Design UI"
assert track_state.tasks[0].assigned_to == "dev1"
def test_track_state_to_dict():
"""Test the to_dict() method for serialization."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-456",
name="Refinement Phase",
status="completed",
created_at=now - timedelta(days=5),
updated_at=now - timedelta(days=2),
)
discussion = [
{"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
{"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
]
# Update Ticket instantiation
tasks = [
Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
track_dict = track_state.to_dict()
assert track_dict["metadata"]["id"] == "track-456"
assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
assert len(track_dict["discussion"]) == 2
assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
assert len(track_dict["tasks"]) == 1
# Use the Ticket's to_dict method for serialization
assert track_dict["tasks"][0]["id"] == "task-c"
assert track_dict["tasks"][0]["description"] == "Add feature X"
assert track_dict["tasks"][0]["assigned_to"] == "dev3"
"""Test the to_dict() method for serialization."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-456",
name="Refinement Phase",
status="completed",
created_at=now - timedelta(days=5),
updated_at=now - timedelta(days=2),
)
discussion = [
{"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
{"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
]
# Update Ticket instantiation
tasks = [
Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
track_dict = track_state.to_dict()
assert track_dict["metadata"]["id"] == "track-456"
assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
assert len(track_dict["discussion"]) == 2
assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
assert len(track_dict["tasks"]) == 1
# Use the Ticket's to_dict method for serialization
assert track_dict["tasks"][0]["id"] == "task-c"
assert track_dict["tasks"][0]["description"] == "Add feature X"
assert track_dict["tasks"][0]["assigned_to"] == "dev3"
def test_track_state_from_dict():
"""Test the from_dict() class method for deserialization."""
now = datetime.now(timezone.utc)
track_dict_data = {
"metadata": {
"id": "track-789",
"name": "Final Review",
"status": "pending",
"created_at": (now - timedelta(days=10)).isoformat(),
"updated_at": (now - timedelta(days=9)).isoformat(),
},
"discussion": [
{"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
],
"tasks": [
# Use fields from models.py Ticket definition for deserialization
{"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
],
}
"""Test the from_dict() class method for deserialization."""
now = datetime.now(timezone.utc)
track_dict_data = {
"metadata": {
"id": "track-789",
"name": "Final Review",
"status": "pending",
"created_at": (now - timedelta(days=10)).isoformat(),
"updated_at": (now - timedelta(days=9)).isoformat(),
},
"discussion": [
{"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
],
"tasks": [
# Use fields from models.py Ticket definition for deserialization
{"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
],
}
track_state = TrackState.from_dict(track_dict_data)
assert isinstance(track_state, TrackState)
assert track_state.metadata.id == "track-789"
assert isinstance(track_state.metadata.created_at, datetime)
assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
assert len(track_state.discussion) == 1
assert isinstance(track_state.discussion[0]["ts"], datetime)
assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
assert len(track_state.tasks) == 1
assert isinstance(track_state.tasks[0], Ticket)
assert track_state.tasks[0].id == "task-d"
assert track_state.tasks[0].description == "Deploy"
assert track_state.tasks[0].assigned_to == "ops1"
# Test case for empty lists and missing keys for robustness
track_state = TrackState.from_dict(track_dict_data)
assert isinstance(track_state, TrackState)
assert track_state.metadata.id == "track-789"
assert isinstance(track_state.metadata.created_at, datetime)
assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
assert len(track_state.discussion) == 1
assert isinstance(track_state.discussion[0]["ts"], datetime)
assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
assert len(track_state.tasks) == 1
assert isinstance(track_state.tasks[0], Ticket)
assert track_state.tasks[0].id == "task-d"
assert track_state.tasks[0].description == "Deploy"
assert track_state.tasks[0].assigned_to == "ops1"
# Test case for empty lists and missing keys for robustness
def test_track_state_from_dict_empty_and_missing():
"""Test from_dict with empty lists and missing optional keys."""
track_dict_data = {
"metadata": {
"id": "track-empty",
"name": "Empty State",
# created_at, updated_at, status are optional in from_dict logic
},
"discussion": [], # Empty discussion list
"tasks": [], # Empty tasks list
}
"""Test from_dict with empty lists and missing optional keys."""
track_dict_data = {
"metadata": {
"id": "track-empty",
"name": "Empty State",
# created_at, updated_at, status are optional in from_dict logic
},
"discussion": [], # Empty discussion list
"tasks": [], # Empty tasks list
}
track_state = TrackState.from_dict(track_dict_data)
assert isinstance(track_state, TrackState)
assert track_state.metadata.id == "track-empty"
assert track_state.metadata.name == "Empty State"
assert track_state.metadata.created_at is None
assert track_state.metadata.updated_at is None
assert track_state.metadata.status is None
assert len(track_state.discussion) == 0
assert len(track_state.tasks) == 0
# Test case for to_dict with None values or missing optional data
track_state = TrackState.from_dict(track_dict_data)
assert isinstance(track_state, TrackState)
assert track_state.metadata.id == "track-empty"
assert track_state.metadata.name == "Empty State"
assert track_state.metadata.created_at is None
assert track_state.metadata.updated_at is None
assert track_state.metadata.status is None
assert len(track_state.discussion) == 0
assert len(track_state.tasks) == 0
# Test case for to_dict with None values or missing optional data
def test_track_state_to_dict_with_none():
"""Test to_dict with None values in optional fields."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-none",
name="None Test",
status=None, # None status
created_at=now,
updated_at=None, # None updated_at
)
discussion = [
{"role": "system", "content": "Info", "ts": None}, # None timestamp
]
# Update Ticket instantiation
tasks = [
Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
track_dict = track_state.to_dict()
assert track_dict["metadata"]["status"] is None
# Check that isoformat was called on datetime object, not None
assert track_dict["metadata"]["created_at"] == now.isoformat()
assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
assert track_dict["discussion"][0]["ts"] is None
assert track_dict["tasks"][0]["description"] == "Task None"
assert track_dict["tasks"][0]["assigned_to"] == "anon"
"""Test to_dict with None values in optional fields."""
now = datetime.now(timezone.utc)
metadata = Metadata(
id="track-none",
name="None Test",
status=None, # None status
created_at=now,
updated_at=None, # None updated_at
)
discussion = [
{"role": "system", "content": "Info", "ts": None}, # None timestamp
]
# Update Ticket instantiation
tasks = [
Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
]
track_state = TrackState(
metadata=metadata,
discussion=discussion,
tasks=tasks,
)
track_dict = track_state.to_dict()
assert track_dict["metadata"]["status"] is None
# Check that isoformat was called on datetime object, not None
assert track_dict["metadata"]["created_at"] == now.isoformat()
assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
assert track_dict["discussion"][0]["ts"] is None
assert track_dict["tasks"][0]["description"] == "Task None"
assert track_dict["tasks"][0]["assigned_to"] == "anon"

View File

@@ -2,29 +2,24 @@ import tree_sitter_python as tspython
from tree_sitter import Language, Parser
def test_tree_sitter_python_setup():
"""
"""
Verifies that tree-sitter and tree-sitter-python are correctly installed
and can parse a simple Python function string.
"""
# Initialize the Python language and parser
PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)
# Simple Python code to parse
code = """def hello():
# Initialize the Python language and parser
PY_LANGUAGE = Language(tspython.language())
parser = Parser(PY_LANGUAGE)
# Simple Python code to parse
code = """def hello():
print('world')"""
# Parse the code
tree = parser.parse(bytes(code, "utf8"))
# Assert that the root node is a 'module'
assert tree.root_node.type == "module"
# Verify we can find a function definition
found_function = False
for child in tree.root_node.children:
if child.type == "function_definition":
found_function = True
break
assert found_function, "Should have found a function_definition node"
# Parse the code
tree = parser.parse(bytes(code, "utf8"))
# Assert that the root node is a 'module'
assert tree.root_node.type == "module"
# Verify we can find a function definition
found_function = False
for child in tree.root_node.children:
if child.type == "function_definition":
found_function = True
break
assert found_function, "Should have found a function_definition node"

View File

@@ -8,15 +8,15 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.user_agent import UserSimAgent
def test_user_agent_instantiation():
agent = UserSimAgent(hook_client=None)
assert agent is not None
agent = UserSimAgent(hook_client=None)
assert agent is not None
def test_perform_action_with_delay():
agent = UserSimAgent(hook_client=None)
called = False
def action():
nonlocal called
called = True
agent.perform_action_with_delay(action)
assert called is True
agent = UserSimAgent(hook_client=None)
called = False
def action():
nonlocal called
called = True
agent.perform_action_with_delay(action)
assert called is True

View File

@@ -9,39 +9,33 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from simulation.workflow_sim import WorkflowSimulator
def test_simulator_instantiation():
client = MagicMock()
sim = WorkflowSimulator(client)
assert sim is not None
client = MagicMock()
sim = WorkflowSimulator(client)
assert sim is not None
def test_setup_new_project():
client = MagicMock()
sim = WorkflowSimulator(client)
# Mock responses for wait_for_server
client.wait_for_server.return_value = True
sim.setup_new_project("TestProject", "/tmp/test_git")
# Verify hook calls
client.click.assert_any_call("btn_project_new")
client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
client.click.assert_any_call("btn_project_save")
client = MagicMock()
sim = WorkflowSimulator(client)
# Mock responses for wait_for_server
client.wait_for_server.return_value = True
sim.setup_new_project("TestProject", "/tmp/test_git")
# Verify hook calls
client.click.assert_any_call("btn_project_new")
client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
client.click.assert_any_call("btn_project_save")
def test_discussion_switching():
client = MagicMock()
sim = WorkflowSimulator(client)
sim.create_discussion("NewDisc")
client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
client.click.assert_called_with("btn_disc_create")
sim.switch_discussion("NewDisc")
client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
client = MagicMock()
sim = WorkflowSimulator(client)
sim.create_discussion("NewDisc")
client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
client.click.assert_called_with("btn_disc_create")
sim.switch_discussion("NewDisc")
client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
def test_history_truncation():
client = MagicMock()
sim = WorkflowSimulator(client)
sim.truncate_history(3)
client.set_value.assert_called_with("disc_truncate_pairs", 3)
client.click.assert_called_with("btn_disc_truncate")
client = MagicMock()
sim = WorkflowSimulator(client)
sim.truncate_history(3)
client.set_value.assert_called_with("disc_truncate_pairs", 3)
client.click.assert_called_with("btn_disc_truncate")

View File

@@ -8,89 +8,80 @@ import unittest
# Calculate project root
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
sys.path.insert(0, PROJECT_ROOT)
from api_hook_client import ApiHookClient
class TestMMAGUIRobust(unittest.TestCase):
@classmethod
def setUpClass(cls):
# 1. Launch gui_2.py with --enable-test-hooks
cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
print(f"Launching GUI: {' '.join(cls.gui_command)}")
cls.gui_process = subprocess.Popen(
cls.gui_command,
cwd=PROJECT_ROOT,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
cls.client = ApiHookClient()
print("Waiting for GUI to start...")
if not cls.client.wait_for_server(timeout=10):
cls.gui_process.terminate()
raise RuntimeError("GUI failed to start or hook server not responsive.")
print("GUI started.")
@classmethod
def setUpClass(cls):
# 1. Launch gui_2.py with --enable-test-hooks
cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
print(f"Launching GUI: {' '.join(cls.gui_command)}")
cls.gui_process = subprocess.Popen(
cls.gui_command,
cwd=PROJECT_ROOT,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
cls.client = ApiHookClient()
print("Waiting for GUI to start...")
if not cls.client.wait_for_server(timeout=10):
cls.gui_process.terminate()
raise RuntimeError("GUI failed to start or hook server not responsive.")
print("GUI started.")
@classmethod
def tearDownClass(cls):
if cls.gui_process:
cls.gui_process.terminate()
cls.gui_process.wait(timeout=5)
@classmethod
def tearDownClass(cls):
if cls.gui_process:
cls.gui_process.terminate()
cls.gui_process.wait(timeout=5)
def test_mma_state_ingestion(self):
"""Verify that mma_state_update event correctly updates GUI state."""
track_data = {
"id": "robust_test_track",
"title": "Robust Verification Track",
"description": "Verifying internal state ingestion"
}
tickets_data = [
{"id": "T1", "target_file": "file1.py", "status": "todo"},
{"id": "T2", "target_file": "file2.py", "status": "running"},
{"id": "T3", "target_file": "file3.py", "status": "complete"},
]
payload = {
"status": "active",
"active_tier": "Tier 2",
"track": track_data,
"tickets": tickets_data
}
print("Pushing mma_state_update...")
self.client.push_event("mma_state_update", payload)
# Give GUI a moment to process the async task
time.sleep(1.0)
print("Querying mma_status...")
status = self.client.get_mma_status()
self.assertEqual(status["mma_status"], "active")
self.assertEqual(status["active_tier"], "Tier 2")
self.assertEqual(status["active_track"]["id"], "robust_test_track")
self.assertEqual(len(status["active_tickets"]), 3)
self.assertEqual(status["active_tickets"][2]["status"], "complete")
print("MMA state ingestion verified successfully.")
def test_mma_state_ingestion(self):
"""Verify that mma_state_update event correctly updates GUI state."""
track_data = {
"id": "robust_test_track",
"title": "Robust Verification Track",
"description": "Verifying internal state ingestion"
}
tickets_data = [
{"id": "T1", "target_file": "file1.py", "status": "todo"},
{"id": "T2", "target_file": "file2.py", "status": "running"},
{"id": "T3", "target_file": "file3.py", "status": "complete"},
]
payload = {
"status": "active",
"active_tier": "Tier 2",
"track": track_data,
"tickets": tickets_data
}
print("Pushing mma_state_update...")
self.client.push_event("mma_state_update", payload)
# Give GUI a moment to process the async task
time.sleep(1.0)
print("Querying mma_status...")
status = self.client.get_mma_status()
self.assertEqual(status["mma_status"], "active")
self.assertEqual(status["active_tier"], "Tier 2")
self.assertEqual(status["active_track"]["id"], "robust_test_track")
self.assertEqual(len(status["active_tickets"]), 3)
self.assertEqual(status["active_tickets"][2]["status"], "complete")
print("MMA state ingestion verified successfully.")
def test_mma_step_approval_trigger(self):
"""Verify that mma_step_approval event sets the pending approval flag."""
payload = {
"ticket_id": "T2",
"payload": "echo 'Robust Test'"
}
print("Pushing mma_step_approval...")
self.client.push_event("mma_step_approval", payload)
time.sleep(1.0)
print("Querying mma_status for pending approval...")
status = self.client.get_mma_status()
self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
print("MMA step approval trigger verified successfully.")
def test_mma_step_approval_trigger(self):
"""Verify that mma_step_approval event sets the pending approval flag."""
payload = {
"ticket_id": "T2",
"payload": "echo 'Robust Test'"
}
print("Pushing mma_step_approval...")
self.client.push_event("mma_step_approval", payload)
time.sleep(1.0)
print("Querying mma_status for pending approval...")
status = self.client.get_mma_status()
self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
print("MMA step approval trigger verified successfully.")
if __name__ == "__main__":
unittest.main()
unittest.main()

View File

@@ -5,65 +5,58 @@ import os
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
sys.path.insert(0, PROJECT_ROOT)
from api_hook_client import ApiHookClient
def diag_run():
print("Launching GUI for manual inspection + automated hooks...")
# Use a log file for GUI output
with open("gui_diag.log", "w") as log_file:
gui_process = subprocess.Popen(
[sys.executable, "gui_2.py", "--enable-test-hooks"],
cwd=PROJECT_ROOT,
stdout=log_file,
stderr=log_file,
text=True
)
client = ApiHookClient()
print("Waiting for GUI...")
if not client.wait_for_server(timeout=10):
print("GUI failed to start.")
gui_process.terminate()
return
# Pushing state
track_data = {"id": "diag_track", "title": "Diagnostic Track"}
tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
print("Pushing state update...")
client.push_event("mma_state_update", {
"status": "active",
"active_tier": "Tier 1",
"track": track_data,
"tickets": tickets_data
})
time.sleep(2)
print("Pushing approval request...")
client.push_event("mma_step_approval", {
"ticket_id": "T0",
"payload": "Get-ChildItem"
})
print("\nGUI is running. Check 'gui_diag.log' for output.")
print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
try:
start_poll = time.time()
while time.time() - start_poll < 30:
try:
status = client.get_mma_status()
print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
except Exception as e:
print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
time.sleep(2)
except KeyboardInterrupt:
print("Stopping...")
finally:
gui_process.terminate()
print("Launching GUI for manual inspection + automated hooks...")
# Use a log file for GUI output
with open("gui_diag.log", "w") as log_file:
gui_process = subprocess.Popen(
[sys.executable, "gui_2.py", "--enable-test-hooks"],
cwd=PROJECT_ROOT,
stdout=log_file,
stderr=log_file,
text=True
)
client = ApiHookClient()
print("Waiting for GUI...")
if not client.wait_for_server(timeout=10):
print("GUI failed to start.")
gui_process.terminate()
return
# Pushing state
track_data = {"id": "diag_track", "title": "Diagnostic Track"}
tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
print("Pushing state update...")
client.push_event("mma_state_update", {
"status": "active",
"active_tier": "Tier 1",
"track": track_data,
"tickets": tickets_data
})
time.sleep(2)
print("Pushing approval request...")
client.push_event("mma_step_approval", {
"ticket_id": "T0",
"payload": "Get-ChildItem"
})
print("\nGUI is running. Check 'gui_diag.log' for output.")
print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
try:
start_poll = time.time()
while time.time() - start_poll < 30:
try:
status = client.get_mma_status()
print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
except Exception as e:
print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
time.sleep(2)
except KeyboardInterrupt:
print("Stopping...")
finally:
gui_process.terminate()
if __name__ == "__main__":
diag_run()
diag_run()

View File

@@ -12,130 +12,116 @@ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
# Ensure project root is in sys.path to import modules like api_hook_client
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")
sys.path.insert(0, PROJECT_ROOT)
print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")
try:
from api_hook_client import ApiHookClient
from api_hook_client import ApiHookClient
except ImportError as e:
print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
print(f"Import error: {e}")
sys.exit(1)
print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
print(f"Import error: {e}")
sys.exit(1)
def run_visual_mma_verification():
print("Starting visual MMA verification test...")
# Change current directory to project root
original_dir = os.getcwd()
if original_dir != PROJECT_ROOT:
try:
os.chdir(PROJECT_ROOT)
print(f"Changed current directory to: {PROJECT_ROOT}")
except FileNotFoundError:
print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
return
# 1. Launch gui_2.py with --enable-test-hooks
gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
print(f"Launching GUI with command: {' '.join(gui_command)}")
try:
gui_process = subprocess.Popen(
gui_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
cwd=PROJECT_ROOT
)
print(f"GUI process started with PID: {gui_process.pid}")
except FileNotFoundError:
print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
return
except Exception as e:
print(f"Error starting GUI process: {e}")
return
# Wait for GUI to start
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
time.sleep(5)
if gui_process.poll() is not None:
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
return
# 2. Use ApiHookClient
try:
client = ApiHookClient()
print("ApiHookClient initialized successfully.")
except Exception as e:
print(f"Failed to initialize ApiHookClient. Error: {e}")
if gui_process:
gui_process.terminate()
return
# 3. Setup MMA data
track_data = {
"id": "visual_test_track",
"title": "Visual Verification Track",
"description": "A track to verify MMA UI components"
}
tickets_data = [
{"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
{"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
{"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
{"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
{"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
]
print("\nPushing MMA state update...")
try:
payload = {
"status": "running",
"active_tier": "Tier 3",
"track": track_data,
"tickets": tickets_data
}
client.push_event("mma_state_update", payload)
print(" - MMA state update pushed.")
except Exception as e:
print(f" - Warning: Failed to push mma_state_update: {e}")
time.sleep(3)
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
try:
approval_payload = {
"ticket_id": "TICKET-002",
"payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
}
client.push_event("mma_step_approval", approval_payload)
print("mma_step_approval event pushed successfully.")
except Exception as e:
print(f"Error pushing mma_step_approval event: {e}")
# 5. Provide clear print statements for manual verification
print("\n--- Manual Verification Instructions ---")
print("Please visually inspect the running GUI application:")
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
print("\n--------------------------------------")
print("The test script has finished its automated actions.")
print("The GUI application is still running. Press Enter to exit.")
try:
input()
except EOFError:
pass
print("\nStopping GUI process...")
if gui_process:
gui_process.terminate()
gui_process.wait(timeout=5)
print("Visual MMA verification test script finished.")
print("Starting visual MMA verification test...")
# Change current directory to project root
original_dir = os.getcwd()
if original_dir != PROJECT_ROOT:
try:
os.chdir(PROJECT_ROOT)
print(f"Changed current directory to: {PROJECT_ROOT}")
except FileNotFoundError:
print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
return
# 1. Launch gui_2.py with --enable-test-hooks
gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
print(f"Launching GUI with command: {' '.join(gui_command)}")
try:
gui_process = subprocess.Popen(
gui_command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
cwd=PROJECT_ROOT
)
print(f"GUI process started with PID: {gui_process.pid}")
except FileNotFoundError:
print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
return
except Exception as e:
print(f"Error starting GUI process: {e}")
return
# Wait for GUI to start
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
time.sleep(5)
if gui_process.poll() is not None:
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
return
# 2. Use ApiHookClient
try:
client = ApiHookClient()
print("ApiHookClient initialized successfully.")
except Exception as e:
print(f"Failed to initialize ApiHookClient. Error: {e}")
if gui_process:
gui_process.terminate()
return
# 3. Setup MMA data
track_data = {
"id": "visual_test_track",
"title": "Visual Verification Track",
"description": "A track to verify MMA UI components"
}
tickets_data = [
{"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
{"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
{"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
{"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
{"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
]
print("\nPushing MMA state update...")
try:
payload = {
"status": "running",
"active_tier": "Tier 3",
"track": track_data,
"tickets": tickets_data
}
client.push_event("mma_state_update", payload)
print(" - MMA state update pushed.")
except Exception as e:
print(f" - Warning: Failed to push mma_state_update: {e}")
time.sleep(3)
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
try:
approval_payload = {
"ticket_id": "TICKET-002",
"payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
}
client.push_event("mma_step_approval", approval_payload)
print("mma_step_approval event pushed successfully.")
except Exception as e:
print(f"Error pushing mma_step_approval event: {e}")
# 5. Provide clear print statements for manual verification
print("\n--- Manual Verification Instructions ---")
print("Please visually inspect the running GUI application:")
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
print("\n--------------------------------------")
print("The test script has finished its automated actions.")
print("The GUI application is still running. Press Enter to exit.")
try:
input()
except EOFError:
pass
print("\nStopping GUI process...")
if gui_process:
gui_process.terminate()
gui_process.wait(timeout=5)
print("Visual MMA verification test script finished.")
if __name__ == "__main__":
run_visual_mma_verification()
run_visual_mma_verification()

View File

@@ -11,7 +11,7 @@ from api_hook_client import ApiHookClient
@pytest.mark.integration
def test_mma_epic_lifecycle(live_gui):
"""
"""
Integration test for the full MMA Epic lifecycle.
1. Start App.
2. Trigger 'New Epic' request.
@@ -20,76 +20,61 @@ def test_mma_epic_lifecycle(live_gui):
5. Verify Tier 2 generates tickets.
6. Verify execution loop starts.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=15), "API hook server failed to start."
print("[Test] Initializing MMA Epic lifecycle test...")
# 0. Setup: Ensure we have a project and are in a clean state
client.click("btn_reset")
time.sleep(1)
# 1. Set Epic input
epic_text = "Improve the logging system to include timestamps in all tool calls."
print(f"[Test] Setting Epic input: {epic_text}")
client.set_value("mma_epic_input", epic_text)
# 2. Trigger 'New Epic' (Plan Epic)
print("[Test] Clicking 'Plan Epic (Tier 1)'...")
client.click("btn_mma_plan_epic")
# 3. Verify that Tier 1 generates tracks
print("[Test] Polling for Tier 1 tracks...")
tracks_generated = False
for i in range(120):
status = client.get_value("ai_status")
# Check if the proposal modal is shown or status changed
if status and "Epic tracks generated" in str(status):
tracks_generated = True
print(f"[Test] Tracks generated after {i}s")
break
time.sleep(1)
assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
# 4. Trigger 'Start Track' for the first track
print("[Test] Triggering 'Start Track' for track index 0...")
client.click("btn_mma_start_track", user_data={"index": 0})
# 5. Verify that Tier 2 generates tickets and starts execution
print("[Test] Polling for Tier 2 ticket generation and execution start...")
execution_started = False
for i in range(60):
mma_status = client.get_mma_status()
status_str = mma_status.get("mma_status", "idle")
active_tier = mma_status.get("active_tier", "")
if status_str == "running" or "Tier 3" in str(active_tier):
execution_started = True
print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
break
current_ai_status = client.get_value("ai_status")
if i % 5 == 0:
print(f" ... still waiting. Current AI Status: {current_ai_status}")
time.sleep(1)
assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
# 6. Final verification of MMA state
final_mma = client.get_mma_status()
print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
assert final_mma.get("mma_status") in ["running", "done", "blocked"]
assert len(final_mma.get("active_tickets", [])) > 0
print("[Test] MMA Epic lifecycle verification successful!")
client = ApiHookClient()
assert client.wait_for_server(timeout=15), "API hook server failed to start."
print("[Test] Initializing MMA Epic lifecycle test...")
# 0. Setup: Ensure we have a project and are in a clean state
client.click("btn_reset")
time.sleep(1)
# 1. Set Epic input
epic_text = "Improve the logging system to include timestamps in all tool calls."
print(f"[Test] Setting Epic input: {epic_text}")
client.set_value("mma_epic_input", epic_text)
# 2. Trigger 'New Epic' (Plan Epic)
print("[Test] Clicking 'Plan Epic (Tier 1)'...")
client.click("btn_mma_plan_epic")
# 3. Verify that Tier 1 generates tracks
print("[Test] Polling for Tier 1 tracks...")
tracks_generated = False
for i in range(120):
status = client.get_value("ai_status")
# Check if the proposal modal is shown or status changed
if status and "Epic tracks generated" in str(status):
tracks_generated = True
print(f"[Test] Tracks generated after {i}s")
break
time.sleep(1)
assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
# 4. Trigger 'Start Track' for the first track
print("[Test] Triggering 'Start Track' for track index 0...")
client.click("btn_mma_start_track", user_data={"index": 0})
# 5. Verify that Tier 2 generates tickets and starts execution
print("[Test] Polling for Tier 2 ticket generation and execution start...")
execution_started = False
for i in range(60):
mma_status = client.get_mma_status()
status_str = mma_status.get("mma_status", "idle")
active_tier = mma_status.get("active_tier", "")
if status_str == "running" or "Tier 3" in str(active_tier):
execution_started = True
print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
break
current_ai_status = client.get_value("ai_status")
if i % 5 == 0:
print(f" ... still waiting. Current AI Status: {current_ai_status}")
time.sleep(1)
assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
# 6. Final verification of MMA state
final_mma = client.get_mma_status()
print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
assert final_mma.get("mma_status") in ["running", "done", "blocked"]
assert len(final_mma.get("active_tickets", [])) > 0
print("[Test] MMA Epic lifecycle verification successful!")
if __name__ == "__main__":
# If run directly, try to use pytest
import subprocess
# Using sys.executable to ensure we use the same environment
subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
# If run directly, try to use pytest
import subprocess
# Using sys.executable to ensure we use the same environment
subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])

View File

@@ -10,35 +10,30 @@ from api_hook_client import ApiHookClient
@pytest.mark.integration
def test_mma_epic_simulation(live_gui):
"""
"""
Integration test for MMA epic simulation.
Red Phase: asserts False.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Try selecting MMA Dashboard tab if applicable (using typical naming convention)
try:
client.select_tab('main_tab_bar', 'tab_mma')
except Exception:
pass
# Set model to mock to avoid real API calls and timeouts
try:
client.set_value('current_model', 'mock')
except Exception:
pass
client.set_value('mma_epic_input', 'Build a simple calculator')
client.click('btn_mma_plan_epic')
# Poll client.get_mma_status() every 1 second (up to 30 seconds)
success = False
for i in range(30):
status = client.get_mma_status()
if status and status.get('tracks') and len(status['tracks']) > 0:
success = True
break
time.sleep(1)
assert success, "Failed to generate at least one track."
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Try selecting MMA Dashboard tab if applicable (using typical naming convention)
try:
client.select_tab('main_tab_bar', 'tab_mma')
except Exception:
pass
# Set model to mock to avoid real API calls and timeouts
try:
client.set_value('current_model', 'mock')
except Exception:
pass
client.set_value('mma_epic_input', 'Build a simple calculator')
client.click('btn_mma_plan_epic')
# Poll client.get_mma_status() every 1 second (up to 30 seconds)
success = False
for i in range(30):
status = client.get_mma_status()
if status and status.get('tracks') and len(status['tracks']) > 0:
success = True
break
time.sleep(1)
assert success, "Failed to generate at least one track."