checkpoint: massive refactor
This commit is contained in:
@@ -15,82 +15,76 @@ import ai_client
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_ai_client():
|
||||
"""Reset ai_client global state between every test to prevent state pollution."""
|
||||
ai_client.reset_session()
|
||||
# Default to a safe model
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
yield
|
||||
"""Reset ai_client global state between every test to prevent state pollution."""
|
||||
ai_client.reset_session()
|
||||
# Default to a safe model
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
yield
|
||||
|
||||
def kill_process_tree(pid):
|
||||
"""Robustly kills a process and all its children."""
|
||||
if pid is None:
|
||||
return
|
||||
try:
|
||||
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
|
||||
if os.name == 'nt':
|
||||
# /F is force, /T is tree (includes children)
|
||||
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False)
|
||||
else:
|
||||
# On Unix, kill the process group
|
||||
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
||||
print(f"[Fixture] Process tree {pid} killed.")
|
||||
except Exception as e:
|
||||
print(f"[Fixture] Error killing process tree {pid}: {e}")
|
||||
"""Robustly kills a process and all its children."""
|
||||
if pid is None:
|
||||
return
|
||||
try:
|
||||
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
|
||||
if os.name == 'nt':
|
||||
# /F is force, /T is tree (includes children)
|
||||
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=False)
|
||||
else:
|
||||
# On Unix, kill the process group
|
||||
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
||||
print(f"[Fixture] Process tree {pid} killed.")
|
||||
except Exception as e:
|
||||
print(f"[Fixture] Error killing process tree {pid}: {e}")
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def live_gui():
|
||||
"""
|
||||
"""
|
||||
Session-scoped fixture that starts gui_2.py with --enable-test-hooks.
|
||||
"""
|
||||
gui_script = "gui_2.py"
|
||||
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
|
||||
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
|
||||
|
||||
process = subprocess.Popen(
|
||||
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
text=True,
|
||||
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
|
||||
)
|
||||
|
||||
max_retries = 15 # Slightly more time for gui_2
|
||||
ready = False
|
||||
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
|
||||
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < max_retries:
|
||||
try:
|
||||
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
|
||||
if response.status_code == 200:
|
||||
ready = True
|
||||
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
|
||||
break
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
if process.poll() is not None:
|
||||
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
if not ready:
|
||||
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
|
||||
kill_process_tree(process.pid)
|
||||
pytest.fail(f"Failed to start {gui_script} with test hooks.")
|
||||
|
||||
try:
|
||||
yield process, gui_script
|
||||
finally:
|
||||
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
|
||||
# Reset the GUI state before shutting down
|
||||
try:
|
||||
client = ApiHookClient()
|
||||
client.reset_session()
|
||||
time.sleep(0.5)
|
||||
except: pass
|
||||
kill_process_tree(process.pid)
|
||||
log_file.close()
|
||||
gui_script = "gui_2.py"
|
||||
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
|
||||
process = subprocess.Popen(
|
||||
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
text=True,
|
||||
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
|
||||
)
|
||||
max_retries = 15 # Slightly more time for gui_2
|
||||
ready = False
|
||||
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < max_retries:
|
||||
try:
|
||||
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
|
||||
if response.status_code == 200:
|
||||
ready = True
|
||||
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
|
||||
break
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
if process.poll() is not None:
|
||||
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
if not ready:
|
||||
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
|
||||
kill_process_tree(process.pid)
|
||||
pytest.fail(f"Failed to start {gui_script} with test hooks.")
|
||||
try:
|
||||
yield process, gui_script
|
||||
finally:
|
||||
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
|
||||
# Reset the GUI state before shutting down
|
||||
try:
|
||||
client = ApiHookClient()
|
||||
client.reset_session()
|
||||
time.sleep(0.5)
|
||||
except: pass
|
||||
kill_process_tree(process.pid)
|
||||
log_file.close()
|
||||
|
||||
21
tests/mock_alias_tool.py
Normal file
21
tests/mock_alias_tool.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import sys, json, os, subprocess
|
||||
prompt = sys.stdin.read()
|
||||
if '"role": "tool"' in prompt:
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
|
||||
else:
|
||||
# We must call the bridge to trigger the GUI approval!
|
||||
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
|
||||
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
|
||||
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
|
||||
stdout, _ = proc.communicate(input=json.dumps(tool_call))
|
||||
|
||||
# Even if bridge says allow, we emit the tool_use to the adapter
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "tool_use",
|
||||
"name": "list_directory",
|
||||
"id": "alias_call",
|
||||
"args": {"dir_path": "."}
|
||||
}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
|
||||
@@ -4,104 +4,92 @@ import subprocess
|
||||
import os
|
||||
|
||||
def main():
|
||||
# Debug log to stderr
|
||||
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
|
||||
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
|
||||
|
||||
# Read prompt from stdin
|
||||
try:
|
||||
# On Windows, stdin might be closed or behave weirdly if not handled
|
||||
prompt = sys.stdin.read()
|
||||
except EOFError:
|
||||
prompt = ""
|
||||
|
||||
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
# Skip management commands
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
||||
return
|
||||
|
||||
# If the prompt contains tool results, provide final answer
|
||||
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": "I have processed the tool results. Everything looks good!"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
|
||||
"session_id": "mock-session-final"
|
||||
}), flush=True)
|
||||
return
|
||||
|
||||
# Default flow: simulate a tool call
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
# Using format that bridge understands
|
||||
bridge_tool_call = {
|
||||
"name": "read_file",
|
||||
"input": {"path": "test.txt"}
|
||||
}
|
||||
|
||||
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
try:
|
||||
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
|
||||
process = subprocess.Popen(
|
||||
[sys.executable, bridge_path],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=os.environ
|
||||
)
|
||||
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
|
||||
|
||||
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
|
||||
|
||||
decision_data = json.loads(stdout.strip())
|
||||
decision = decision_data.get("decision")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
|
||||
decision = "deny"
|
||||
|
||||
if decision == "allow":
|
||||
# Simulate REAL CLI field names for adapter normalization test
|
||||
print(json.dumps({
|
||||
"type": "tool_use",
|
||||
"tool_name": "read_file",
|
||||
"tool_id": "call_123",
|
||||
"parameters": {"path": "test.txt"}
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": "I am reading the file now..."
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
|
||||
"session_id": "mock-session-123"
|
||||
}), flush=True)
|
||||
else:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": f"Tool execution was denied. Decision: {decision}"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
|
||||
"session_id": "mock-session-denied"
|
||||
}), flush=True)
|
||||
# Debug log to stderr
|
||||
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
|
||||
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
|
||||
# Read prompt from stdin
|
||||
try:
|
||||
# On Windows, stdin might be closed or behave weirdly if not handled
|
||||
prompt = sys.stdin.read()
|
||||
except EOFError:
|
||||
prompt = ""
|
||||
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
||||
sys.stderr.flush()
|
||||
# Skip management commands
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
||||
return
|
||||
# If the prompt contains tool results, provide final answer
|
||||
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": "I have processed the tool results. Everything looks good!"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
|
||||
"session_id": "mock-session-final"
|
||||
}), flush=True)
|
||||
return
|
||||
# Default flow: simulate a tool call
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
# Using format that bridge understands
|
||||
bridge_tool_call = {
|
||||
"name": "read_file",
|
||||
"input": {"path": "test.txt"}
|
||||
}
|
||||
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
|
||||
sys.stderr.flush()
|
||||
try:
|
||||
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
|
||||
process = subprocess.Popen(
|
||||
[sys.executable, bridge_path],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
env=os.environ
|
||||
)
|
||||
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
|
||||
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
|
||||
decision_data = json.loads(stdout.strip())
|
||||
decision = decision_data.get("decision")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
|
||||
decision = "deny"
|
||||
if decision == "allow":
|
||||
# Simulate REAL CLI field names for adapter normalization test
|
||||
print(json.dumps({
|
||||
"type": "tool_use",
|
||||
"tool_name": "read_file",
|
||||
"tool_id": "call_123",
|
||||
"parameters": {"path": "test.txt"}
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": "I am reading the file now..."
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
|
||||
"session_id": "mock-session-123"
|
||||
}), flush=True)
|
||||
else:
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": f"Tool execution was denied. Decision: {decision}"
|
||||
}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
|
||||
"session_id": "mock-session-denied"
|
||||
}), flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:56:53"
|
||||
last_updated = "2026-02-28T07:35:03"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -6,10 +6,10 @@ roles = [
|
||||
"Reasoning",
|
||||
]
|
||||
history = []
|
||||
active = "TestDisc_1772236592"
|
||||
active = "TestDisc_1772282083"
|
||||
auto_add = true
|
||||
|
||||
[discussions.TestDisc_1772236592]
|
||||
[discussions.TestDisc_1772282083]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:56:46"
|
||||
last_updated = "2026-02-28T07:34:56"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:57:53"
|
||||
last_updated = "2026-02-28T07:35:49"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:57:10"
|
||||
last_updated = "2026-02-28T07:35:20"
|
||||
history = []
|
||||
|
||||
@@ -18,7 +18,5 @@ history = [
|
||||
|
||||
[discussions.AutoDisc]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T23:54:05"
|
||||
history = [
|
||||
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
|
||||
]
|
||||
last_updated = "2026-02-28T07:34:41"
|
||||
history = []
|
||||
|
||||
@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
import ai_client
|
||||
|
||||
def test_agent_capabilities_listing():
|
||||
# Verify that the agent exposes its available tools correctly
|
||||
pass
|
||||
# Verify that the agent exposes its available tools correctly
|
||||
pass
|
||||
|
||||
@@ -9,14 +9,14 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from ai_client import set_agent_tools, _build_anthropic_tools
|
||||
|
||||
def test_set_agent_tools():
|
||||
# Correct usage: pass a dict
|
||||
agent_tools = {"read_file": True, "list_directory": False}
|
||||
set_agent_tools(agent_tools)
|
||||
# Correct usage: pass a dict
|
||||
agent_tools = {"read_file": True, "list_directory": False}
|
||||
set_agent_tools(agent_tools)
|
||||
|
||||
def test_build_anthropic_tools_conversion():
|
||||
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
|
||||
# We set a tool to True and check if it appears in the output
|
||||
set_agent_tools({"read_file": True})
|
||||
anthropic_tools = _build_anthropic_tools()
|
||||
tool_names = [t["name"] for t in anthropic_tools]
|
||||
assert "read_file" in tool_names
|
||||
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
|
||||
# We set a tool to True and check if it appears in the output
|
||||
set_agent_tools({"read_file": True})
|
||||
anthropic_tools = _build_anthropic_tools()
|
||||
tool_names = [t["name"] for t in anthropic_tools]
|
||||
assert "read_file" in tool_names
|
||||
|
||||
@@ -3,39 +3,33 @@ from unittest.mock import MagicMock, patch
|
||||
import ai_client
|
||||
|
||||
def test_ai_client_send_gemini_cli():
|
||||
"""
|
||||
"""
|
||||
Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter'
|
||||
when the 'gemini_cli' provider is specified.
|
||||
"""
|
||||
test_message = "Hello, this is a test prompt for the CLI adapter."
|
||||
test_response = "This is a dummy response from the Gemini CLI."
|
||||
|
||||
# Set provider to gemini_cli
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
|
||||
|
||||
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
|
||||
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
|
||||
mock_adapter_instance = MockAdapterClass.return_value
|
||||
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
|
||||
mock_adapter_instance.last_usage = {"total_tokens": 100}
|
||||
mock_adapter_instance.last_latency = 0.5
|
||||
mock_adapter_instance.session_id = "test-session"
|
||||
|
||||
# Verify that 'events' are emitted correctly
|
||||
with patch.object(ai_client.events, 'emit') as mock_emit:
|
||||
response = ai_client.send(
|
||||
md_content="<context></context>",
|
||||
user_message=test_message,
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
# Check that the adapter's send method was called.
|
||||
mock_adapter_instance.send.assert_called()
|
||||
|
||||
# Verify that the expected lifecycle events were emitted.
|
||||
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
|
||||
assert 'request_start' in emitted_event_names
|
||||
assert 'response_received' in emitted_event_names
|
||||
|
||||
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
|
||||
assert response == test_response
|
||||
test_message = "Hello, this is a test prompt for the CLI adapter."
|
||||
test_response = "This is a dummy response from the Gemini CLI."
|
||||
# Set provider to gemini_cli
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
|
||||
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
|
||||
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
|
||||
mock_adapter_instance = MockAdapterClass.return_value
|
||||
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
|
||||
mock_adapter_instance.last_usage = {"total_tokens": 100}
|
||||
mock_adapter_instance.last_latency = 0.5
|
||||
mock_adapter_instance.session_id = "test-session"
|
||||
# Verify that 'events' are emitted correctly
|
||||
with patch.object(ai_client.events, 'emit') as mock_emit:
|
||||
response = ai_client.send(
|
||||
md_content="<context></context>",
|
||||
user_message=test_message,
|
||||
base_dir="."
|
||||
)
|
||||
# Check that the adapter's send method was called.
|
||||
mock_adapter_instance.send.assert_called()
|
||||
# Verify that the expected lifecycle events were emitted.
|
||||
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
|
||||
assert 'request_start' in emitted_event_names
|
||||
assert 'response_received' in emitted_event_names
|
||||
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
|
||||
assert response == test_response
|
||||
|
||||
@@ -3,15 +3,14 @@ from unittest.mock import patch, MagicMock
|
||||
import ai_client
|
||||
|
||||
def test_list_models_gemini_cli():
|
||||
"""
|
||||
"""
|
||||
Verifies that 'ai_client.list_models' correctly returns a list of models
|
||||
for the 'gemini_cli' provider.
|
||||
"""
|
||||
models = ai_client.list_models("gemini_cli")
|
||||
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemini-2.5-flash-lite" in models
|
||||
assert len(models) == 5
|
||||
models = ai_client.list_models("gemini_cli")
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemini-2.5-flash-lite" in models
|
||||
assert len(models) == 5
|
||||
|
||||
@@ -3,22 +3,22 @@ import textwrap
|
||||
from scripts.ai_style_formatter import format_code
|
||||
|
||||
def test_basic_indentation():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
def hello():
|
||||
print("world")
|
||||
if True:
|
||||
print("nested")
|
||||
""")
|
||||
expected = (
|
||||
"def hello():\n"
|
||||
" print(\"world\")\n"
|
||||
" if True:\n"
|
||||
" print(\"nested\")\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
expected = (
|
||||
"def hello():\n"
|
||||
" print(\"world\")\n"
|
||||
" if True:\n"
|
||||
" print(\"nested\")\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
|
||||
def test_top_level_blank_lines():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
def a():
|
||||
pass
|
||||
|
||||
@@ -26,31 +26,31 @@ def test_top_level_blank_lines():
|
||||
def b():
|
||||
pass
|
||||
""")
|
||||
expected = (
|
||||
"def a():\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"def b():\n"
|
||||
" pass\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
expected = (
|
||||
"def a():\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"def b():\n"
|
||||
" pass\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
|
||||
def test_inner_blank_lines():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
def a():
|
||||
print("start")
|
||||
|
||||
print("end")
|
||||
""")
|
||||
expected = (
|
||||
"def a():\n"
|
||||
" print(\"start\")\n"
|
||||
" print(\"end\")\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
expected = (
|
||||
"def a():\n"
|
||||
" print(\"start\")\n"
|
||||
" print(\"end\")\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
|
||||
def test_multiline_string_safety():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
def a():
|
||||
'''
|
||||
This is a multiline
|
||||
@@ -60,21 +60,20 @@ def test_multiline_string_safety():
|
||||
'''
|
||||
pass
|
||||
""")
|
||||
# Note: the indentation of the ''' itself becomes 1 space.
|
||||
# The content inside remains exactly as in source.
|
||||
# textwrap.dedent will remove the common leading whitespace from the source.
|
||||
# The source's ''' is at 4 spaces. Content is at 4 spaces.
|
||||
# After dedent:
|
||||
# def a():
|
||||
# '''
|
||||
# This is a...
|
||||
|
||||
result = format_code(source)
|
||||
assert " This is a multiline" in result
|
||||
assert result.startswith("def a():\n '''")
|
||||
# Note: the indentation of the ''' itself becomes 1 space.
|
||||
# The content inside remains exactly as in source.
|
||||
# textwrap.dedent will remove the common leading whitespace from the source.
|
||||
# The source's ''' is at 4 spaces. Content is at 4 spaces.
|
||||
# After dedent:
|
||||
# def a():
|
||||
# '''
|
||||
# This is a...
|
||||
result = format_code(source)
|
||||
assert " This is a multiline" in result
|
||||
assert result.startswith("def a():\n '''")
|
||||
|
||||
def test_continuation_indentation():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
def long_func(
|
||||
a,
|
||||
b
|
||||
@@ -84,20 +83,20 @@ def test_continuation_indentation():
|
||||
b
|
||||
)
|
||||
""")
|
||||
expected = (
|
||||
"def long_func(\n"
|
||||
" a,\n"
|
||||
" b\n"
|
||||
"):\n"
|
||||
" return (\n"
|
||||
" a +\n"
|
||||
" b\n"
|
||||
" )\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
expected = (
|
||||
"def long_func(\n"
|
||||
" a,\n"
|
||||
" b\n"
|
||||
"):\n"
|
||||
" return (\n"
|
||||
" a +\n"
|
||||
" b\n"
|
||||
" )\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
|
||||
def test_multiple_top_level_definitions():
|
||||
source = textwrap.dedent("""\
|
||||
source = textwrap.dedent("""\
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
self.x = 1
|
||||
@@ -109,14 +108,14 @@ def test_multiple_top_level_definitions():
|
||||
def top_level():
|
||||
pass
|
||||
""")
|
||||
expected = (
|
||||
"class MyClass:\n"
|
||||
" def __init__(self):\n"
|
||||
" self.x = 1\n"
|
||||
" def method(self):\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"def top_level():\n"
|
||||
" pass\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
expected = (
|
||||
"class MyClass:\n"
|
||||
" def __init__(self):\n"
|
||||
" self.x = 1\n"
|
||||
" def method(self):\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"def top_level():\n"
|
||||
" pass\n"
|
||||
)
|
||||
assert format_code(source) == expected
|
||||
|
||||
@@ -3,127 +3,104 @@ from unittest.mock import MagicMock, patch
|
||||
import ai_client
|
||||
|
||||
class MockUsage:
|
||||
def __init__(self):
|
||||
self.prompt_token_count = 10
|
||||
self.candidates_token_count = 5
|
||||
self.total_token_count = 15
|
||||
self.cached_content_token_count = 0
|
||||
def __init__(self):
|
||||
self.prompt_token_count = 10
|
||||
self.candidates_token_count = 5
|
||||
self.total_token_count = 15
|
||||
self.cached_content_token_count = 0
|
||||
|
||||
class MockPart:
|
||||
def __init__(self, text, function_call):
|
||||
self.text = text
|
||||
self.function_call = function_call
|
||||
def __init__(self, text, function_call):
|
||||
self.text = text
|
||||
self.function_call = function_call
|
||||
|
||||
class MockContent:
|
||||
def __init__(self, parts):
|
||||
self.parts = parts
|
||||
def __init__(self, parts):
|
||||
self.parts = parts
|
||||
|
||||
class MockCandidate:
|
||||
def __init__(self, parts):
|
||||
self.content = MockContent(parts)
|
||||
self.finish_reason = MagicMock()
|
||||
self.finish_reason.name = "STOP"
|
||||
def __init__(self, parts):
|
||||
self.content = MockContent(parts)
|
||||
self.finish_reason = MagicMock()
|
||||
self.finish_reason.name = "STOP"
|
||||
|
||||
def test_ai_client_event_emitter_exists():
|
||||
# This should fail initially because 'events' won't exist on ai_client
|
||||
assert hasattr(ai_client, 'events')
|
||||
# This should fail initially because 'events' won't exist on ai_client
|
||||
assert hasattr(ai_client, 'events')
|
||||
|
||||
def test_event_emission():
|
||||
callback = MagicMock()
|
||||
ai_client.events.on("test_event", callback)
|
||||
ai_client.events.emit("test_event", payload={"data": 123})
|
||||
callback.assert_called_once_with(payload={"data": 123})
|
||||
callback = MagicMock()
|
||||
ai_client.events.on("test_event", callback)
|
||||
ai_client.events.emit("test_event", payload={"data": 123})
|
||||
callback.assert_called_once_with(payload={"data": 123})
|
||||
|
||||
def test_send_emits_events():
|
||||
with patch("ai_client._send_gemini") as mock_send_gemini, \
|
||||
patch("ai_client._send_anthropic") as mock_send_anthropic:
|
||||
|
||||
mock_send_gemini.return_value = "gemini response"
|
||||
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
|
||||
# We mocked _send_gemini so it doesn't emit events inside.
|
||||
# But wait, ai_client.send itself emits request_start and response_received?
|
||||
# Actually, ai_client.send delegates to _send_gemini.
|
||||
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
|
||||
pass
|
||||
with patch("ai_client._send_gemini") as mock_send_gemini, \
|
||||
patch("ai_client._send_anthropic") as mock_send_anthropic:
|
||||
mock_send_gemini.return_value = "gemini response"
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
# We mocked _send_gemini so it doesn't emit events inside.
|
||||
# But wait, ai_client.send itself emits request_start and response_received?
|
||||
# Actually, ai_client.send delegates to _send_gemini.
|
||||
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
|
||||
pass
|
||||
|
||||
def test_send_emits_events_proper():
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client:
|
||||
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
|
||||
mock_response.usage_metadata = MockUsage()
|
||||
mock_chat.send_message.return_value = mock_response
|
||||
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
|
||||
assert start_callback.called
|
||||
assert response_callback.called
|
||||
|
||||
args, kwargs = start_callback.call_args
|
||||
assert kwargs['payload']['provider'] == 'gemini'
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client:
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
mock_response = MagicMock()
|
||||
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
|
||||
mock_response.usage_metadata = MockUsage()
|
||||
mock_chat.send_message.return_value = mock_response
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
assert start_callback.called
|
||||
assert response_callback.called
|
||||
args, kwargs = start_callback.call_args
|
||||
assert kwargs['payload']['provider'] == 'gemini'
|
||||
|
||||
def test_send_emits_tool_events():
|
||||
import mcp_client
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
|
||||
# 1. Setup mock response with a tool call
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"path": "test.txt"}
|
||||
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
|
||||
mock_response_with_tool.usage_metadata = MockUsage()
|
||||
|
||||
# 2. Setup second mock response (final answer)
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
|
||||
mock_response_final.usage_metadata = MockUsage()
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
mock_dispatch.return_value = "file content"
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
|
||||
tool_callback = MagicMock()
|
||||
ai_client.events.on("tool_execution", tool_callback)
|
||||
|
||||
ai_client.send("context", "message")
|
||||
|
||||
# Should be called twice: once for 'started', once for 'completed'
|
||||
assert tool_callback.call_count == 2
|
||||
|
||||
# Check 'started' call
|
||||
args, kwargs = tool_callback.call_args_list[0]
|
||||
assert kwargs['payload']['status'] == 'started'
|
||||
assert kwargs['payload']['tool'] == 'read_file'
|
||||
|
||||
# Check 'completed' call
|
||||
args, kwargs = tool_callback.call_args_list[1]
|
||||
assert kwargs['payload']['status'] == 'completed'
|
||||
assert kwargs['payload']['result'] == 'file content'
|
||||
import mcp_client
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
# 1. Setup mock response with a tool call
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"path": "test.txt"}
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
|
||||
mock_response_with_tool.usage_metadata = MockUsage()
|
||||
# 2. Setup second mock response (final answer)
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
|
||||
mock_response_final.usage_metadata = MockUsage()
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
mock_dispatch.return_value = "file content"
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
tool_callback = MagicMock()
|
||||
ai_client.events.on("tool_execution", tool_callback)
|
||||
ai_client.send("context", "message")
|
||||
# Should be called twice: once for 'started', once for 'completed'
|
||||
assert tool_callback.call_count == 2
|
||||
# Check 'started' call
|
||||
args, kwargs = tool_callback.call_args_list[0]
|
||||
assert kwargs['payload']['status'] == 'started'
|
||||
assert kwargs['payload']['tool'] == 'read_file'
|
||||
# Check 'completed' call
|
||||
args, kwargs = tool_callback.call_args_list[1]
|
||||
assert kwargs['payload']['status'] == 'completed'
|
||||
assert kwargs['payload']['result'] == 'file content'
|
||||
|
||||
@@ -13,88 +13,84 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_get_status_success(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test that get_status successfully retrieves the server status
|
||||
when the live GUI is running.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
status = client.get_status()
|
||||
assert status == {'status': 'ok'}
|
||||
client = ApiHookClient()
|
||||
status = client.get_status()
|
||||
assert status == {'status': 'ok'}
|
||||
|
||||
def test_get_project_success(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test successful retrieval of project data from the live GUI.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
response = client.get_project()
|
||||
assert 'project' in response
|
||||
# We don't assert specific content as it depends on the environment's active project
|
||||
client = ApiHookClient()
|
||||
response = client.get_project()
|
||||
assert 'project' in response
|
||||
# We don't assert specific content as it depends on the environment's active project
|
||||
|
||||
def test_get_session_success(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test successful retrieval of session data.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
response = client.get_session()
|
||||
assert 'session' in response
|
||||
assert 'entries' in response['session']
|
||||
client = ApiHookClient()
|
||||
response = client.get_session()
|
||||
assert 'session' in response
|
||||
assert 'entries' in response['session']
|
||||
|
||||
def test_post_gui_success(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test successful posting of GUI data.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
client = ApiHookClient()
|
||||
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
def test_get_performance_success(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test successful retrieval of performance metrics.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
response = client.get_performance()
|
||||
assert "performance" in response
|
||||
client = ApiHookClient()
|
||||
response = client.get_performance()
|
||||
assert "performance" in response
|
||||
|
||||
def test_unsupported_method_error():
|
||||
"""
|
||||
"""
|
||||
Test that calling an unsupported HTTP method raises a ValueError.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
with pytest.raises(ValueError, match="Unsupported HTTP method"):
|
||||
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
|
||||
client = ApiHookClient()
|
||||
with pytest.raises(ValueError, match="Unsupported HTTP method"):
|
||||
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
|
||||
|
||||
def test_get_text_value():
|
||||
"""
|
||||
"""
|
||||
Test retrieval of string representation using get_text_value.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
with patch.object(client, 'get_value', return_value=123):
|
||||
assert client.get_text_value("dummy_tag") == "123"
|
||||
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
assert client.get_text_value("dummy_tag") is None
|
||||
client = ApiHookClient()
|
||||
with patch.object(client, 'get_value', return_value=123):
|
||||
assert client.get_text_value("dummy_tag") == "123"
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
assert client.get_text_value("dummy_tag") is None
|
||||
|
||||
def test_get_node_status():
|
||||
"""
|
||||
"""
|
||||
Test retrieval of DAG node status using get_node_status.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
# When get_value returns a status directly
|
||||
with patch.object(client, 'get_value', return_value="running"):
|
||||
assert client.get_node_status("my_node") == "running"
|
||||
|
||||
# When get_value returns None and diagnostics provides a nodes dict
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
|
||||
assert client.get_node_status("my_node") == "completed"
|
||||
|
||||
# When get_value returns None and diagnostics provides a direct key
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
|
||||
assert client.get_node_status("my_node") == "failed"
|
||||
|
||||
# When neither works
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={}):
|
||||
assert client.get_node_status("my_node") is None
|
||||
client = ApiHookClient()
|
||||
# When get_value returns a status directly
|
||||
with patch.object(client, 'get_value', return_value="running"):
|
||||
assert client.get_node_status("my_node") == "running"
|
||||
# When get_value returns None and diagnostics provides a nodes dict
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
|
||||
assert client.get_node_status("my_node") == "completed"
|
||||
# When get_value returns None and diagnostics provides a direct key
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
|
||||
assert client.get_node_status("my_node") == "failed"
|
||||
# When neither works
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={}):
|
||||
assert client.get_node_status("my_node") is None
|
||||
|
||||
@@ -8,68 +8,64 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_api_client_has_extensions():
|
||||
client = ApiHookClient()
|
||||
# These should fail initially as they are not implemented
|
||||
assert hasattr(client, 'select_tab')
|
||||
assert hasattr(client, 'select_list_item')
|
||||
client = ApiHookClient()
|
||||
# These should fail initially as they are not implemented
|
||||
assert hasattr(client, 'select_tab')
|
||||
assert hasattr(client, 'select_list_item')
|
||||
|
||||
def test_select_tab_integration(live_gui):
|
||||
client = ApiHookClient()
|
||||
# We'll need to make sure the tags exist in gui_legacy.py
|
||||
# For now, this is a placeholder for the integration test
|
||||
response = client.select_tab("operations_tabs", "tab_tool")
|
||||
assert response == {'status': 'queued'}
|
||||
client = ApiHookClient()
|
||||
# We'll need to make sure the tags exist in gui_legacy.py
|
||||
# For now, this is a placeholder for the integration test
|
||||
response = client.select_tab("operations_tabs", "tab_tool")
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
def test_select_list_item_integration(live_gui):
|
||||
client = ApiHookClient()
|
||||
# Assuming 'Default' discussion exists or we can just test that it queues
|
||||
response = client.select_list_item("disc_listbox", "Default")
|
||||
assert response == {'status': 'queued'}
|
||||
client = ApiHookClient()
|
||||
# Assuming 'Default' discussion exists or we can just test that it queues
|
||||
response = client.select_list_item("disc_listbox", "Default")
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
def test_get_indicator_state_integration(live_gui):
|
||||
client = ApiHookClient()
|
||||
# thinking_indicator is usually hidden unless AI is running
|
||||
response = client.get_indicator_state("thinking_indicator")
|
||||
assert 'shown' in response
|
||||
assert response['tag'] == "thinking_indicator"
|
||||
client = ApiHookClient()
|
||||
# thinking_indicator is usually hidden unless AI is running
|
||||
response = client.get_indicator_state("thinking_indicator")
|
||||
assert 'shown' in response
|
||||
assert response['tag'] == "thinking_indicator"
|
||||
|
||||
def test_app_processes_new_actions():
|
||||
import gui_legacy
|
||||
from unittest.mock import MagicMock, patch
|
||||
import dearpygui.dearpygui as dpg
|
||||
|
||||
dpg.create_context()
|
||||
try:
|
||||
with patch('gui_legacy.load_config', return_value={}), \
|
||||
patch('gui_legacy.PerformanceMonitor'), \
|
||||
patch('gui_legacy.shell_runner'), \
|
||||
patch('gui_legacy.project_manager'), \
|
||||
patch.object(gui_legacy.App, '_load_active_project'):
|
||||
app = gui_legacy.App()
|
||||
|
||||
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
|
||||
|
||||
# Test select_tab
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "select_tab",
|
||||
"tab_bar": "some_tab_bar",
|
||||
"tab": "some_tab"
|
||||
})
|
||||
app._process_pending_gui_tasks()
|
||||
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
|
||||
|
||||
# Test select_list_item
|
||||
mock_cb = MagicMock()
|
||||
mock_get_cb.return_value = mock_cb
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "select_list_item",
|
||||
"listbox": "some_listbox",
|
||||
"item_value": "some_value"
|
||||
})
|
||||
app._process_pending_gui_tasks()
|
||||
mock_set_value.assert_any_call("some_listbox", "some_value")
|
||||
mock_cb.assert_called_with("some_listbox", "some_value")
|
||||
finally:
|
||||
dpg.destroy_context()
|
||||
import gui_legacy
|
||||
from unittest.mock import MagicMock, patch
|
||||
import dearpygui.dearpygui as dpg
|
||||
dpg.create_context()
|
||||
try:
|
||||
with patch('gui_legacy.load_config', return_value={}), \
|
||||
patch('gui_legacy.PerformanceMonitor'), \
|
||||
patch('gui_legacy.shell_runner'), \
|
||||
patch('gui_legacy.project_manager'), \
|
||||
patch.object(gui_legacy.App, '_load_active_project'):
|
||||
app = gui_legacy.App()
|
||||
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
|
||||
# Test select_tab
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "select_tab",
|
||||
"tab_bar": "some_tab_bar",
|
||||
"tab": "some_tab"
|
||||
})
|
||||
app._process_pending_gui_tasks()
|
||||
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
|
||||
# Test select_list_item
|
||||
mock_cb = MagicMock()
|
||||
mock_get_cb.return_value = mock_cb
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "select_list_item",
|
||||
"listbox": "some_listbox",
|
||||
"item_value": "some_value"
|
||||
})
|
||||
app._process_pending_gui_tasks()
|
||||
mock_set_value.assert_any_call("some_listbox", "some_value")
|
||||
mock_cb.assert_called_with("some_listbox", "some_value")
|
||||
finally:
|
||||
dpg.destroy_context()
|
||||
|
||||
@@ -3,24 +3,24 @@ import tree_sitter
|
||||
from file_cache import ASTParser
|
||||
|
||||
def test_ast_parser_initialization():
|
||||
"""Verify that ASTParser can be initialized with a language string."""
|
||||
parser = ASTParser("python")
|
||||
assert parser.language_name == "python"
|
||||
"""Verify that ASTParser can be initialized with a language string."""
|
||||
parser = ASTParser("python")
|
||||
assert parser.language_name == "python"
|
||||
|
||||
def test_ast_parser_parse():
|
||||
"""Verify that the parse method returns a tree_sitter.Tree."""
|
||||
parser = ASTParser("python")
|
||||
code = """def example_func():
|
||||
"""Verify that the parse method returns a tree_sitter.Tree."""
|
||||
parser = ASTParser("python")
|
||||
code = """def example_func():
|
||||
return 42"""
|
||||
tree = parser.parse(code)
|
||||
assert isinstance(tree, tree_sitter.Tree)
|
||||
# Basic check that it parsed something
|
||||
assert tree.root_node.type == "module"
|
||||
tree = parser.parse(code)
|
||||
assert isinstance(tree, tree_sitter.Tree)
|
||||
# Basic check that it parsed something
|
||||
assert tree.root_node.type == "module"
|
||||
|
||||
def test_ast_parser_get_skeleton_python():
|
||||
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
def complex_function(a, b):
|
||||
"""
|
||||
This is a docstring.
|
||||
@@ -36,35 +36,32 @@ class MyClass:
|
||||
print("doing something")
|
||||
return None
|
||||
'''
|
||||
skeleton = parser.get_skeleton(code)
|
||||
|
||||
# Check that signatures are preserved
|
||||
assert "def complex_function(a, b):" in skeleton
|
||||
assert "class MyClass:" in skeleton
|
||||
assert "def method_without_docstring(self):" in skeleton
|
||||
|
||||
# Check that docstring is preserved
|
||||
assert '"""' in skeleton
|
||||
assert "This is a docstring." in skeleton
|
||||
assert "It should be preserved." in skeleton
|
||||
|
||||
# Check that bodies are replaced with '...'
|
||||
assert "..." in skeleton
|
||||
assert "result = a + b" not in skeleton
|
||||
assert "return result" not in skeleton
|
||||
assert 'print("doing something")' not in skeleton
|
||||
skeleton = parser.get_skeleton(code)
|
||||
# Check that signatures are preserved
|
||||
assert "def complex_function(a, b):" in skeleton
|
||||
assert "class MyClass:" in skeleton
|
||||
assert "def method_without_docstring(self):" in skeleton
|
||||
# Check that docstring is preserved
|
||||
assert '"""' in skeleton
|
||||
assert "This is a docstring." in skeleton
|
||||
assert "It should be preserved." in skeleton
|
||||
# Check that bodies are replaced with '...'
|
||||
assert "..." in skeleton
|
||||
assert "result = a + b" not in skeleton
|
||||
assert "return result" not in skeleton
|
||||
assert 'print("doing something")' not in skeleton
|
||||
|
||||
def test_ast_parser_invalid_language():
|
||||
"""Verify handling of unsupported or invalid languages."""
|
||||
# This might raise an error or return a default, depending on implementation
|
||||
# For now, we expect it to either fail gracefully or raise an exception we can catch
|
||||
with pytest.raises(Exception):
|
||||
ASTParser("not-a-language")
|
||||
"""Verify handling of unsupported or invalid languages."""
|
||||
# This might raise an error or return a default, depending on implementation
|
||||
# For now, we expect it to either fail gracefully or raise an exception we can catch
|
||||
with pytest.raises(Exception):
|
||||
ASTParser("not-a-language")
|
||||
|
||||
def test_ast_parser_get_curated_view():
|
||||
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
@core_logic
|
||||
def core_func():
|
||||
"""Core logic doc."""
|
||||
@@ -86,20 +83,16 @@ class MyClass:
|
||||
def core_method(self, x):
|
||||
print("method preserved", x)
|
||||
'''
|
||||
curated = parser.get_curated_view(code)
|
||||
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved", x)' in curated
|
||||
curated = parser.get_curated_view(code)
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved", x)' in curated
|
||||
|
||||
@@ -2,8 +2,8 @@ import pytest
|
||||
from file_cache import ASTParser
|
||||
|
||||
def test_ast_parser_get_curated_view():
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
parser = ASTParser("python")
|
||||
code = '''
|
||||
@core_logic
|
||||
def core_func():
|
||||
"""Core logic doc."""
|
||||
@@ -25,20 +25,16 @@ class MyClass:
|
||||
def core_method(self):
|
||||
print("method preserved")
|
||||
'''
|
||||
curated = parser.get_curated_view(code)
|
||||
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved")' in curated
|
||||
curated = parser.get_curated_view(code)
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved")' in curated
|
||||
|
||||
@@ -3,45 +3,40 @@ import pytest
|
||||
from events import AsyncEventQueue
|
||||
|
||||
def test_async_event_queue_put_get():
|
||||
"""Verify that an event can be asynchronously put and retrieved from the queue."""
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
event_name = "test_event"
|
||||
payload = {"data": "hello"}
|
||||
|
||||
await queue.put(event_name, payload)
|
||||
ret_name, ret_payload = await queue.get()
|
||||
|
||||
assert ret_name == event_name
|
||||
assert ret_payload == payload
|
||||
"""Verify that an event can be asynchronously put and retrieved from the queue."""
|
||||
|
||||
asyncio.run(run_test())
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
event_name = "test_event"
|
||||
payload = {"data": "hello"}
|
||||
await queue.put(event_name, payload)
|
||||
ret_name, ret_payload = await queue.get()
|
||||
assert ret_name == event_name
|
||||
assert ret_payload == payload
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_async_event_queue_multiple():
|
||||
"""Verify that multiple events can be asynchronously put and retrieved in order."""
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
|
||||
await queue.put("event1", 1)
|
||||
await queue.put("event2", 2)
|
||||
|
||||
name1, val1 = await queue.get()
|
||||
name2, val2 = await queue.get()
|
||||
|
||||
assert name1 == "event1"
|
||||
assert val1 == 1
|
||||
assert name2 == "event2"
|
||||
assert val2 == 2
|
||||
"""Verify that multiple events can be asynchronously put and retrieved in order."""
|
||||
|
||||
asyncio.run(run_test())
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
await queue.put("event1", 1)
|
||||
await queue.put("event2", 2)
|
||||
name1, val1 = await queue.get()
|
||||
name2, val2 = await queue.get()
|
||||
assert name1 == "event1"
|
||||
assert val1 == 1
|
||||
assert name2 == "event2"
|
||||
assert val2 == 2
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_async_event_queue_none_payload():
|
||||
"""Verify that an event with None payload works correctly."""
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
await queue.put("no_payload")
|
||||
name, payload = await queue.get()
|
||||
assert name == "no_payload"
|
||||
assert payload is None
|
||||
"""Verify that an event with None payload works correctly."""
|
||||
|
||||
asyncio.run(run_test())
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
await queue.put("no_payload")
|
||||
name, payload = await queue.get()
|
||||
assert name == "no_payload"
|
||||
assert payload is None
|
||||
asyncio.run(run_test())
|
||||
|
||||
@@ -5,72 +5,60 @@ from log_registry import LogRegistry
|
||||
|
||||
@pytest.fixture
|
||||
def registry_setup(tmp_path):
|
||||
registry_path = tmp_path / "log_registry.toml"
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
registry = LogRegistry(str(registry_path))
|
||||
return registry, logs_dir
|
||||
registry_path = tmp_path / "log_registry.toml"
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
registry = LogRegistry(str(registry_path))
|
||||
return registry, logs_dir
|
||||
|
||||
def test_auto_whitelist_keywords(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_kw"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with ERROR
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_kw"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
# Create comms.log with ERROR
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
def test_auto_whitelist_message_count(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_msg_count"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with > 10 lines
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("\n".join(["msg"] * 15))
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 15
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_msg_count"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
# Create comms.log with > 10 lines
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("\n".join(["msg"] * 15))
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 15
|
||||
|
||||
def test_auto_whitelist_large_size(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_large"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create large file (> 50KB)
|
||||
large_file = session_dir / "large.log"
|
||||
large_file.write_text("x" * 60000)
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_large"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
# Create large file (> 50KB)
|
||||
large_file = session_dir / "large.log"
|
||||
large_file.write_text("x" * 60000)
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
def test_no_auto_whitelist_insignificant(registry_setup):
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_insignificant"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Small file, few lines, no keywords
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("hello\nworld")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert not registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 2
|
||||
registry, logs_dir = registry_setup
|
||||
session_id = "test_insignificant"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
# Small file, few lines, no keywords
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("hello\nworld")
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
assert not registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 2
|
||||
|
||||
@@ -12,64 +12,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from scripts.cli_tool_bridge import main
|
||||
|
||||
class TestCliToolBridge(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
|
||||
self.tool_call = {
|
||||
'tool_name': 'read_file',
|
||||
'tool_input': {'path': 'test.txt'}
|
||||
}
|
||||
def setUp(self):
|
||||
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
|
||||
self.tool_call = {
|
||||
'tool_name': 'read_file',
|
||||
'tool_input': {'path': 'test.txt'}
|
||||
}
|
||||
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
|
||||
# 1. Mock stdin with a JSON string tool call
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
|
||||
# 1. Mock stdin with a JSON string tool call
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
# Run main
|
||||
main()
|
||||
# 3. Capture stdout and assert allow
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
# 4. Mock ApiHookClient to return denied
|
||||
mock_request.return_value = {'approved': False}
|
||||
main()
|
||||
# Assert deny
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
|
||||
# Run main
|
||||
main()
|
||||
|
||||
# 3. Capture stdout and assert allow
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 4. Mock ApiHookClient to return denied
|
||||
mock_request.return_value = {'approved': False}
|
||||
|
||||
main()
|
||||
|
||||
# Assert deny
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 5. Test case where hook server is unreachable (exception)
|
||||
mock_request.side_effect = Exception("Connection refused")
|
||||
|
||||
main()
|
||||
|
||||
# Assert deny on error
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
# 5. Test case where hook server is unreachable (exception)
|
||||
mock_request.side_effect = Exception("Connection refused")
|
||||
main()
|
||||
# Assert deny on error
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -12,42 +12,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from scripts.cli_tool_bridge import main
|
||||
|
||||
class TestCliToolBridgeMapping(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
|
||||
def setUp(self):
|
||||
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
|
||||
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
|
||||
"""
|
||||
@patch('sys.stdin', new_callable=io.StringIO)
|
||||
@patch('sys.stdout', new_callable=io.StringIO)
|
||||
@patch('api_hook_client.ApiHookClient.request_confirmation')
|
||||
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
|
||||
"""
|
||||
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
|
||||
into tool_name and tool_input for the hook client.
|
||||
"""
|
||||
api_tool_call = {
|
||||
'id': 'call123',
|
||||
'name': 'read_file',
|
||||
'input': {'path': 'test.txt'}
|
||||
}
|
||||
|
||||
# 1. Mock stdin with the API format JSON
|
||||
mock_stdin.write(json.dumps(api_tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
|
||||
# Run main
|
||||
main()
|
||||
|
||||
# 3. Verify that request_confirmation was called with mapped values
|
||||
# If it's not mapped, it will likely be called with None or fail
|
||||
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
|
||||
|
||||
# 4. Capture stdout and assert allow
|
||||
output_str = mock_stdout.getvalue().strip()
|
||||
self.assertTrue(output_str, "Stdout should not be empty")
|
||||
output = json.loads(output_str)
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
api_tool_call = {
|
||||
'id': 'call123',
|
||||
'name': 'read_file',
|
||||
'input': {'path': 'test.txt'}
|
||||
}
|
||||
# 1. Mock stdin with the API format JSON
|
||||
mock_stdin.write(json.dumps(api_tool_call))
|
||||
mock_stdin.seek(0)
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
# Run main
|
||||
main()
|
||||
# 3. Verify that request_confirmation was called with mapped values
|
||||
# If it's not mapped, it will likely be called with None or fail
|
||||
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
|
||||
# 4. Capture stdout and assert allow
|
||||
output_str = mock_stdout.getvalue().strip()
|
||||
self.assertTrue(output_str, "Stdout should not be empty")
|
||||
output = json.loads(output_str)
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -13,61 +13,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def simulate_conductor_phase_completion(client: ApiHookClient):
|
||||
"""
|
||||
"""
|
||||
Simulates the Conductor agent's logic for phase completion using ApiHookClient.
|
||||
"""
|
||||
results = {
|
||||
"verification_successful": False,
|
||||
"verification_message": ""
|
||||
}
|
||||
|
||||
try:
|
||||
status = client.get_status()
|
||||
if status.get('status') == 'ok':
|
||||
results["verification_successful"] = True
|
||||
results["verification_message"] = "Automated verification completed successfully."
|
||||
else:
|
||||
results["verification_successful"] = False
|
||||
results["verification_message"] = f"Automated verification failed: {status}"
|
||||
except Exception as e:
|
||||
results["verification_successful"] = False
|
||||
results["verification_message"] = f"Automated verification failed: {e}"
|
||||
|
||||
return results
|
||||
results = {
|
||||
"verification_successful": False,
|
||||
"verification_message": ""
|
||||
}
|
||||
try:
|
||||
status = client.get_status()
|
||||
if status.get('status') == 'ok':
|
||||
results["verification_successful"] = True
|
||||
results["verification_message"] = "Automated verification completed successfully."
|
||||
else:
|
||||
results["verification_successful"] = False
|
||||
results["verification_message"] = f"Automated verification failed: {status}"
|
||||
except Exception as e:
|
||||
results["verification_successful"] = False
|
||||
results["verification_message"] = f"Automated verification failed: {e}"
|
||||
return results
|
||||
|
||||
def test_conductor_integrates_api_hook_client_for_verification(live_gui):
|
||||
"""
|
||||
"""
|
||||
Verify that Conductor's simulated phase completion logic properly integrates
|
||||
and uses the ApiHookClient for verification against the live GUI.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is True
|
||||
assert "successfully" in results["verification_message"]
|
||||
client = ApiHookClient()
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
assert results["verification_successful"] is True
|
||||
assert "successfully" in results["verification_message"]
|
||||
|
||||
def test_conductor_handles_api_hook_failure(live_gui):
|
||||
"""
|
||||
"""
|
||||
Verify Conductor handles a simulated API hook verification failure.
|
||||
We patch the client's get_status to simulate failure even with live GUI.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
|
||||
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is False
|
||||
assert "failed" in results["verification_message"]
|
||||
client = ApiHookClient()
|
||||
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
|
||||
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
assert results["verification_successful"] is False
|
||||
assert "failed" in results["verification_message"]
|
||||
|
||||
def test_conductor_handles_api_hook_connection_error():
|
||||
"""
|
||||
"""
|
||||
Verify Conductor handles a simulated API hook connection error (server down).
|
||||
"""
|
||||
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is False
|
||||
# Check for expected error substrings from ApiHookClient
|
||||
msg = results["verification_message"]
|
||||
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
|
||||
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
assert results["verification_successful"] is False
|
||||
# Check for expected error substrings from ApiHookClient
|
||||
msg = results["verification_message"]
|
||||
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
|
||||
|
||||
@@ -7,284 +7,234 @@ import ai_client
|
||||
# which will be implemented in the next phase of TDD.
|
||||
|
||||
def test_conductor_engine_initialization():
|
||||
"""
|
||||
"""
|
||||
Test that ConductorEngine can be initialized with a Track.
|
||||
"""
|
||||
track = Track(id="test_track", description="Test Track")
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
engine = ConductorEngine(track=track)
|
||||
assert engine.track == track
|
||||
track = Track(id="test_track", description="Test Track")
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
engine = ConductorEngine(track=track)
|
||||
assert engine.track == track
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that run_linear iterates through executable tickets and calls the worker lifecycle.
|
||||
"""
|
||||
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
|
||||
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
|
||||
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# We mock run_worker_lifecycle as it is expected to be in the same module
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Track.get_executable_tickets() should be called repeatedly until all are done
|
||||
# T1 should run first, then T2.
|
||||
assert mock_lifecycle.call_count == 2
|
||||
assert ticket1.status == "completed"
|
||||
assert ticket2.status == "completed"
|
||||
|
||||
# Verify sequence: T1 before T2
|
||||
calls = mock_lifecycle.call_args_list
|
||||
assert calls[0][0][0].id == "T1"
|
||||
assert calls[1][0][0].id == "T2"
|
||||
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
|
||||
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
engine = ConductorEngine(track=track)
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
# We mock run_worker_lifecycle as it is expected to be in the same module
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
|
||||
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
await engine.run_linear()
|
||||
# Track.get_executable_tickets() should be called repeatedly until all are done
|
||||
# T1 should run first, then T2.
|
||||
assert mock_lifecycle.call_count == 2
|
||||
assert ticket1.status == "completed"
|
||||
assert ticket2.status == "completed"
|
||||
# Verify sequence: T1 before T2
|
||||
calls = mock_lifecycle.call_args_list
|
||||
assert calls[0][0][0].id == "T1"
|
||||
assert calls[1][0][0].id == "T2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
mock_send.return_value = "Task complete. I have updated the file."
|
||||
|
||||
result = run_worker_lifecycle(ticket, context)
|
||||
|
||||
assert result == "Task complete. I have updated the file."
|
||||
assert ticket.status == "completed"
|
||||
mock_send.assert_called_once()
|
||||
|
||||
# Check if description was passed to send()
|
||||
args, kwargs = mock_send.call_args
|
||||
# user_message is passed as a keyword argument
|
||||
assert ticket.description in kwargs["user_message"]
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
mock_send.return_value = "Task complete. I have updated the file."
|
||||
result = run_worker_lifecycle(ticket, context)
|
||||
assert result == "Task complete. I have updated the file."
|
||||
assert ticket.status == "completed"
|
||||
mock_send.assert_called_once()
|
||||
# Check if description was passed to send()
|
||||
args, kwargs = mock_send.call_args
|
||||
# user_message is passed as a keyword argument
|
||||
assert ticket.description in kwargs["user_message"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_worker_lifecycle_context_injection(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
context_files = ["primary.py", "secondary.py"]
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# We mock ASTParser which is expected to be imported in multi_agent_conductor
|
||||
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
|
||||
patch("builtins.open", new_callable=MagicMock) as mock_open:
|
||||
|
||||
# Setup open mock to return different content for different files
|
||||
file_contents = {
|
||||
"primary.py": "def primary(): pass",
|
||||
"secondary.py": "def secondary(): pass"
|
||||
}
|
||||
|
||||
def mock_open_side_effect(file, *args, **kwargs):
|
||||
content = file_contents.get(file, "")
|
||||
mock_file = MagicMock()
|
||||
mock_file.read.return_value = content
|
||||
mock_file.__enter__.return_value = mock_file
|
||||
return mock_file
|
||||
|
||||
mock_open.side_effect = mock_open_side_effect
|
||||
|
||||
# Setup ASTParser mock
|
||||
mock_ast_parser = mock_ast_parser_class.return_value
|
||||
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
|
||||
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
|
||||
|
||||
mock_send.return_value = "Success"
|
||||
|
||||
run_worker_lifecycle(ticket, context, context_files=context_files)
|
||||
|
||||
# Verify ASTParser calls:
|
||||
# First file (primary) should get curated view, others (secondary) get skeleton
|
||||
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
|
||||
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
|
||||
|
||||
# Verify user_message contains the views
|
||||
_, kwargs = mock_send.call_args
|
||||
user_message = kwargs["user_message"]
|
||||
assert "CURATED VIEW" in user_message
|
||||
assert "SKELETON VIEW" in user_message
|
||||
assert "primary.py" in user_message
|
||||
assert "secondary.py" in user_message
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
context_files = ["primary.py", "secondary.py"]
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
# We mock ASTParser which is expected to be imported in multi_agent_conductor
|
||||
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
|
||||
patch("builtins.open", new_callable=MagicMock) as mock_open:
|
||||
# Setup open mock to return different content for different files
|
||||
file_contents = {
|
||||
"primary.py": "def primary(): pass",
|
||||
"secondary.py": "def secondary(): pass"
|
||||
}
|
||||
|
||||
def mock_open_side_effect(file, *args, **kwargs):
|
||||
content = file_contents.get(file, "")
|
||||
mock_file = MagicMock()
|
||||
mock_file.read.return_value = content
|
||||
mock_file.__enter__.return_value = mock_file
|
||||
return mock_file
|
||||
mock_open.side_effect = mock_open_side_effect
|
||||
# Setup ASTParser mock
|
||||
mock_ast_parser = mock_ast_parser_class.return_value
|
||||
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
|
||||
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
|
||||
mock_send.return_value = "Success"
|
||||
run_worker_lifecycle(ticket, context, context_files=context_files)
|
||||
# Verify ASTParser calls:
|
||||
# First file (primary) should get curated view, others (secondary) get skeleton
|
||||
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
|
||||
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
|
||||
# Verify user_message contains the views
|
||||
_, kwargs = mock_send.call_args
|
||||
user_message = kwargs["user_message"]
|
||||
assert "CURATED VIEW" in user_message
|
||||
assert "SKELETON VIEW" in user_message
|
||||
assert "primary.py" in user_message
|
||||
assert "secondary.py" in user_message
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# Simulate a response indicating a block
|
||||
mock_send.return_value = "I am BLOCKED because I don't have enough information."
|
||||
|
||||
run_worker_lifecycle(ticket, context)
|
||||
|
||||
assert ticket.status == "blocked"
|
||||
assert "BLOCKED" in ticket.blocked_reason
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
# Simulate a response indicating a block
|
||||
mock_send.return_value = "I am BLOCKED because I don't have enough information."
|
||||
run_worker_lifecycle(ticket, context)
|
||||
assert ticket.status == "blocked"
|
||||
assert "BLOCKED" in ticket.blocked_reason
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
|
||||
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
|
||||
the flow works as expected.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
|
||||
# We simulate ai_client.send by making it call the pre_tool_callback it received
|
||||
def mock_send_side_effect(md_content, user_message, **kwargs):
|
||||
callback = kwargs.get("pre_tool_callback")
|
||||
if callback:
|
||||
# Simulate calling it with some payload
|
||||
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
|
||||
return "Success"
|
||||
|
||||
mock_send.side_effect = mock_send_side_effect
|
||||
mock_confirm.return_value = True
|
||||
|
||||
mock_event_queue = MagicMock()
|
||||
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
|
||||
|
||||
# Verify confirm_execution was called
|
||||
mock_confirm.assert_called_once()
|
||||
assert ticket.status == "completed"
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
# We simulate ai_client.send by making it call the pre_tool_callback it received
|
||||
|
||||
def mock_send_side_effect(md_content, user_message, **kwargs):
|
||||
callback = kwargs.get("pre_tool_callback")
|
||||
if callback:
|
||||
# Simulate calling it with some payload
|
||||
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
|
||||
return "Success"
|
||||
mock_send.side_effect = mock_send_side_effect
|
||||
mock_confirm.return_value = True
|
||||
mock_event_queue = MagicMock()
|
||||
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
|
||||
# Verify confirm_execution was called
|
||||
mock_confirm.assert_called_once()
|
||||
assert ticket.status == "completed"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
|
||||
would prevent execution. In run_worker_lifecycle, we just check if it's passed.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
|
||||
mock_confirm.return_value = False
|
||||
mock_send.return_value = "Task failed because tool execution was rejected."
|
||||
|
||||
run_worker_lifecycle(ticket, context)
|
||||
|
||||
# Verify it was passed to send
|
||||
args, kwargs = mock_send.call_args
|
||||
assert kwargs["pre_tool_callback"] is not None
|
||||
|
||||
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
|
||||
# here we just verify the wiring.
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
mock_confirm.return_value = False
|
||||
mock_send.return_value = "Task failed because tool execution was rejected."
|
||||
run_worker_lifecycle(ticket, context)
|
||||
# Verify it was passed to send
|
||||
args, kwargs = mock_send.call_args
|
||||
assert kwargs["pre_tool_callback"] is not None
|
||||
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
|
||||
# here we just verify the wiring.
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
|
||||
"""
|
||||
import json
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
|
||||
track = Track(id="dynamic_track", description="Dynamic Track")
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
tickets_json = json.dumps([
|
||||
{
|
||||
"id": "T1",
|
||||
"description": "Initial task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker1",
|
||||
"depends_on": []
|
||||
},
|
||||
{
|
||||
"id": "T2",
|
||||
"description": "Dependent task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker2",
|
||||
"depends_on": ["T1"]
|
||||
},
|
||||
{
|
||||
"id": "T3",
|
||||
"description": "Another initial task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker3",
|
||||
"depends_on": []
|
||||
}
|
||||
])
|
||||
|
||||
engine.parse_json_tickets(tickets_json)
|
||||
|
||||
assert len(engine.track.tickets) == 3
|
||||
assert engine.track.tickets[0].id == "T1"
|
||||
assert engine.track.tickets[1].id == "T2"
|
||||
assert engine.track.tickets[2].id == "T3"
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# Mock run_worker_lifecycle to mark tickets as complete
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
assert mock_lifecycle.call_count == 3
|
||||
|
||||
# Verify dependency order: T1 must be called before T2
|
||||
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
|
||||
|
||||
t1_idx = calls.index("T1")
|
||||
t2_idx = calls.index("T2")
|
||||
assert t1_idx < t2_idx
|
||||
|
||||
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
|
||||
assert "T3" in calls
|
||||
import json
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
track = Track(id="dynamic_track", description="Dynamic Track")
|
||||
engine = ConductorEngine(track=track)
|
||||
tickets_json = json.dumps([
|
||||
{
|
||||
"id": "T1",
|
||||
"description": "Initial task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker1",
|
||||
"depends_on": []
|
||||
},
|
||||
{
|
||||
"id": "T2",
|
||||
"description": "Dependent task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker2",
|
||||
"depends_on": ["T1"]
|
||||
},
|
||||
{
|
||||
"id": "T3",
|
||||
"description": "Another initial task",
|
||||
"status": "todo",
|
||||
"assigned_to": "worker3",
|
||||
"depends_on": []
|
||||
}
|
||||
])
|
||||
engine.parse_json_tickets(tickets_json)
|
||||
assert len(engine.track.tickets) == 3
|
||||
assert engine.track.tickets[0].id == "T1"
|
||||
assert engine.track.tickets[1].id == "T2"
|
||||
assert engine.track.tickets[2].id == "T3"
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
# Mock run_worker_lifecycle to mark tickets as complete
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
await engine.run_linear()
|
||||
assert mock_lifecycle.call_count == 3
|
||||
# Verify dependency order: T1 must be called before T2
|
||||
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
|
||||
t1_idx = calls.index("T1")
|
||||
t2_idx = calls.index("T2")
|
||||
assert t1_idx < t2_idx
|
||||
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
|
||||
assert "T3" in calls
|
||||
|
||||
@@ -4,112 +4,106 @@ import json
|
||||
import conductor_tech_lead
|
||||
|
||||
class TestConductorTechLead(unittest.TestCase):
|
||||
@patch('ai_client.send')
|
||||
@patch('ai_client.set_provider')
|
||||
@patch('ai_client.reset_session')
|
||||
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
|
||||
# Setup mock response
|
||||
mock_tickets = [
|
||||
{
|
||||
"id": "ticket_1",
|
||||
"type": "Ticket",
|
||||
"goal": "Test goal",
|
||||
"target_file": "test.py",
|
||||
"depends_on": [],
|
||||
"context_requirements": []
|
||||
}
|
||||
]
|
||||
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
|
||||
|
||||
track_brief = "Test track brief"
|
||||
module_skeletons = "Test skeletons"
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||
@patch('ai_client.send')
|
||||
@patch('ai_client.set_provider')
|
||||
@patch('ai_client.reset_session')
|
||||
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
|
||||
# Setup mock response
|
||||
mock_tickets = [
|
||||
{
|
||||
"id": "ticket_1",
|
||||
"type": "Ticket",
|
||||
"goal": "Test goal",
|
||||
"target_file": "test.py",
|
||||
"depends_on": [],
|
||||
"context_requirements": []
|
||||
}
|
||||
]
|
||||
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
|
||||
track_brief = "Test track brief"
|
||||
module_skeletons = "Test skeletons"
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||
# Verify set_provider was called
|
||||
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
|
||||
mock_reset_session.assert_called_once()
|
||||
# Verify send was called
|
||||
mock_send.assert_called_once()
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertEqual(kwargs['md_content'], "")
|
||||
self.assertIn(track_brief, kwargs['user_message'])
|
||||
self.assertIn(module_skeletons, kwargs['user_message'])
|
||||
# Verify tickets were parsed correctly
|
||||
self.assertEqual(tickets, mock_tickets)
|
||||
|
||||
# Verify set_provider was called
|
||||
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
|
||||
mock_reset_session.assert_called_once()
|
||||
|
||||
# Verify send was called
|
||||
mock_send.assert_called_once()
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertEqual(kwargs['md_content'], "")
|
||||
self.assertIn(track_brief, kwargs['user_message'])
|
||||
self.assertIn(module_skeletons, kwargs['user_message'])
|
||||
|
||||
# Verify tickets were parsed correctly
|
||||
self.assertEqual(tickets, mock_tickets)
|
||||
|
||||
@patch('ai_client.send')
|
||||
@patch('ai_client.set_provider')
|
||||
@patch('ai_client.reset_session')
|
||||
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
|
||||
# Setup mock invalid response
|
||||
mock_send.return_value = "Invalid JSON"
|
||||
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
|
||||
|
||||
# Verify it returns an empty list on parse error
|
||||
self.assertEqual(tickets, [])
|
||||
@patch('ai_client.send')
|
||||
@patch('ai_client.set_provider')
|
||||
@patch('ai_client.reset_session')
|
||||
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
|
||||
# Setup mock invalid response
|
||||
mock_send.return_value = "Invalid JSON"
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
|
||||
# Verify it returns an empty list on parse error
|
||||
self.assertEqual(tickets, [])
|
||||
|
||||
class TestTopologicalSort(unittest.TestCase):
|
||||
def test_topological_sort_empty(self):
|
||||
tickets = []
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
self.assertEqual(sorted_tickets, [])
|
||||
def test_topological_sort_empty(self):
|
||||
tickets = []
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
self.assertEqual(sorted_tickets, [])
|
||||
|
||||
def test_topological_sort_linear(self):
|
||||
tickets = [
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
{"id": "t1", "depends_on": []},
|
||||
{"id": "t3", "depends_on": ["t2"]},
|
||||
]
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
ids = [t["id"] for t in sorted_tickets]
|
||||
self.assertEqual(ids, ["t1", "t2", "t3"])
|
||||
def test_topological_sort_linear(self):
|
||||
tickets = [
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
{"id": "t1", "depends_on": []},
|
||||
{"id": "t3", "depends_on": ["t2"]},
|
||||
]
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
ids = [t["id"] for t in sorted_tickets]
|
||||
self.assertEqual(ids, ["t1", "t2", "t3"])
|
||||
|
||||
def test_topological_sort_complex(self):
|
||||
# t1
|
||||
# | \
|
||||
# t2 t3
|
||||
# | /
|
||||
# t4
|
||||
tickets = [
|
||||
{"id": "t4", "depends_on": ["t2", "t3"]},
|
||||
{"id": "t3", "depends_on": ["t1"]},
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
{"id": "t1", "depends_on": []},
|
||||
]
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
ids = [t["id"] for t in sorted_tickets]
|
||||
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
|
||||
self.assertEqual(ids[0], "t1")
|
||||
self.assertEqual(ids[-1], "t4")
|
||||
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
|
||||
def test_topological_sort_complex(self):
|
||||
# t1
|
||||
# | \
|
||||
# t2 t3
|
||||
# | /
|
||||
# t4
|
||||
tickets = [
|
||||
{"id": "t4", "depends_on": ["t2", "t3"]},
|
||||
{"id": "t3", "depends_on": ["t1"]},
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
{"id": "t1", "depends_on": []},
|
||||
]
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
ids = [t["id"] for t in sorted_tickets]
|
||||
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
|
||||
self.assertEqual(ids[0], "t1")
|
||||
self.assertEqual(ids[-1], "t4")
|
||||
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
|
||||
|
||||
def test_topological_sort_cycle(self):
|
||||
tickets = [
|
||||
{"id": "t1", "depends_on": ["t2"]},
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
]
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
self.assertIn("Circular dependency detected", str(cm.exception))
|
||||
def test_topological_sort_cycle(self):
|
||||
tickets = [
|
||||
{"id": "t1", "depends_on": ["t2"]},
|
||||
{"id": "t2", "depends_on": ["t1"]},
|
||||
]
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
self.assertIn("Circular dependency detected", str(cm.exception))
|
||||
|
||||
def test_topological_sort_missing_dependency(self):
|
||||
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
|
||||
# Usually in our context, we only care about dependencies within the same track.
|
||||
tickets = [
|
||||
{"id": "t1", "depends_on": ["missing"]},
|
||||
]
|
||||
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
|
||||
# OR it should just treat it as "ready" if it's external?
|
||||
# Actually, let's just test that it doesn't crash if it's not a cycle.
|
||||
# But if 'missing' is not in tickets, it will never be satisfied.
|
||||
# Let's say it raises ValueError for missing internal dependencies.
|
||||
with self.assertRaises(ValueError):
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
def test_topological_sort_missing_dependency(self):
|
||||
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
|
||||
# Usually in our context, we only care about dependencies within the same track.
|
||||
tickets = [
|
||||
{"id": "t1", "depends_on": ["missing"]},
|
||||
]
|
||||
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
|
||||
# OR it should just treat it as "ready" if it's external?
|
||||
# Actually, let's just test that it doesn't crash if it's not a cycle.
|
||||
# But if 'missing' is not in tickets, it will never be satisfied.
|
||||
# Let's say it raises ValueError for missing internal dependencies.
|
||||
with self.assertRaises(ValueError):
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -3,82 +3,72 @@ from models import Ticket
|
||||
from dag_engine import TrackDAG
|
||||
|
||||
def test_get_ready_tasks_linear():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T2"
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T2"
|
||||
|
||||
def test_get_ready_tasks_branching():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 2
|
||||
ready_ids = {t.id for t in ready}
|
||||
assert ready_ids == {"T2", "T3"}
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 2
|
||||
ready_ids = {t.id for t in ready}
|
||||
assert ready_ids == {"T2", "T3"}
|
||||
|
||||
def test_has_cycle_no_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert not dag.has_cycle()
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert not dag.has_cycle()
|
||||
|
||||
def test_has_cycle_direct_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert dag.has_cycle()
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert dag.has_cycle()
|
||||
|
||||
def test_has_cycle_indirect_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert dag.has_cycle()
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert dag.has_cycle()
|
||||
|
||||
def test_has_cycle_complex_no_cycle():
|
||||
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
|
||||
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
assert not dag.has_cycle()
|
||||
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
assert not dag.has_cycle()
|
||||
|
||||
def test_get_ready_tasks_multiple_deps():
|
||||
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
|
||||
|
||||
t2.status = "todo"
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
|
||||
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
|
||||
t2.status = "todo"
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
|
||||
|
||||
def test_topological_sort():
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
sort = dag.topological_sort()
|
||||
assert sort == ["T1", "T2", "T3"]
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
sort = dag.topological_sort()
|
||||
assert sort == ["T1", "T2", "T3"]
|
||||
|
||||
def test_topological_sort_cycle():
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
with pytest.raises(ValueError, match="Dependency cycle detected"):
|
||||
dag.topological_sort()
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2])
|
||||
with pytest.raises(ValueError, match="Dependency cycle detected"):
|
||||
dag.topological_sort()
|
||||
|
||||
@@ -12,54 +12,51 @@ import ai_client
|
||||
import project_manager
|
||||
|
||||
def test_credentials_error_mentions_deepseek(monkeypatch):
|
||||
"""
|
||||
"""
|
||||
Verify that the error message shown when credentials.toml is missing
|
||||
includes deepseek instructions.
|
||||
"""
|
||||
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
|
||||
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
|
||||
|
||||
with pytest.raises(FileNotFoundError) as excinfo:
|
||||
ai_client._load_credentials()
|
||||
|
||||
err_msg = str(excinfo.value)
|
||||
assert "[deepseek]" in err_msg
|
||||
assert "api_key" in err_msg
|
||||
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
|
||||
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
|
||||
with pytest.raises(FileNotFoundError) as excinfo:
|
||||
ai_client._load_credentials()
|
||||
err_msg = str(excinfo.value)
|
||||
assert "[deepseek]" in err_msg
|
||||
assert "api_key" in err_msg
|
||||
|
||||
def test_default_project_includes_reasoning_role():
|
||||
"""
|
||||
"""
|
||||
Verify that 'Reasoning' is included in the default discussion roles
|
||||
to support DeepSeek-R1 reasoning traces.
|
||||
"""
|
||||
proj = project_manager.default_project("test")
|
||||
roles = proj["discussion"]["roles"]
|
||||
assert "Reasoning" in roles
|
||||
proj = project_manager.default_project("test")
|
||||
roles = proj["discussion"]["roles"]
|
||||
assert "Reasoning" in roles
|
||||
|
||||
def test_gui_providers_list():
|
||||
"""
|
||||
"""
|
||||
Check if 'deepseek' is in the GUI's provider list.
|
||||
"""
|
||||
import gui_2
|
||||
assert "deepseek" in gui_2.PROVIDERS
|
||||
import gui_2
|
||||
assert "deepseek" in gui_2.PROVIDERS
|
||||
|
||||
def test_deepseek_model_listing():
|
||||
"""
|
||||
"""
|
||||
Verify that list_models for deepseek returns expected models.
|
||||
"""
|
||||
models = ai_client.list_models("deepseek")
|
||||
assert "deepseek-chat" in models
|
||||
assert "deepseek-reasoner" in models
|
||||
models = ai_client.list_models("deepseek")
|
||||
assert "deepseek-chat" in models
|
||||
assert "deepseek-reasoner" in models
|
||||
|
||||
def test_gui_provider_list_via_hooks(live_gui):
|
||||
"""
|
||||
"""
|
||||
Verify 'deepseek' is present in the GUI provider list using API hooks.
|
||||
"""
|
||||
from api_hook_client import ApiHookClient
|
||||
import time
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
# Attempt to set provider to deepseek to verify it's an allowed value
|
||||
client.set_value('current_provider', 'deepseek')
|
||||
time.sleep(0.5)
|
||||
assert client.get_value('current_provider') == 'deepseek'
|
||||
from api_hook_client import ApiHookClient
|
||||
import time
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
# Attempt to set provider to deepseek to verify it's an allowed value
|
||||
client.set_value('current_provider', 'deepseek')
|
||||
time.sleep(0.5)
|
||||
assert client.get_value('current_provider') == 'deepseek'
|
||||
|
||||
@@ -3,137 +3,124 @@ from unittest.mock import patch, MagicMock
|
||||
import ai_client
|
||||
|
||||
def test_deepseek_model_selection():
|
||||
"""
|
||||
"""
|
||||
Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
assert ai_client._provider == "deepseek"
|
||||
assert ai_client._model == "deepseek-chat"
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
assert ai_client._provider == "deepseek"
|
||||
assert ai_client._model == "deepseek-chat"
|
||||
|
||||
def test_deepseek_completion_logic():
|
||||
"""
|
||||
"""
|
||||
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {"role": "assistant", "content": "DeepSeek Response"},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
|
||||
assert result == "DeepSeek Response"
|
||||
assert mock_post.called
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {"role": "assistant", "content": "DeepSeek Response"},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
|
||||
assert result == "DeepSeek Response"
|
||||
assert mock_post.called
|
||||
|
||||
def test_deepseek_reasoning_logic():
|
||||
"""
|
||||
"""
|
||||
Verifies that reasoning_content is captured and wrapped in <thinking> tags.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-reasoner")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Final Answer",
|
||||
"reasoning_content": "Chain of thought"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
|
||||
assert "<thinking>\nChain of thought\n</thinking>" in result
|
||||
assert "Final Answer" in result
|
||||
ai_client.set_provider("deepseek", "deepseek-reasoner")
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Final Answer",
|
||||
"reasoning_content": "Chain of thought"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
|
||||
assert "<thinking>\nChain of thought\n</thinking>" in result
|
||||
assert "Final Answer" in result
|
||||
|
||||
def test_deepseek_tool_calling():
|
||||
"""
|
||||
"""
|
||||
Verifies that DeepSeek provider correctly identifies and executes tool calls.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
|
||||
# 1. Mock first response with a tool call
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.status_code = 200
|
||||
mock_resp1.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Let me read that file.",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"arguments": '{"path": "test.txt"}'
|
||||
}
|
||||
}]
|
||||
},
|
||||
"finish_reason": "tool_calls"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
with patch("requests.post") as mock_post, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
# 1. Mock first response with a tool call
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.status_code = 200
|
||||
mock_resp1.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Let me read that file.",
|
||||
"tool_calls": [{
|
||||
"id": "call_123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"arguments": '{"path": "test.txt"}'
|
||||
}
|
||||
|
||||
# 2. Mock second response (final answer)
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.status_code = 200
|
||||
mock_resp2.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "File content is: Hello World"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
|
||||
}
|
||||
|
||||
mock_post.side_effect = [mock_resp1, mock_resp2]
|
||||
mock_dispatch.return_value = "Hello World"
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
|
||||
|
||||
assert "File content is: Hello World" in result
|
||||
assert mock_dispatch.called
|
||||
assert mock_dispatch.call_args[0][0] == "read_file"
|
||||
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
|
||||
}]
|
||||
},
|
||||
"finish_reason": "tool_calls"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
|
||||
}
|
||||
# 2. Mock second response (final answer)
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.status_code = 200
|
||||
mock_resp2.json.return_value = {
|
||||
"choices": [{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "File content is: Hello World"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}],
|
||||
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
|
||||
}
|
||||
mock_post.side_effect = [mock_resp1, mock_resp2]
|
||||
mock_dispatch.return_value = "Hello World"
|
||||
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
|
||||
assert "File content is: Hello World" in result
|
||||
assert mock_dispatch.called
|
||||
assert mock_dispatch.call_args[0][0] == "read_file"
|
||||
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
|
||||
|
||||
def test_deepseek_streaming():
|
||||
"""
|
||||
"""
|
||||
Verifies that DeepSeek provider correctly aggregates streaming chunks.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
# Mock a streaming response
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
# Simulate OpenAI-style server-sent events (SSE) for streaming
|
||||
# Each line starts with 'data: ' and contains a JSON object
|
||||
chunks = [
|
||||
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
|
||||
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
|
||||
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
|
||||
'data: [DONE]'
|
||||
]
|
||||
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
|
||||
assert result == "Hello World"
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
with patch("requests.post") as mock_post:
|
||||
# Mock a streaming response
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
# Simulate OpenAI-style server-sent events (SSE) for streaming
|
||||
# Each line starts with 'data: ' and contains a JSON object
|
||||
chunks = [
|
||||
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
|
||||
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
|
||||
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
|
||||
'data: [DONE]'
|
||||
]
|
||||
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
|
||||
mock_post.return_value = mock_response
|
||||
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
|
||||
assert result == "Hello World"
|
||||
|
||||
@@ -3,121 +3,99 @@ from models import Ticket
|
||||
from dag_engine import TrackDAG, ExecutionEngine
|
||||
|
||||
def test_execution_engine_basic_flow():
|
||||
# Setup tickets with dependencies
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
engine = ExecutionEngine(dag)
|
||||
|
||||
# Tick 1: Only T1 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
|
||||
# Tick 2: T2 and T3 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 2
|
||||
ids = {t.id for t in ready}
|
||||
assert ids == {"T2", "T3"}
|
||||
|
||||
# Complete T2
|
||||
engine.update_task_status("T2", "completed")
|
||||
|
||||
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T3"
|
||||
|
||||
# Complete T3
|
||||
engine.update_task_status("T3", "completed")
|
||||
|
||||
# Tick 4: T4 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T4"
|
||||
|
||||
# Complete T4
|
||||
engine.update_task_status("T4", "completed")
|
||||
|
||||
# Tick 5: Nothing ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
# Setup tickets with dependencies
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
engine = ExecutionEngine(dag)
|
||||
# Tick 1: Only T1 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
# Tick 2: T2 and T3 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 2
|
||||
ids = {t.id for t in ready}
|
||||
assert ids == {"T2", "T3"}
|
||||
# Complete T2
|
||||
engine.update_task_status("T2", "completed")
|
||||
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T3"
|
||||
# Complete T3
|
||||
engine.update_task_status("T3", "completed")
|
||||
# Tick 4: T4 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T4"
|
||||
# Complete T4
|
||||
engine.update_task_status("T4", "completed")
|
||||
# Tick 5: Nothing ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
|
||||
def test_execution_engine_update_nonexistent_task():
|
||||
dag = TrackDAG([])
|
||||
engine = ExecutionEngine(dag)
|
||||
# Should not raise error, or handle gracefully
|
||||
engine.update_task_status("NONEXISTENT", "completed")
|
||||
dag = TrackDAG([])
|
||||
engine = ExecutionEngine(dag)
|
||||
# Should not raise error, or handle gracefully
|
||||
engine.update_task_status("NONEXISTENT", "completed")
|
||||
|
||||
def test_execution_engine_status_persistence():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag)
|
||||
|
||||
engine.update_task_status("T1", "in_progress")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag)
|
||||
engine.update_task_status("T1", "in_progress")
|
||||
assert t1.status == "in_progress"
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
|
||||
|
||||
def test_execution_engine_auto_queue():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
|
||||
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
assert t2.status == "todo"
|
||||
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
|
||||
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T2"
|
||||
assert t2.status == "in_progress"
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
dag = TrackDAG([t1, t2])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "in_progress"
|
||||
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
assert t2.status == "todo"
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T2"
|
||||
assert t2.status == "in_progress"
|
||||
|
||||
def test_execution_engine_step_mode():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
|
||||
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
|
||||
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "todo"
|
||||
|
||||
# Manual approval
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "todo"
|
||||
# Manual approval
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
|
||||
def test_execution_engine_approve_task():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=False)
|
||||
|
||||
# Should be able to approve even if auto_queue is False
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=False)
|
||||
# Should be able to approve even if auto_queue is False
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
@@ -14,44 +14,40 @@ from simulation.sim_execution import ExecutionSimulation
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_context_sim_live(live_gui):
|
||||
"""Run the Context & Chat simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ContextSimulation(client)
|
||||
sim.setup("LiveContextSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
"""Run the Context & Chat simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
sim = ContextSimulation(client)
|
||||
sim.setup("LiveContextSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_ai_settings_sim_live(live_gui):
|
||||
"""Run the AI Settings simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = AISettingsSimulation(client)
|
||||
sim.setup("LiveAISettingsSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
"""Run the AI Settings simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
sim = AISettingsSimulation(client)
|
||||
sim.setup("LiveAISettingsSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_tools_sim_live(live_gui):
|
||||
"""Run the Tools & Search simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ToolsSimulation(client)
|
||||
sim.setup("LiveToolsSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
"""Run the Tools & Search simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
sim = ToolsSimulation(client)
|
||||
sim.setup("LiveToolsSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_execution_sim_live(live_gui):
|
||||
"""Run the Execution & Modals simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ExecutionSimulation(client)
|
||||
sim.setup("LiveExecutionSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
"""Run the Execution & Modals simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
sim = ExecutionSimulation(client)
|
||||
sim.setup("LiveExecutionSim")
|
||||
sim.run()
|
||||
sim.teardown()
|
||||
|
||||
@@ -12,119 +12,105 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from gemini_cli_adapter import GeminiCliAdapter
|
||||
|
||||
class TestGeminiCliAdapter(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
def setUp(self):
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
|
||||
"""
|
||||
Verify that send(message) correctly starts the subprocess with
|
||||
--output-format stream-json and the provided message via stdin using communicate.
|
||||
"""
|
||||
# Setup mock process with a minimal valid JSONL termination
|
||||
process_mock = MagicMock()
|
||||
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
# Setup mock process with a minimal valid JSONL termination
|
||||
process_mock = MagicMock()
|
||||
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
message = "Hello Gemini CLI"
|
||||
self.adapter.send(message)
|
||||
# Verify subprocess.Popen call
|
||||
mock_popen.assert_called_once()
|
||||
args, kwargs = mock_popen.call_args
|
||||
cmd = args[0]
|
||||
# Check mandatory CLI components
|
||||
self.assertIn("gemini", cmd)
|
||||
self.assertIn("--output-format", cmd)
|
||||
self.assertIn("stream-json", cmd)
|
||||
# Message should NOT be in cmd now
|
||||
self.assertNotIn(message, cmd)
|
||||
# Verify message was sent via communicate
|
||||
process_mock.communicate.assert_called_once_with(input=message)
|
||||
# Check process configuration
|
||||
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
|
||||
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
|
||||
self.assertEqual(kwargs.get('text'), True)
|
||||
|
||||
message = "Hello Gemini CLI"
|
||||
self.adapter.send(message)
|
||||
|
||||
# Verify subprocess.Popen call
|
||||
mock_popen.assert_called_once()
|
||||
args, kwargs = mock_popen.call_args
|
||||
cmd = args[0]
|
||||
|
||||
# Check mandatory CLI components
|
||||
self.assertIn("gemini", cmd)
|
||||
self.assertIn("--output-format", cmd)
|
||||
self.assertIn("stream-json", cmd)
|
||||
# Message should NOT be in cmd now
|
||||
self.assertNotIn(message, cmd)
|
||||
|
||||
# Verify message was sent via communicate
|
||||
process_mock.communicate.assert_called_once_with(input=message)
|
||||
|
||||
# Check process configuration
|
||||
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
|
||||
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
|
||||
self.assertEqual(kwargs.get('text'), True)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_parses_jsonl_output(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_parses_jsonl_output(self, mock_popen):
|
||||
"""
|
||||
Verify that it correctly parses multiple JSONL 'message' events
|
||||
and returns the combined text.
|
||||
"""
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
|
||||
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
|
||||
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
|
||||
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
|
||||
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
result = self.adapter.send("test message")
|
||||
self.assertEqual(result["text"], "The quick brown fox jumps.")
|
||||
self.assertEqual(result["tool_calls"], [])
|
||||
|
||||
result = self.adapter.send("test message")
|
||||
|
||||
self.assertEqual(result["text"], "The quick brown fox jumps.")
|
||||
self.assertEqual(result["tool_calls"], [])
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_handles_tool_use_events(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_handles_tool_use_events(self, mock_popen):
|
||||
"""
|
||||
Verify that it correctly handles 'tool_use' events in the stream
|
||||
by continuing to read until the final 'result' event.
|
||||
"""
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
|
||||
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
|
||||
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
|
||||
json.dumps({"type": "result", "usage": {}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
|
||||
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
|
||||
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
|
||||
json.dumps({"type": "result", "usage": {}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
result = self.adapter.send("read test.txt")
|
||||
# Result should contain the combined text from all 'message' events
|
||||
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
|
||||
self.assertEqual(len(result["tool_calls"]), 1)
|
||||
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
result = self.adapter.send("read test.txt")
|
||||
|
||||
# Result should contain the combined text from all 'message' events
|
||||
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
|
||||
self.assertEqual(len(result["tool_calls"]), 1)
|
||||
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_captures_usage_metadata(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_captures_usage_metadata(self, mock_popen):
|
||||
"""
|
||||
Verify that usage data is extracted from the 'result' event.
|
||||
"""
|
||||
usage_data = {"total_tokens": 42}
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "text": "Finalizing"}),
|
||||
json.dumps({"type": "result", "usage": usage_data})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
self.adapter.send("usage test")
|
||||
|
||||
# Verify the usage was captured in the adapter instance
|
||||
self.assertEqual(self.adapter.last_usage, usage_data)
|
||||
usage_data = {"total_tokens": 42}
|
||||
jsonl_output = [
|
||||
json.dumps({"type": "message", "text": "Finalizing"}),
|
||||
json.dumps({"type": "result", "usage": usage_data})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
self.adapter.send("usage test")
|
||||
# Verify the usage was captured in the adapter instance
|
||||
self.assertEqual(self.adapter.last_usage, usage_data)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -9,168 +9,143 @@ import os
|
||||
# Ensure the project root is in sys.path to resolve imports correctly
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
# Import the class to be tested
|
||||
sys.path.append(project_root)
|
||||
# Import the class to be tested
|
||||
from gemini_cli_adapter import GeminiCliAdapter
|
||||
|
||||
class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""Set up a fresh adapter instance and reset session state for each test."""
|
||||
# Patch session_logger to prevent file operations during tests
|
||||
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
|
||||
self.mock_session_logger = self.session_logger_patcher.start()
|
||||
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
self.adapter.session_id = None
|
||||
self.adapter.last_usage = None
|
||||
self.adapter.last_latency = 0.0
|
||||
def setUp(self):
|
||||
"""Set up a fresh adapter instance and reset session state for each test."""
|
||||
# Patch session_logger to prevent file operations during tests
|
||||
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
|
||||
self.mock_session_logger = self.session_logger_patcher.start()
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
self.adapter.session_id = None
|
||||
self.adapter.last_usage = None
|
||||
self.adapter.last_latency = 0.0
|
||||
|
||||
def tearDown(self):
|
||||
self.session_logger_patcher.stop()
|
||||
def tearDown(self):
|
||||
self.session_logger_patcher.stop()
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_count_tokens_uses_estimation(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_count_tokens_uses_estimation(self, mock_popen):
|
||||
"""
|
||||
Test that count_tokens uses character-based estimation.
|
||||
"""
|
||||
contents_to_count = ["This is the first line.", "This is the second line."]
|
||||
expected_chars = len("\n".join(contents_to_count))
|
||||
expected_tokens = expected_chars // 4
|
||||
|
||||
token_count = self.adapter.count_tokens(contents=contents_to_count)
|
||||
self.assertEqual(token_count, expected_tokens)
|
||||
|
||||
# Verify that NO subprocess was started for counting
|
||||
mock_popen.assert_not_called()
|
||||
contents_to_count = ["This is the first line.", "This is the second line."]
|
||||
expected_chars = len("\n".join(contents_to_count))
|
||||
expected_tokens = expected_chars // 4
|
||||
token_count = self.adapter.count_tokens(contents=contents_to_count)
|
||||
self.assertEqual(token_count, expected_tokens)
|
||||
# Verify that NO subprocess was started for counting
|
||||
mock_popen.assert_not_called()
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
|
||||
"""
|
||||
Test that the send method does NOT add --safety flags when safety_settings are provided,
|
||||
as this functionality is no longer supported via CLI flags.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
message_content = "User's prompt here."
|
||||
safety_settings = [
|
||||
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
|
||||
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
|
||||
]
|
||||
self.adapter.send(message=message_content, safety_settings=safety_settings)
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
# Verify that no --safety flags were added to the command
|
||||
self.assertNotIn("--safety", command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
message_content = "User's prompt here."
|
||||
safety_settings = [
|
||||
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
|
||||
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
|
||||
]
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=safety_settings)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that no --safety flags were added to the command
|
||||
self.assertNotIn("--safety", command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_without_safety_settings_no_flags(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_without_safety_settings_no_flags(self, mock_popen):
|
||||
"""
|
||||
Test that when safety_settings is None or an empty list, no --safety flags are added.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
message_content = "Another prompt."
|
||||
self.adapter.send(message=message_content, safety_settings=None)
|
||||
args_none, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_none[0])
|
||||
mock_popen.reset_mock()
|
||||
self.adapter.send(message=message_content, safety_settings=[])
|
||||
args_empty, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_empty[0])
|
||||
|
||||
message_content = "Another prompt."
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=None)
|
||||
args_none, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_none[0])
|
||||
mock_popen.reset_mock()
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=[])
|
||||
args_empty, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_empty[0])
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
|
||||
"""
|
||||
Test that the send method prepends the system instruction to the prompt
|
||||
sent via stdin, and does NOT add a --system flag to the command.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
message_content = "User's prompt here."
|
||||
system_instruction_text = "Some instruction"
|
||||
expected_input = f"{system_instruction_text}\n\n{message_content}"
|
||||
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
# Verify that the system instruction was prepended to the input sent to communicate
|
||||
process_mock.communicate.assert_called_once_with(input=expected_input)
|
||||
# Verify that no --system flag was added to the command
|
||||
self.assertNotIn("--system", command)
|
||||
|
||||
message_content = "User's prompt here."
|
||||
system_instruction_text = "Some instruction"
|
||||
expected_input = f"{system_instruction_text}\n\n{message_content}"
|
||||
|
||||
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the system instruction was prepended to the input sent to communicate
|
||||
process_mock.communicate.assert_called_once_with(input=expected_input)
|
||||
|
||||
# Verify that no --system flag was added to the command
|
||||
self.assertNotIn("--system", command)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_model_parameter(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_with_model_parameter(self, mock_popen):
|
||||
"""
|
||||
Test that the send method correctly adds the -m <model> flag when a model is specified.
|
||||
"""
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
process_mock = MagicMock()
|
||||
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
message_content = "User's prompt here."
|
||||
model_name = "gemini-1.5-flash"
|
||||
expected_command_part = f'-m "{model_name}"'
|
||||
self.adapter.send(message=message_content, model=model_name)
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
# Verify that the -m <model> flag was added to the command
|
||||
self.assertIn(expected_command_part, command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
message_content = "User's prompt here."
|
||||
model_name = "gemini-1.5-flash"
|
||||
expected_command_part = f'-m "{model_name}"'
|
||||
|
||||
self.adapter.send(message=message_content, model=model_name)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the -m <model> flag was added to the command
|
||||
self.assertIn(expected_command_part, command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
process_mock.communicate.assert_called_once_with(input=message_content)
|
||||
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_kills_process_on_communicate_exception(self, mock_popen):
|
||||
"""
|
||||
@patch('subprocess.Popen')
|
||||
def test_send_kills_process_on_communicate_exception(self, mock_popen):
|
||||
"""
|
||||
Test that if subprocess.Popen().communicate() raises an exception,
|
||||
GeminiCliAdapter.send() kills the process and re-raises the exception.
|
||||
"""
|
||||
mock_process = MagicMock()
|
||||
mock_popen.return_value = mock_process
|
||||
|
||||
# Define an exception to simulate
|
||||
simulated_exception = RuntimeError("Simulated communicate error")
|
||||
mock_process.communicate.side_effect = simulated_exception
|
||||
|
||||
message_content = "User message"
|
||||
|
||||
# Assert that the exception is raised and process is killed
|
||||
with self.assertRaises(RuntimeError) as cm:
|
||||
self.adapter.send(message=message_content)
|
||||
|
||||
# Verify that the process's kill method was called
|
||||
mock_process.kill.assert_called_once()
|
||||
|
||||
# Verify that the correct exception was re-raised
|
||||
self.assertIs(cm.exception, simulated_exception)
|
||||
mock_process = MagicMock()
|
||||
mock_popen.return_value = mock_process
|
||||
# Define an exception to simulate
|
||||
simulated_exception = RuntimeError("Simulated communicate error")
|
||||
mock_process.communicate.side_effect = simulated_exception
|
||||
message_content = "User message"
|
||||
# Assert that the exception is raised and process is killed
|
||||
with self.assertRaises(RuntimeError) as cm:
|
||||
self.adapter.send(message=message_content)
|
||||
# Verify that the process's kill method was called
|
||||
mock_process.kill.assert_called_once()
|
||||
# Verify that the correct exception was re-raised
|
||||
self.assertIs(cm.exception, simulated_exception)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -7,66 +7,57 @@ import json
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_gemini_cli_context_bleed_prevention(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
|
||||
and only shows assistant content in the GUI history.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
|
||||
# Create a specialized mock for context bleed
|
||||
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
|
||||
with open(bleed_mock, "w") as f:
|
||||
f.write('''import sys, json
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
# Create a specialized mock for context bleed
|
||||
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
|
||||
with open(bleed_mock, "w") as f:
|
||||
f.write('''import sys, json
|
||||
print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
|
||||
print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
|
||||
''')
|
||||
|
||||
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Test context bleed")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Wait for completion
|
||||
time.sleep(3)
|
||||
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
|
||||
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
|
||||
ai_entries = [e for e in entries if e.get("role") == "AI"]
|
||||
assert len(ai_entries) == 1
|
||||
assert ai_entries[0].get("content") == "Actual AI Response"
|
||||
assert "echoing you" not in ai_entries[0].get("content")
|
||||
|
||||
os.remove(bleed_mock)
|
||||
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
client.set_value("ai_input", "Test context bleed")
|
||||
client.click("btn_gen_send")
|
||||
# Wait for completion
|
||||
time.sleep(3)
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
|
||||
ai_entries = [e for e in entries if e.get("role") == "AI"]
|
||||
assert len(ai_entries) == 1
|
||||
assert ai_entries[0].get("content") == "Actual AI Response"
|
||||
assert "echoing you" not in ai_entries[0].get("content")
|
||||
os.remove(bleed_mock)
|
||||
|
||||
def test_gemini_cli_parameter_resilience(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases
|
||||
sent by the AI instead of 'path'.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# Create a mock that uses dir_path for list_directory
|
||||
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
# Avoid backslashes in f-string expression part
|
||||
if sys.platform == "win32":
|
||||
bridge_path_str = bridge_path.replace("\\", "/")
|
||||
else:
|
||||
bridge_path_str = bridge_path
|
||||
|
||||
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
|
||||
f.write(f'''import sys, json, os, subprocess
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
# Create a mock that uses dir_path for list_directory
|
||||
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
# Avoid backslashes in f-string expression part
|
||||
if sys.platform == "win32":
|
||||
bridge_path_str = bridge_path.replace("\\", "/")
|
||||
else:
|
||||
bridge_path_str = bridge_path
|
||||
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
|
||||
f.write(f'''import sys, json, os, subprocess
|
||||
prompt = sys.stdin.read()
|
||||
if '"role": "tool"' in prompt:
|
||||
print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
|
||||
@@ -88,83 +79,71 @@ else:
|
||||
}}), flush=True)
|
||||
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
|
||||
''')
|
||||
|
||||
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Test parameter aliases")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Handle approval
|
||||
timeout = 15
|
||||
start_time = time.time()
|
||||
approved = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
if ev.get("type") == "ask_received":
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert approved, "Tool approval event never received"
|
||||
|
||||
# Verify tool result in history
|
||||
time.sleep(2)
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
|
||||
# Check for "Tool worked!" which implies the tool execution was successful
|
||||
found = any("Tool worked!" in e.get("content", "") for e in entries)
|
||||
assert found, "Tool result indicating success not found in history"
|
||||
|
||||
os.remove(alias_mock)
|
||||
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
client.set_value("ai_input", "Test parameter aliases")
|
||||
client.click("btn_gen_send")
|
||||
# Handle approval
|
||||
timeout = 15
|
||||
start_time = time.time()
|
||||
approved = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
if ev.get("type") == "ask_received":
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
assert approved, "Tool approval event never received"
|
||||
# Verify tool result in history
|
||||
time.sleep(2)
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
# Check for "Tool worked!" which implies the tool execution was successful
|
||||
found = any("Tool worked!" in e.get("content", "") for e in entries)
|
||||
assert found, "Tool result indicating success not found in history"
|
||||
os.remove(alias_mock)
|
||||
|
||||
def test_gemini_cli_loop_termination(live_gui):
|
||||
"""
|
||||
"""
|
||||
Test that multi-round tool calling correctly terminates and preserves
|
||||
payload (session context) between rounds.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Perform multi-round tool test")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Handle approvals (mock does one tool call)
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
approved = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
if ev.get("type") == "ask_received":
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
|
||||
# Wait for the second round and final answer
|
||||
found_final = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 15:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
for e in entries:
|
||||
if "processed the tool results" in e.get("content", ""):
|
||||
found_final = True
|
||||
break
|
||||
if found_final: break
|
||||
time.sleep(1)
|
||||
|
||||
assert found_final, "Final message after multi-round tool loop not found"
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
client.set_value("ai_input", "Perform multi-round tool test")
|
||||
client.click("btn_gen_send")
|
||||
# Handle approvals (mock does one tool call)
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
approved = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
if ev.get("type") == "ask_received":
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
# Wait for the second round and final answer
|
||||
found_final = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 15:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
for e in entries:
|
||||
if "processed the tool results" in e.get("content", ""):
|
||||
found_final = True
|
||||
break
|
||||
if found_final: break
|
||||
time.sleep(1)
|
||||
assert found_final, "Final message after multi-round tool loop not found"
|
||||
|
||||
@@ -6,136 +6,116 @@ import requests
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_gemini_cli_full_integration(live_gui):
|
||||
"""
|
||||
"""
|
||||
Integration test for the Gemini CLI provider and tool bridge.
|
||||
Handles 'ask_received' events from the bridge and any other approval requests.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# 0. Reset session and enable history
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
# Switch to manual_slop project explicitly
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# 1. Setup paths and configure the GUI
|
||||
# Use the real gemini CLI if available, otherwise use mock
|
||||
# For CI/testing we prefer mock
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
|
||||
print(f"[TEST] Setting current_provider to gemini_cli")
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
print(f"[TEST] Setting gcli_path to {cli_cmd}")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# Verify settings
|
||||
assert client.get_value("current_provider") == "gemini_cli"
|
||||
|
||||
# Clear events
|
||||
client.get_events()
|
||||
|
||||
# 2. Trigger a message in the GUI
|
||||
print("[TEST] Sending user message...")
|
||||
client.set_value("ai_input", "Please read test.txt")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Monitor for approval events
|
||||
print("[TEST] Waiting for approval events...")
|
||||
timeout = 45
|
||||
start_time = time.time()
|
||||
approved_count = 0
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
events = client.get_events()
|
||||
if events:
|
||||
for ev in events:
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id") or ev.get("action_id")
|
||||
print(f"[TEST] Received event: {etype} (ID: {eid})")
|
||||
|
||||
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
|
||||
print(f"[TEST] Approving {etype} {eid}")
|
||||
if etype == "script_confirmation_required":
|
||||
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
|
||||
else:
|
||||
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": True}})
|
||||
assert resp.status_code == 200
|
||||
approved_count += 1
|
||||
|
||||
# Check if we got a final response in history
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
found_final = False
|
||||
for entry in entries:
|
||||
content = entry.get("content", "")
|
||||
if "Hello from mock!" in content or "processed the tool results" in content:
|
||||
print(f"[TEST] Success! Found final message in history.")
|
||||
found_final = True
|
||||
break
|
||||
|
||||
if found_final:
|
||||
break
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
assert approved_count > 0, "No approval events were processed"
|
||||
assert found_final, "Final message from mock CLI was not found in the GUI history"
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
# 0. Reset session and enable history
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
# Switch to manual_slop project explicitly
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
# 1. Setup paths and configure the GUI
|
||||
# Use the real gemini CLI if available, otherwise use mock
|
||||
# For CI/testing we prefer mock
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
print(f"[TEST] Setting current_provider to gemini_cli")
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
print(f"[TEST] Setting gcli_path to {cli_cmd}")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
# Verify settings
|
||||
assert client.get_value("current_provider") == "gemini_cli"
|
||||
# Clear events
|
||||
client.get_events()
|
||||
# 2. Trigger a message in the GUI
|
||||
print("[TEST] Sending user message...")
|
||||
client.set_value("ai_input", "Please read test.txt")
|
||||
client.click("btn_gen_send")
|
||||
# 3. Monitor for approval events
|
||||
print("[TEST] Waiting for approval events...")
|
||||
timeout = 45
|
||||
start_time = time.time()
|
||||
approved_count = 0
|
||||
while time.time() - start_time < timeout:
|
||||
events = client.get_events()
|
||||
if events:
|
||||
for ev in events:
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id") or ev.get("action_id")
|
||||
print(f"[TEST] Received event: {etype} (ID: {eid})")
|
||||
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
|
||||
print(f"[TEST] Approving {etype} {eid}")
|
||||
if etype == "script_confirmation_required":
|
||||
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
|
||||
else:
|
||||
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": True}})
|
||||
assert resp.status_code == 200
|
||||
approved_count += 1
|
||||
# Check if we got a final response in history
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
found_final = False
|
||||
for entry in entries:
|
||||
content = entry.get("content", "")
|
||||
if "Hello from mock!" in content or "processed the tool results" in content:
|
||||
print(f"[TEST] Success! Found final message in history.")
|
||||
found_final = True
|
||||
break
|
||||
if found_final:
|
||||
break
|
||||
time.sleep(1.0)
|
||||
assert approved_count > 0, "No approval events were processed"
|
||||
assert found_final, "Final message from mock CLI was not found in the GUI history"
|
||||
|
||||
def test_gemini_cli_rejection_and_history(live_gui):
|
||||
"""
|
||||
"""
|
||||
Integration test for the Gemini CLI provider: Rejection flow and history.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# 0. Reset session
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# 2. Trigger a message
|
||||
print("[TEST] Sending user message (to be denied)...")
|
||||
client.set_value("ai_input", "Deny me")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Wait for event and reject
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
denied = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id")
|
||||
print(f"[TEST] Received event: {etype}")
|
||||
if etype == "ask_received":
|
||||
print(f"[TEST] Denying request {eid}")
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": False}})
|
||||
denied = True
|
||||
break
|
||||
if denied: break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert denied, "No ask_received event to deny"
|
||||
|
||||
# 4. Verify rejection in history
|
||||
print("[TEST] Waiting for rejection in history...")
|
||||
rejection_found = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 20:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
for entry in entries:
|
||||
if "Tool execution was denied" in entry.get("content", ""):
|
||||
rejection_found = True
|
||||
break
|
||||
if rejection_found: break
|
||||
time.sleep(1.0)
|
||||
|
||||
assert rejection_found, "Rejection message not found in history"
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
# 0. Reset session
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
# 2. Trigger a message
|
||||
print("[TEST] Sending user message (to be denied)...")
|
||||
client.set_value("ai_input", "Deny me")
|
||||
client.click("btn_gen_send")
|
||||
# 3. Wait for event and reject
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
denied = False
|
||||
while time.time() - start_time < timeout:
|
||||
for ev in client.get_events():
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id")
|
||||
print(f"[TEST] Received event: {etype}")
|
||||
if etype == "ask_received":
|
||||
print(f"[TEST] Denying request {eid}")
|
||||
requests.post("http://127.0.0.1:8999/api/ask/respond",
|
||||
json={"request_id": eid, "response": {"approved": False}})
|
||||
denied = True
|
||||
break
|
||||
if denied: break
|
||||
time.sleep(0.5)
|
||||
assert denied, "No ask_received event to deny"
|
||||
# 4. Verify rejection in history
|
||||
print("[TEST] Waiting for rejection in history...")
|
||||
rejection_found = False
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 20:
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
for entry in entries:
|
||||
if "Tool execution was denied" in entry.get("content", ""):
|
||||
rejection_found = True
|
||||
break
|
||||
if rejection_found: break
|
||||
time.sleep(1.0)
|
||||
assert rejection_found, "Rejection message not found in history"
|
||||
|
||||
@@ -10,43 +10,38 @@ import ai_client
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_ai_client():
|
||||
ai_client.reset_session()
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
|
||||
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
|
||||
ai_client.comms_log_callback = lambda entry: None
|
||||
ai_client.tool_log_callback = lambda script, result: None
|
||||
yield
|
||||
ai_client.reset_session()
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
|
||||
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
|
||||
ai_client.comms_log_callback = lambda entry: None
|
||||
ai_client.tool_log_callback = lambda script, result: None
|
||||
yield
|
||||
|
||||
@patch('ai_client.GeminiCliAdapter')
|
||||
@patch('ai_client._get_combined_system_prompt')
|
||||
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
|
||||
mock_prompt.return_value = "Mocked Prompt"
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 10}
|
||||
mock_instance.last_latency = 0.1
|
||||
mock_instance.session_id = None
|
||||
|
||||
ai_client.send("context", "message", discussion_history="hist")
|
||||
|
||||
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
|
||||
assert mock_instance.send.called
|
||||
args, kwargs = mock_instance.send.call_args
|
||||
assert args[0] == expected_payload
|
||||
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
|
||||
mock_prompt.return_value = "Mocked Prompt"
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 10}
|
||||
mock_instance.last_latency = 0.1
|
||||
mock_instance.session_id = None
|
||||
ai_client.send("context", "message", discussion_history="hist")
|
||||
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
|
||||
assert mock_instance.send.called
|
||||
args, kwargs = mock_instance.send.call_args
|
||||
assert args[0] == expected_payload
|
||||
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
|
||||
|
||||
@patch('ai_client.GeminiCliAdapter')
|
||||
def test_get_history_bleed_stats(mock_adapter_class):
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 1500}
|
||||
mock_instance.last_latency = 0.5
|
||||
mock_instance.session_id = "sess"
|
||||
|
||||
# Initialize by sending a message
|
||||
ai_client.send("context", "msg")
|
||||
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
|
||||
assert stats["provider"] == "gemini_cli"
|
||||
assert stats["current"] == 1500
|
||||
mock_instance = mock_adapter_class.return_value
|
||||
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
|
||||
mock_instance.last_usage = {"input_tokens": 1500}
|
||||
mock_instance.last_latency = 0.5
|
||||
mock_instance.session_id = "sess"
|
||||
# Initialize by sending a message
|
||||
ai_client.send("context", "msg")
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
assert stats["provider"] == "gemini_cli"
|
||||
assert stats["current"] == 1500
|
||||
|
||||
@@ -10,41 +10,34 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from ai_client import get_gemini_cache_stats, reset_session
|
||||
|
||||
def test_get_gemini_cache_stats_with_mock_client():
|
||||
"""
|
||||
"""
|
||||
Test that get_gemini_cache_stats correctly processes cache lists
|
||||
from a mocked client instance.
|
||||
"""
|
||||
# Ensure a clean state before the test by resetting the session
|
||||
reset_session()
|
||||
|
||||
# 1. Create a mock for the cache object that the client will return
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.name = "cachedContents/test-cache"
|
||||
mock_cache.display_name = "Test Cache"
|
||||
mock_cache.model = "models/gemini-1.5-pro-001"
|
||||
mock_cache.size_bytes = 1024
|
||||
|
||||
# 2. Create a mock for the client instance
|
||||
mock_client_instance = MagicMock()
|
||||
# Configure its `caches.list` method to return our mock cache
|
||||
mock_client_instance.caches.list.return_value = [mock_cache]
|
||||
|
||||
# 3. Patch the Client constructor to return our mock instance
|
||||
# This intercepts the `_ensure_gemini_client` call inside the function
|
||||
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
|
||||
|
||||
# 4. Call the function under test
|
||||
stats = get_gemini_cache_stats()
|
||||
|
||||
# 5. Assert that the function behaved as expected
|
||||
|
||||
# It should have constructed the client
|
||||
mock_client_constructor.assert_called_once()
|
||||
# It should have called the `list` method on the `caches` attribute
|
||||
mock_client_instance.caches.list.assert_called_once()
|
||||
|
||||
# The returned stats dictionary should be correct
|
||||
assert "cache_count" in stats
|
||||
assert "total_size_bytes" in stats
|
||||
assert stats["cache_count"] == 1
|
||||
assert stats["total_size_bytes"] == 1024
|
||||
# Ensure a clean state before the test by resetting the session
|
||||
reset_session()
|
||||
# 1. Create a mock for the cache object that the client will return
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.name = "cachedContents/test-cache"
|
||||
mock_cache.display_name = "Test Cache"
|
||||
mock_cache.model = "models/gemini-1.5-pro-001"
|
||||
mock_cache.size_bytes = 1024
|
||||
# 2. Create a mock for the client instance
|
||||
mock_client_instance = MagicMock()
|
||||
# Configure its `caches.list` method to return our mock cache
|
||||
mock_client_instance.caches.list.return_value = [mock_cache]
|
||||
# 3. Patch the Client constructor to return our mock instance
|
||||
# This intercepts the `_ensure_gemini_client` call inside the function
|
||||
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
|
||||
# 4. Call the function under test
|
||||
stats = get_gemini_cache_stats()
|
||||
# 5. Assert that the function behaved as expected
|
||||
# It should have constructed the client
|
||||
mock_client_constructor.assert_called_once()
|
||||
# It should have called the `list` method on the `caches` attribute
|
||||
mock_client_instance.caches.list.assert_called_once()
|
||||
# The returned stats dictionary should be correct
|
||||
assert "cache_count" in stats
|
||||
assert "total_size_bytes" in stats
|
||||
assert stats["cache_count"] == 1
|
||||
assert stats["total_size_bytes"] == 1024
|
||||
|
||||
@@ -6,43 +6,40 @@ from events import EventEmitter
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
"""
|
||||
"""
|
||||
Fixture to create an instance of the gui_2.App class for testing.
|
||||
It mocks functions that would render a window or block execution.
|
||||
"""
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App
|
||||
|
||||
def test_app_subscribes_to_events(app_instance):
|
||||
"""
|
||||
"""
|
||||
This test checks that the App's __init__ method subscribes the necessary
|
||||
event handlers to the ai_client.events emitter.
|
||||
This test will fail until the event subscription logic is added to gui_2.App.
|
||||
"""
|
||||
with patch.object(ai_client.events, 'on') as mock_on:
|
||||
app = app_instance()
|
||||
mock_on.assert_called()
|
||||
|
||||
calls = mock_on.call_args_list
|
||||
event_names = [call.args[0] for call in calls]
|
||||
assert "request_start" in event_names
|
||||
assert "response_received" in event_names
|
||||
assert "tool_execution" in event_names
|
||||
|
||||
for call in calls:
|
||||
handler = call.args[1]
|
||||
assert hasattr(handler, '__self__')
|
||||
assert handler.__self__ is app
|
||||
with patch.object(ai_client.events, 'on') as mock_on:
|
||||
app = app_instance()
|
||||
mock_on.assert_called()
|
||||
calls = mock_on.call_args_list
|
||||
event_names = [call.args[0] for call in calls]
|
||||
assert "request_start" in event_names
|
||||
assert "response_received" in event_names
|
||||
assert "tool_execution" in event_names
|
||||
for call in calls:
|
||||
handler = call.args[1]
|
||||
assert hasattr(handler, '__self__')
|
||||
assert handler.__self__ is app
|
||||
|
||||
@@ -4,45 +4,43 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App()
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App()
|
||||
|
||||
def test_gui2_hubs_exist_in_show_windows(app_instance):
|
||||
"""
|
||||
"""
|
||||
Verifies that the new consolidated Hub windows are defined in the App's show_windows.
|
||||
This ensures they will be available in the 'Windows' menu.
|
||||
"""
|
||||
expected_hubs = [
|
||||
"Context Hub",
|
||||
"AI Settings",
|
||||
"Discussion Hub",
|
||||
"Operations Hub",
|
||||
"Files & Media",
|
||||
"Theme",
|
||||
]
|
||||
|
||||
for hub in expected_hubs:
|
||||
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
|
||||
expected_hubs = [
|
||||
"Context Hub",
|
||||
"AI Settings",
|
||||
"Discussion Hub",
|
||||
"Operations Hub",
|
||||
"Files & Media",
|
||||
"Theme",
|
||||
]
|
||||
for hub in expected_hubs:
|
||||
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
|
||||
|
||||
def test_gui2_old_windows_removed_from_show_windows(app_instance):
|
||||
"""
|
||||
"""
|
||||
Verifies that the old fragmented windows are removed from show_windows.
|
||||
"""
|
||||
old_windows = [
|
||||
"Projects", "Files", "Screenshots",
|
||||
"Provider", "System Prompts",
|
||||
"Message", "Response", "Tool Calls", "Comms History"
|
||||
]
|
||||
|
||||
for old_win in old_windows:
|
||||
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
|
||||
old_windows = [
|
||||
"Projects", "Files", "Screenshots",
|
||||
"Provider", "System Prompts",
|
||||
"Message", "Response", "Tool Calls", "Comms History"
|
||||
]
|
||||
for old_win in old_windows:
|
||||
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
|
||||
|
||||
@@ -6,74 +6,65 @@ from events import EventEmitter
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App()
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
yield App()
|
||||
|
||||
def test_mcp_tool_call_is_dispatched(app_instance):
|
||||
"""
|
||||
"""
|
||||
This test verifies that when the AI returns a tool call for an MCP function,
|
||||
the ai_client correctly dispatches it to mcp_client.
|
||||
This will fail until mcp_client is properly integrated.
|
||||
"""
|
||||
# 1. Define the mock tool call from the AI
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"file_path": "test.txt"}
|
||||
# 1. Define the mock tool call from the AI
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"file_path": "test.txt"}
|
||||
# 2. Construct the mock AI response (Gemini format)
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_part = MagicMock()
|
||||
mock_part.text = ""
|
||||
mock_part.function_call = mock_fc
|
||||
mock_candidate = MagicMock()
|
||||
mock_candidate.content.parts = [mock_part]
|
||||
mock_candidate.finish_reason.name = "TOOL_CALLING"
|
||||
mock_response_with_tool.candidates = [mock_candidate]
|
||||
|
||||
# 2. Construct the mock AI response (Gemini format)
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_part = MagicMock()
|
||||
mock_part.text = ""
|
||||
mock_part.function_call = mock_fc
|
||||
mock_candidate = MagicMock()
|
||||
mock_candidate.content.parts = [mock_part]
|
||||
mock_candidate.finish_reason.name = "TOOL_CALLING"
|
||||
mock_response_with_tool.candidates = [mock_candidate]
|
||||
|
||||
class DummyUsage:
|
||||
prompt_token_count = 100
|
||||
candidates_token_count = 10
|
||||
cached_content_token_count = 0
|
||||
|
||||
mock_response_with_tool.usage_metadata = DummyUsage()
|
||||
|
||||
# 3. Create a mock for the final AI response after the tool call
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.text = "Final answer"
|
||||
mock_response_final.candidates = []
|
||||
mock_response_final.usage_metadata = DummyUsage()
|
||||
|
||||
# 4. Patch the necessary components
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
|
||||
|
||||
mock_chat = mock_client.chats.create.return_value
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
|
||||
ai_client.set_provider("gemini", "mock-model")
|
||||
|
||||
# 5. Call the send function
|
||||
ai_client.send(
|
||||
md_content="some context",
|
||||
user_message="read the file",
|
||||
base_dir=".",
|
||||
file_items=[],
|
||||
discussion_history=""
|
||||
)
|
||||
|
||||
# 6. Assert that the MCP dispatch function was called
|
||||
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
|
||||
class DummyUsage:
|
||||
prompt_token_count = 100
|
||||
candidates_token_count = 10
|
||||
cached_content_token_count = 0
|
||||
mock_response_with_tool.usage_metadata = DummyUsage()
|
||||
# 3. Create a mock for the final AI response after the tool call
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.text = "Final answer"
|
||||
mock_response_final.candidates = []
|
||||
mock_response_final.usage_metadata = DummyUsage()
|
||||
# 4. Patch the necessary components
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
|
||||
mock_chat = mock_client.chats.create.return_value
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
ai_client.set_provider("gemini", "mock-model")
|
||||
# 5. Call the send function
|
||||
ai_client.send(
|
||||
md_content="some context",
|
||||
user_message="read the file",
|
||||
base_dir=".",
|
||||
file_items=[],
|
||||
discussion_history=""
|
||||
)
|
||||
# 6. Assert that the MCP dispatch function was called
|
||||
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
|
||||
|
||||
@@ -15,70 +15,62 @@ TEST_CALLBACK_FILE = Path("temp_callback_output.txt")
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def cleanup_callback_file():
|
||||
"""Ensures the test callback file is cleaned up before and after each test."""
|
||||
if TEST_CALLBACK_FILE.exists():
|
||||
TEST_CALLBACK_FILE.unlink()
|
||||
yield
|
||||
if TEST_CALLBACK_FILE.exists():
|
||||
TEST_CALLBACK_FILE.unlink()
|
||||
"""Ensures the test callback file is cleaned up before and after each test."""
|
||||
if TEST_CALLBACK_FILE.exists():
|
||||
TEST_CALLBACK_FILE.unlink()
|
||||
yield
|
||||
if TEST_CALLBACK_FILE.exists():
|
||||
TEST_CALLBACK_FILE.unlink()
|
||||
|
||||
def test_gui2_set_value_hook_works(live_gui):
|
||||
"""
|
||||
"""
|
||||
Tests that the 'set_value' GUI hook is correctly implemented.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_value = f"New value set by test: {uuid.uuid4()}"
|
||||
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
|
||||
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
# Verify the value was actually set using the new get_value hook
|
||||
time.sleep(0.5)
|
||||
current_value = client.get_value('ai_input')
|
||||
assert current_value == test_value
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_value = f"New value set by test: {uuid.uuid4()}"
|
||||
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
# Verify the value was actually set using the new get_value hook
|
||||
time.sleep(0.5)
|
||||
current_value = client.get_value('ai_input')
|
||||
assert current_value == test_value
|
||||
|
||||
def test_gui2_click_hook_works(live_gui):
|
||||
"""
|
||||
"""
|
||||
Tests that the 'click' GUI hook for the 'Reset' button is implemented.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
# First, set some state that 'Reset' would clear.
|
||||
test_value = "This text should be cleared by the reset button."
|
||||
client.set_value('ai_input', test_value)
|
||||
time.sleep(0.5)
|
||||
assert client.get_value('ai_input') == test_value
|
||||
|
||||
# Now, trigger the click
|
||||
client.click('btn_reset')
|
||||
time.sleep(0.5)
|
||||
|
||||
# Verify it was reset
|
||||
assert client.get_value('ai_input') == ""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
# First, set some state that 'Reset' would clear.
|
||||
test_value = "This text should be cleared by the reset button."
|
||||
client.set_value('ai_input', test_value)
|
||||
time.sleep(0.5)
|
||||
assert client.get_value('ai_input') == test_value
|
||||
# Now, trigger the click
|
||||
client.click('btn_reset')
|
||||
time.sleep(0.5)
|
||||
# Verify it was reset
|
||||
assert client.get_value('ai_input') == ""
|
||||
|
||||
def test_gui2_custom_callback_hook_works(live_gui):
|
||||
"""
|
||||
"""
|
||||
Tests that the 'custom_callback' GUI hook is correctly implemented.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_data = f"Callback executed: {uuid.uuid4()}"
|
||||
|
||||
gui_data = {
|
||||
'action': 'custom_callback',
|
||||
'callback': '_test_callback_func_write_to_file',
|
||||
'args': [test_data]
|
||||
}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
time.sleep(1) # Give gui_2.py time to process its task queue
|
||||
|
||||
# Assert that the file WAS created and contains the correct data
|
||||
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
|
||||
with open(TEST_CALLBACK_FILE, "r") as f:
|
||||
content = f.read()
|
||||
assert content == test_data, "Callback executed, but file content is incorrect."
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_data = f"Callback executed: {uuid.uuid4()}"
|
||||
gui_data = {
|
||||
'action': 'custom_callback',
|
||||
'callback': '_test_callback_func_write_to_file',
|
||||
'args': [test_data]
|
||||
}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
time.sleep(1) # Give gui_2.py time to process its task queue
|
||||
# Assert that the file WAS created and contains the correct data
|
||||
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
|
||||
with open(TEST_CALLBACK_FILE, "r") as f:
|
||||
content = f.read()
|
||||
assert content == test_data, "Callback executed, but file content is incorrect."
|
||||
|
||||
@@ -12,78 +12,66 @@ from api_hook_client import ApiHookClient
|
||||
_shared_metrics = {}
|
||||
|
||||
def test_performance_benchmarking(live_gui):
|
||||
"""
|
||||
"""
|
||||
Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py).
|
||||
"""
|
||||
process, gui_script = live_gui
|
||||
client = ApiHookClient()
|
||||
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(3.0)
|
||||
|
||||
# Collect metrics over 5 seconds
|
||||
fps_values = []
|
||||
cpu_values = []
|
||||
frame_time_values = []
|
||||
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 5:
|
||||
try:
|
||||
perf_data = client.get_performance()
|
||||
metrics = perf_data.get('performance', {})
|
||||
if metrics:
|
||||
fps = metrics.get('fps', 0.0)
|
||||
cpu = metrics.get('cpu_percent', 0.0)
|
||||
ft = metrics.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# In some CI environments without a display, metrics might be 0
|
||||
# We only record positive ones to avoid skewing averages if hooks are failing
|
||||
if fps > 0:
|
||||
fps_values.append(fps)
|
||||
cpu_values.append(cpu)
|
||||
frame_time_values.append(ft)
|
||||
time.sleep(0.1)
|
||||
except Exception:
|
||||
break
|
||||
|
||||
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
|
||||
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
|
||||
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
|
||||
|
||||
_shared_metrics[gui_script] = {
|
||||
"avg_fps": avg_fps,
|
||||
"avg_cpu": avg_cpu,
|
||||
"avg_ft": avg_ft
|
||||
}
|
||||
|
||||
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
|
||||
|
||||
# Absolute minimum requirements
|
||||
if avg_fps > 0:
|
||||
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
|
||||
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
|
||||
process, gui_script = live_gui
|
||||
client = ApiHookClient()
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(3.0)
|
||||
# Collect metrics over 5 seconds
|
||||
fps_values = []
|
||||
cpu_values = []
|
||||
frame_time_values = []
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 5:
|
||||
try:
|
||||
perf_data = client.get_performance()
|
||||
metrics = perf_data.get('performance', {})
|
||||
if metrics:
|
||||
fps = metrics.get('fps', 0.0)
|
||||
cpu = metrics.get('cpu_percent', 0.0)
|
||||
ft = metrics.get('last_frame_time_ms', 0.0)
|
||||
# In some CI environments without a display, metrics might be 0
|
||||
# We only record positive ones to avoid skewing averages if hooks are failing
|
||||
if fps > 0:
|
||||
fps_values.append(fps)
|
||||
cpu_values.append(cpu)
|
||||
frame_time_values.append(ft)
|
||||
time.sleep(0.1)
|
||||
except Exception:
|
||||
break
|
||||
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
|
||||
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
|
||||
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
|
||||
_shared_metrics[gui_script] = {
|
||||
"avg_fps": avg_fps,
|
||||
"avg_cpu": avg_cpu,
|
||||
"avg_ft": avg_ft
|
||||
}
|
||||
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
|
||||
# Absolute minimum requirements
|
||||
if avg_fps > 0:
|
||||
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
|
||||
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
|
||||
|
||||
def test_performance_parity():
|
||||
"""
|
||||
"""
|
||||
Compare the metrics collected in the parameterized test_performance_benchmarking.
|
||||
"""
|
||||
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
|
||||
if len(_shared_metrics) < 2:
|
||||
pytest.skip("Metrics for both GUIs not yet collected.")
|
||||
|
||||
gui_m = _shared_metrics["gui_legacy.py"]
|
||||
gui2_m = _shared_metrics["gui_2.py"]
|
||||
|
||||
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
|
||||
# Actually I'll use 0.15 for assertion and log the actual.
|
||||
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
|
||||
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
|
||||
|
||||
print(f"\n--- Performance Parity Results ---")
|
||||
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
|
||||
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
|
||||
|
||||
# We follow the 5% requirement for FPS
|
||||
# For CPU we might need more leeway
|
||||
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
|
||||
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
|
||||
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
|
||||
if len(_shared_metrics) < 2:
|
||||
pytest.skip("Metrics for both GUIs not yet collected.")
|
||||
gui_m = _shared_metrics["gui_legacy.py"]
|
||||
gui2_m = _shared_metrics["gui_2.py"]
|
||||
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
|
||||
# Actually I'll use 0.15 for assertion and log the actual.
|
||||
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
|
||||
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
|
||||
print(f"\n--- Performance Parity Results ---")
|
||||
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
|
||||
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
|
||||
# We follow the 5% requirement for FPS
|
||||
# For CPU we might need more leeway
|
||||
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
|
||||
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
|
||||
|
||||
@@ -6,75 +6,70 @@ from events import UserRequestEvent
|
||||
|
||||
@pytest.fixture
|
||||
def mock_gui():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1"},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
gui = App()
|
||||
return gui
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1"},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
gui = App()
|
||||
return gui
|
||||
|
||||
def test_handle_generate_send_pushes_event(mock_gui):
|
||||
# Mock _do_generate to return sample data
|
||||
mock_gui._do_generate = MagicMock(return_value=(
|
||||
"full_md", "path", [], "stable_md", "disc_text"
|
||||
))
|
||||
mock_gui.ui_ai_input = "test prompt"
|
||||
mock_gui.ui_files_base_dir = "."
|
||||
|
||||
# Mock event_queue.put
|
||||
mock_gui.event_queue.put = MagicMock()
|
||||
|
||||
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
|
||||
mock_gui._handle_generate_send()
|
||||
|
||||
# Verify run_coroutine_threadsafe was called
|
||||
assert mock_run.called
|
||||
|
||||
# Verify the call to event_queue.put was correct
|
||||
# This is a bit tricky since the first arg to run_coroutine_threadsafe
|
||||
# is the coroutine returned by event_queue.put().
|
||||
# Let's verify that the call to put occurred.
|
||||
mock_gui.event_queue.put.assert_called_once()
|
||||
args, kwargs = mock_gui.event_queue.put.call_args
|
||||
assert args[0] == "user_request"
|
||||
event = args[1]
|
||||
assert isinstance(event, UserRequestEvent)
|
||||
assert event.prompt == "test prompt"
|
||||
assert event.stable_md == "stable_md"
|
||||
assert event.disc_text == "disc_text"
|
||||
assert event.base_dir == "."
|
||||
# Mock _do_generate to return sample data
|
||||
mock_gui._do_generate = MagicMock(return_value=(
|
||||
"full_md", "path", [], "stable_md", "disc_text"
|
||||
))
|
||||
mock_gui.ui_ai_input = "test prompt"
|
||||
mock_gui.ui_files_base_dir = "."
|
||||
# Mock event_queue.put
|
||||
mock_gui.event_queue.put = MagicMock()
|
||||
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
|
||||
mock_gui._handle_generate_send()
|
||||
# Verify run_coroutine_threadsafe was called
|
||||
assert mock_run.called
|
||||
# Verify the call to event_queue.put was correct
|
||||
# This is a bit tricky since the first arg to run_coroutine_threadsafe
|
||||
# is the coroutine returned by event_queue.put().
|
||||
# Let's verify that the call to put occurred.
|
||||
mock_gui.event_queue.put.assert_called_once()
|
||||
args, kwargs = mock_gui.event_queue.put.call_args
|
||||
assert args[0] == "user_request"
|
||||
event = args[1]
|
||||
assert isinstance(event, UserRequestEvent)
|
||||
assert event.prompt == "test prompt"
|
||||
assert event.stable_md == "stable_md"
|
||||
assert event.disc_text == "disc_text"
|
||||
assert event.base_dir == "."
|
||||
|
||||
def test_user_request_event_payload():
|
||||
payload = UserRequestEvent(
|
||||
prompt="hello",
|
||||
stable_md="md",
|
||||
file_items=[],
|
||||
disc_text="disc",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
d = payload.to_dict()
|
||||
assert d["prompt"] == "hello"
|
||||
assert d["stable_md"] == "md"
|
||||
assert d["file_items"] == []
|
||||
assert d["disc_text"] == "disc"
|
||||
assert d["base_dir"] == "."
|
||||
payload = UserRequestEvent(
|
||||
prompt="hello",
|
||||
stable_md="md",
|
||||
file_items=[],
|
||||
disc_text="disc",
|
||||
base_dir="."
|
||||
)
|
||||
d = payload.to_dict()
|
||||
assert d["prompt"] == "hello"
|
||||
assert d["stable_md"] == "md"
|
||||
assert d["file_items"] == []
|
||||
assert d["disc_text"] == "disc"
|
||||
assert d["base_dir"] == "."
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_event_queue():
|
||||
from events import AsyncEventQueue
|
||||
q = AsyncEventQueue()
|
||||
await q.put("test_event", {"data": 123})
|
||||
name, payload = await q.get()
|
||||
assert name == "test_event"
|
||||
assert payload["data"] == 123
|
||||
from events import AsyncEventQueue
|
||||
q = AsyncEventQueue()
|
||||
await q.put("test_event", {"data": 123})
|
||||
name, payload = await q.get()
|
||||
assert name == "test_event"
|
||||
assert payload["data"] == 123
|
||||
|
||||
@@ -13,53 +13,48 @@ from gui_legacy import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
dpg.create_context()
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
dpg.destroy_context()
|
||||
dpg.create_context()
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
app = App()
|
||||
yield app
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_diagnostics_panel_initialization(app_instance):
|
||||
assert "Diagnostics" in app_instance.window_info
|
||||
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
|
||||
assert "frame_time" in app_instance.perf_history
|
||||
assert len(app_instance.perf_history["frame_time"]) == 100
|
||||
assert "Diagnostics" in app_instance.window_info
|
||||
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
|
||||
assert "frame_time" in app_instance.perf_history
|
||||
assert len(app_instance.perf_history["frame_time"]) == 100
|
||||
|
||||
def test_diagnostics_panel_updates(app_instance):
|
||||
# Mock dependencies
|
||||
mock_metrics = {
|
||||
'last_frame_time_ms': 10.0,
|
||||
'fps': 100.0,
|
||||
'cpu_percent': 50.0,
|
||||
'input_lag_ms': 5.0
|
||||
}
|
||||
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
|
||||
|
||||
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
|
||||
|
||||
# We also need to mock ai_client stats
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
app_instance._update_performance_diagnostics()
|
||||
|
||||
# Verify UI updates
|
||||
mock_set_value.assert_any_call("perf_fps_text", "100.0")
|
||||
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
|
||||
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
|
||||
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
|
||||
|
||||
# Verify history update
|
||||
assert app_instance.perf_history["frame_time"][-1] == 10.0
|
||||
# Mock dependencies
|
||||
mock_metrics = {
|
||||
'last_frame_time_ms': 10.0,
|
||||
'fps': 100.0,
|
||||
'cpu_percent': 50.0,
|
||||
'input_lag_ms': 5.0
|
||||
}
|
||||
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
|
||||
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
|
||||
# We also need to mock ai_client stats
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
app_instance._update_performance_diagnostics()
|
||||
# Verify UI updates
|
||||
mock_set_value.assert_any_call("perf_fps_text", "100.0")
|
||||
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
|
||||
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
|
||||
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
|
||||
# Verify history update
|
||||
assert app_instance.perf_history["frame_time"][-1] == 10.0
|
||||
|
||||
@@ -8,55 +8,47 @@ import ai_client
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
"""
|
||||
"""
|
||||
Fixture to create an instance of the App class for testing.
|
||||
It creates a real DPG context but mocks functions that would
|
||||
render a window or block execution.
|
||||
"""
|
||||
dpg.create_context()
|
||||
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch('gui_legacy.PerformanceMonitor'), \
|
||||
patch('gui_legacy.shell_runner'), \
|
||||
patch('gui_legacy.project_manager'), \
|
||||
patch.object(App, '_load_active_project'), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
|
||||
dpg.destroy_context()
|
||||
dpg.create_context()
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch('gui_legacy.PerformanceMonitor'), \
|
||||
patch('gui_legacy.shell_runner'), \
|
||||
patch('gui_legacy.project_manager'), \
|
||||
patch.object(App, '_load_active_project'), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
app = App()
|
||||
yield app
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_gui_updates_on_event(app_instance):
|
||||
# Patch dependencies for the test
|
||||
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.configure_item'), \
|
||||
patch('ai_client.get_history_bleed_stats') as mock_stats:
|
||||
|
||||
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
|
||||
|
||||
# We'll use patch.object to see if _refresh_api_metrics is called
|
||||
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
|
||||
# Simulate event
|
||||
ai_client.events.emit("response_received", payload={})
|
||||
|
||||
# Process tasks manually
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
# Verify that _refresh_api_metrics was called
|
||||
mock_refresh.assert_called_once()
|
||||
|
||||
# Verify that dpg.set_value was called for the metrics widgets
|
||||
calls = [call.args[0] for call in mock_set_value.call_args_list]
|
||||
assert "token_budget_bar" in calls
|
||||
assert "token_budget_label" in calls
|
||||
# Patch dependencies for the test
|
||||
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.configure_item'), \
|
||||
patch('ai_client.get_history_bleed_stats') as mock_stats:
|
||||
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
|
||||
# We'll use patch.object to see if _refresh_api_metrics is called
|
||||
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
|
||||
# Simulate event
|
||||
ai_client.events.emit("response_received", payload={})
|
||||
# Process tasks manually
|
||||
app_instance._process_pending_gui_tasks()
|
||||
# Verify that _refresh_api_metrics was called
|
||||
mock_refresh.assert_called_once()
|
||||
# Verify that dpg.set_value was called for the metrics widgets
|
||||
calls = [call.args[0] for call in mock_set_value.call_args_list]
|
||||
assert "token_budget_bar" in calls
|
||||
assert "token_budget_label" in calls
|
||||
|
||||
@@ -9,32 +9,27 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_idle_performance_requirements(live_gui):
|
||||
"""
|
||||
"""
|
||||
Requirement: GUI must maintain stable performance on idle.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(2.0)
|
||||
|
||||
# Get multiple samples to be sure
|
||||
samples = []
|
||||
for _ in range(5):
|
||||
perf_data = client.get_performance()
|
||||
samples.append(perf_data)
|
||||
time.sleep(0.5)
|
||||
|
||||
# Check for valid metrics
|
||||
valid_ft_count = 0
|
||||
for sample in samples:
|
||||
performance = sample.get('performance', {})
|
||||
frame_time = performance.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# We expect a positive frame time if rendering is happening
|
||||
if frame_time > 0:
|
||||
valid_ft_count += 1
|
||||
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
|
||||
|
||||
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
|
||||
# In some CI environments without a real display, frame time might remain 0
|
||||
# but we've verified the hook is returning the dictionary.
|
||||
client = ApiHookClient()
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(2.0)
|
||||
# Get multiple samples to be sure
|
||||
samples = []
|
||||
for _ in range(5):
|
||||
perf_data = client.get_performance()
|
||||
samples.append(perf_data)
|
||||
time.sleep(0.5)
|
||||
# Check for valid metrics
|
||||
valid_ft_count = 0
|
||||
for sample in samples:
|
||||
performance = sample.get('performance', {})
|
||||
frame_time = performance.get('last_frame_time_ms', 0.0)
|
||||
# We expect a positive frame time if rendering is happening
|
||||
if frame_time > 0:
|
||||
valid_ft_count += 1
|
||||
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
|
||||
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
|
||||
# In some CI environments without a real display, frame time might remain 0
|
||||
# but we've verified the hook is returning the dictionary.
|
||||
|
||||
@@ -9,45 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_comms_volume_stress_performance(live_gui):
|
||||
"""
|
||||
"""
|
||||
Stress test: Inject many session entries and verify performance doesn't degrade.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
# 1. Capture baseline
|
||||
time.sleep(2.0) # Wait for stability
|
||||
baseline_resp = client.get_performance()
|
||||
baseline = baseline_resp.get('performance', {})
|
||||
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# 2. Inject 50 "dummy" session entries
|
||||
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
|
||||
large_session = []
|
||||
for i in range(50):
|
||||
large_session.append({
|
||||
"role": "User",
|
||||
"content": f"Stress test entry {i} " * 5,
|
||||
"ts": time.time(),
|
||||
"collapsed": False
|
||||
})
|
||||
|
||||
client.post_session(large_session)
|
||||
|
||||
# Give it a moment to process UI updates
|
||||
time.sleep(1.0)
|
||||
|
||||
# 3. Capture stress performance
|
||||
stress_resp = client.get_performance()
|
||||
stress = stress_resp.get('performance', {})
|
||||
stress_ft = stress.get('last_frame_time_ms', 0.0)
|
||||
|
||||
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
|
||||
|
||||
# If we got valid timing, assert it's within reason
|
||||
if stress_ft > 0:
|
||||
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
|
||||
|
||||
# Ensure the session actually updated
|
||||
session_data = client.get_session()
|
||||
entries = session_data.get('session', {}).get('entries', [])
|
||||
assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"
|
||||
client = ApiHookClient()
|
||||
# 1. Capture baseline
|
||||
time.sleep(2.0) # Wait for stability
|
||||
baseline_resp = client.get_performance()
|
||||
baseline = baseline_resp.get('performance', {})
|
||||
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
|
||||
# 2. Inject 50 "dummy" session entries
|
||||
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
|
||||
large_session = []
|
||||
for i in range(50):
|
||||
large_session.append({
|
||||
"role": "User",
|
||||
"content": f"Stress test entry {i} " * 5,
|
||||
"ts": time.time(),
|
||||
"collapsed": False
|
||||
})
|
||||
client.post_session(large_session)
|
||||
# Give it a moment to process UI updates
|
||||
time.sleep(1.0)
|
||||
# 3. Capture stress performance
|
||||
stress_resp = client.get_performance()
|
||||
stress = stress_resp.get('performance', {})
|
||||
stress_ft = stress.get('last_frame_time_ms', 0.0)
|
||||
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
|
||||
# If we got valid timing, assert it's within reason
|
||||
if stress_ft > 0:
|
||||
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
|
||||
# Ensure the session actually updated
|
||||
session_data = client.get_session()
|
||||
entries = session_data.get('session', {}).get('entries', [])
|
||||
assert len(entries) >= 50, f"Expected at least 50 entries, got {len(entries)}"
|
||||
|
||||
@@ -17,103 +17,88 @@ from gui_legacy import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
"""
|
||||
"""
|
||||
Fixture to create an instance of the App class for testing.
|
||||
It creates a real DPG context but mocks functions that would
|
||||
render a window or block execution.
|
||||
"""
|
||||
dpg.create_context()
|
||||
|
||||
# Patch only the functions that would show a window or block,
|
||||
# and the App methods that rebuild UI on init.
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
|
||||
dpg.destroy_context()
|
||||
dpg.create_context()
|
||||
# Patch only the functions that would show a window or block,
|
||||
# and the App methods that rebuild UI on init.
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
patch('dearpygui.dearpygui.start_dearpygui'), \
|
||||
patch('gui_legacy.load_config', return_value={}), \
|
||||
patch.object(App, '_rebuild_files_list'), \
|
||||
patch.object(App, '_rebuild_shots_list'), \
|
||||
patch.object(App, '_rebuild_disc_list'), \
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
app = App()
|
||||
yield app
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_telemetry_panel_updates_correctly(app_instance):
|
||||
"""
|
||||
"""
|
||||
Tests that the _update_performance_diagnostics method correctly updates
|
||||
DPG widgets based on the stats from ai_client.
|
||||
"""
|
||||
# 1. Set the provider to anthropic
|
||||
app_instance.current_provider = "anthropic"
|
||||
|
||||
# 2. Define the mock stats
|
||||
mock_stats = {
|
||||
"provider": "anthropic",
|
||||
"limit": 180000,
|
||||
"current": 135000,
|
||||
"percentage": 75.0,
|
||||
}
|
||||
|
||||
# 3. Patch the dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
|
||||
# 4. Call the method under test
|
||||
app_instance._refresh_api_metrics()
|
||||
|
||||
# 5. Assert the results
|
||||
mock_get_stats.assert_called_once()
|
||||
|
||||
# Assert history bleed widgets were updated
|
||||
mock_set_value.assert_any_call("token_budget_bar", 0.75)
|
||||
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
|
||||
|
||||
# Assert Gemini-specific widget was hidden
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
|
||||
# 1. Set the provider to anthropic
|
||||
app_instance.current_provider = "anthropic"
|
||||
# 2. Define the mock stats
|
||||
mock_stats = {
|
||||
"provider": "anthropic",
|
||||
"limit": 180000,
|
||||
"current": 135000,
|
||||
"percentage": 75.0,
|
||||
}
|
||||
# 3. Patch the dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
# 4. Call the method under test
|
||||
app_instance._refresh_api_metrics()
|
||||
# 5. Assert the results
|
||||
mock_get_stats.assert_called_once()
|
||||
# Assert history bleed widgets were updated
|
||||
mock_set_value.assert_any_call("token_budget_bar", 0.75)
|
||||
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
|
||||
# Assert Gemini-specific widget was hidden
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
|
||||
|
||||
def test_cache_data_display_updates_correctly(app_instance):
|
||||
"""
|
||||
"""
|
||||
Tests that the _update_performance_diagnostics method correctly updates the
|
||||
GUI with Gemini cache statistics when the provider is set to Gemini.
|
||||
"""
|
||||
# 1. Set the provider to Gemini
|
||||
app_instance.current_provider = "gemini"
|
||||
|
||||
# 2. Define mock cache stats
|
||||
mock_cache_stats = {
|
||||
'cache_count': 5,
|
||||
'total_size_bytes': 12345
|
||||
}
|
||||
# Expected formatted string
|
||||
expected_text = "Gemini Caches: 5 (12.1 KB)"
|
||||
|
||||
# 3. Patch dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
|
||||
# We also need to mock get_history_bleed_stats as it's called in the same function
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
|
||||
# 4. Call the method under test with payload
|
||||
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
|
||||
|
||||
# 5. Assert the results
|
||||
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
|
||||
|
||||
# Check that the UI item was shown and its value was set
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
|
||||
mock_set_value.assert_any_call("gemini_cache_label", expected_text)
|
||||
# 1. Set the provider to Gemini
|
||||
app_instance.current_provider = "gemini"
|
||||
# 2. Define mock cache stats
|
||||
mock_cache_stats = {
|
||||
'cache_count': 5,
|
||||
'total_size_bytes': 12345
|
||||
}
|
||||
# Expected formatted string
|
||||
expected_text = "Gemini Caches: 5 (12.1 KB)"
|
||||
# 3. Patch dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
# We also need to mock get_history_bleed_stats as it's called in the same function
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
# 4. Call the method under test with payload
|
||||
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
|
||||
# 5. Assert the results
|
||||
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
|
||||
# Check that the UI item was shown and its value was set
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
|
||||
mock_set_value.assert_any_call("gemini_cache_label", expected_text)
|
||||
|
||||
|
||||
@@ -8,173 +8,163 @@ from pathlib import Path
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
class TestHeadlessAPI(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# We need an App instance to initialize the API, but we want to avoid GUI stuff
|
||||
with patch('gui_2.session_logger.open_session'), \
|
||||
patch('gui_2.ai_client.set_provider'), \
|
||||
patch('gui_2.session_logger.close_session'):
|
||||
self.app_instance = gui_2.App()
|
||||
# Set a default API key for tests
|
||||
self.test_api_key = "test-secret-key"
|
||||
self.app_instance.config["headless"] = {"api_key": self.test_api_key}
|
||||
self.headers = {"X-API-KEY": self.test_api_key}
|
||||
def setUp(self):
|
||||
# We need an App instance to initialize the API, but we want to avoid GUI stuff
|
||||
with patch('gui_2.session_logger.open_session'), \
|
||||
patch('gui_2.ai_client.set_provider'), \
|
||||
patch('gui_2.session_logger.close_session'):
|
||||
self.app_instance = gui_2.App()
|
||||
# Set a default API key for tests
|
||||
self.test_api_key = "test-secret-key"
|
||||
self.app_instance.config["headless"] = {"api_key": self.test_api_key}
|
||||
self.headers = {"X-API-KEY": self.test_api_key}
|
||||
# Clear any leftover state
|
||||
self.app_instance._pending_actions = {}
|
||||
self.app_instance._pending_dialog = None
|
||||
self.api = self.app_instance.create_api()
|
||||
self.client = TestClient(self.api)
|
||||
|
||||
# Clear any leftover state
|
||||
self.app_instance._pending_actions = {}
|
||||
self.app_instance._pending_dialog = None
|
||||
def test_health_endpoint(self):
|
||||
response = self.client.get("/health")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.json(), {"status": "ok"})
|
||||
|
||||
self.api = self.app_instance.create_api()
|
||||
self.client = TestClient(self.api)
|
||||
def test_status_endpoint_unauthorized(self):
|
||||
# Ensure a key is required
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
|
||||
response = self.client.get("/status")
|
||||
self.assertEqual(response.status_code, 403)
|
||||
|
||||
def test_health_endpoint(self):
|
||||
response = self.client.get("/health")
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.json(), {"status": "ok"})
|
||||
def test_status_endpoint_authorized(self):
|
||||
# We'll use a test key
|
||||
headers = {"X-API-KEY": "test-secret-key"}
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
|
||||
response = self.client.get("/status", headers=headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
def test_status_endpoint_unauthorized(self):
|
||||
# Ensure a key is required
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": "some-required-key"}}):
|
||||
response = self.client.get("/status")
|
||||
self.assertEqual(response.status_code, 403)
|
||||
def test_generate_endpoint(self):
|
||||
payload = {
|
||||
"prompt": "Hello AI"
|
||||
}
|
||||
# Mock ai_client.send and get_comms_log
|
||||
with patch('gui_2.ai_client.send') as mock_send, \
|
||||
patch('gui_2.ai_client.get_comms_log') as mock_log:
|
||||
mock_send.return_value = "Hello from Mock AI"
|
||||
mock_log.return_value = [{
|
||||
"kind": "response",
|
||||
"payload": {
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5}
|
||||
}
|
||||
}]
|
||||
response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertEqual(data["text"], "Hello from Mock AI")
|
||||
self.assertIn("metadata", data)
|
||||
self.assertEqual(data["usage"]["input_tokens"], 10)
|
||||
|
||||
def test_status_endpoint_authorized(self):
|
||||
# We'll use a test key
|
||||
headers = {"X-API-KEY": "test-secret-key"}
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": "test-secret-key"}}):
|
||||
response = self.client.get("/status", headers=headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
def test_pending_actions_endpoint(self):
|
||||
# Manually add a pending action
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
response = self.client.get("/api/v1/pending_actions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertEqual(len(data), 1)
|
||||
self.assertEqual(data[0]["action_id"], "test-action-id")
|
||||
|
||||
def test_generate_endpoint(self):
|
||||
payload = {
|
||||
"prompt": "Hello AI"
|
||||
}
|
||||
# Mock ai_client.send and get_comms_log
|
||||
with patch('gui_2.ai_client.send') as mock_send, \
|
||||
patch('gui_2.ai_client.get_comms_log') as mock_log:
|
||||
mock_send.return_value = "Hello from Mock AI"
|
||||
mock_log.return_value = [{
|
||||
"kind": "response",
|
||||
"payload": {
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5}
|
||||
}
|
||||
}]
|
||||
def test_confirm_action_endpoint(self):
|
||||
# Manually add a pending action
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
payload = {"approved": True}
|
||||
response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(dialog._done)
|
||||
self.assertTrue(dialog._approved)
|
||||
|
||||
response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertEqual(data["text"], "Hello from Mock AI")
|
||||
self.assertIn("metadata", data)
|
||||
self.assertEqual(data["usage"]["input_tokens"], 10)
|
||||
def test_list_sessions_endpoint(self):
|
||||
# Ensure logs directory exists
|
||||
Path("logs").mkdir(exist_ok=True)
|
||||
# Create a dummy log
|
||||
dummy_log = Path("logs/test_session_api.log")
|
||||
dummy_log.write_text("dummy content")
|
||||
try:
|
||||
response = self.client.get("/api/v1/sessions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertIn("test_session_api.log", data)
|
||||
finally:
|
||||
if dummy_log.exists():
|
||||
dummy_log.unlink()
|
||||
|
||||
def test_pending_actions_endpoint(self):
|
||||
# Manually add a pending action
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
def test_get_context_endpoint(self):
|
||||
response = self.client.get("/api/v1/context", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertIn("files", data)
|
||||
self.assertIn("screenshots", data)
|
||||
self.assertIn("files_base_dir", data)
|
||||
|
||||
response = self.client.get("/api/v1/pending_actions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertEqual(len(data), 1)
|
||||
self.assertEqual(data[0]["action_id"], "test-action-id")
|
||||
|
||||
def test_confirm_action_endpoint(self):
|
||||
# Manually add a pending action
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
|
||||
payload = {"approved": True}
|
||||
response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(dialog._done)
|
||||
self.assertTrue(dialog._approved)
|
||||
|
||||
def test_list_sessions_endpoint(self):
|
||||
# Ensure logs directory exists
|
||||
Path("logs").mkdir(exist_ok=True)
|
||||
# Create a dummy log
|
||||
dummy_log = Path("logs/test_session_api.log")
|
||||
dummy_log.write_text("dummy content")
|
||||
|
||||
try:
|
||||
response = self.client.get("/api/v1/sessions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertIn("test_session_api.log", data)
|
||||
finally:
|
||||
if dummy_log.exists():
|
||||
dummy_log.unlink()
|
||||
|
||||
def test_get_context_endpoint(self):
|
||||
response = self.client.get("/api/v1/context", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
self.assertIn("files", data)
|
||||
self.assertIn("screenshots", data)
|
||||
self.assertIn("files_base_dir", data)
|
||||
|
||||
def test_endpoint_no_api_key_configured(self):
|
||||
# Test the security fix specifically
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
|
||||
response = self.client.get("/status", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 403)
|
||||
self.assertEqual(response.json()["detail"], "API Key not configured on server")
|
||||
def test_endpoint_no_api_key_configured(self):
|
||||
# Test the security fix specifically
|
||||
with patch.dict(self.app_instance.config, {"headless": {"api_key": ""}}):
|
||||
response = self.client.get("/status", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 403)
|
||||
self.assertEqual(response.json()["detail"], "API Key not configured on server")
|
||||
|
||||
class TestHeadlessStartup(unittest.TestCase):
|
||||
|
||||
@patch('gui_2.immapp.run')
|
||||
@patch('gui_2.api_hooks.HookServer')
|
||||
@patch('gui_2.save_config')
|
||||
@patch('gui_2.ai_client.cleanup')
|
||||
@patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
|
||||
def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
|
||||
# Setup mock argv with --headless
|
||||
test_args = ["gui_2.py", "--headless"]
|
||||
@patch('gui_2.immapp.run')
|
||||
@patch('gui_2.api_hooks.HookServer')
|
||||
@patch('gui_2.save_config')
|
||||
@patch('gui_2.ai_client.cleanup')
|
||||
@patch('uvicorn.run') # Mock uvicorn.run to prevent hanging
|
||||
def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
|
||||
# Setup mock argv with --headless
|
||||
test_args = ["gui_2.py", "--headless"]
|
||||
with patch.object(sys, 'argv', test_args):
|
||||
with patch('gui_2.session_logger.close_session'), \
|
||||
patch('gui_2.session_logger.open_session'):
|
||||
app = gui_2.App()
|
||||
# Mock _fetch_models to avoid network calls
|
||||
app._fetch_models = MagicMock()
|
||||
app.run()
|
||||
# Expectation: immapp.run should NOT be called in headless mode
|
||||
mock_immapp_run.assert_not_called()
|
||||
# Expectation: uvicorn.run SHOULD be called
|
||||
mock_uvicorn_run.assert_called_once()
|
||||
|
||||
with patch.object(sys, 'argv', test_args):
|
||||
with patch('gui_2.session_logger.close_session'), \
|
||||
patch('gui_2.session_logger.open_session'):
|
||||
app = gui_2.App()
|
||||
|
||||
# Mock _fetch_models to avoid network calls
|
||||
app._fetch_models = MagicMock()
|
||||
|
||||
app.run()
|
||||
|
||||
# Expectation: immapp.run should NOT be called in headless mode
|
||||
mock_immapp_run.assert_not_called()
|
||||
# Expectation: uvicorn.run SHOULD be called
|
||||
mock_uvicorn_run.assert_called_once()
|
||||
|
||||
@patch('gui_2.immapp.run')
|
||||
def test_normal_startup_calls_gui_run(self, mock_immapp_run):
|
||||
test_args = ["gui_2.py"]
|
||||
with patch.object(sys, 'argv', test_args):
|
||||
# In normal mode, it should still call immapp.run
|
||||
with patch('gui_2.api_hooks.HookServer'), \
|
||||
patch('gui_2.save_config'), \
|
||||
patch('gui_2.ai_client.cleanup'), \
|
||||
patch('gui_2.session_logger.close_session'), \
|
||||
patch('gui_2.session_logger.open_session'):
|
||||
app = gui_2.App()
|
||||
app._fetch_models = MagicMock()
|
||||
app.run()
|
||||
mock_immapp_run.assert_called_once()
|
||||
@patch('gui_2.immapp.run')
|
||||
def test_normal_startup_calls_gui_run(self, mock_immapp_run):
|
||||
test_args = ["gui_2.py"]
|
||||
with patch.object(sys, 'argv', test_args):
|
||||
# In normal mode, it should still call immapp.run
|
||||
with patch('gui_2.api_hooks.HookServer'), \
|
||||
patch('gui_2.save_config'), \
|
||||
patch('gui_2.ai_client.cleanup'), \
|
||||
patch('gui_2.session_logger.close_session'), \
|
||||
patch('gui_2.session_logger.open_session'):
|
||||
app = gui_2.App()
|
||||
app._fetch_models = MagicMock()
|
||||
app.run()
|
||||
mock_immapp_run.assert_called_once()
|
||||
|
||||
def test_fastapi_installed():
|
||||
"""Verify that fastapi is installed."""
|
||||
try:
|
||||
importlib.import_module("fastapi")
|
||||
except ImportError:
|
||||
pytest.fail("fastapi is not installed")
|
||||
"""Verify that fastapi is installed."""
|
||||
try:
|
||||
importlib.import_module("fastapi")
|
||||
except ImportError:
|
||||
pytest.fail("fastapi is not installed")
|
||||
|
||||
def test_uvicorn_installed():
|
||||
"""Verify that uvicorn is installed."""
|
||||
try:
|
||||
importlib.import_module("uvicorn")
|
||||
except ImportError:
|
||||
pytest.fail("uvicorn is not installed")
|
||||
"""Verify that uvicorn is installed."""
|
||||
try:
|
||||
importlib.import_module("uvicorn")
|
||||
except ImportError:
|
||||
pytest.fail("uvicorn is not installed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -7,137 +7,113 @@ import json
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_headless_verification_full_run():
|
||||
"""
|
||||
"""
|
||||
1. Initialize a ConductorEngine with a Track containing multiple dependent Tickets.
|
||||
2. Simulate a full execution run using engine.run_linear().
|
||||
3. Mock ai_client.send to simulate successful tool calls and final responses.
|
||||
4. Specifically verify that 'Context Amnesia' is maintained.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
assert mock_reset.call_count == 2
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
await engine.run_linear()
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
assert mock_reset.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_headless_verification_error_and_qa_interceptor():
|
||||
"""
|
||||
"""
|
||||
5. Simulate a shell error and verify that the Tier 4 QA interceptor is triggered
|
||||
and its summary is injected into the worker's history for the next retry.
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
return f"""STDERR: Error: file not found
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
return f"""STDERR: Error: file not found
|
||||
|
||||
QA ANALYSIS:
|
||||
{analysis}"""
|
||||
return "Error: file not found"
|
||||
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
# We need to be careful with how google.genai.types.Part is structured or mocked
|
||||
part_str = str(part)
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
return "Error: file not found"
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
await engine.run_linear()
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
# We need to be careful with how google.genai.types.Part is structured or mocked
|
||||
part_str = str(part)
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
|
||||
@@ -18,199 +18,164 @@ import ai_client
|
||||
# --- Tests for Aggregate Module ---
|
||||
|
||||
def test_aggregate_includes_segregated_history(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests if the aggregate function correctly includes history
|
||||
when it's segregated into a separate file.
|
||||
"""
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
|
||||
# Setup segregated project configuration
|
||||
proj_data = project_manager.default_project("test-aggregate")
|
||||
proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
|
||||
|
||||
# Save the project, which should segregate the history
|
||||
project_manager.save_project(proj_data, proj_path)
|
||||
|
||||
# Load the project and aggregate its content
|
||||
loaded_proj = project_manager.load_project(proj_path)
|
||||
config = project_manager.flat_config(loaded_proj)
|
||||
|
||||
markdown, output_file, file_items = aggregate.run(config)
|
||||
|
||||
# Assert that the history is present in the aggregated markdown
|
||||
assert "## Discussion History" in markdown
|
||||
assert "Show me history" in markdown
|
||||
|
||||
# --- Tests for MCP Client and Blacklisting ---
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
# Setup segregated project configuration
|
||||
proj_data = project_manager.default_project("test-aggregate")
|
||||
proj_data["discussion"]["discussions"]["main"]["history"] = ["@2026-02-24T14:00:00\nUser:\nShow me history"]
|
||||
# Save the project, which should segregate the history
|
||||
project_manager.save_project(proj_data, proj_path)
|
||||
# Load the project and aggregate its content
|
||||
loaded_proj = project_manager.load_project(proj_path)
|
||||
config = project_manager.flat_config(loaded_proj)
|
||||
markdown, output_file, file_items = aggregate.run(config)
|
||||
# Assert that the history is present in the aggregated markdown
|
||||
assert "## Discussion History" in markdown
|
||||
assert "Show me history" in markdown
|
||||
# --- Tests for MCP Client and Blacklisting ---
|
||||
|
||||
def test_mcp_blacklist(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests that the MCP client correctly blacklists specified files
|
||||
and prevents listing them.
|
||||
"""
|
||||
# Setup a file that should be blacklisted
|
||||
hist_file = tmp_path / "my_project_history.toml"
|
||||
hist_file.write_text("secret history", encoding="utf-8")
|
||||
|
||||
# Configure MCP client to allow access to the temporary directory
|
||||
# but ensure the history file is implicitly or explicitly blacklisted.
|
||||
mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
|
||||
|
||||
# Attempt to read the blacklisted file - should result in an access denied message
|
||||
result = mcp_client.read_file(str(hist_file))
|
||||
assert "ACCESS DENIED" in result or "BLACKLISTED" in result
|
||||
|
||||
# Attempt to list the directory containing the blacklisted file
|
||||
result = mcp_client.list_directory(str(tmp_path))
|
||||
# The blacklisted file should not appear in the directory listing
|
||||
assert "my_project_history.toml" not in result
|
||||
# Setup a file that should be blacklisted
|
||||
hist_file = tmp_path / "my_project_history.toml"
|
||||
hist_file.write_text("secret history", encoding="utf-8")
|
||||
# Configure MCP client to allow access to the temporary directory
|
||||
# but ensure the history file is implicitly or explicitly blacklisted.
|
||||
mcp_client.configure([{"path": str(hist_file)}], extra_base_dirs=[str(tmp_path)])
|
||||
# Attempt to read the blacklisted file - should result in an access denied message
|
||||
result = mcp_client.read_file(str(hist_file))
|
||||
assert "ACCESS DENIED" in result or "BLACKLISTED" in result
|
||||
# Attempt to list the directory containing the blacklisted file
|
||||
result = mcp_client.list_directory(str(tmp_path))
|
||||
# The blacklisted file should not appear in the directory listing
|
||||
assert "my_project_history.toml" not in result
|
||||
|
||||
def test_aggregate_blacklist(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests that aggregate's path resolution respects blacklisting,
|
||||
ensuring history files are not included by default.
|
||||
"""
|
||||
# Setup a history file in the temporary directory
|
||||
hist_file = tmp_path / "my_project_history.toml"
|
||||
hist_file.write_text("secret history", encoding="utf-8")
|
||||
|
||||
# Attempt to resolve paths including the history file using a wildcard
|
||||
paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
|
||||
assert hist_file not in paths, "History file should be blacklisted and not resolved"
|
||||
|
||||
# Resolve all paths and ensure the history file is still excluded
|
||||
paths = aggregate.resolve_paths(tmp_path, "*")
|
||||
assert hist_file not in paths, "History file should be excluded even with a general glob"
|
||||
|
||||
# --- Tests for History Migration and Separation ---
|
||||
# Setup a history file in the temporary directory
|
||||
hist_file = tmp_path / "my_project_history.toml"
|
||||
hist_file.write_text("secret history", encoding="utf-8")
|
||||
# Attempt to resolve paths including the history file using a wildcard
|
||||
paths = aggregate.resolve_paths(tmp_path, "*_history.toml")
|
||||
assert hist_file not in paths, "History file should be blacklisted and not resolved"
|
||||
# Resolve all paths and ensure the history file is still excluded
|
||||
paths = aggregate.resolve_paths(tmp_path, "*")
|
||||
assert hist_file not in paths, "History file should be excluded even with a general glob"
|
||||
# --- Tests for History Migration and Separation ---
|
||||
|
||||
def test_migration_on_load(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests that project loading migrates discussion history from manual_slop.toml
|
||||
to manual_slop_history.toml if it exists in the main config.
|
||||
"""
|
||||
# Define paths for the main project config and the history file
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
|
||||
# Create a legacy project data structure with discussion history
|
||||
legacy_data = project_manager.default_project("test-project")
|
||||
legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
|
||||
|
||||
# Save this legacy data into manual_slop.toml
|
||||
with open(proj_path, "wb") as f:
|
||||
tomli_w.dump(legacy_data, f)
|
||||
|
||||
# Load the project - this action should trigger the migration
|
||||
loaded_data = project_manager.load_project(proj_path)
|
||||
|
||||
# Assertions:
|
||||
assert "discussion" in loaded_data
|
||||
assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
|
||||
|
||||
# 2. The history should no longer be present in the main manual_slop.toml on disk.
|
||||
with open(proj_path, "rb") as f:
|
||||
on_disk_main = tomllib.load(f)
|
||||
assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
|
||||
|
||||
# 3. The history file (manual_slop_history.toml) should now exist and contain the data.
|
||||
assert hist_path.exists()
|
||||
with open(hist_path, "rb") as f:
|
||||
on_disk_hist = tomllib.load(f)
|
||||
assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]
|
||||
# Define paths for the main project config and the history file
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
# Create a legacy project data structure with discussion history
|
||||
legacy_data = project_manager.default_project("test-project")
|
||||
legacy_data["discussion"]["discussions"]["main"]["history"] = ["Hello", "World"]
|
||||
# Save this legacy data into manual_slop.toml
|
||||
with open(proj_path, "wb") as f:
|
||||
tomli_w.dump(legacy_data, f)
|
||||
# Load the project - this action should trigger the migration
|
||||
loaded_data = project_manager.load_project(proj_path)
|
||||
# Assertions:
|
||||
assert "discussion" in loaded_data
|
||||
assert loaded_data["discussion"]["discussions"]["main"]["history"] == ["Hello", "World"]
|
||||
# 2. The history should no longer be present in the main manual_slop.toml on disk.
|
||||
with open(proj_path, "rb") as f:
|
||||
on_disk_main = tomllib.load(f)
|
||||
assert "discussion" not in on_disk_main, "Discussion history should be removed from main config after migration"
|
||||
# 3. The history file (manual_slop_history.toml) should now exist and contain the data.
|
||||
assert hist_path.exists()
|
||||
with open(hist_path, "rb") as f:
|
||||
on_disk_hist = tomllib.load(f)
|
||||
assert on_disk_hist["discussions"]["main"]["history"] == ["Hello", "World"]
|
||||
|
||||
def test_save_separation(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests that saving project data correctly separates discussion history
|
||||
into manual_slop_history.toml.
|
||||
"""
|
||||
# Define paths for the main project config and the history file
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
|
||||
# Create fresh project data, including discussion history
|
||||
proj_data = project_manager.default_project("test-project")
|
||||
proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
|
||||
|
||||
# Save the project data
|
||||
project_manager.save_project(proj_data, proj_path)
|
||||
|
||||
# Assertions:
|
||||
assert proj_path.exists()
|
||||
assert hist_path.exists()
|
||||
|
||||
# 2. The main project file should NOT contain the discussion history.
|
||||
with open(proj_path, "rb") as f:
|
||||
p_disk = tomllib.load(f)
|
||||
assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
|
||||
|
||||
# 3. The history file should contain the discussion history.
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
|
||||
|
||||
# --- Tests for History Persistence Across Turns ---
|
||||
# Define paths for the main project config and the history file
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
# Create fresh project data, including discussion history
|
||||
proj_data = project_manager.default_project("test-project")
|
||||
proj_data["discussion"]["discussions"]["main"]["history"] = ["Saved", "Separately"]
|
||||
# Save the project data
|
||||
project_manager.save_project(proj_data, proj_path)
|
||||
# Assertions:
|
||||
assert proj_path.exists()
|
||||
assert hist_path.exists()
|
||||
# 2. The main project file should NOT contain the discussion history.
|
||||
with open(proj_path, "rb") as f:
|
||||
p_disk = tomllib.load(f)
|
||||
assert "discussion" not in p_disk, "Discussion history should not be in main config file after save"
|
||||
# 3. The history file should contain the discussion history.
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert h_disk["discussions"]["main"]["history"] == ["Saved", "Separately"]
|
||||
# --- Tests for History Persistence Across Turns ---
|
||||
|
||||
def test_history_persistence_across_turns(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests that discussion history is correctly persisted across multiple save/load cycles.
|
||||
"""
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
|
||||
# Step 1: Initialize a new project and save it.
|
||||
proj = project_manager.default_project("test-persistence")
|
||||
project_manager.save_project(proj, proj_path)
|
||||
|
||||
# Step 2: Add a first turn of discussion history.
|
||||
proj = project_manager.load_project(proj_path)
|
||||
entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
|
||||
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
|
||||
project_manager.save_project(proj, proj_path)
|
||||
|
||||
# Verify separation after the first save
|
||||
with open(proj_path, "rb") as f:
|
||||
p_disk = tomllib.load(f)
|
||||
assert "discussion" not in p_disk
|
||||
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
|
||||
|
||||
# Step 3: Add a second turn of discussion history.
|
||||
proj = project_manager.load_project(proj_path)
|
||||
entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
|
||||
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
|
||||
project_manager.save_project(proj, proj_path)
|
||||
|
||||
# Verify persistence
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert len(h_disk["discussions"]["main"]["history"]) == 2
|
||||
assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
|
||||
|
||||
# Step 4: Reload the project from disk and check history
|
||||
proj_final = project_manager.load_project(proj_path)
|
||||
assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
|
||||
|
||||
# --- Tests for AI Client History Management ---
|
||||
proj_path = tmp_path / "manual_slop.toml"
|
||||
hist_path = tmp_path / "manual_slop_history.toml"
|
||||
# Step 1: Initialize a new project and save it.
|
||||
proj = project_manager.default_project("test-persistence")
|
||||
project_manager.save_project(proj, proj_path)
|
||||
# Step 2: Add a first turn of discussion history.
|
||||
proj = project_manager.load_project(proj_path)
|
||||
entry1 = {"role": "User", "content": "Hello", "ts": "2026-02-24T13:00:00"}
|
||||
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry1))
|
||||
project_manager.save_project(proj, proj_path)
|
||||
# Verify separation after the first save
|
||||
with open(proj_path, "rb") as f:
|
||||
p_disk = tomllib.load(f)
|
||||
assert "discussion" not in p_disk
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert h_disk["discussions"]["main"]["history"] == ["@2026-02-24T13:00:00\nUser:\nHello"]
|
||||
# Step 3: Add a second turn of discussion history.
|
||||
proj = project_manager.load_project(proj_path)
|
||||
entry2 = {"role": "AI", "content": "Hi there!", "ts": "2026-02-24T13:01:00"}
|
||||
proj["discussion"]["discussions"]["main"]["history"].append(project_manager.entry_to_str(entry2))
|
||||
project_manager.save_project(proj, proj_path)
|
||||
# Verify persistence
|
||||
with open(hist_path, "rb") as f:
|
||||
h_disk = tomllib.load(f)
|
||||
assert len(h_disk["discussions"]["main"]["history"]) == 2
|
||||
assert h_disk["discussions"]["main"]["history"][1] == "@2026-02-24T13:01:00\nAI:\nHi there!"
|
||||
# Step 4: Reload the project from disk and check history
|
||||
proj_final = project_manager.load_project(proj_path)
|
||||
assert len(proj_final["discussion"]["discussions"]["main"]["history"]) == 2
|
||||
# --- Tests for AI Client History Management ---
|
||||
|
||||
def test_get_history_bleed_stats_basic():
|
||||
"""
|
||||
"""
|
||||
Tests basic retrieval of history bleed statistics from the AI client.
|
||||
"""
|
||||
# Reset the AI client's session state
|
||||
ai_client.reset_session()
|
||||
|
||||
# Set a custom history truncation limit for testing purposes.
|
||||
ai_client.set_history_trunc_limit(500)
|
||||
|
||||
# For this test, we're primarily checking the structure of the returned stats
|
||||
# and the configured limit.
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
|
||||
assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
|
||||
assert 'limit' in stats, "Stats dictionary should contain 'limit'"
|
||||
assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
|
||||
assert isinstance(stats['current'], int) and stats['current'] >= 0
|
||||
# Reset the AI client's session state
|
||||
ai_client.reset_session()
|
||||
# Set a custom history truncation limit for testing purposes.
|
||||
ai_client.set_history_trunc_limit(500)
|
||||
# For this test, we're primarily checking the structure of the returned stats
|
||||
# and the configured limit.
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
assert 'current' in stats, "Stats dictionary should contain 'current' token usage"
|
||||
assert 'limit' in stats, "Stats dictionary should contain 'limit'"
|
||||
assert stats['limit'] == 500, f"Expected limit of 500, but got {stats['limit']}"
|
||||
assert isinstance(stats['current'], int) and stats['current'] >= 0
|
||||
|
||||
@@ -12,40 +12,35 @@ from api_hook_client import ApiHookClient
|
||||
import gui_legacy
|
||||
|
||||
def test_hooks_enabled_via_cli():
|
||||
with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
|
||||
app = gui_legacy.App()
|
||||
assert app.test_hooks_enabled is True
|
||||
with patch.object(sys, 'argv', ['gui_legacy.py', '--enable-test-hooks']):
|
||||
app = gui_legacy.App()
|
||||
assert app.test_hooks_enabled is True
|
||||
|
||||
def test_hooks_disabled_by_default():
|
||||
with patch.object(sys, 'argv', ['gui_legacy.py']):
|
||||
if 'SLOP_TEST_HOOKS' in os.environ:
|
||||
del os.environ['SLOP_TEST_HOOKS']
|
||||
app = gui_legacy.App()
|
||||
assert getattr(app, 'test_hooks_enabled', False) is False
|
||||
with patch.object(sys, 'argv', ['gui_legacy.py']):
|
||||
if 'SLOP_TEST_HOOKS' in os.environ:
|
||||
del os.environ['SLOP_TEST_HOOKS']
|
||||
app = gui_legacy.App()
|
||||
assert getattr(app, 'test_hooks_enabled', False) is False
|
||||
|
||||
def test_live_hook_server_responses(live_gui):
|
||||
"""
|
||||
"""
|
||||
Verifies the live hook server (started via fixture) responds correctly to all major endpoints.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
# Test /status
|
||||
status = client.get_status()
|
||||
assert status == {'status': 'ok'}
|
||||
|
||||
# Test /api/project
|
||||
project = client.get_project()
|
||||
assert 'project' in project
|
||||
|
||||
# Test /api/session
|
||||
session = client.get_session()
|
||||
assert 'session' in session
|
||||
|
||||
# Test /api/performance
|
||||
perf = client.get_performance()
|
||||
assert 'performance' in perf
|
||||
|
||||
# Test POST /api/gui
|
||||
gui_data = {"action": "test_action", "value": 42}
|
||||
resp = client.post_gui(gui_data)
|
||||
assert resp == {'status': 'queued'}
|
||||
client = ApiHookClient()
|
||||
# Test /status
|
||||
status = client.get_status()
|
||||
assert status == {'status': 'ok'}
|
||||
# Test /api/project
|
||||
project = client.get_project()
|
||||
assert 'project' in project
|
||||
# Test /api/session
|
||||
session = client.get_session()
|
||||
assert 'session' in session
|
||||
# Test /api/performance
|
||||
perf = client.get_performance()
|
||||
assert 'performance' in perf
|
||||
# Test POST /api/gui
|
||||
gui_data = {"action": "test_action", "value": 42}
|
||||
resp = client.post_gui(gui_data)
|
||||
assert resp == {'status': 'queued'}
|
||||
|
||||
@@ -14,89 +14,80 @@ spec.loader.exec_module(gui_legacy)
|
||||
from gui_legacy import App
|
||||
|
||||
def test_new_hubs_defined_in_window_info():
|
||||
"""
|
||||
"""
|
||||
Verifies that the new consolidated Hub windows are defined in the App's window_info.
|
||||
This ensures they will be available in the 'Windows' menu.
|
||||
"""
|
||||
# We don't need a full App instance with DPG context for this,
|
||||
# as window_info is initialized in __init__ before DPG starts.
|
||||
# But we mock load_config to avoid file access.
|
||||
from unittest.mock import patch
|
||||
with patch('gui_legacy.load_config', return_value={}):
|
||||
app = App()
|
||||
|
||||
expected_hubs = {
|
||||
"Context Hub": "win_context_hub",
|
||||
"AI Settings Hub": "win_ai_settings_hub",
|
||||
"Discussion Hub": "win_discussion_hub",
|
||||
"Operations Hub": "win_operations_hub",
|
||||
}
|
||||
|
||||
for label, tag in expected_hubs.items():
|
||||
assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
|
||||
# Check if the label matches (or is present)
|
||||
found = False
|
||||
for l, t in app.window_info.items():
|
||||
if t == tag:
|
||||
found = True
|
||||
assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
|
||||
assert found, f"Expected window label {label} not found in window_info"
|
||||
# We don't need a full App instance with DPG context for this,
|
||||
# as window_info is initialized in __init__ before DPG starts.
|
||||
# But we mock load_config to avoid file access.
|
||||
from unittest.mock import patch
|
||||
with patch('gui_legacy.load_config', return_value={}):
|
||||
app = App()
|
||||
expected_hubs = {
|
||||
"Context Hub": "win_context_hub",
|
||||
"AI Settings Hub": "win_ai_settings_hub",
|
||||
"Discussion Hub": "win_discussion_hub",
|
||||
"Operations Hub": "win_operations_hub",
|
||||
}
|
||||
for label, tag in expected_hubs.items():
|
||||
assert tag in app.window_info.values(), f"Expected window tag {tag} not found in window_info"
|
||||
# Check if the label matches (or is present)
|
||||
found = False
|
||||
for l, t in app.window_info.items():
|
||||
if t == tag:
|
||||
found = True
|
||||
assert l == label or label in l, f"Label mismatch for {tag}: expected {label}, found {l}"
|
||||
assert found, f"Expected window label {label} not found in window_info"
|
||||
|
||||
def test_old_windows_removed_from_window_info(app_instance_simple):
|
||||
"""
|
||||
"""
|
||||
Verifies that the old fragmented windows are removed from window_info.
|
||||
"""
|
||||
old_tags = [
|
||||
"win_projects", "win_files", "win_screenshots",
|
||||
"win_provider", "win_system_prompts",
|
||||
"win_discussion", "win_message", "win_response",
|
||||
"win_comms", "win_tool_log"
|
||||
]
|
||||
|
||||
for tag in old_tags:
|
||||
assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"
|
||||
old_tags = [
|
||||
"win_projects", "win_files", "win_screenshots",
|
||||
"win_provider", "win_system_prompts",
|
||||
"win_discussion", "win_message", "win_response",
|
||||
"win_comms", "win_tool_log"
|
||||
]
|
||||
for tag in old_tags:
|
||||
assert tag not in app_instance_simple.window_info.values(), f"Old window tag {tag} should have been removed from window_info"
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance_simple():
|
||||
from unittest.mock import patch
|
||||
from gui_legacy import App
|
||||
with patch('gui_legacy.load_config', return_value={}):
|
||||
app = App()
|
||||
return app
|
||||
from unittest.mock import patch
|
||||
from gui_legacy import App
|
||||
with patch('gui_legacy.load_config', return_value={}):
|
||||
app = App()
|
||||
return app
|
||||
|
||||
def test_hub_windows_have_correct_flags(app_instance_simple):
|
||||
"""
|
||||
"""
|
||||
Verifies that the new Hub windows have appropriate flags for a professional workspace.
|
||||
(e.g., no_collapse should be True for main hubs).
|
||||
"""
|
||||
import dearpygui.dearpygui as dpg
|
||||
dpg.create_context()
|
||||
|
||||
# We need to actually call the build methods to check the configuration
|
||||
app_instance_simple._build_context_hub()
|
||||
app_instance_simple._build_ai_settings_hub()
|
||||
app_instance_simple._build_discussion_hub()
|
||||
app_instance_simple._build_operations_hub()
|
||||
|
||||
hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
|
||||
for hub in hubs:
|
||||
assert dpg.does_item_exist(hub)
|
||||
# We can't easily check 'no_collapse' after creation without internal DPG calls
|
||||
# but we can check if it's been configured if we mock dpg.window or check it manually
|
||||
|
||||
dpg.destroy_context()
|
||||
import dearpygui.dearpygui as dpg
|
||||
dpg.create_context()
|
||||
# We need to actually call the build methods to check the configuration
|
||||
app_instance_simple._build_context_hub()
|
||||
app_instance_simple._build_ai_settings_hub()
|
||||
app_instance_simple._build_discussion_hub()
|
||||
app_instance_simple._build_operations_hub()
|
||||
hubs = ["win_context_hub", "win_ai_settings_hub", "win_discussion_hub", "win_operations_hub"]
|
||||
for hub in hubs:
|
||||
assert dpg.does_item_exist(hub)
|
||||
# We can't easily check 'no_collapse' after creation without internal DPG calls
|
||||
# but we can check if it's been configured if we mock dpg.window or check it manually
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_indicators_exist(app_instance_simple):
|
||||
"""
|
||||
"""
|
||||
Verifies that the new thinking and live indicators exist in the UI.
|
||||
"""
|
||||
import dearpygui.dearpygui as dpg
|
||||
dpg.create_context()
|
||||
|
||||
app_instance_simple._build_discussion_hub()
|
||||
app_instance_simple._build_operations_hub()
|
||||
|
||||
assert dpg.does_item_exist("thinking_indicator")
|
||||
assert dpg.does_item_exist("operations_live_indicator")
|
||||
|
||||
dpg.destroy_context()
|
||||
import dearpygui.dearpygui as dpg
|
||||
dpg.create_context()
|
||||
app_instance_simple._build_discussion_hub()
|
||||
app_instance_simple._build_operations_hub()
|
||||
assert dpg.does_item_exist("thinking_indicator")
|
||||
assert dpg.does_item_exist("operations_live_indicator")
|
||||
dpg.destroy_context()
|
||||
|
||||
@@ -8,120 +8,110 @@ import ai_client
|
||||
|
||||
@pytest.fixture
|
||||
def mock_app():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={
|
||||
"project": {"name": "test_proj"},
|
||||
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
|
||||
"files": {"paths": [], "base_dir": "."},
|
||||
"screenshots": {"paths": [], "base_dir": "."},
|
||||
"agent": {"tools": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
app = App()
|
||||
yield app
|
||||
# We don't have a clean way to stop the loop thread in gui_2.py App
|
||||
# so we just let it daemon-exit.
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={
|
||||
"ai": {"provider": "gemini", "model": "model-1", "temperature": 0.0, "max_tokens": 100, "history_trunc_limit": 1000},
|
||||
"projects": {"paths": [], "active": ""},
|
||||
"gui": {"show_windows": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.load_project', return_value={
|
||||
"project": {"name": "test_proj"},
|
||||
"discussion": {"active": "main", "discussions": {"main": {"history": []}}},
|
||||
"files": {"paths": [], "base_dir": "."},
|
||||
"screenshots": {"paths": [], "base_dir": "."},
|
||||
"agent": {"tools": {}}
|
||||
}),
|
||||
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
|
||||
patch('gui_2.project_manager.save_project'),
|
||||
patch('gui_2.session_logger.open_session'),
|
||||
patch('gui_2.App._init_ai_and_hooks'),
|
||||
patch('gui_2.App._fetch_models')
|
||||
):
|
||||
app = App()
|
||||
yield app
|
||||
# We don't have a clean way to stop the loop thread in gui_2.py App
|
||||
# so we just let it daemon-exit.
|
||||
|
||||
@pytest.mark.timeout(10)
|
||||
def test_user_request_integration_flow(mock_app):
|
||||
"""
|
||||
"""
|
||||
Verifies that pushing a UserRequestEvent to the event_queue:
|
||||
1. Triggers ai_client.send
|
||||
2. Results in a 'response' event back to the queue
|
||||
3. Eventually updates the UI state (ai_response, ai_status) after processing GUI tasks.
|
||||
"""
|
||||
app = mock_app
|
||||
|
||||
# Mock all ai_client methods called during _handle_request_event
|
||||
mock_response = "This is a test AI response"
|
||||
with (
|
||||
patch('ai_client.send', return_value=mock_response) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
# 1. Create and push a UserRequestEvent
|
||||
event = UserRequestEvent(
|
||||
prompt="Hello AI",
|
||||
stable_md="Context",
|
||||
file_items=[],
|
||||
disc_text="History",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
# 2. Push event to the app's internal loop
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
|
||||
# 3. Wait for ai_client.send to be called (polling background thread)
|
||||
start_time = time.time()
|
||||
while not mock_send.called and time.time() - start_time < 5:
|
||||
time.sleep(0.1)
|
||||
|
||||
assert mock_send.called, "ai_client.send was not called within timeout"
|
||||
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
|
||||
|
||||
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
|
||||
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 3:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_response == mock_response and app.ai_status == "done":
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
|
||||
assert app.ai_response == mock_response
|
||||
assert app.ai_status == "done"
|
||||
app = mock_app
|
||||
# Mock all ai_client methods called during _handle_request_event
|
||||
mock_response = "This is a test AI response"
|
||||
with (
|
||||
patch('ai_client.send', return_value=mock_response) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
# 1. Create and push a UserRequestEvent
|
||||
event = UserRequestEvent(
|
||||
prompt="Hello AI",
|
||||
stable_md="Context",
|
||||
file_items=[],
|
||||
disc_text="History",
|
||||
base_dir="."
|
||||
)
|
||||
# 2. Push event to the app's internal loop
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
# 3. Wait for ai_client.send to be called (polling background thread)
|
||||
start_time = time.time()
|
||||
while not mock_send.called and time.time() - start_time < 5:
|
||||
time.sleep(0.1)
|
||||
assert mock_send.called, "ai_client.send was not called within timeout"
|
||||
mock_send.assert_called_once_with("Context", "Hello AI", ".", [], "History")
|
||||
# 4. Wait for the response to propagate to _pending_gui_tasks and update UI
|
||||
# We call _process_pending_gui_tasks manually to simulate a GUI frame update.
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 3:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_response == mock_response and app.ai_status == "done":
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
assert success, f"UI state was not updated. ai_response: '{app.ai_response}', status: '{app.ai_status}'"
|
||||
assert app.ai_response == mock_response
|
||||
assert app.ai_status == "done"
|
||||
|
||||
@pytest.mark.timeout(10)
|
||||
def test_user_request_error_handling(mock_app):
|
||||
"""
|
||||
"""
|
||||
Verifies that if ai_client.send raises an exception, the UI is updated with the error state.
|
||||
"""
|
||||
app = mock_app
|
||||
|
||||
with (
|
||||
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
event = UserRequestEvent(
|
||||
prompt="Trigger Error",
|
||||
stable_md="",
|
||||
file_items=[],
|
||||
disc_text="",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
|
||||
# Poll for error state by processing GUI tasks
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 5:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
|
||||
app = mock_app
|
||||
with (
|
||||
patch('ai_client.send', side_effect=Exception("API Failure")) as mock_send,
|
||||
patch('ai_client.set_custom_system_prompt'),
|
||||
patch('ai_client.set_model_params'),
|
||||
patch('ai_client.set_agent_tools')
|
||||
):
|
||||
event = UserRequestEvent(
|
||||
prompt="Trigger Error",
|
||||
stable_md="",
|
||||
file_items=[],
|
||||
disc_text="",
|
||||
base_dir="."
|
||||
)
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
app.event_queue.put("user_request", event),
|
||||
app._loop
|
||||
)
|
||||
# Poll for error state by processing GUI tasks
|
||||
start_time = time.time()
|
||||
success = False
|
||||
while time.time() - start_time < 5:
|
||||
app._process_pending_gui_tasks()
|
||||
if app.ai_status == "error" and "ERROR: API Failure" in app.ai_response:
|
||||
success = True
|
||||
break
|
||||
time.sleep(0.1)
|
||||
assert success, f"Error state was not reflected in UI. status: {app.ai_status}, response: {app.ai_response}"
|
||||
|
||||
@@ -10,80 +10,67 @@ from api_hook_client import ApiHookClient
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_full_live_workflow(live_gui):
|
||||
"""
|
||||
"""
|
||||
Integration test that drives the GUI through a full workflow.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
client.post_session(session_entries=[])
|
||||
time.sleep(2)
|
||||
|
||||
# 1. Reset
|
||||
client.click("btn_reset")
|
||||
time.sleep(1)
|
||||
|
||||
# 2. Project Setup
|
||||
temp_project_path = os.path.abspath("tests/temp_project.toml")
|
||||
if os.path.exists(temp_project_path):
|
||||
os.remove(temp_project_path)
|
||||
|
||||
client.click("btn_project_new_automated", user_data=temp_project_path)
|
||||
time.sleep(1) # Wait for project creation and switch
|
||||
|
||||
# Verify metadata update
|
||||
proj = client.get_project()
|
||||
|
||||
test_git = os.path.abspath(".")
|
||||
client.set_value("project_git_dir", test_git)
|
||||
client.click("btn_project_save")
|
||||
time.sleep(1)
|
||||
|
||||
proj = client.get_project()
|
||||
# flat_config returns {"project": {...}, "output": ...}
|
||||
# so proj is {"project": {"project": {"git_dir": ...}}}
|
||||
assert proj['project']['project']['git_dir'] == test_git
|
||||
|
||||
# Enable auto-add so the response ends up in history
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("current_model", "gemini-2.5-flash-lite")
|
||||
time.sleep(0.5)
|
||||
|
||||
# 3. Discussion Turn
|
||||
client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Verify thinking indicator appears (might be brief)
|
||||
thinking_seen = False
|
||||
print("\nPolling for thinking indicator...")
|
||||
for i in range(40):
|
||||
state = client.get_indicator_state("thinking_indicator")
|
||||
if state.get('shown'):
|
||||
thinking_seen = True
|
||||
print(f"Thinking indicator seen at poll {i}")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
# 4. Wait for response in session
|
||||
success = False
|
||||
print("Waiting for AI response in session...")
|
||||
for i in range(120):
|
||||
session = client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
if any(e.get('role') == 'AI' for e in entries):
|
||||
success = True
|
||||
print(f"AI response found at second {i}")
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
assert success, "AI failed to respond within 120 seconds"
|
||||
# 5. Switch Discussion
|
||||
client.set_value("disc_new_name_input", "AutoDisc")
|
||||
client.click("btn_disc_create")
|
||||
time.sleep(1.0) # Wait for GUI to process creation
|
||||
|
||||
client.select_list_item("disc_listbox", "AutoDisc")
|
||||
time.sleep(1.0) # Wait for GUI to switch
|
||||
|
||||
# Verify session is empty in new discussion
|
||||
session = client.get_session()
|
||||
assert len(session.get('session', {}).get('entries', [])) == 0
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
client.post_session(session_entries=[])
|
||||
time.sleep(2)
|
||||
# 1. Reset
|
||||
client.click("btn_reset")
|
||||
time.sleep(1)
|
||||
# 2. Project Setup
|
||||
temp_project_path = os.path.abspath("tests/temp_project.toml")
|
||||
if os.path.exists(temp_project_path):
|
||||
os.remove(temp_project_path)
|
||||
client.click("btn_project_new_automated", user_data=temp_project_path)
|
||||
time.sleep(1) # Wait for project creation and switch
|
||||
# Verify metadata update
|
||||
proj = client.get_project()
|
||||
test_git = os.path.abspath(".")
|
||||
client.set_value("project_git_dir", test_git)
|
||||
client.click("btn_project_save")
|
||||
time.sleep(1)
|
||||
proj = client.get_project()
|
||||
# flat_config returns {"project": {...}, "output": ...}
|
||||
# so proj is {"project": {"project": {"git_dir": ...}}}
|
||||
assert proj['project']['project']['git_dir'] == test_git
|
||||
# Enable auto-add so the response ends up in history
|
||||
client.set_value("auto_add_history", True)
|
||||
client.set_value("current_model", "gemini-2.5-flash-lite")
|
||||
time.sleep(0.5)
|
||||
# 3. Discussion Turn
|
||||
client.set_value("ai_input", "Hello! This is an automated test. Just say 'Acknowledged'.")
|
||||
client.click("btn_gen_send")
|
||||
# Verify thinking indicator appears (might be brief)
|
||||
thinking_seen = False
|
||||
print("\nPolling for thinking indicator...")
|
||||
for i in range(40):
|
||||
state = client.get_indicator_state("thinking_indicator")
|
||||
if state.get('shown'):
|
||||
thinking_seen = True
|
||||
print(f"Thinking indicator seen at poll {i}")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
# 4. Wait for response in session
|
||||
success = False
|
||||
print("Waiting for AI response in session...")
|
||||
for i in range(120):
|
||||
session = client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
if any(e.get('role') == 'AI' for e in entries):
|
||||
success = True
|
||||
print(f"AI response found at second {i}")
|
||||
break
|
||||
time.sleep(1)
|
||||
assert success, "AI failed to respond within 120 seconds"
|
||||
# 5. Switch Discussion
|
||||
client.set_value("disc_new_name_input", "AutoDisc")
|
||||
client.click("btn_disc_create")
|
||||
time.sleep(1.0) # Wait for GUI to process creation
|
||||
client.select_list_item("disc_listbox", "AutoDisc")
|
||||
time.sleep(1.0) # Wait for GUI to switch
|
||||
# Verify session is empty in new discussion
|
||||
session = client.get_session()
|
||||
assert len(session.get('session', {}).get('entries', [])) == 0
|
||||
|
||||
@@ -9,20 +9,20 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config(tmp_path):
|
||||
config_path = tmp_path / "config.toml"
|
||||
config_path.write_text("""[projects]
|
||||
config_path = tmp_path / "config.toml"
|
||||
config_path.write_text("""[projects]
|
||||
paths = []
|
||||
active = ""
|
||||
[ai]
|
||||
provider = "gemini"
|
||||
model = "model"
|
||||
""", encoding="utf-8")
|
||||
return config_path
|
||||
return config_path
|
||||
|
||||
@pytest.fixture
|
||||
def mock_project(tmp_path):
|
||||
project_path = tmp_path / "project.toml"
|
||||
project_path.write_text("""[project]
|
||||
project_path = tmp_path / "project.toml"
|
||||
project_path.write_text("""[project]
|
||||
name = "test"
|
||||
[discussion]
|
||||
roles = ["User", "AI"]
|
||||
@@ -30,74 +30,65 @@ active = "main"
|
||||
[discussion.discussions.main]
|
||||
history = []
|
||||
""", encoding="utf-8")
|
||||
return project_path
|
||||
return project_path
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance(mock_config, mock_project, monkeypatch):
|
||||
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
|
||||
|
||||
with patch("project_manager.load_project") as mock_load, \
|
||||
patch("session_logger.open_session"):
|
||||
|
||||
mock_load.return_value = {
|
||||
"project": {"name": "test"},
|
||||
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
|
||||
"files": {"paths": []},
|
||||
"screenshots": {"paths": []}
|
||||
}
|
||||
|
||||
# Mock the __init__ to do nothing, then set the fields we need manually
|
||||
with patch.object(App, '__init__', lambda self: None):
|
||||
app = App()
|
||||
app.show_windows = {"Log Management": False}
|
||||
app.ui_state = MagicMock()
|
||||
app.ui_files_base_dir = "."
|
||||
app.files = []
|
||||
|
||||
# Since we bypassed __init__, we need to bind the method manually
|
||||
# but python allows calling it directly.
|
||||
return app
|
||||
monkeypatch.setattr("gui_2.CONFIG_PATH", mock_config)
|
||||
with patch("project_manager.load_project") as mock_load, \
|
||||
patch("session_logger.open_session"):
|
||||
mock_load.return_value = {
|
||||
"project": {"name": "test"},
|
||||
"discussion": {"roles": ["User", "AI"], "active": "main", "discussions": {"main": {"history": []}}},
|
||||
"files": {"paths": []},
|
||||
"screenshots": {"paths": []}
|
||||
}
|
||||
# Mock the __init__ to do nothing, then set the fields we need manually
|
||||
with patch.object(App, '__init__', lambda self: None):
|
||||
app = App()
|
||||
app.show_windows = {"Log Management": False}
|
||||
app.ui_state = MagicMock()
|
||||
app.ui_files_base_dir = "."
|
||||
app.files = []
|
||||
# Since we bypassed __init__, we need to bind the method manually
|
||||
# but python allows calling it directly.
|
||||
return app
|
||||
|
||||
def test_log_management_init(app_instance):
|
||||
app = app_instance
|
||||
assert "Log Management" in app.show_windows
|
||||
assert app.show_windows["Log Management"] is False
|
||||
assert hasattr(app, "_render_log_management")
|
||||
assert callable(app._render_log_management)
|
||||
app = app_instance
|
||||
assert "Log Management" in app.show_windows
|
||||
assert app.show_windows["Log Management"] is False
|
||||
assert hasattr(app, "_render_log_management")
|
||||
assert callable(app._render_log_management)
|
||||
|
||||
def test_render_log_management_logic(app_instance):
|
||||
app = app_instance
|
||||
app.show_windows["Log Management"] = True
|
||||
|
||||
# Mock LogRegistry
|
||||
with patch("gui_2.LogRegistry") as MockRegistry, \
|
||||
patch("gui_2.imgui.begin") as mock_begin, \
|
||||
patch("gui_2.imgui.begin_table") as mock_begin_table, \
|
||||
patch("gui_2.imgui.text") as mock_text, \
|
||||
patch("gui_2.imgui.end_table") as mock_end_table, \
|
||||
patch("gui_2.imgui.end") as mock_end, \
|
||||
patch("gui_2.imgui.push_style_color"), \
|
||||
patch("gui_2.imgui.pop_style_color"), \
|
||||
patch("gui_2.imgui.table_setup_column"), \
|
||||
patch("gui_2.imgui.table_headers_row"), \
|
||||
patch("gui_2.imgui.table_next_row"), \
|
||||
patch("gui_2.imgui.table_next_column"), \
|
||||
patch("gui_2.imgui.button"):
|
||||
|
||||
mock_reg = MockRegistry.return_value
|
||||
mock_reg.data = {
|
||||
"session_1": {
|
||||
"start_time": "2023-01-01",
|
||||
"whitelisted": False,
|
||||
"metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
|
||||
}
|
||||
}
|
||||
|
||||
mock_begin.return_value = (True, True)
|
||||
mock_begin_table.return_value = True
|
||||
|
||||
app._render_log_management()
|
||||
|
||||
mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
|
||||
mock_begin_table.assert_called()
|
||||
mock_text.assert_any_call("session_1")
|
||||
app = app_instance
|
||||
app.show_windows["Log Management"] = True
|
||||
# Mock LogRegistry
|
||||
with patch("gui_2.LogRegistry") as MockRegistry, \
|
||||
patch("gui_2.imgui.begin") as mock_begin, \
|
||||
patch("gui_2.imgui.begin_table") as mock_begin_table, \
|
||||
patch("gui_2.imgui.text") as mock_text, \
|
||||
patch("gui_2.imgui.end_table") as mock_end_table, \
|
||||
patch("gui_2.imgui.end") as mock_end, \
|
||||
patch("gui_2.imgui.push_style_color"), \
|
||||
patch("gui_2.imgui.pop_style_color"), \
|
||||
patch("gui_2.imgui.table_setup_column"), \
|
||||
patch("gui_2.imgui.table_headers_row"), \
|
||||
patch("gui_2.imgui.table_next_row"), \
|
||||
patch("gui_2.imgui.table_next_column"), \
|
||||
patch("gui_2.imgui.button"):
|
||||
mock_reg = MockRegistry.return_value
|
||||
mock_reg.data = {
|
||||
"session_1": {
|
||||
"start_time": "2023-01-01",
|
||||
"whitelisted": False,
|
||||
"metadata": {"reason": "test", "size_kb": 10, "message_count": 5}
|
||||
}
|
||||
}
|
||||
mock_begin.return_value = (True, True)
|
||||
mock_begin_table.return_value = True
|
||||
app._render_log_management()
|
||||
mock_begin.assert_called_with("Log Management", app.show_windows["Log Management"])
|
||||
mock_begin_table.assert_called()
|
||||
mock_text.assert_any_call("session_1")
|
||||
|
||||
@@ -8,48 +8,42 @@ from log_pruner import LogPruner
|
||||
|
||||
@pytest.fixture
|
||||
def pruner_setup(tmp_path):
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
registry_path = logs_dir / "log_registry.toml"
|
||||
registry = LogRegistry(str(registry_path))
|
||||
pruner = LogPruner(registry, str(logs_dir))
|
||||
return pruner, registry, logs_dir
|
||||
logs_dir = tmp_path / "logs"
|
||||
logs_dir.mkdir()
|
||||
registry_path = logs_dir / "log_registry.toml"
|
||||
registry = LogRegistry(str(registry_path))
|
||||
pruner = LogPruner(registry, str(logs_dir))
|
||||
return pruner, registry, logs_dir
|
||||
|
||||
def test_prune_old_insignificant_logs(pruner_setup):
|
||||
pruner, registry, logs_dir = pruner_setup
|
||||
|
||||
# 1. Old and small (insignificant) -> should be pruned
|
||||
session_id_old_small = "old_small"
|
||||
dir_old_small = logs_dir / session_id_old_small
|
||||
dir_old_small.mkdir()
|
||||
(dir_old_small / "comms.log").write_text("small") # < 2KB
|
||||
registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
|
||||
|
||||
# 2. Old and large (significant) -> should NOT be pruned
|
||||
session_id_old_large = "old_large"
|
||||
dir_old_large = logs_dir / session_id_old_large
|
||||
dir_old_large.mkdir()
|
||||
(dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
|
||||
registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
|
||||
|
||||
# 3. Recent and small -> should NOT be pruned
|
||||
session_id_recent_small = "recent_small"
|
||||
dir_recent_small = logs_dir / session_id_recent_small
|
||||
dir_recent_small.mkdir()
|
||||
(dir_recent_small / "comms.log").write_text("small")
|
||||
registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
|
||||
|
||||
# 4. Old and whitelisted -> should NOT be pruned
|
||||
session_id_old_whitelisted = "old_whitelisted"
|
||||
dir_old_whitelisted = logs_dir / session_id_old_whitelisted
|
||||
dir_old_whitelisted.mkdir()
|
||||
(dir_old_whitelisted / "comms.log").write_text("small")
|
||||
registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
|
||||
registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
|
||||
|
||||
pruner.prune()
|
||||
|
||||
assert not dir_old_small.exists()
|
||||
assert dir_old_large.exists()
|
||||
assert dir_recent_small.exists()
|
||||
assert dir_old_whitelisted.exists()
|
||||
pruner, registry, logs_dir = pruner_setup
|
||||
# 1. Old and small (insignificant) -> should be pruned
|
||||
session_id_old_small = "old_small"
|
||||
dir_old_small = logs_dir / session_id_old_small
|
||||
dir_old_small.mkdir()
|
||||
(dir_old_small / "comms.log").write_text("small") # < 2KB
|
||||
registry.register_session(session_id_old_small, str(dir_old_small), datetime.now() - timedelta(days=2))
|
||||
# 2. Old and large (significant) -> should NOT be pruned
|
||||
session_id_old_large = "old_large"
|
||||
dir_old_large = logs_dir / session_id_old_large
|
||||
dir_old_large.mkdir()
|
||||
(dir_old_large / "comms.log").write_text("x" * 3000) # > 2KB
|
||||
registry.register_session(session_id_old_large, str(dir_old_large), datetime.now() - timedelta(days=2))
|
||||
# 3. Recent and small -> should NOT be pruned
|
||||
session_id_recent_small = "recent_small"
|
||||
dir_recent_small = logs_dir / session_id_recent_small
|
||||
dir_recent_small.mkdir()
|
||||
(dir_recent_small / "comms.log").write_text("small")
|
||||
registry.register_session(session_id_recent_small, str(dir_recent_small), datetime.now() - timedelta(hours=2))
|
||||
# 4. Old and whitelisted -> should NOT be pruned
|
||||
session_id_old_whitelisted = "old_whitelisted"
|
||||
dir_old_whitelisted = logs_dir / session_id_old_whitelisted
|
||||
dir_old_whitelisted.mkdir()
|
||||
(dir_old_whitelisted / "comms.log").write_text("small")
|
||||
registry.register_session(session_id_old_whitelisted, str(dir_old_whitelisted), datetime.now() - timedelta(days=2))
|
||||
registry.update_session_metadata(session_id_old_whitelisted, 0, 0, 0, True, "Manual")
|
||||
pruner.prune()
|
||||
assert not dir_old_small.exists()
|
||||
assert dir_old_large.exists()
|
||||
assert dir_recent_small.exists()
|
||||
assert dir_old_whitelisted.exists()
|
||||
|
||||
@@ -8,173 +8,149 @@ from log_registry import LogRegistry
|
||||
|
||||
class TestLogRegistry(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""Set up a temporary directory and registry file for each test."""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
|
||||
|
||||
# Ensure the file is created and empty initially for a clean state.
|
||||
# LogRegistry is assumed to load from this file on instantiation.
|
||||
with open(self.registry_path, 'w') as f:
|
||||
f.write("# Initial empty registry\n")
|
||||
|
||||
# Instantiate LogRegistry. This will load from the empty file.
|
||||
self.registry = LogRegistry(self.registry_path)
|
||||
def setUp(self):
|
||||
"""Set up a temporary directory and registry file for each test."""
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
self.registry_path = os.path.join(self.temp_dir.name, "registry.toml")
|
||||
# Ensure the file is created and empty initially for a clean state.
|
||||
# LogRegistry is assumed to load from this file on instantiation.
|
||||
with open(self.registry_path, 'w') as f:
|
||||
f.write("# Initial empty registry\n")
|
||||
# Instantiate LogRegistry. This will load from the empty file.
|
||||
self.registry = LogRegistry(self.registry_path)
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up the temporary directory and its contents after each test."""
|
||||
self.temp_dir.cleanup()
|
||||
def tearDown(self):
|
||||
"""Clean up the temporary directory and its contents after each test."""
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def test_instantiation(self):
|
||||
"""Test LogRegistry instantiation with a file path."""
|
||||
self.assertIsInstance(self.registry, LogRegistry)
|
||||
self.assertEqual(self.registry.registry_path, self.registry_path)
|
||||
# Check if the file exists. LogRegistry is assumed to create it if not.
|
||||
self.assertTrue(os.path.exists(self.registry_path))
|
||||
# We will verify content in other tests that explicitly save and reload.
|
||||
def test_instantiation(self):
|
||||
"""Test LogRegistry instantiation with a file path."""
|
||||
self.assertIsInstance(self.registry, LogRegistry)
|
||||
self.assertEqual(self.registry.registry_path, self.registry_path)
|
||||
# Check if the file exists. LogRegistry is assumed to create it if not.
|
||||
self.assertTrue(os.path.exists(self.registry_path))
|
||||
# We will verify content in other tests that explicitly save and reload.
|
||||
|
||||
def test_register_session(self):
|
||||
"""Test registering a new session."""
|
||||
session_id = "session-123"
|
||||
path = "/path/to/session/123"
|
||||
start_time = datetime.utcnow()
|
||||
def test_register_session(self):
|
||||
"""Test registering a new session."""
|
||||
session_id = "session-123"
|
||||
path = "/path/to/session/123"
|
||||
start_time = datetime.utcnow()
|
||||
self.registry.register_session(session_id, path, start_time)
|
||||
# Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
|
||||
self.assertIn(session_id, self.registry.data)
|
||||
session_data = self.registry.data[session_id]
|
||||
self.assertEqual(session_data['path'], path)
|
||||
# Convert stored ISO string back to datetime for comparison
|
||||
stored_start_time = datetime.fromisoformat(session_data['start_time'])
|
||||
self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
|
||||
self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
|
||||
self.assertIsNone(session_data.get('metadata'))
|
||||
# Verify data was written to the TOML file by reloading
|
||||
reloaded_registry = LogRegistry(self.registry_path)
|
||||
self.assertIn(session_id, reloaded_registry.data)
|
||||
reloaded_session_data = reloaded_registry.data[session_id]
|
||||
reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
|
||||
self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
|
||||
|
||||
self.registry.register_session(session_id, path, start_time)
|
||||
def test_update_session_metadata(self):
|
||||
"""Test updating session metadata."""
|
||||
session_id = "session-456"
|
||||
path = "/path/to/session/456"
|
||||
start_time = datetime.utcnow()
|
||||
self.registry.register_session(session_id, path, start_time)
|
||||
message_count = 100
|
||||
errors = 5
|
||||
size_kb = 1024
|
||||
whitelisted = True
|
||||
reason = "Automated process"
|
||||
self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
|
||||
# Verify metadata was updated in internal data
|
||||
self.assertIn(session_id, self.registry.data)
|
||||
session_data = self.registry.data[session_id]
|
||||
self.assertIsNotNone(session_data.get('metadata'))
|
||||
metadata = session_data['metadata']
|
||||
self.assertEqual(metadata['message_count'], message_count)
|
||||
self.assertEqual(metadata['errors'], errors)
|
||||
self.assertEqual(metadata['size_kb'], size_kb)
|
||||
self.assertEqual(metadata['whitelisted'], whitelisted)
|
||||
self.assertEqual(metadata['reason'], reason)
|
||||
# Also check if the whitelisted flag in the main session data is updated
|
||||
self.assertTrue(session_data.get('whitelisted', False))
|
||||
# Verify data was written to the TOML file by reloading
|
||||
reloaded_registry = LogRegistry(self.registry_path)
|
||||
self.assertIn(session_id, reloaded_registry.data)
|
||||
reloaded_session_data = reloaded_registry.data[session_id]
|
||||
self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
|
||||
self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
|
||||
|
||||
# Verify session was added to internal data (assuming LogRegistry has a public 'data' attribute for testing)
|
||||
self.assertIn(session_id, self.registry.data)
|
||||
session_data = self.registry.data[session_id]
|
||||
self.assertEqual(session_data['path'], path)
|
||||
# Convert stored ISO string back to datetime for comparison
|
||||
stored_start_time = datetime.fromisoformat(session_data['start_time'])
|
||||
self.assertAlmostEqual(stored_start_time, start_time, delta=timedelta(seconds=1)) # Allow for minor time differences
|
||||
self.assertFalse(session_data.get('whitelisted', False)) # Default to not whitelisted
|
||||
self.assertIsNone(session_data.get('metadata'))
|
||||
def test_is_session_whitelisted(self):
|
||||
"""Test checking if a session is whitelisted."""
|
||||
session_id_whitelisted = "session-789-whitelisted"
|
||||
path_w = "/path/to/session/789"
|
||||
start_time_w = datetime.utcnow()
|
||||
self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
|
||||
self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
|
||||
session_id_not_whitelisted = "session-abc-not-whitelisted"
|
||||
path_nw = "/path/to/session/abc"
|
||||
start_time_nw = datetime.utcnow()
|
||||
self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
|
||||
# Test explicitly whitelisted session
|
||||
self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
|
||||
# Test session registered but not updated, should default to not whitelisted
|
||||
self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
|
||||
# Test for a non-existent session, should be treated as not whitelisted
|
||||
self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
|
||||
|
||||
# Verify data was written to the TOML file by reloading
|
||||
reloaded_registry = LogRegistry(self.registry_path)
|
||||
self.assertIn(session_id, reloaded_registry.data)
|
||||
reloaded_session_data = reloaded_registry.data[session_id]
|
||||
reloaded_start_time = datetime.fromisoformat(reloaded_session_data['start_time'])
|
||||
self.assertAlmostEqual(reloaded_start_time, start_time, delta=timedelta(seconds=1))
|
||||
|
||||
|
||||
def test_update_session_metadata(self):
|
||||
"""Test updating session metadata."""
|
||||
session_id = "session-456"
|
||||
path = "/path/to/session/456"
|
||||
start_time = datetime.utcnow()
|
||||
self.registry.register_session(session_id, path, start_time)
|
||||
|
||||
message_count = 100
|
||||
errors = 5
|
||||
size_kb = 1024
|
||||
whitelisted = True
|
||||
reason = "Automated process"
|
||||
|
||||
self.registry.update_session_metadata(session_id, message_count, errors, size_kb, whitelisted, reason)
|
||||
|
||||
# Verify metadata was updated in internal data
|
||||
self.assertIn(session_id, self.registry.data)
|
||||
session_data = self.registry.data[session_id]
|
||||
self.assertIsNotNone(session_data.get('metadata'))
|
||||
metadata = session_data['metadata']
|
||||
self.assertEqual(metadata['message_count'], message_count)
|
||||
self.assertEqual(metadata['errors'], errors)
|
||||
self.assertEqual(metadata['size_kb'], size_kb)
|
||||
self.assertEqual(metadata['whitelisted'], whitelisted)
|
||||
self.assertEqual(metadata['reason'], reason)
|
||||
# Also check if the whitelisted flag in the main session data is updated
|
||||
self.assertTrue(session_data.get('whitelisted', False))
|
||||
|
||||
# Verify data was written to the TOML file by reloading
|
||||
reloaded_registry = LogRegistry(self.registry_path)
|
||||
self.assertIn(session_id, reloaded_registry.data)
|
||||
reloaded_session_data = reloaded_registry.data[session_id]
|
||||
self.assertTrue(reloaded_session_data.get('metadata', {}).get('whitelisted', False))
|
||||
self.assertTrue(reloaded_session_data.get('whitelisted', False)) # Check main flag too
|
||||
|
||||
def test_is_session_whitelisted(self):
|
||||
"""Test checking if a session is whitelisted."""
|
||||
session_id_whitelisted = "session-789-whitelisted"
|
||||
path_w = "/path/to/session/789"
|
||||
start_time_w = datetime.utcnow()
|
||||
self.registry.register_session(session_id_whitelisted, path_w, start_time_w)
|
||||
self.registry.update_session_metadata(session_id_whitelisted, 10, 0, 100, True, "Manual whitelist")
|
||||
|
||||
session_id_not_whitelisted = "session-abc-not-whitelisted"
|
||||
path_nw = "/path/to/session/abc"
|
||||
start_time_nw = datetime.utcnow()
|
||||
self.registry.register_session(session_id_not_whitelisted, path_nw, start_time_nw)
|
||||
|
||||
# Test explicitly whitelisted session
|
||||
self.assertTrue(self.registry.is_session_whitelisted(session_id_whitelisted))
|
||||
# Test session registered but not updated, should default to not whitelisted
|
||||
self.assertFalse(self.registry.is_session_whitelisted(session_id_not_whitelisted))
|
||||
|
||||
# Test for a non-existent session, should be treated as not whitelisted
|
||||
self.assertFalse(self.registry.is_session_whitelisted("non-existent-session"))
|
||||
|
||||
def test_get_old_non_whitelisted_sessions(self):
|
||||
"""Test retrieving old, non-whitelisted sessions."""
|
||||
now = datetime.utcnow()
|
||||
# Define a cutoff time that is 7 days ago
|
||||
cutoff_time = now - timedelta(days=7)
|
||||
|
||||
# Session 1: Old and not whitelisted
|
||||
session_id_old_nw = "session-old-nw"
|
||||
path_old_nw = "/path/to/session/old_nw"
|
||||
start_time_old_nw = now - timedelta(days=10) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
|
||||
|
||||
# Session 2: Recent and not whitelisted
|
||||
session_id_recent_nw = "session-recent-nw"
|
||||
path_recent_nw = "/path/to/session/recent_nw"
|
||||
start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
|
||||
self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
|
||||
|
||||
# Session 3: Old and whitelisted
|
||||
session_id_old_w = "session-old-w"
|
||||
path_old_w = "/path/to/session/old_w"
|
||||
start_time_old_w = now - timedelta(days=15) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
|
||||
self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
|
||||
|
||||
# Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
|
||||
# The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
|
||||
session_id_old_nw_incomplete = "session-old-nw-incomplete"
|
||||
path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
|
||||
start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
|
||||
# Update with some metadata, but set 'whitelisted' to False explicitly
|
||||
self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed")
|
||||
|
||||
# Get sessions older than cutoff_time and not whitelisted
|
||||
old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
|
||||
|
||||
# Collect session IDs from the result
|
||||
found_session_ids = {s['session_id'] for s in old_sessions}
|
||||
|
||||
# Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
|
||||
self.assertIn(session_id_old_nw, found_session_ids)
|
||||
self.assertIn(session_id_old_nw_incomplete, found_session_ids)
|
||||
|
||||
# Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
|
||||
self.assertNotIn(session_id_recent_nw, found_session_ids)
|
||||
self.assertNotIn(session_id_old_w, found_session_ids)
|
||||
|
||||
# Ensure only the expected sessions are in the result
|
||||
self.assertEqual(len(found_session_ids), 2)
|
||||
|
||||
# Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
|
||||
future_cutoff = now + timedelta(days=1) # All sessions are older than this
|
||||
all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
|
||||
all_found_session_ids = {s['session_id'] for s in all_old_sessions}
|
||||
|
||||
# Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
|
||||
# Not expected: session_id_old_w (whitelisted)
|
||||
self.assertEqual(len(all_found_session_ids), 3)
|
||||
self.assertIn(session_id_old_nw, all_found_session_ids)
|
||||
self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
|
||||
self.assertIn(session_id_recent_nw, all_found_session_ids)
|
||||
self.assertNotIn(session_id_old_w, all_found_session_ids)
|
||||
def test_get_old_non_whitelisted_sessions(self):
|
||||
"""Test retrieving old, non-whitelisted sessions."""
|
||||
now = datetime.utcnow()
|
||||
# Define a cutoff time that is 7 days ago
|
||||
cutoff_time = now - timedelta(days=7)
|
||||
# Session 1: Old and not whitelisted
|
||||
session_id_old_nw = "session-old-nw"
|
||||
path_old_nw = "/path/to/session/old_nw"
|
||||
start_time_old_nw = now - timedelta(days=10) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_nw, path_old_nw, start_time_old_nw)
|
||||
# Session 2: Recent and not whitelisted
|
||||
session_id_recent_nw = "session-recent-nw"
|
||||
path_recent_nw = "/path/to/session/recent_nw"
|
||||
start_time_recent_nw = now - timedelta(days=3) # Newer than cutoff
|
||||
self.registry.register_session(session_id_recent_nw, path_recent_nw, start_time_recent_nw)
|
||||
# Session 3: Old and whitelisted
|
||||
session_id_old_w = "session-old-w"
|
||||
path_old_w = "/path/to/session/old_w"
|
||||
start_time_old_w = now - timedelta(days=15) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_w, path_old_w, start_time_old_w)
|
||||
self.registry.update_session_metadata(session_id_old_w, 50, 0, 500, True, "Whitelisted")
|
||||
# Session 4: Old, not whitelisted explicitly, but with metadata that doesn't set 'whitelisted' to True.
|
||||
# The 'is_session_whitelisted' logic should correctly interpret this as not whitelisted.
|
||||
session_id_old_nw_incomplete = "session-old-nw-incomplete"
|
||||
path_old_nw_incomplete = "/path/to/session/old_nw_incomplete"
|
||||
start_time_old_nw_incomplete = now - timedelta(days=20) # Older than cutoff
|
||||
self.registry.register_session(session_id_old_nw_incomplete, path_old_nw_incomplete, start_time_old_nw_incomplete)
|
||||
# Update with some metadata, but set 'whitelisted' to False explicitly
|
||||
self.registry.update_session_metadata(session_id_old_nw_incomplete, 10, 0, 100, False, "Manual review needed")
|
||||
# Get sessions older than cutoff_time and not whitelisted
|
||||
old_sessions = self.registry.get_old_non_whitelisted_sessions(cutoff_time)
|
||||
# Collect session IDs from the result
|
||||
found_session_ids = {s['session_id'] for s in old_sessions}
|
||||
# Expected: session_id_old_nw (old, not whitelisted) and session_id_old_nw_incomplete (old, explicitly not whitelisted)
|
||||
self.assertIn(session_id_old_nw, found_session_ids)
|
||||
self.assertIn(session_id_old_nw_incomplete, found_session_ids)
|
||||
# Not expected: session_id_recent_nw (too recent), session_id_old_w (whitelisted)
|
||||
self.assertNotIn(session_id_recent_nw, found_session_ids)
|
||||
self.assertNotIn(session_id_old_w, found_session_ids)
|
||||
# Ensure only the expected sessions are in the result
|
||||
self.assertEqual(len(found_session_ids), 2)
|
||||
# Test with a cutoff that includes all sessions, and ensure only non-whitelisted are returned
|
||||
future_cutoff = now + timedelta(days=1) # All sessions are older than this
|
||||
all_old_sessions = self.registry.get_old_non_whitelisted_sessions(future_cutoff)
|
||||
all_found_session_ids = {s['session_id'] for s in all_old_sessions}
|
||||
# Expected: session_id_old_nw, session_id_old_nw_incomplete, AND session_id_recent_nw
|
||||
# Not expected: session_id_old_w (whitelisted)
|
||||
self.assertEqual(len(all_found_session_ids), 3)
|
||||
self.assertIn(session_id_old_nw, all_found_session_ids)
|
||||
self.assertIn(session_id_old_nw_incomplete, all_found_session_ids)
|
||||
self.assertIn(session_id_recent_nw, all_found_session_ids)
|
||||
self.assertNotIn(session_id_old_w, all_found_session_ids)
|
||||
|
||||
@@ -11,69 +11,56 @@ from log_pruner import LogPruner
|
||||
|
||||
@pytest.fixture
|
||||
def e2e_setup(tmp_path, monkeypatch):
|
||||
# Ensure closed before starting
|
||||
session_logger.close_session()
|
||||
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||
|
||||
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
|
||||
original_log_dir = session_logger._LOG_DIR
|
||||
session_logger._LOG_DIR = tmp_path / "logs"
|
||||
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
yield tmp_path
|
||||
|
||||
# Cleanup
|
||||
session_logger.close_session()
|
||||
session_logger._LOG_DIR = original_log_dir
|
||||
session_logger._SCRIPTS_DIR = original_scripts_dir
|
||||
# Ensure closed before starting
|
||||
session_logger.close_session()
|
||||
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||
# Mock _LOG_DIR and _SCRIPTS_DIR in session_logger
|
||||
original_log_dir = session_logger._LOG_DIR
|
||||
session_logger._LOG_DIR = tmp_path / "logs"
|
||||
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
yield tmp_path
|
||||
# Cleanup
|
||||
session_logger.close_session()
|
||||
session_logger._LOG_DIR = original_log_dir
|
||||
session_logger._SCRIPTS_DIR = original_scripts_dir
|
||||
|
||||
def test_logging_e2e(e2e_setup):
|
||||
tmp_path = e2e_setup
|
||||
logs_dir = tmp_path / "logs"
|
||||
|
||||
# Step 1: Initialize (open_session)
|
||||
session_logger.open_session(label="E2E_Test")
|
||||
session_id = session_logger._session_id
|
||||
session_dir = session_logger._session_dir
|
||||
|
||||
# Step 2: Simulate logs (write 'ERROR' to 'comms.log')
|
||||
# Use log_comms which writes to comms.log
|
||||
session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
|
||||
|
||||
# Step 3: Shutdown (close_session)
|
||||
session_logger.close_session()
|
||||
|
||||
# Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
|
||||
registry_path = logs_dir / "log_registry.toml"
|
||||
registry = LogRegistry(str(registry_path))
|
||||
assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
|
||||
|
||||
# Step 5: Simulate an OLD insignificant session in the registry and directory
|
||||
old_session_id = "20200101_120000_OLD"
|
||||
old_session_dir = logs_dir / old_session_id
|
||||
old_session_dir.mkdir()
|
||||
(old_session_dir / "comms.log").write_text("nothing special") # < 2KB
|
||||
|
||||
old_start_time = datetime.now() - timedelta(days=2)
|
||||
registry.register_session(old_session_id, str(old_session_dir), old_start_time)
|
||||
|
||||
# Step 6: Trigger 'LogPruner.prune()'
|
||||
pruner = LogPruner(registry, str(logs_dir))
|
||||
pruner.prune()
|
||||
|
||||
# Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
|
||||
assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
|
||||
assert session_dir.exists(), "New whitelisted session should have been kept"
|
||||
|
||||
# Extra check: Whitelisted sessions should be kept even if old
|
||||
# Manually backdate the current session
|
||||
registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
|
||||
registry.save_registry()
|
||||
pruner.prune()
|
||||
assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"
|
||||
tmp_path = e2e_setup
|
||||
logs_dir = tmp_path / "logs"
|
||||
# Step 1: Initialize (open_session)
|
||||
session_logger.open_session(label="E2E_Test")
|
||||
session_id = session_logger._session_id
|
||||
session_dir = session_logger._session_dir
|
||||
# Step 2: Simulate logs (write 'ERROR' to 'comms.log')
|
||||
# Use log_comms which writes to comms.log
|
||||
session_logger.log_comms({"level": "ERROR", "message": "Something went wrong"})
|
||||
# Step 3: Shutdown (close_session)
|
||||
session_logger.close_session()
|
||||
# Step 4: Verify 'log_registry.toml' has the session whitelisted due to 'ERROR'
|
||||
registry_path = logs_dir / "log_registry.toml"
|
||||
registry = LogRegistry(str(registry_path))
|
||||
assert registry.is_session_whitelisted(session_id), "Current session should be whitelisted due to ERROR keyword"
|
||||
# Step 5: Simulate an OLD insignificant session in the registry and directory
|
||||
old_session_id = "20200101_120000_OLD"
|
||||
old_session_dir = logs_dir / old_session_id
|
||||
old_session_dir.mkdir()
|
||||
(old_session_dir / "comms.log").write_text("nothing special") # < 2KB
|
||||
old_start_time = datetime.now() - timedelta(days=2)
|
||||
registry.register_session(old_session_id, str(old_session_dir), old_start_time)
|
||||
# Step 6: Trigger 'LogPruner.prune()'
|
||||
pruner = LogPruner(registry, str(logs_dir))
|
||||
pruner.prune()
|
||||
# Step 7: Verify the OLD session is deleted but the NEW (whitelisted) session is kept.
|
||||
assert not old_session_dir.exists(), "Old insignificant session should have been pruned"
|
||||
assert session_dir.exists(), "New whitelisted session should have been kept"
|
||||
# Extra check: Whitelisted sessions should be kept even if old
|
||||
# Manually backdate the current session
|
||||
registry.data[session_id]['start_time'] = (datetime.now() - timedelta(days=2)).isoformat()
|
||||
registry.save_registry()
|
||||
pruner.prune()
|
||||
assert session_dir.exists(), "Whitelisted session should be kept even if it is old and small"
|
||||
|
||||
@@ -9,11 +9,10 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
import mcp_client
|
||||
|
||||
def test_mcp_perf_tool_retrieval():
|
||||
# Test that the MCP tool can call performance_monitor metrics
|
||||
mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
|
||||
|
||||
# Simulate tool call by patching the callback
|
||||
with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
|
||||
result = mcp_client.get_ui_performance()
|
||||
assert "60" in result
|
||||
assert "16.6" in result
|
||||
# Test that the MCP tool can call performance_monitor metrics
|
||||
mock_metrics = {"fps": 60, "last_frame_time_ms": 16.6}
|
||||
# Simulate tool call by patching the callback
|
||||
with patch('mcp_client.perf_monitor_callback', return_value=mock_metrics):
|
||||
result = mcp_client.get_ui_performance()
|
||||
assert "60" in result
|
||||
assert "16.6" in result
|
||||
|
||||
@@ -4,63 +4,55 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
# We patch the dependencies of App.__init__ to avoid side effects
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager') as mock_pm,
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
# Ensure project and ui_files_base_dir are set for _refresh_from_project
|
||||
app.project = {}
|
||||
app.ui_files_base_dir = "."
|
||||
# Return the app and the mock_pm for use in tests
|
||||
yield app, mock_pm
|
||||
# We patch the dependencies of App.__init__ to avoid side effects
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager') as mock_pm,
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
# Ensure project and ui_files_base_dir are set for _refresh_from_project
|
||||
app.project = {}
|
||||
app.ui_files_base_dir = "."
|
||||
# Return the app and the mock_pm for use in tests
|
||||
yield app, mock_pm
|
||||
|
||||
def test_mma_dashboard_refresh(app_instance):
|
||||
app, mock_pm = app_instance
|
||||
|
||||
# 1. Define mock tracks
|
||||
mock_tracks = [
|
||||
MagicMock(id="track_1", description="Track 1"),
|
||||
MagicMock(id="track_2", description="Track 2")
|
||||
]
|
||||
|
||||
# 2. Patch get_all_tracks to return our mock list
|
||||
mock_pm.get_all_tracks.return_value = mock_tracks
|
||||
|
||||
# 3. Call _refresh_from_project
|
||||
app._refresh_from_project()
|
||||
|
||||
# 4. Verify that app.tracks contains the mock tracks
|
||||
assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
|
||||
assert app.tracks == mock_tracks
|
||||
assert len(app.tracks) == 2
|
||||
assert app.tracks[0].id == "track_1"
|
||||
assert app.tracks[1].id == "track_2"
|
||||
|
||||
# Verify get_all_tracks was called with the correct base_dir
|
||||
mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)
|
||||
app, mock_pm = app_instance
|
||||
# 1. Define mock tracks
|
||||
mock_tracks = [
|
||||
MagicMock(id="track_1", description="Track 1"),
|
||||
MagicMock(id="track_2", description="Track 2")
|
||||
]
|
||||
# 2. Patch get_all_tracks to return our mock list
|
||||
mock_pm.get_all_tracks.return_value = mock_tracks
|
||||
# 3. Call _refresh_from_project
|
||||
app._refresh_from_project()
|
||||
# 4. Verify that app.tracks contains the mock tracks
|
||||
assert hasattr(app, 'tracks'), "App instance should have a 'tracks' attribute"
|
||||
assert app.tracks == mock_tracks
|
||||
assert len(app.tracks) == 2
|
||||
assert app.tracks[0].id == "track_1"
|
||||
assert app.tracks[1].id == "track_2"
|
||||
# Verify get_all_tracks was called with the correct base_dir
|
||||
mock_pm.get_all_tracks.assert_called_with(app.ui_files_base_dir)
|
||||
|
||||
def test_mma_dashboard_initialization_refresh(app_instance):
|
||||
"""
|
||||
"""
|
||||
Checks that _refresh_from_project is called during initialization if
|
||||
_load_active_project is NOT mocked to skip it (but here it IS mocked in fixture).
|
||||
This test verifies that calling it manually works as expected for initialization scenarios.
|
||||
"""
|
||||
app, mock_pm = app_instance
|
||||
|
||||
mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
|
||||
mock_pm.get_all_tracks.return_value = mock_tracks
|
||||
|
||||
# Simulate the refresh that would happen during a project load
|
||||
app._refresh_from_project()
|
||||
|
||||
assert app.tracks == mock_tracks
|
||||
assert app.tracks[0].id == "init_track"
|
||||
app, mock_pm = app_instance
|
||||
mock_tracks = [MagicMock(id="init_track", description="Initial Track")]
|
||||
mock_pm.get_all_tracks.return_value = mock_tracks
|
||||
# Simulate the refresh that would happen during a project load
|
||||
app._refresh_from_project()
|
||||
assert app.tracks == mock_tracks
|
||||
assert app.tracks[0].id == "init_track"
|
||||
|
||||
@@ -2,175 +2,159 @@ import pytest
|
||||
from models import Ticket, Track, WorkerContext
|
||||
|
||||
def test_ticket_instantiation():
|
||||
"""
|
||||
"""
|
||||
Verifies that a Ticket can be instantiated with its required fields:
|
||||
id, description, status, assigned_to.
|
||||
"""
|
||||
ticket_id = "T1"
|
||||
description = "Implement surgical code changes"
|
||||
status = "todo"
|
||||
assigned_to = "tier3-worker"
|
||||
|
||||
ticket = Ticket(
|
||||
id=ticket_id,
|
||||
description=description,
|
||||
status=status,
|
||||
assigned_to=assigned_to
|
||||
)
|
||||
|
||||
assert ticket.id == ticket_id
|
||||
assert ticket.description == description
|
||||
assert ticket.status == status
|
||||
assert ticket.assigned_to == assigned_to
|
||||
assert ticket.depends_on == []
|
||||
ticket_id = "T1"
|
||||
description = "Implement surgical code changes"
|
||||
status = "todo"
|
||||
assigned_to = "tier3-worker"
|
||||
ticket = Ticket(
|
||||
id=ticket_id,
|
||||
description=description,
|
||||
status=status,
|
||||
assigned_to=assigned_to
|
||||
)
|
||||
assert ticket.id == ticket_id
|
||||
assert ticket.description == description
|
||||
assert ticket.status == status
|
||||
assert ticket.assigned_to == assigned_to
|
||||
assert ticket.depends_on == []
|
||||
|
||||
def test_ticket_with_dependencies():
|
||||
"""
|
||||
"""
|
||||
Verifies that a Ticket can store dependencies.
|
||||
"""
|
||||
ticket = Ticket(
|
||||
id="T2",
|
||||
description="Write code",
|
||||
status="todo",
|
||||
assigned_to="worker-1",
|
||||
depends_on=["T1"]
|
||||
)
|
||||
assert ticket.depends_on == ["T1"]
|
||||
ticket = Ticket(
|
||||
id="T2",
|
||||
description="Write code",
|
||||
status="todo",
|
||||
assigned_to="worker-1",
|
||||
depends_on=["T1"]
|
||||
)
|
||||
assert ticket.depends_on == ["T1"]
|
||||
|
||||
def test_track_instantiation():
|
||||
"""
|
||||
"""
|
||||
Verifies that a Track can be instantiated with its required fields:
|
||||
id, description, and a list of Tickets.
|
||||
"""
|
||||
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
|
||||
|
||||
track_id = "TRACK-1"
|
||||
track_desc = "Implement MMA Models"
|
||||
tickets = [ticket1, ticket2]
|
||||
|
||||
track = Track(
|
||||
id=track_id,
|
||||
description=track_desc,
|
||||
tickets=tickets
|
||||
)
|
||||
|
||||
assert track.id == track_id
|
||||
assert track.description == track_desc
|
||||
assert len(track.tickets) == 2
|
||||
assert track.tickets[0].id == "T1"
|
||||
assert track.tickets[1].id == "T2"
|
||||
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="b")
|
||||
track_id = "TRACK-1"
|
||||
track_desc = "Implement MMA Models"
|
||||
tickets = [ticket1, ticket2]
|
||||
track = Track(
|
||||
id=track_id,
|
||||
description=track_desc,
|
||||
tickets=tickets
|
||||
)
|
||||
assert track.id == track_id
|
||||
assert track.description == track_desc
|
||||
assert len(track.tickets) == 2
|
||||
assert track.tickets[0].id == "T1"
|
||||
assert track.tickets[1].id == "T2"
|
||||
|
||||
def test_track_can_handle_empty_tickets():
|
||||
"""
|
||||
"""
|
||||
Verifies that a Track can be instantiated with an empty list of tickets.
|
||||
"""
|
||||
track = Track(id="TRACK-2", description="Empty Track", tickets=[])
|
||||
assert track.tickets == []
|
||||
track = Track(id="TRACK-2", description="Empty Track", tickets=[])
|
||||
assert track.tickets == []
|
||||
|
||||
def test_worker_context_instantiation():
|
||||
"""
|
||||
"""
|
||||
Verifies that a WorkerContext can be instantiated with ticket_id,
|
||||
model_name, and messages.
|
||||
"""
|
||||
ticket_id = "T1"
|
||||
model_name = "gemini-2.0-flash-lite"
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
context = WorkerContext(
|
||||
ticket_id=ticket_id,
|
||||
model_name=model_name,
|
||||
messages=messages
|
||||
)
|
||||
|
||||
assert context.ticket_id == ticket_id
|
||||
assert context.model_name == model_name
|
||||
assert context.messages == messages
|
||||
ticket_id = "T1"
|
||||
model_name = "gemini-2.0-flash-lite"
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
context = WorkerContext(
|
||||
ticket_id=ticket_id,
|
||||
model_name=model_name,
|
||||
messages=messages
|
||||
)
|
||||
assert context.ticket_id == ticket_id
|
||||
assert context.model_name == model_name
|
||||
assert context.messages == messages
|
||||
|
||||
def test_ticket_mark_blocked():
|
||||
"""
|
||||
"""
|
||||
Verifies that ticket.mark_blocked(reason) sets the status to 'blocked'.
|
||||
Note: The reason field might need to be added to the Ticket class.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket.mark_blocked("Waiting for API key")
|
||||
assert ticket.status == "blocked"
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket.mark_blocked("Waiting for API key")
|
||||
assert ticket.status == "blocked"
|
||||
|
||||
def test_ticket_mark_complete():
|
||||
"""
|
||||
"""
|
||||
Verifies that ticket.mark_complete() sets the status to 'completed'.
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket.mark_complete()
|
||||
assert ticket.status == "completed"
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="a")
|
||||
ticket.mark_complete()
|
||||
assert ticket.status == "completed"
|
||||
|
||||
def test_track_get_executable_tickets():
|
||||
"""
|
||||
"""
|
||||
Verifies that track.get_executable_tickets() returns only 'todo' tickets
|
||||
whose dependencies are all 'completed'.
|
||||
"""
|
||||
# T1: todo, no deps -> executable
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
|
||||
# T2: todo, deps [T1] -> not executable (T1 is todo)
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
|
||||
# T3: todo, deps [T4] -> not executable (T4 is blocked)
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
|
||||
# T4: blocked, no deps -> not executable (not 'todo')
|
||||
t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
|
||||
# T5: completed, no deps -> not executable (not 'todo')
|
||||
t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
|
||||
# T6: todo, deps [T5] -> executable (T5 is completed)
|
||||
t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
|
||||
|
||||
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
|
||||
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = [t.id for t in executable]
|
||||
|
||||
assert "T1" in executable_ids
|
||||
assert "T6" in executable_ids
|
||||
assert len(executable_ids) == 2
|
||||
# T1: todo, no deps -> executable
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="a")
|
||||
# T2: todo, deps [T1] -> not executable (T1 is todo)
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
|
||||
# T3: todo, deps [T4] -> not executable (T4 is blocked)
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T4"])
|
||||
# T4: blocked, no deps -> not executable (not 'todo')
|
||||
t4 = Ticket(id="T4", description="T4", status="blocked", assigned_to="a")
|
||||
# T5: completed, no deps -> not executable (not 'todo')
|
||||
t5 = Ticket(id="T5", description="T5", status="completed", assigned_to="a")
|
||||
# T6: todo, deps [T5] -> executable (T5 is completed)
|
||||
t6 = Ticket(id="T6", description="T6", status="todo", assigned_to="a", depends_on=["T5"])
|
||||
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5, t6])
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = [t.id for t in executable]
|
||||
assert "T1" in executable_ids
|
||||
assert "T6" in executable_ids
|
||||
assert len(executable_ids) == 2
|
||||
|
||||
def test_track_get_executable_tickets_complex():
|
||||
"""
|
||||
"""
|
||||
Verifies executable tickets with complex dependency chains.
|
||||
Chain: T1 (comp) -> T2 (todo) -> T3 (todo)
|
||||
T4 (comp) -> T3
|
||||
T5 (todo) -> T3
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
|
||||
t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
|
||||
t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
|
||||
|
||||
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
|
||||
|
||||
# At this point:
|
||||
# T1 is completed
|
||||
# T4 is completed
|
||||
# T2 is todo, depends on T1 (completed) -> Executable
|
||||
# T5 is todo, no deps -> Executable
|
||||
# T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
|
||||
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
|
||||
assert executable_ids == ["T2", "T5"]
|
||||
|
||||
# Mark T2 complete
|
||||
t2.mark_complete()
|
||||
# T3 still depends on T5
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
assert executable_ids == ["T5"]
|
||||
|
||||
# Mark T5 complete
|
||||
t5.mark_complete()
|
||||
# Now T3 should be executable
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
assert executable_ids == ["T3"]
|
||||
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="a")
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="a", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="a", depends_on=["T2", "T4", "T5"])
|
||||
t4 = Ticket(id="T4", description="T4", status="completed", assigned_to="a")
|
||||
t5 = Ticket(id="T5", description="T5", status="todo", assigned_to="a")
|
||||
track = Track(id="TR1", description="Track 1", tickets=[t1, t2, t3, t4, t5])
|
||||
# At this point:
|
||||
# T1 is completed
|
||||
# T4 is completed
|
||||
# T2 is todo, depends on T1 (completed) -> Executable
|
||||
# T5 is todo, no deps -> Executable
|
||||
# T3 is todo, depends on T2 (todo), T4 (completed), T5 (todo) -> Not executable
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
assert executable_ids == ["T2", "T5"]
|
||||
# Mark T2 complete
|
||||
t2.mark_complete()
|
||||
# T3 still depends on T5
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
assert executable_ids == ["T5"]
|
||||
# Mark T5 complete
|
||||
t5.mark_complete()
|
||||
# Now T3 should be executable
|
||||
executable = track.get_executable_tickets()
|
||||
executable_ids = sorted([t.id for t in executable])
|
||||
assert executable_ids == ["T3"]
|
||||
|
||||
@@ -7,141 +7,126 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
# Initialize the new state variables if they aren't there yet (they won't be until we implement them)
|
||||
if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
|
||||
if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
|
||||
if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
|
||||
yield app
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
# Initialize the new state variables if they aren't there yet (they won't be until we implement them)
|
||||
if not hasattr(app, 'ui_epic_input'): app.ui_epic_input = ""
|
||||
if not hasattr(app, 'proposed_tracks'): app.proposed_tracks = []
|
||||
if not hasattr(app, '_show_track_proposal_modal'): app._show_track_proposal_modal = False
|
||||
yield app
|
||||
|
||||
def test_mma_ui_state_initialization(app_instance):
|
||||
"""Verifies that the new MMA UI state variables are initialized correctly."""
|
||||
assert hasattr(app_instance, 'ui_epic_input')
|
||||
assert hasattr(app_instance, 'proposed_tracks')
|
||||
assert hasattr(app_instance, '_show_track_proposal_modal')
|
||||
assert hasattr(app_instance, 'mma_streams')
|
||||
assert app_instance.ui_epic_input == ""
|
||||
assert app_instance.proposed_tracks == []
|
||||
assert app_instance._show_track_proposal_modal is False
|
||||
assert app_instance.mma_streams == {}
|
||||
"""Verifies that the new MMA UI state variables are initialized correctly."""
|
||||
assert hasattr(app_instance, 'ui_epic_input')
|
||||
assert hasattr(app_instance, 'proposed_tracks')
|
||||
assert hasattr(app_instance, '_show_track_proposal_modal')
|
||||
assert hasattr(app_instance, 'mma_streams')
|
||||
assert app_instance.ui_epic_input == ""
|
||||
assert app_instance.proposed_tracks == []
|
||||
assert app_instance._show_track_proposal_modal is False
|
||||
assert app_instance.mma_streams == {}
|
||||
|
||||
def test_process_pending_gui_tasks_show_track_proposal(app_instance):
|
||||
"""Verifies that the 'show_track_proposal' action correctly updates the UI state."""
|
||||
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
||||
task = {
|
||||
"action": "show_track_proposal",
|
||||
"payload": mock_tracks
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
assert app_instance.proposed_tracks == mock_tracks
|
||||
assert app_instance._show_track_proposal_modal is True
|
||||
"""Verifies that the 'show_track_proposal' action correctly updates the UI state."""
|
||||
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
||||
task = {
|
||||
"action": "show_track_proposal",
|
||||
"payload": mock_tracks
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
assert app_instance.proposed_tracks == mock_tracks
|
||||
assert app_instance._show_track_proposal_modal is True
|
||||
|
||||
def test_cb_plan_epic_launches_thread(app_instance):
|
||||
"""Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
|
||||
app_instance.ui_epic_input = "Develop a new feature"
|
||||
app_instance.active_project_path = "test_project.toml"
|
||||
|
||||
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
||||
|
||||
with (
|
||||
patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
|
||||
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
|
||||
patch('aggregate.build_file_items', return_value=[]) as mock_build_files
|
||||
):
|
||||
|
||||
# We need to mock project_manager.flat_config and project_manager.load_project
|
||||
with (
|
||||
patch('project_manager.load_project', return_value={}),
|
||||
patch('project_manager.flat_config', return_value={})
|
||||
):
|
||||
|
||||
app_instance._cb_plan_epic()
|
||||
|
||||
# Wait for the background thread to finish (it should be quick with mocks)
|
||||
max_wait = 5
|
||||
start_time = time.time()
|
||||
while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
|
||||
time.sleep(0.1)
|
||||
|
||||
assert len(app_instance._pending_gui_tasks) == 2
|
||||
|
||||
task1 = app_instance._pending_gui_tasks[0]
|
||||
assert task1['action'] == 'handle_ai_response'
|
||||
assert task1['payload']['stream_id'] == 'Tier 1'
|
||||
assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
|
||||
|
||||
task2 = app_instance._pending_gui_tasks[1]
|
||||
assert task2['action'] == 'show_track_proposal'
|
||||
assert task2['payload'] == mock_tracks
|
||||
|
||||
mock_get_history.assert_called_once()
|
||||
mock_gen_tracks.assert_called_once()
|
||||
"""Verifies that _cb_plan_epic launches a thread and eventually queues a task."""
|
||||
app_instance.ui_epic_input = "Develop a new feature"
|
||||
app_instance.active_project_path = "test_project.toml"
|
||||
mock_tracks = [{"id": "track_1", "title": "Test Track"}]
|
||||
with (
|
||||
patch('orchestrator_pm.get_track_history_summary', return_value="History summary") as mock_get_history,
|
||||
patch('orchestrator_pm.generate_tracks', return_value=mock_tracks) as mock_gen_tracks,
|
||||
patch('aggregate.build_file_items', return_value=[]) as mock_build_files
|
||||
):
|
||||
# We need to mock project_manager.flat_config and project_manager.load_project
|
||||
with (
|
||||
patch('project_manager.load_project', return_value={}),
|
||||
patch('project_manager.flat_config', return_value={})
|
||||
):
|
||||
app_instance._cb_plan_epic()
|
||||
# Wait for the background thread to finish (it should be quick with mocks)
|
||||
max_wait = 5
|
||||
start_time = time.time()
|
||||
while len(app_instance._pending_gui_tasks) < 2 and time.time() - start_time < max_wait:
|
||||
time.sleep(0.1)
|
||||
assert len(app_instance._pending_gui_tasks) == 2
|
||||
task1 = app_instance._pending_gui_tasks[0]
|
||||
assert task1['action'] == 'handle_ai_response'
|
||||
assert task1['payload']['stream_id'] == 'Tier 1'
|
||||
assert task1['payload']['text'] == json.dumps(mock_tracks, indent=2)
|
||||
task2 = app_instance._pending_gui_tasks[1]
|
||||
assert task2['action'] == 'show_track_proposal'
|
||||
assert task2['payload'] == mock_tracks
|
||||
mock_get_history.assert_called_once()
|
||||
mock_gen_tracks.assert_called_once()
|
||||
|
||||
def test_process_pending_gui_tasks_mma_spawn_approval(app_instance):
|
||||
"""Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
|
||||
task = {
|
||||
"action": "mma_spawn_approval",
|
||||
"ticket_id": "T1",
|
||||
"role": "Tier 3 Worker",
|
||||
"prompt": "Test Prompt",
|
||||
"context_md": "Test Context",
|
||||
"dialog_container": [None]
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
assert app_instance._pending_mma_spawn == task
|
||||
assert app_instance._mma_spawn_prompt == "Test Prompt"
|
||||
assert app_instance._mma_spawn_context == "Test Context"
|
||||
assert app_instance._mma_spawn_open is True
|
||||
assert app_instance._mma_spawn_edit_mode is False
|
||||
assert task["dialog_container"][0] is not None
|
||||
assert task["dialog_container"][0]._ticket_id == "T1"
|
||||
"""Verifies that the 'mma_spawn_approval' action correctly updates the UI state."""
|
||||
task = {
|
||||
"action": "mma_spawn_approval",
|
||||
"ticket_id": "T1",
|
||||
"role": "Tier 3 Worker",
|
||||
"prompt": "Test Prompt",
|
||||
"context_md": "Test Context",
|
||||
"dialog_container": [None]
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
assert app_instance._pending_mma_spawn == task
|
||||
assert app_instance._mma_spawn_prompt == "Test Prompt"
|
||||
assert app_instance._mma_spawn_context == "Test Context"
|
||||
assert app_instance._mma_spawn_open is True
|
||||
assert app_instance._mma_spawn_edit_mode is False
|
||||
assert task["dialog_container"][0] is not None
|
||||
assert task["dialog_container"][0]._ticket_id == "T1"
|
||||
|
||||
def test_handle_ai_response_with_stream_id(app_instance):
|
||||
"""Verifies routing to mma_streams."""
|
||||
task = {
|
||||
"action": "handle_ai_response",
|
||||
"payload": {
|
||||
"text": "Tier 1 Strategy Content",
|
||||
"stream_id": "Tier 1",
|
||||
"status": "Thinking..."
|
||||
}
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
|
||||
assert app_instance.ai_status == "Thinking..."
|
||||
assert app_instance.ai_response == ""
|
||||
"""Verifies routing to mma_streams."""
|
||||
task = {
|
||||
"action": "handle_ai_response",
|
||||
"payload": {
|
||||
"text": "Tier 1 Strategy Content",
|
||||
"stream_id": "Tier 1",
|
||||
"status": "Thinking..."
|
||||
}
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
assert app_instance.mma_streams.get("Tier 1") == "Tier 1 Strategy Content"
|
||||
assert app_instance.ai_status == "Thinking..."
|
||||
assert app_instance.ai_response == ""
|
||||
|
||||
def test_handle_ai_response_fallback(app_instance):
|
||||
"""Verifies fallback to ai_response when stream_id is missing."""
|
||||
task = {
|
||||
"action": "handle_ai_response",
|
||||
"payload": {
|
||||
"text": "Regular AI Response",
|
||||
"status": "done"
|
||||
}
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
assert app_instance.ai_response == "Regular AI Response"
|
||||
assert app_instance.ai_status == "done"
|
||||
assert len(app_instance.mma_streams) == 0
|
||||
"""Verifies fallback to ai_response when stream_id is missing."""
|
||||
task = {
|
||||
"action": "handle_ai_response",
|
||||
"payload": {
|
||||
"text": "Regular AI Response",
|
||||
"status": "done"
|
||||
}
|
||||
}
|
||||
app_instance._pending_gui_tasks.append(task)
|
||||
app_instance._process_pending_gui_tasks()
|
||||
assert app_instance.ai_response == "Regular AI Response"
|
||||
assert app_instance.ai_status == "done"
|
||||
assert len(app_instance.mma_streams) == 0
|
||||
|
||||
@@ -2,51 +2,51 @@ import pytest
|
||||
from mma_prompts import PROMPTS
|
||||
|
||||
def test_tier1_epic_init_constraints():
|
||||
prompt = PROMPTS["tier1_epic_init"]
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
assert "JSON array" in prompt
|
||||
assert "Tracks" in prompt
|
||||
assert "severity" in prompt
|
||||
assert "IGNORE all source code" in prompt
|
||||
prompt = PROMPTS["tier1_epic_init"]
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
assert "JSON array" in prompt
|
||||
assert "Tracks" in prompt
|
||||
assert "severity" in prompt
|
||||
assert "IGNORE all source code" in prompt
|
||||
|
||||
def test_tier1_track_delegation_constraints():
|
||||
prompt = PROMPTS["tier1_track_delegation"]
|
||||
assert "Track Brief" in prompt
|
||||
assert "AST Skeleton View" in prompt
|
||||
assert "IGNORE unrelated module docs" in prompt
|
||||
prompt = PROMPTS["tier1_track_delegation"]
|
||||
assert "Track Brief" in prompt
|
||||
assert "AST Skeleton View" in prompt
|
||||
assert "IGNORE unrelated module docs" in prompt
|
||||
|
||||
def test_tier1_macro_merge_constraints():
|
||||
prompt = PROMPTS["tier1_macro_merge"]
|
||||
assert "Macro-Merge" in prompt
|
||||
assert "Macro-Diff" in prompt
|
||||
assert "IGNORE Tier 3 trial-and-error" in prompt
|
||||
prompt = PROMPTS["tier1_macro_merge"]
|
||||
assert "Macro-Merge" in prompt
|
||||
assert "Macro-Diff" in prompt
|
||||
assert "IGNORE Tier 3 trial-and-error" in prompt
|
||||
|
||||
def test_tier2_sprint_planning_constraints():
|
||||
prompt = PROMPTS["tier2_sprint_planning"]
|
||||
assert "Tickets" in prompt
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
assert "depends_on" in prompt
|
||||
assert "DAG" in prompt
|
||||
assert "Skeleton View" in prompt
|
||||
assert "Curated Implementation View" in prompt
|
||||
prompt = PROMPTS["tier2_sprint_planning"]
|
||||
assert "Tickets" in prompt
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
assert "depends_on" in prompt
|
||||
assert "DAG" in prompt
|
||||
assert "Skeleton View" in prompt
|
||||
assert "Curated Implementation View" in prompt
|
||||
|
||||
def test_tier2_code_review_constraints():
|
||||
prompt = PROMPTS["tier2_code_review"]
|
||||
assert "Code Review" in prompt
|
||||
assert "IGNORE the Contributor's internal trial-and-error" in prompt
|
||||
assert "Tier 4 (QA) logs" in prompt
|
||||
prompt = PROMPTS["tier2_code_review"]
|
||||
assert "Code Review" in prompt
|
||||
assert "IGNORE the Contributor's internal trial-and-error" in prompt
|
||||
assert "Tier 4 (QA) logs" in prompt
|
||||
|
||||
def test_tier2_track_finalization_constraints():
|
||||
prompt = PROMPTS["tier2_track_finalization"]
|
||||
assert "Track Finalization" in prompt
|
||||
assert "Executive Summary" in prompt
|
||||
assert "Macro-Diff" in prompt
|
||||
assert "Dependency Delta" in prompt
|
||||
prompt = PROMPTS["tier2_track_finalization"]
|
||||
assert "Track Finalization" in prompt
|
||||
assert "Executive Summary" in prompt
|
||||
assert "Macro-Diff" in prompt
|
||||
assert "Dependency Delta" in prompt
|
||||
|
||||
def test_tier2_contract_first_constraints():
|
||||
prompt = PROMPTS["tier2_contract_first"]
|
||||
assert "Stub Ticket" in prompt
|
||||
assert "Consumer Ticket" in prompt
|
||||
assert "Implementation Ticket" in prompt
|
||||
assert "Interface-Driven Development" in prompt
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
prompt = PROMPTS["tier2_contract_first"]
|
||||
assert "Stub Ticket" in prompt
|
||||
assert "Consumer Ticket" in prompt
|
||||
assert "Implementation Ticket" in prompt
|
||||
assert "Interface-Driven Development" in prompt
|
||||
assert "Godot ECS Flat List format" in prompt
|
||||
|
||||
@@ -5,49 +5,43 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
app.active_tickets = []
|
||||
app._loop = MagicMock()
|
||||
yield app
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init')
|
||||
):
|
||||
app = App()
|
||||
app.active_tickets = []
|
||||
app._loop = MagicMock()
|
||||
yield app
|
||||
|
||||
def test_cb_ticket_retry(app_instance):
|
||||
ticket_id = "test_ticket_1"
|
||||
app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
|
||||
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
|
||||
app_instance._cb_ticket_retry(ticket_id)
|
||||
|
||||
# Verify status update
|
||||
assert app_instance.active_tickets[0]['status'] == 'todo'
|
||||
|
||||
# Verify event pushed
|
||||
mock_run_safe.assert_called_once()
|
||||
# First arg is the coroutine (event_queue.put), second is self._loop
|
||||
args, _ = mock_run_safe.call_args
|
||||
assert args[1] == app_instance._loop
|
||||
ticket_id = "test_ticket_1"
|
||||
app_instance.active_tickets = [{"id": ticket_id, "status": "failed"}]
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
|
||||
app_instance._cb_ticket_retry(ticket_id)
|
||||
# Verify status update
|
||||
assert app_instance.active_tickets[0]['status'] == 'todo'
|
||||
# Verify event pushed
|
||||
mock_run_safe.assert_called_once()
|
||||
# First arg is the coroutine (event_queue.put), second is self._loop
|
||||
args, _ = mock_run_safe.call_args
|
||||
assert args[1] == app_instance._loop
|
||||
|
||||
def test_cb_ticket_skip(app_instance):
|
||||
ticket_id = "test_ticket_1"
|
||||
app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
|
||||
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
|
||||
app_instance._cb_ticket_skip(ticket_id)
|
||||
|
||||
# Verify status update
|
||||
assert app_instance.active_tickets[0]['status'] == 'skipped'
|
||||
|
||||
# Verify event pushed
|
||||
mock_run_safe.assert_called_once()
|
||||
args, _ = mock_run_safe.call_args
|
||||
assert args[1] == app_instance._loop
|
||||
ticket_id = "test_ticket_1"
|
||||
app_instance.active_tickets = [{"id": ticket_id, "status": "todo"}]
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run_safe:
|
||||
app_instance._cb_ticket_skip(ticket_id)
|
||||
# Verify status update
|
||||
assert app_instance.active_tickets[0]['status'] == 'skipped'
|
||||
# Verify event pushed
|
||||
mock_run_safe.assert_called_once()
|
||||
args, _ = mock_run_safe.call_args
|
||||
assert args[1] == app_instance._loop
|
||||
|
||||
@@ -8,126 +8,104 @@ from models import Track, Ticket
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ai_client():
|
||||
with patch("ai_client.send") as mock_send:
|
||||
yield mock_send
|
||||
with patch("ai_client.send") as mock_send:
|
||||
yield mock_send
|
||||
|
||||
def test_generate_tracks(mock_ai_client):
|
||||
# Tier 1 (PM) response mock
|
||||
mock_ai_client.return_value = json.dumps([
|
||||
{"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
|
||||
{"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
|
||||
])
|
||||
|
||||
user_request = "Build a new app"
|
||||
project_config = {}
|
||||
file_items = []
|
||||
|
||||
tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
|
||||
|
||||
assert len(tracks) == 2
|
||||
assert tracks[0]["id"] == "track_1"
|
||||
assert tracks[1]["id"] == "track_2"
|
||||
mock_ai_client.assert_called_once()
|
||||
# Tier 1 (PM) response mock
|
||||
mock_ai_client.return_value = json.dumps([
|
||||
{"id": "track_1", "title": "Infrastructure Setup", "description": "Setup basic project structure"},
|
||||
{"id": "track_2", "title": "Feature implementation", "description": "Implement core feature"}
|
||||
])
|
||||
user_request = "Build a new app"
|
||||
project_config = {}
|
||||
file_items = []
|
||||
tracks = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
|
||||
assert len(tracks) == 2
|
||||
assert tracks[0]["id"] == "track_1"
|
||||
assert tracks[1]["id"] == "track_2"
|
||||
mock_ai_client.assert_called_once()
|
||||
|
||||
def test_generate_tickets(mock_ai_client):
|
||||
# Tier 2 (Tech Lead) response mock
|
||||
mock_ai_client.return_value = json.dumps([
|
||||
{"id": "T-001", "description": "Define interfaces", "depends_on": []},
|
||||
{"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
|
||||
])
|
||||
|
||||
track_brief = "Implement a new feature."
|
||||
module_skeletons = "class Feature: pass"
|
||||
|
||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||
|
||||
assert len(tickets) == 2
|
||||
assert tickets[0]["id"] == "T-001"
|
||||
assert tickets[1]["id"] == "T-002"
|
||||
assert tickets[1]["depends_on"] == ["T-001"]
|
||||
# Tier 2 (Tech Lead) response mock
|
||||
mock_ai_client.return_value = json.dumps([
|
||||
{"id": "T-001", "description": "Define interfaces", "depends_on": []},
|
||||
{"id": "T-002", "description": "Implement interfaces", "depends_on": ["T-001"]}
|
||||
])
|
||||
track_brief = "Implement a new feature."
|
||||
module_skeletons = "class Feature: pass"
|
||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||
assert len(tickets) == 2
|
||||
assert tickets[0]["id"] == "T-001"
|
||||
assert tickets[1]["id"] == "T-002"
|
||||
assert tickets[1]["depends_on"] == ["T-001"]
|
||||
|
||||
def test_topological_sort():
|
||||
tickets = [
|
||||
{"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
|
||||
{"id": "T-001", "description": "Base", "depends_on": []},
|
||||
{"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
|
||||
]
|
||||
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
|
||||
assert sorted_tickets[0]["id"] == "T-001"
|
||||
assert sorted_tickets[1]["id"] == "T-002"
|
||||
assert sorted_tickets[2]["id"] == "T-003"
|
||||
tickets = [
|
||||
{"id": "T-002", "description": "Dep on 001", "depends_on": ["T-001"]},
|
||||
{"id": "T-001", "description": "Base", "depends_on": []},
|
||||
{"id": "T-003", "description": "Dep on 002", "depends_on": ["T-002"]}
|
||||
]
|
||||
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
|
||||
assert sorted_tickets[0]["id"] == "T-001"
|
||||
assert sorted_tickets[1]["id"] == "T-002"
|
||||
assert sorted_tickets[2]["id"] == "T-003"
|
||||
|
||||
def test_topological_sort_circular():
|
||||
tickets = [
|
||||
{"id": "T-001", "depends_on": ["T-002"]},
|
||||
{"id": "T-002", "depends_on": ["T-001"]}
|
||||
]
|
||||
|
||||
with pytest.raises(ValueError, match="Circular dependency detected"):
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
tickets = [
|
||||
{"id": "T-001", "depends_on": ["T-002"]},
|
||||
{"id": "T-002", "depends_on": ["T-001"]}
|
||||
]
|
||||
with pytest.raises(ValueError, match="Circular dependency detected"):
|
||||
conductor_tech_lead.topological_sort(tickets)
|
||||
|
||||
def test_track_executable_tickets():
|
||||
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||
|
||||
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||
|
||||
executable = track.get_executable_tickets()
|
||||
assert len(executable) == 1
|
||||
assert executable[0].id == "T1"
|
||||
|
||||
# Complete T1
|
||||
t1.status = "completed"
|
||||
executable = track.get_executable_tickets()
|
||||
assert len(executable) == 1
|
||||
assert executable[0].id == "T2"
|
||||
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||
executable = track.get_executable_tickets()
|
||||
assert len(executable) == 1
|
||||
assert executable[0].id == "T1"
|
||||
# Complete T1
|
||||
t1.status = "completed"
|
||||
executable = track.get_executable_tickets()
|
||||
assert len(executable) == 1
|
||||
assert executable[0].id == "T2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_conductor_engine_run_linear():
|
||||
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||
t1 = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
t2 = Ticket(id="T2", description="desc", status="todo", assigned_to="user", depends_on=["T1"])
|
||||
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||
engine = multi_agent_conductor.ConductorEngine(track)
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
|
||||
# Mock worker to complete tickets
|
||||
|
||||
track = Track(id="track_1", description="desc", tickets=[t1, t2])
|
||||
engine = multi_agent_conductor.ConductorEngine(track)
|
||||
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_worker:
|
||||
# Mock worker to complete tickets
|
||||
def complete_ticket(ticket, context, **kwargs):
|
||||
ticket.status = "completed"
|
||||
|
||||
mock_worker.side_effect = complete_ticket
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
assert mock_worker.call_count == 2
|
||||
def complete_ticket(ticket, context, **kwargs):
|
||||
ticket.status = "completed"
|
||||
mock_worker.side_effect = complete_ticket
|
||||
await engine.run_linear()
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
assert mock_worker.call_count == 2
|
||||
|
||||
def test_conductor_engine_parse_json_tickets():
|
||||
track = Track(id="track_1", description="desc")
|
||||
engine = multi_agent_conductor.ConductorEngine(track)
|
||||
|
||||
json_data = json.dumps([
|
||||
{"id": "T1", "description": "desc 1", "depends_on": []},
|
||||
{"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
|
||||
])
|
||||
|
||||
engine.parse_json_tickets(json_data)
|
||||
|
||||
assert len(track.tickets) == 2
|
||||
assert track.tickets[0].id == "T1"
|
||||
assert track.tickets[1].id == "T2"
|
||||
assert track.tickets[1].depends_on == ["T1"]
|
||||
track = Track(id="track_1", description="desc")
|
||||
engine = multi_agent_conductor.ConductorEngine(track)
|
||||
json_data = json.dumps([
|
||||
{"id": "T1", "description": "desc 1", "depends_on": []},
|
||||
{"id": "T2", "description": "desc 2", "depends_on": ["T1"]}
|
||||
])
|
||||
engine.parse_json_tickets(json_data)
|
||||
assert len(track.tickets) == 2
|
||||
assert track.tickets[0].id == "T1"
|
||||
assert track.tickets[1].id == "T2"
|
||||
assert track.tickets[1].depends_on == ["T1"]
|
||||
|
||||
def test_run_worker_lifecycle_blocked(mock_ai_client):
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
|
||||
mock_ai_client.return_value = "BLOCKED because of missing info"
|
||||
|
||||
multi_agent_conductor.run_worker_lifecycle(ticket, context)
|
||||
|
||||
assert ticket.status == "blocked"
|
||||
assert ticket.blocked_reason == "BLOCKED because of missing info"
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = multi_agent_conductor.WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
mock_ai_client.return_value = "BLOCKED because of missing info"
|
||||
multi_agent_conductor.run_worker_lifecycle(ticket, context)
|
||||
assert ticket.status == "blocked"
|
||||
assert ticket.blocked_reason == "BLOCKED because of missing info"
|
||||
|
||||
@@ -6,75 +6,67 @@ import mma_prompts
|
||||
|
||||
class TestOrchestratorPM(unittest.TestCase):
|
||||
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_success(self, mock_send, mock_summarize):
|
||||
# Setup mocks
|
||||
mock_summarize.return_value = "REPO_MAP_CONTENT"
|
||||
|
||||
mock_response_data = [
|
||||
{
|
||||
"id": "track_1",
|
||||
"type": "Track",
|
||||
"module": "test_module",
|
||||
"persona": "Tech Lead",
|
||||
"severity": "Medium",
|
||||
"goal": "Test goal",
|
||||
"acceptance_criteria": ["criteria 1"]
|
||||
}
|
||||
]
|
||||
mock_send.return_value = json.dumps(mock_response_data)
|
||||
|
||||
user_request = "Implement unit tests"
|
||||
project_config = {"files": {"paths": ["src"]}}
|
||||
file_items = [{"path": "src/main.py", "content": "print('hello')"}]
|
||||
|
||||
# Execute
|
||||
result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
|
||||
|
||||
# Verify summarize call
|
||||
mock_summarize.assert_called_once_with(file_items)
|
||||
|
||||
# Verify ai_client.send call
|
||||
expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
|
||||
mock_send.assert_called_once()
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertEqual(kwargs['md_content'], "")
|
||||
# Cannot check system_prompt via mock_send kwargs anymore as it's set globally
|
||||
# But we can verify user_message was passed
|
||||
self.assertIn(user_request, kwargs['user_message'])
|
||||
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_success(self, mock_send, mock_summarize):
|
||||
# Setup mocks
|
||||
mock_summarize.return_value = "REPO_MAP_CONTENT"
|
||||
mock_response_data = [
|
||||
{
|
||||
"id": "track_1",
|
||||
"type": "Track",
|
||||
"module": "test_module",
|
||||
"persona": "Tech Lead",
|
||||
"severity": "Medium",
|
||||
"goal": "Test goal",
|
||||
"acceptance_criteria": ["criteria 1"]
|
||||
}
|
||||
]
|
||||
mock_send.return_value = json.dumps(mock_response_data)
|
||||
user_request = "Implement unit tests"
|
||||
project_config = {"files": {"paths": ["src"]}}
|
||||
file_items = [{"path": "src/main.py", "content": "print('hello')"}]
|
||||
# Execute
|
||||
result = orchestrator_pm.generate_tracks(user_request, project_config, file_items)
|
||||
# Verify summarize call
|
||||
mock_summarize.assert_called_once_with(file_items)
|
||||
# Verify ai_client.send call
|
||||
expected_system_prompt = mma_prompts.PROMPTS['tier1_epic_init']
|
||||
mock_send.assert_called_once()
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertEqual(kwargs['md_content'], "")
|
||||
# Cannot check system_prompt via mock_send kwargs anymore as it's set globally
|
||||
# But we can verify user_message was passed
|
||||
self.assertIn(user_request, kwargs['user_message'])
|
||||
self.assertIn("REPO_MAP_CONTENT", kwargs['user_message'])
|
||||
# Verify result
|
||||
self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
|
||||
|
||||
# Verify result
|
||||
self.assertEqual(result[0]['id'], mock_response_data[0]['id'])
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_markdown_wrapped(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
mock_response_data = [{"id": "track_1"}]
|
||||
expected_result = [{"id": "track_1", "title": "Untitled Track"}]
|
||||
# Wrapped in ```json ... ```
|
||||
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, expected_result)
|
||||
# Wrapped in ``` ... ```
|
||||
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
mock_response_data = [{"id": "track_1"}]
|
||||
expected_result = [{"id": "track_1", "title": "Untitled Track"}]
|
||||
# Wrapped in ```json ... ```
|
||||
mock_send.return_value = f"Here is the plan:\n```json\n{json.dumps(mock_response_data)}\n```\nHope this helps."
|
||||
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, expected_result)
|
||||
# Wrapped in ``` ... ```
|
||||
mock_send.return_value = f"```\n{json.dumps(mock_response_data)}\n```"
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
mock_send.return_value = "NOT A JSON"
|
||||
|
||||
# Should return empty list and print error (we can mock print if we want to be thorough)
|
||||
with patch('builtins.print') as mock_print:
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, [])
|
||||
mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")
|
||||
@patch('summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_malformed_json(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
mock_send.return_value = "NOT A JSON"
|
||||
# Should return empty list and print error (we can mock print if we want to be thorough)
|
||||
with patch('builtins.print') as mock_print:
|
||||
result = orchestrator_pm.generate_tracks("req", {}, [])
|
||||
self.assertEqual(result, [])
|
||||
mock_print.assert_any_call("Error parsing Tier 1 response: Expecting value: line 1 column 1 (char 0)")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -7,70 +7,63 @@ from pathlib import Path
|
||||
import orchestrator_pm
|
||||
|
||||
class TestOrchestratorPMHistory(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.test_dir = Path("test_conductor")
|
||||
self.test_dir.mkdir(exist_ok=True)
|
||||
self.archive_dir = self.test_dir / "archive"
|
||||
self.tracks_dir = self.test_dir / "tracks"
|
||||
self.archive_dir.mkdir(exist_ok=True)
|
||||
self.tracks_dir.mkdir(exist_ok=True)
|
||||
def setUp(self):
|
||||
self.test_dir = Path("test_conductor")
|
||||
self.test_dir.mkdir(exist_ok=True)
|
||||
self.archive_dir = self.test_dir / "archive"
|
||||
self.tracks_dir = self.test_dir / "tracks"
|
||||
self.archive_dir.mkdir(exist_ok=True)
|
||||
self.tracks_dir.mkdir(exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
if self.test_dir.exists():
|
||||
shutil.rmtree(self.test_dir)
|
||||
def tearDown(self):
|
||||
if self.test_dir.exists():
|
||||
shutil.rmtree(self.test_dir)
|
||||
|
||||
def create_track(self, parent_dir, track_id, title, status, overview):
|
||||
track_path = parent_dir / track_id
|
||||
track_path.mkdir(exist_ok=True)
|
||||
|
||||
metadata = {"title": title, "status": status}
|
||||
with open(track_path / "metadata.json", "w") as f:
|
||||
json.dump(metadata, f)
|
||||
|
||||
spec_content = f"# Specification\n\n## Overview\n{overview}"
|
||||
with open(track_path / "spec.md", "w") as f:
|
||||
f.write(spec_content)
|
||||
def create_track(self, parent_dir, track_id, title, status, overview):
|
||||
track_path = parent_dir / track_id
|
||||
track_path.mkdir(exist_ok=True)
|
||||
metadata = {"title": title, "status": status}
|
||||
with open(track_path / "metadata.json", "w") as f:
|
||||
json.dump(metadata, f)
|
||||
spec_content = f"# Specification\n\n## Overview\n{overview}"
|
||||
with open(track_path / "spec.md", "w") as f:
|
||||
f.write(spec_content)
|
||||
|
||||
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
|
||||
def test_get_track_history_summary(self):
|
||||
# Setup mock tracks
|
||||
self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
|
||||
self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
|
||||
|
||||
summary = orchestrator_pm.get_track_history_summary()
|
||||
|
||||
self.assertIn("Initial Setup", summary)
|
||||
self.assertIn("completed", summary)
|
||||
self.assertIn("Setting up the project structure.", summary)
|
||||
self.assertIn("Feature A", summary)
|
||||
self.assertIn("in_progress", summary)
|
||||
self.assertIn("Implementing Feature A.", summary)
|
||||
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
|
||||
def test_get_track_history_summary(self):
|
||||
# Setup mock tracks
|
||||
self.create_track(self.archive_dir, "track_001", "Initial Setup", "completed", "Setting up the project structure.")
|
||||
self.create_track(self.tracks_dir, "track_002", "Feature A", "in_progress", "Implementing Feature A.")
|
||||
summary = orchestrator_pm.get_track_history_summary()
|
||||
self.assertIn("Initial Setup", summary)
|
||||
self.assertIn("completed", summary)
|
||||
self.assertIn("Setting up the project structure.", summary)
|
||||
self.assertIn("Feature A", summary)
|
||||
self.assertIn("in_progress", summary)
|
||||
self.assertIn("Implementing Feature A.", summary)
|
||||
|
||||
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
|
||||
def test_get_track_history_summary_missing_files(self):
|
||||
# Track with missing spec.md
|
||||
track_path = self.tracks_dir / "track_003"
|
||||
track_path.mkdir(exist_ok=True)
|
||||
with open(track_path / "metadata.json", "w") as f:
|
||||
json.dump({"title": "Missing Spec", "status": "pending"}, f)
|
||||
|
||||
summary = orchestrator_pm.get_track_history_summary()
|
||||
self.assertIn("Missing Spec", summary)
|
||||
self.assertIn("pending", summary)
|
||||
self.assertIn("No overview available", summary)
|
||||
@patch('orchestrator_pm.CONDUCTOR_PATH', Path("test_conductor"))
|
||||
def test_get_track_history_summary_missing_files(self):
|
||||
# Track with missing spec.md
|
||||
track_path = self.tracks_dir / "track_003"
|
||||
track_path.mkdir(exist_ok=True)
|
||||
with open(track_path / "metadata.json", "w") as f:
|
||||
json.dump({"title": "Missing Spec", "status": "pending"}, f)
|
||||
summary = orchestrator_pm.get_track_history_summary()
|
||||
self.assertIn("Missing Spec", summary)
|
||||
self.assertIn("pending", summary)
|
||||
self.assertIn("No overview available", summary)
|
||||
|
||||
@patch('orchestrator_pm.summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_with_history(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
mock_send.return_value = "[]"
|
||||
|
||||
history_summary = "PAST_HISTORY_SUMMARY"
|
||||
orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
|
||||
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertIn(history_summary, kwargs['user_message'])
|
||||
self.assertIn("### TRACK HISTORY:", kwargs['user_message'])
|
||||
@patch('orchestrator_pm.summarize.build_summary_markdown')
|
||||
@patch('ai_client.send')
|
||||
def test_generate_tracks_with_history(self, mock_send, mock_summarize):
|
||||
mock_summarize.return_value = "REPO_MAP"
|
||||
mock_send.return_value = "[]"
|
||||
history_summary = "PAST_HISTORY_SUMMARY"
|
||||
orchestrator_pm.generate_tracks("req", {}, [], history_summary=history_summary)
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertIn(history_summary, kwargs['user_message'])
|
||||
self.assertIn("### TRACK HISTORY:", kwargs['user_message'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -9,21 +9,19 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from performance_monitor import PerformanceMonitor
|
||||
|
||||
def test_perf_monitor_basic_timing():
|
||||
pm = PerformanceMonitor()
|
||||
pm.start_frame()
|
||||
time.sleep(0.02) # 20ms
|
||||
pm.end_frame()
|
||||
|
||||
metrics = pm.get_metrics()
|
||||
assert metrics['last_frame_time_ms'] >= 20.0
|
||||
pm.stop()
|
||||
pm = PerformanceMonitor()
|
||||
pm.start_frame()
|
||||
time.sleep(0.02) # 20ms
|
||||
pm.end_frame()
|
||||
metrics = pm.get_metrics()
|
||||
assert metrics['last_frame_time_ms'] >= 20.0
|
||||
pm.stop()
|
||||
|
||||
def test_perf_monitor_component_timing():
|
||||
pm = PerformanceMonitor()
|
||||
pm.start_component("test_comp")
|
||||
time.sleep(0.01)
|
||||
pm.end_component("test_comp")
|
||||
|
||||
metrics = pm.get_metrics()
|
||||
assert metrics['time_test_comp_ms'] >= 10.0
|
||||
pm.stop()
|
||||
pm = PerformanceMonitor()
|
||||
pm.start_component("test_comp")
|
||||
time.sleep(0.01)
|
||||
pm.end_component("test_comp")
|
||||
metrics = pm.get_metrics()
|
||||
assert metrics['time_test_comp_ms'] >= 10.0
|
||||
pm.stop()
|
||||
|
||||
@@ -5,58 +5,49 @@ from gui_2 import App
|
||||
|
||||
@pytest.fixture
|
||||
def app_instance():
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init'),
|
||||
patch('ai_client.set_provider'),
|
||||
patch('ai_client.reset_session')
|
||||
):
|
||||
app = App()
|
||||
yield app
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {'provider': 'gemini', 'model': 'gemini-2.5-flash-lite'}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
patch('gui_2.project_manager'),
|
||||
patch('gui_2.session_logger'),
|
||||
patch('gui_2.immapp.run'),
|
||||
patch.object(App, '_load_active_project'),
|
||||
patch.object(App, '_fetch_models'),
|
||||
patch.object(App, '_load_fonts'),
|
||||
patch.object(App, '_post_init'),
|
||||
patch('ai_client.set_provider'),
|
||||
patch('ai_client.reset_session')
|
||||
):
|
||||
app = App()
|
||||
yield app
|
||||
|
||||
def test_redundant_calls_in_process_pending_gui_tasks(app_instance):
|
||||
# Setup
|
||||
app_instance._pending_gui_tasks = [
|
||||
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
|
||||
]
|
||||
|
||||
with patch('ai_client.set_provider') as mock_set_provider, \
|
||||
patch('ai_client.reset_session') as mock_reset_session:
|
||||
|
||||
# We need to make sure the property setter's internal calls are also tracked or mocked.
|
||||
# However, the App instance was created with mocked ai_client.
|
||||
# Let's re-patch it specifically for this test.
|
||||
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
# current_provider setter calls:
|
||||
# ai_client.reset_session()
|
||||
# ai_client.set_provider(value, self.current_model)
|
||||
|
||||
# _process_pending_gui_tasks NO LONGER calls it redundantly:
|
||||
|
||||
# Total should be 1 call for each.
|
||||
assert mock_set_provider.call_count == 1
|
||||
assert mock_reset_session.call_count == 1
|
||||
# Setup
|
||||
app_instance._pending_gui_tasks = [
|
||||
{'action': 'set_value', 'item': 'current_provider', 'value': 'anthropic'}
|
||||
]
|
||||
with patch('ai_client.set_provider') as mock_set_provider, \
|
||||
patch('ai_client.reset_session') as mock_reset_session:
|
||||
# We need to make sure the property setter's internal calls are also tracked or mocked.
|
||||
# However, the App instance was created with mocked ai_client.
|
||||
# Let's re-patch it specifically for this test.
|
||||
app_instance._process_pending_gui_tasks()
|
||||
# current_provider setter calls:
|
||||
# ai_client.reset_session()
|
||||
# ai_client.set_provider(value, self.current_model)
|
||||
# _process_pending_gui_tasks NO LONGER calls it redundantly:
|
||||
# Total should be 1 call for each.
|
||||
assert mock_set_provider.call_count == 1
|
||||
assert mock_reset_session.call_count == 1
|
||||
|
||||
def test_gcli_path_updates_adapter(app_instance):
|
||||
# Setup
|
||||
app_instance.current_provider = 'gemini_cli'
|
||||
app_instance._pending_gui_tasks = [
|
||||
{'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
|
||||
]
|
||||
|
||||
# Initialize adapter if it doesn't exist (it shouldn't in mock env)
|
||||
ai_client._gemini_cli_adapter = None
|
||||
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
assert ai_client._gemini_cli_adapter is not None
|
||||
assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'
|
||||
# Setup
|
||||
app_instance.current_provider = 'gemini_cli'
|
||||
app_instance._pending_gui_tasks = [
|
||||
{'action': 'set_value', 'item': 'gcli_path', 'value': '/new/path/to/gemini'}
|
||||
]
|
||||
# Initialize adapter if it doesn't exist (it shouldn't in mock env)
|
||||
ai_client._gemini_cli_adapter = None
|
||||
app_instance._process_pending_gui_tasks()
|
||||
assert ai_client._gemini_cli_adapter is not None
|
||||
assert ai_client._gemini_cli_adapter.binary_path == '/new/path/to/gemini'
|
||||
|
||||
@@ -6,89 +6,79 @@ from models import TrackState, Metadata, Ticket
|
||||
from datetime import datetime
|
||||
|
||||
def test_get_all_tracks_empty(tmp_path):
|
||||
# conductor/tracks directory doesn't exist
|
||||
assert get_all_tracks(tmp_path) == []
|
||||
# conductor/tracks directory doesn't exist
|
||||
assert get_all_tracks(tmp_path) == []
|
||||
|
||||
def test_get_all_tracks_with_state(tmp_path):
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
|
||||
track_id = "test_track_1"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
|
||||
# Create TrackState
|
||||
metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress",
|
||||
created_at=datetime.now(), updated_at=datetime.now())
|
||||
tasks = [
|
||||
Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
|
||||
Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
|
||||
]
|
||||
state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
|
||||
save_track_state(track_id, state, tmp_path)
|
||||
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["title"] == "Test Track 1"
|
||||
assert track["status"] == "in_progress"
|
||||
assert track["complete"] == 1
|
||||
assert track["total"] == 2
|
||||
assert track["progress"] == 0.5
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
track_id = "test_track_1"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
# Create TrackState
|
||||
metadata = Metadata(id=track_id, name="Test Track 1", status="in_progress",
|
||||
created_at=datetime.now(), updated_at=datetime.now())
|
||||
tasks = [
|
||||
Ticket(id="task1", description="desc1", status="completed", assigned_to="user"),
|
||||
Ticket(id="task2", description="desc2", status="todo", assigned_to="user")
|
||||
]
|
||||
state = TrackState(metadata=metadata, discussion=[], tasks=tasks)
|
||||
save_track_state(track_id, state, tmp_path)
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["title"] == "Test Track 1"
|
||||
assert track["status"] == "in_progress"
|
||||
assert track["complete"] == 1
|
||||
assert track["total"] == 2
|
||||
assert track["progress"] == 0.5
|
||||
|
||||
def test_get_all_tracks_with_metadata_json(tmp_path):
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
|
||||
track_id = "test_track_2"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
|
||||
metadata = {
|
||||
"id": track_id,
|
||||
"title": "Test Track 2",
|
||||
"status": "planned"
|
||||
}
|
||||
with open(track_dir / "metadata.json", "w") as f:
|
||||
json.dump(metadata, f)
|
||||
|
||||
# Create plan.md to test parsing
|
||||
plan_content = """
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
track_id = "test_track_2"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
metadata = {
|
||||
"id": track_id,
|
||||
"title": "Test Track 2",
|
||||
"status": "planned"
|
||||
}
|
||||
with open(track_dir / "metadata.json", "w") as f:
|
||||
json.dump(metadata, f)
|
||||
# Create plan.md to test parsing
|
||||
plan_content = """
|
||||
# Plan
|
||||
- [x] Task: Task 1
|
||||
- [ ] Task: Task 2
|
||||
- [~] Task: Task 3
|
||||
"""
|
||||
with open(track_dir / "plan.md", "w") as f:
|
||||
f.write(plan_content)
|
||||
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["title"] == "Test Track 2"
|
||||
assert track["status"] == "planned"
|
||||
assert track["complete"] == 1
|
||||
assert track["total"] == 3
|
||||
assert pytest.approx(track["progress"]) == 0.333333
|
||||
with open(track_dir / "plan.md", "w") as f:
|
||||
f.write(plan_content)
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["title"] == "Test Track 2"
|
||||
assert track["status"] == "planned"
|
||||
assert track["complete"] == 1
|
||||
assert track["total"] == 3
|
||||
assert pytest.approx(track["progress"]) == 0.333333
|
||||
|
||||
def test_get_all_tracks_malformed(tmp_path):
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
|
||||
track_id = "malformed_track"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
|
||||
# Malformed metadata.json
|
||||
with open(track_dir / "metadata.json", "w") as f:
|
||||
f.write("{ invalid json }")
|
||||
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["status"] == "unknown"
|
||||
assert track["complete"] == 0
|
||||
assert track["total"] == 0
|
||||
tracks_dir = tmp_path / "conductor" / "tracks"
|
||||
tracks_dir.mkdir(parents=True)
|
||||
track_id = "malformed_track"
|
||||
track_dir = tracks_dir / track_id
|
||||
track_dir.mkdir()
|
||||
# Malformed metadata.json
|
||||
with open(track_dir / "metadata.json", "w") as f:
|
||||
f.write("{ invalid json }")
|
||||
tracks = get_all_tracks(tmp_path)
|
||||
assert len(tracks) == 1
|
||||
track = tracks[0]
|
||||
assert track["id"] == track_id
|
||||
assert track["status"] == "unknown"
|
||||
assert track["complete"] == 0
|
||||
assert track["total"] == 0
|
||||
|
||||
@@ -9,56 +9,46 @@ import tomllib
|
||||
|
||||
@pytest.fixture
|
||||
def temp_logs(tmp_path, monkeypatch):
|
||||
# Ensure closed before starting
|
||||
session_logger.close_session()
|
||||
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||
|
||||
# Mock _LOG_DIR in session_logger
|
||||
original_log_dir = session_logger._LOG_DIR
|
||||
session_logger._LOG_DIR = tmp_path / "logs"
|
||||
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Mock _SCRIPTS_DIR
|
||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
yield tmp_path / "logs"
|
||||
|
||||
# Cleanup: Close handles if open
|
||||
session_logger.close_session()
|
||||
session_logger._LOG_DIR = original_log_dir
|
||||
session_logger._SCRIPTS_DIR = original_scripts_dir
|
||||
# Ensure closed before starting
|
||||
session_logger.close_session()
|
||||
monkeypatch.setattr(session_logger, "_comms_fh", None)
|
||||
# Mock _LOG_DIR in session_logger
|
||||
original_log_dir = session_logger._LOG_DIR
|
||||
session_logger._LOG_DIR = tmp_path / "logs"
|
||||
monkeypatch.setattr(session_logger, "_LOG_DIR", tmp_path / "logs")
|
||||
session_logger._LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
# Mock _SCRIPTS_DIR
|
||||
original_scripts_dir = session_logger._SCRIPTS_DIR
|
||||
session_logger._SCRIPTS_DIR = tmp_path / "scripts" / "generated"
|
||||
monkeypatch.setattr(session_logger, "_SCRIPTS_DIR", tmp_path / "scripts" / "generated")
|
||||
session_logger._SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
yield tmp_path / "logs"
|
||||
# Cleanup: Close handles if open
|
||||
session_logger.close_session()
|
||||
session_logger._LOG_DIR = original_log_dir
|
||||
session_logger._SCRIPTS_DIR = original_scripts_dir
|
||||
|
||||
def test_open_session_creates_subdir_and_registry(temp_logs):
|
||||
label = "test-label"
|
||||
# We can't easily mock datetime.datetime.now() because it's a built-in
|
||||
# but we can check the resulting directory name pattern
|
||||
|
||||
session_logger.open_session(label=label)
|
||||
|
||||
# Check that a subdirectory was created
|
||||
subdirs = list(temp_logs.iterdir())
|
||||
# One is the log_registry.toml, one is the session dir
|
||||
session_dirs = [d for d in subdirs if d.is_dir()]
|
||||
assert len(session_dirs) == 1
|
||||
session_dir = session_dirs[0]
|
||||
|
||||
assert session_dir.name.endswith(f"_{label}")
|
||||
|
||||
# Check for log files
|
||||
assert (session_dir / "comms.log").exists()
|
||||
assert (session_dir / "toolcalls.log").exists()
|
||||
assert (session_dir / "apihooks.log").exists()
|
||||
assert (session_dir / "clicalls.log").exists()
|
||||
|
||||
# Check registry
|
||||
registry_path = temp_logs / "log_registry.toml"
|
||||
assert registry_path.exists()
|
||||
|
||||
with open(registry_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
assert session_dir.name in data
|
||||
assert data[session_dir.name]["path"] == str(session_dir)
|
||||
label = "test-label"
|
||||
# We can't easily mock datetime.datetime.now() because it's a built-in
|
||||
# but we can check the resulting directory name pattern
|
||||
session_logger.open_session(label=label)
|
||||
# Check that a subdirectory was created
|
||||
subdirs = list(temp_logs.iterdir())
|
||||
# One is the log_registry.toml, one is the session dir
|
||||
session_dirs = [d for d in subdirs if d.is_dir()]
|
||||
assert len(session_dirs) == 1
|
||||
session_dir = session_dirs[0]
|
||||
assert session_dir.name.endswith(f"_{label}")
|
||||
# Check for log files
|
||||
assert (session_dir / "comms.log").exists()
|
||||
assert (session_dir / "toolcalls.log").exists()
|
||||
assert (session_dir / "apihooks.log").exists()
|
||||
assert (session_dir / "clicalls.log").exists()
|
||||
# Check registry
|
||||
registry_path = temp_logs / "log_registry.toml"
|
||||
assert registry_path.exists()
|
||||
with open(registry_path, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
assert session_dir.name in data
|
||||
assert data[session_dir.name]["path"] == str(session_dir)
|
||||
|
||||
@@ -9,33 +9,28 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.sim_ai_settings import AISettingsSimulation
|
||||
|
||||
def test_ai_settings_simulation_run():
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
|
||||
mock_client.get_value.side_effect = lambda key: {
|
||||
"current_provider": "gemini",
|
||||
"current_model": "gemini-2.5-flash-lite"
|
||||
}.get(key)
|
||||
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
|
||||
sim = AISettingsSimulation(mock_client)
|
||||
|
||||
# Override the side effect after initial setup if needed or just let it return the same for simplicity
|
||||
# Actually, let's use a side effect that updates
|
||||
vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
|
||||
def side_effect(key):
|
||||
return vals.get(key)
|
||||
def set_side_effect(key, val):
|
||||
vals[key] = val
|
||||
|
||||
mock_client.get_value.side_effect = side_effect
|
||||
mock_client.set_value.side_effect = set_side_effect
|
||||
|
||||
sim.run()
|
||||
|
||||
# Verify calls
|
||||
mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
|
||||
mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
mock_client.get_value.side_effect = lambda key: {
|
||||
"current_provider": "gemini",
|
||||
"current_model": "gemini-2.5-flash-lite"
|
||||
}.get(key)
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
sim = AISettingsSimulation(mock_client)
|
||||
# Override the side effect after initial setup if needed or just let it return the same for simplicity
|
||||
# Actually, let's use a side effect that updates
|
||||
vals = {"current_provider": "gemini", "current_model": "gemini-2.5-flash-lite"}
|
||||
|
||||
def side_effect(key):
|
||||
return vals.get(key)
|
||||
|
||||
def set_side_effect(key, val):
|
||||
vals[key] = val
|
||||
mock_client.get_value.side_effect = side_effect
|
||||
mock_client.set_value.side_effect = set_side_effect
|
||||
sim.run()
|
||||
# Verify calls
|
||||
mock_client.set_value.assert_any_call("current_model", "gemini-1.5-flash")
|
||||
mock_client.set_value.assert_any_call("current_model", "gemini-2.5-flash-lite")
|
||||
|
||||
@@ -9,26 +9,22 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.sim_base import BaseSimulation
|
||||
|
||||
def test_base_simulation_init():
|
||||
with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
|
||||
mock_client = MagicMock()
|
||||
mock_client_class.return_value = mock_client
|
||||
|
||||
sim = BaseSimulation()
|
||||
assert sim.client == mock_client
|
||||
assert sim.sim is not None
|
||||
with patch('simulation.sim_base.ApiHookClient') as mock_client_class:
|
||||
mock_client = MagicMock()
|
||||
mock_client_class.return_value = mock_client
|
||||
sim = BaseSimulation()
|
||||
assert sim.client == mock_client
|
||||
assert sim.sim is not None
|
||||
|
||||
def test_base_simulation_setup():
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
|
||||
sim = BaseSimulation(mock_client)
|
||||
sim.setup("TestSim")
|
||||
|
||||
mock_client.wait_for_server.assert_called()
|
||||
mock_client.click.assert_any_call("btn_reset")
|
||||
mock_sim.setup_new_project.assert_called()
|
||||
assert sim.project_path.endswith("temp_testsim.toml")
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
sim = BaseSimulation(mock_client)
|
||||
sim.setup("TestSim")
|
||||
mock_client.wait_for_server.assert_called()
|
||||
mock_client.click.assert_any_call("btn_reset")
|
||||
mock_sim.setup_new_project.assert_called()
|
||||
assert sim.project_path.endswith("temp_testsim.toml")
|
||||
|
||||
@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.sim_context import ContextSimulation
|
||||
|
||||
def test_context_simulation_run():
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
|
||||
# Mock project config
|
||||
mock_project = {
|
||||
'project': {
|
||||
'files': {'paths': []}
|
||||
}
|
||||
}
|
||||
mock_client.get_project.return_value = mock_project
|
||||
mock_client.get_value.side_effect = lambda key: {
|
||||
"ai_status": "md written: test.md",
|
||||
"token_budget_pct": 0.05
|
||||
}.get(key)
|
||||
|
||||
# Mock session entries
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'User', 'content': 'Hello'},
|
||||
{'role': 'AI', 'content': 'Hi'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
|
||||
sim = ContextSimulation(mock_client)
|
||||
sim.run()
|
||||
|
||||
# Verify calls
|
||||
mock_sim.create_discussion.assert_called()
|
||||
mock_client.post_project.assert_called()
|
||||
mock_client.click.assert_called_with("btn_md_only")
|
||||
mock_sim.run_discussion_turn.assert_called()
|
||||
mock_sim.truncate_history.assert_called_with(1)
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
# Mock project config
|
||||
mock_project = {
|
||||
'project': {
|
||||
'files': {'paths': []}
|
||||
}
|
||||
}
|
||||
mock_client.get_project.return_value = mock_project
|
||||
mock_client.get_value.side_effect = lambda key: {
|
||||
"ai_status": "md written: test.md",
|
||||
"token_budget_pct": 0.05
|
||||
}.get(key)
|
||||
# Mock session entries
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'User', 'content': 'Hello'},
|
||||
{'role': 'AI', 'content': 'Hi'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
sim = ContextSimulation(mock_client)
|
||||
sim.run()
|
||||
# Verify calls
|
||||
mock_sim.create_discussion.assert_called()
|
||||
mock_client.post_project.assert_called()
|
||||
mock_client.click.assert_called_with("btn_md_only")
|
||||
mock_sim.run_discussion_turn.assert_called()
|
||||
mock_sim.truncate_history.assert_called_with(1)
|
||||
|
||||
@@ -9,42 +9,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.sim_execution import ExecutionSimulation
|
||||
|
||||
def test_execution_simulation_run():
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
|
||||
# Mock show_confirm_modal state
|
||||
vals = {"show_confirm_modal": False}
|
||||
def side_effect(key):
|
||||
return vals.get(key)
|
||||
def set_side_effect(key, val):
|
||||
vals[key] = val
|
||||
|
||||
mock_client.get_value.side_effect = side_effect
|
||||
mock_client.set_value.side_effect = set_side_effect
|
||||
|
||||
# Mock session entries with tool output
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
# Mock show_confirm_modal state
|
||||
vals = {"show_confirm_modal": False}
|
||||
|
||||
# Mock script confirmation event
|
||||
mock_client.wait_for_event.side_effect = [
|
||||
{"type": "script_confirmation_required", "script": "dir"},
|
||||
None # Second call returns None to end the loop
|
||||
]
|
||||
def side_effect(key):
|
||||
return vals.get(key)
|
||||
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
|
||||
sim = ExecutionSimulation(mock_client)
|
||||
sim.run()
|
||||
|
||||
# Verify calls
|
||||
mock_sim.run_discussion_turn_async.assert_called()
|
||||
mock_client.click.assert_called_with("btn_approve_script")
|
||||
def set_side_effect(key, val):
|
||||
vals[key] = val
|
||||
mock_client.get_value.side_effect = side_effect
|
||||
mock_client.set_value.side_effect = set_side_effect
|
||||
# Mock session entries with tool output
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'Tool', 'content': 'Simulation Test', 'tool_call_id': 'call_1'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
# Mock script confirmation event
|
||||
mock_client.wait_for_event.side_effect = [
|
||||
{"type": "script_confirmation_required", "script": "dir"},
|
||||
None # Second call returns None to end the loop
|
||||
]
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
sim = ExecutionSimulation(mock_client)
|
||||
sim.run()
|
||||
# Verify calls
|
||||
mock_sim.run_discussion_turn_async.assert_called()
|
||||
mock_client.click.assert_called_with("btn_approve_script")
|
||||
|
||||
@@ -9,28 +9,24 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.sim_tools import ToolsSimulation
|
||||
|
||||
def test_tools_simulation_run():
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
|
||||
# Mock session entries with tool output
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'User', 'content': 'List files'},
|
||||
{'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
|
||||
{'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
|
||||
sim = ToolsSimulation(mock_client)
|
||||
sim.run()
|
||||
|
||||
# Verify calls
|
||||
mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
|
||||
mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")
|
||||
mock_client = MagicMock()
|
||||
mock_client.wait_for_server.return_value = True
|
||||
# Mock session entries with tool output
|
||||
mock_session = {
|
||||
'session': {
|
||||
'entries': [
|
||||
{'role': 'User', 'content': 'List files'},
|
||||
{'role': 'Tool', 'content': 'aggregate.py, ai_client.py', 'tool_call_id': 'call_1'},
|
||||
{'role': 'AI', 'content': 'The files are: aggregate.py, ai_client.py'}
|
||||
]
|
||||
}
|
||||
}
|
||||
mock_client.get_session.return_value = mock_session
|
||||
with patch('simulation.sim_base.WorkflowSimulator') as mock_sim_class:
|
||||
mock_sim = MagicMock()
|
||||
mock_sim_class.return_value = mock_sim
|
||||
sim = ToolsSimulation(mock_client)
|
||||
sim.run()
|
||||
# Verify calls
|
||||
mock_sim.run_discussion_turn.assert_any_call("List the files in the current directory.")
|
||||
mock_sim.run_discussion_turn.assert_any_call("Read the first 10 lines of aggregate.py.")
|
||||
|
||||
@@ -7,84 +7,75 @@ import asyncio
|
||||
import concurrent.futures
|
||||
|
||||
class MockDialog:
|
||||
def __init__(self, approved, final_payload=None):
|
||||
self.approved = approved
|
||||
self.final_payload = final_payload
|
||||
def wait(self):
|
||||
# Match the new return format: a dictionary
|
||||
res = {'approved': self.approved, 'abort': False}
|
||||
if self.final_payload:
|
||||
res.update(self.final_payload)
|
||||
return res
|
||||
def __init__(self, approved, final_payload=None):
|
||||
self.approved = approved
|
||||
self.final_payload = final_payload
|
||||
|
||||
def wait(self):
|
||||
# Match the new return format: a dictionary
|
||||
res = {'approved': self.approved, 'abort': False}
|
||||
if self.final_payload:
|
||||
res.update(self.final_payload)
|
||||
return res
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ai_client():
|
||||
with patch("ai_client.send") as mock_send:
|
||||
mock_send.return_value = "Task completed"
|
||||
yield mock_send
|
||||
with patch("ai_client.send") as mock_send:
|
||||
mock_send.return_value = "Task completed"
|
||||
yield mock_send
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_confirm_spawn_pushed_to_queue():
|
||||
event_queue = events.AsyncEventQueue()
|
||||
ticket_id = "T1"
|
||||
role = "Tier 3 Worker"
|
||||
prompt = "Original Prompt"
|
||||
context_md = "Original Context"
|
||||
event_queue = events.AsyncEventQueue()
|
||||
ticket_id = "T1"
|
||||
role = "Tier 3 Worker"
|
||||
prompt = "Original Prompt"
|
||||
context_md = "Original Context"
|
||||
# Start confirm_spawn in a thread since it blocks with time.sleep
|
||||
|
||||
# Start confirm_spawn in a thread since it blocks with time.sleep
|
||||
def run_confirm():
|
||||
return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = loop.run_in_executor(executor, run_confirm)
|
||||
|
||||
# Wait for the event to appear in the queue
|
||||
event_name, payload = await event_queue.get()
|
||||
assert event_name == "mma_spawn_approval"
|
||||
assert payload["ticket_id"] == ticket_id
|
||||
assert payload["role"] == role
|
||||
assert payload["prompt"] == prompt
|
||||
assert payload["context_md"] == context_md
|
||||
assert "dialog_container" in payload
|
||||
|
||||
# Simulate GUI injecting a dialog
|
||||
payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
|
||||
|
||||
approved, final_prompt, final_context = await future
|
||||
assert approved is True
|
||||
assert final_prompt == "Modified Prompt"
|
||||
assert final_context == "Modified Context"
|
||||
def run_confirm():
|
||||
return multi_agent_conductor.confirm_spawn(role, prompt, context_md, event_queue, ticket_id)
|
||||
loop = asyncio.get_running_loop()
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
future = loop.run_in_executor(executor, run_confirm)
|
||||
# Wait for the event to appear in the queue
|
||||
event_name, payload = await event_queue.get()
|
||||
assert event_name == "mma_spawn_approval"
|
||||
assert payload["ticket_id"] == ticket_id
|
||||
assert payload["role"] == role
|
||||
assert payload["prompt"] == prompt
|
||||
assert payload["context_md"] == context_md
|
||||
assert "dialog_container" in payload
|
||||
# Simulate GUI injecting a dialog
|
||||
payload["dialog_container"][0] = MockDialog(True, {"prompt": "Modified Prompt", "context_md": "Modified Context"})
|
||||
approved, final_prompt, final_context = await future
|
||||
assert approved is True
|
||||
assert final_prompt == "Modified Prompt"
|
||||
assert final_context == "Modified Context"
|
||||
|
||||
@patch("multi_agent_conductor.confirm_spawn")
|
||||
def test_run_worker_lifecycle_approved(mock_confirm, mock_ai_client):
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
event_queue = events.AsyncEventQueue()
|
||||
|
||||
mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
|
||||
|
||||
multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
|
||||
|
||||
mock_confirm.assert_called_once()
|
||||
# Check that ai_client.send was called with modified values
|
||||
args, kwargs = mock_ai_client.call_args
|
||||
assert kwargs["user_message"] == "Modified Prompt"
|
||||
assert kwargs["md_content"] == "Modified Context"
|
||||
assert ticket.status == "completed"
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
event_queue = events.AsyncEventQueue()
|
||||
mock_confirm.return_value = (True, "Modified Prompt", "Modified Context")
|
||||
multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
|
||||
mock_confirm.assert_called_once()
|
||||
# Check that ai_client.send was called with modified values
|
||||
args, kwargs = mock_ai_client.call_args
|
||||
assert kwargs["user_message"] == "Modified Prompt"
|
||||
assert kwargs["md_content"] == "Modified Context"
|
||||
assert ticket.status == "completed"
|
||||
|
||||
@patch("multi_agent_conductor.confirm_spawn")
|
||||
def test_run_worker_lifecycle_rejected(mock_confirm, mock_ai_client):
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
event_queue = events.AsyncEventQueue()
|
||||
|
||||
mock_confirm.return_value = (False, "Original Prompt", "Original Context")
|
||||
|
||||
result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
|
||||
|
||||
mock_confirm.assert_called_once()
|
||||
mock_ai_client.assert_not_called()
|
||||
assert ticket.status == "blocked"
|
||||
assert "Spawn rejected by user" in ticket.blocked_reason
|
||||
assert "BLOCKED" in result
|
||||
ticket = Ticket(id="T1", description="desc", status="todo", assigned_to="user")
|
||||
context = WorkerContext(ticket_id="T1", model_name="model", messages=[])
|
||||
event_queue = events.AsyncEventQueue()
|
||||
mock_confirm.return_value = (False, "Original Prompt", "Original Context")
|
||||
result = multi_agent_conductor.run_worker_lifecycle(ticket, context, event_queue=event_queue)
|
||||
mock_confirm.assert_called_once()
|
||||
mock_ai_client.assert_not_called()
|
||||
assert ticket.status == "blocked"
|
||||
assert "Spawn rejected by user" in ticket.blocked_reason
|
||||
assert "BLOCKED" in result
|
||||
|
||||
@@ -5,57 +5,50 @@ import pytest
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def test_api_ask_client_method(live_gui):
|
||||
"""
|
||||
"""
|
||||
Tests the request_confirmation method in ApiHookClient.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# Drain existing events
|
||||
client.get_events()
|
||||
|
||||
results = {"response": None, "error": None}
|
||||
|
||||
def make_blocking_request():
|
||||
try:
|
||||
# This call should block until we respond
|
||||
results["response"] = client.request_confirmation(
|
||||
tool_name="powershell",
|
||||
args={"command": "echo hello"}
|
||||
)
|
||||
except Exception as e:
|
||||
results["error"] = str(e)
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
# Drain existing events
|
||||
client.get_events()
|
||||
results = {"response": None, "error": None}
|
||||
|
||||
# Start the request in a background thread
|
||||
t = threading.Thread(target=make_blocking_request)
|
||||
t.start()
|
||||
|
||||
# Poll for the 'ask_received' event
|
||||
request_id = None
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 5:
|
||||
events = client.get_events()
|
||||
for ev in events:
|
||||
if ev.get("type") == "ask_received":
|
||||
request_id = ev.get("request_id")
|
||||
break
|
||||
if request_id:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
assert request_id is not None, "Timed out waiting for 'ask_received' event"
|
||||
|
||||
# Respond
|
||||
expected_response = {"approved": True}
|
||||
resp = requests.post(
|
||||
"http://127.0.0.1:8999/api/ask/respond",
|
||||
json={
|
||||
"request_id": request_id,
|
||||
"response": expected_response
|
||||
}
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
|
||||
t.join(timeout=5)
|
||||
assert not t.is_alive()
|
||||
assert results["error"] is None
|
||||
assert results["response"] == expected_response
|
||||
def make_blocking_request():
|
||||
try:
|
||||
# This call should block until we respond
|
||||
results["response"] = client.request_confirmation(
|
||||
tool_name="powershell",
|
||||
args={"command": "echo hello"}
|
||||
)
|
||||
except Exception as e:
|
||||
results["error"] = str(e)
|
||||
# Start the request in a background thread
|
||||
t = threading.Thread(target=make_blocking_request)
|
||||
t.start()
|
||||
# Poll for the 'ask_received' event
|
||||
request_id = None
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 5:
|
||||
events = client.get_events()
|
||||
for ev in events:
|
||||
if ev.get("type") == "ask_received":
|
||||
request_id = ev.get("request_id")
|
||||
break
|
||||
if request_id:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
assert request_id is not None, "Timed out waiting for 'ask_received' event"
|
||||
# Respond
|
||||
expected_response = {"approved": True}
|
||||
resp = requests.post(
|
||||
"http://127.0.0.1:8999/api/ask/respond",
|
||||
json={
|
||||
"request_id": request_id,
|
||||
"response": expected_response
|
||||
}
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
t.join(timeout=5)
|
||||
assert not t.is_alive()
|
||||
assert results["error"] is None
|
||||
assert results["response"] == expected_response
|
||||
|
||||
@@ -4,221 +4,176 @@ import subprocess
|
||||
from shell_runner import run_powershell
|
||||
|
||||
def test_run_powershell_qa_callback_on_failure():
|
||||
"""
|
||||
"""
|
||||
Test that qa_callback is called when a powershell command fails (non-zero exit code).
|
||||
The result of the callback should be appended to the output.
|
||||
"""
|
||||
script = "Write-Error 'something went wrong'; exit 1"
|
||||
base_dir = "."
|
||||
|
||||
# Mocking subprocess.run to simulate failure
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = "something went wrong"
|
||||
mock_result.returncode = 1
|
||||
|
||||
qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
|
||||
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
|
||||
# We expect run_powershell to accept qa_callback
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
|
||||
# Verify callback was called with stderr
|
||||
qa_callback.assert_called_once_with("something went wrong")
|
||||
|
||||
# Verify output contains the callback result
|
||||
assert "QA ANALYSIS: This looks like a syntax error." in output
|
||||
assert "STDERR:\nsomething went wrong" in output
|
||||
assert "EXIT CODE: 1" in output
|
||||
script = "Write-Error 'something went wrong'; exit 1"
|
||||
base_dir = "."
|
||||
# Mocking subprocess.run to simulate failure
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = "something went wrong"
|
||||
mock_result.returncode = 1
|
||||
qa_callback = MagicMock(return_value="QA ANALYSIS: This looks like a syntax error.")
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
# We expect run_powershell to accept qa_callback
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
# Verify callback was called with stderr
|
||||
qa_callback.assert_called_once_with("something went wrong")
|
||||
# Verify output contains the callback result
|
||||
assert "QA ANALYSIS: This looks like a syntax error." in output
|
||||
assert "STDERR:\nsomething went wrong" in output
|
||||
assert "EXIT CODE: 1" in output
|
||||
|
||||
def test_run_powershell_qa_callback_on_stderr_only():
|
||||
"""
|
||||
"""
|
||||
Test that qa_callback is called when a command has stderr even if exit code is 0.
|
||||
"""
|
||||
script = "Write-Error 'non-fatal error'"
|
||||
base_dir = "."
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = "Success"
|
||||
mock_result.stderr = "non-fatal error"
|
||||
mock_result.returncode = 0
|
||||
|
||||
qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
|
||||
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
|
||||
qa_callback.assert_called_once_with("non-fatal error")
|
||||
assert "QA ANALYSIS: Ignorable warning." in output
|
||||
assert "STDOUT:\nSuccess" in output
|
||||
script = "Write-Error 'non-fatal error'"
|
||||
base_dir = "."
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = "Success"
|
||||
mock_result.stderr = "non-fatal error"
|
||||
mock_result.returncode = 0
|
||||
qa_callback = MagicMock(return_value="QA ANALYSIS: Ignorable warning.")
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
qa_callback.assert_called_once_with("non-fatal error")
|
||||
assert "QA ANALYSIS: Ignorable warning." in output
|
||||
assert "STDOUT:\nSuccess" in output
|
||||
|
||||
def test_run_powershell_no_qa_callback_on_success():
|
||||
"""
|
||||
"""
|
||||
Test that qa_callback is NOT called when the command succeeds without stderr.
|
||||
"""
|
||||
script = "Write-Output 'All good'"
|
||||
base_dir = "."
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = "All good"
|
||||
mock_result.stderr = ""
|
||||
mock_result.returncode = 0
|
||||
|
||||
qa_callback = MagicMock()
|
||||
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
|
||||
qa_callback.assert_not_called()
|
||||
assert "STDOUT:\nAll good" in output
|
||||
assert "EXIT CODE: 0" in output
|
||||
assert "QA ANALYSIS" not in output
|
||||
script = "Write-Output 'All good'"
|
||||
base_dir = "."
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = "All good"
|
||||
mock_result.stderr = ""
|
||||
mock_result.returncode = 0
|
||||
qa_callback = MagicMock()
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
output = run_powershell(script, base_dir, qa_callback=qa_callback)
|
||||
qa_callback.assert_not_called()
|
||||
assert "STDOUT:\nAll good" in output
|
||||
assert "EXIT CODE: 0" in output
|
||||
assert "QA ANALYSIS" not in output
|
||||
|
||||
def test_run_powershell_optional_qa_callback():
|
||||
"""
|
||||
"""
|
||||
Test that run_powershell still works without providing a qa_callback.
|
||||
"""
|
||||
script = "Write-Error 'error'"
|
||||
base_dir = "."
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = "error"
|
||||
mock_result.returncode = 1
|
||||
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
|
||||
# Should not raise TypeError even if qa_callback is not provided
|
||||
output = run_powershell(script, base_dir)
|
||||
|
||||
assert "STDERR:\nerror" in output
|
||||
assert "EXIT CODE: 1" in output
|
||||
script = "Write-Error 'error'"
|
||||
base_dir = "."
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = "error"
|
||||
mock_result.returncode = 1
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"):
|
||||
# Should not raise TypeError even if qa_callback is not provided
|
||||
output = run_powershell(script, base_dir)
|
||||
assert "STDERR:\nerror" in output
|
||||
assert "EXIT CODE: 1" in output
|
||||
|
||||
def test_end_to_end_tier4_integration():
|
||||
"""
|
||||
"""
|
||||
Verifies that shell_runner.run_powershell correctly uses ai_client.run_tier4_analysis.
|
||||
"""
|
||||
import ai_client
|
||||
|
||||
script = "Invoke-Item non_existent_file"
|
||||
base_dir = "."
|
||||
stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = stderr_content
|
||||
mock_result.returncode = 1
|
||||
|
||||
expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
|
||||
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"), \
|
||||
patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
|
||||
|
||||
output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
|
||||
|
||||
mock_analysis.assert_called_once_with(stderr_content)
|
||||
assert f"QA ANALYSIS:\n{expected_analysis}" in output
|
||||
import ai_client
|
||||
script = "Invoke-Item non_existent_file"
|
||||
base_dir = "."
|
||||
stderr_content = "Invoke-Item : Cannot find path 'C:\\non_existent_file' because it does not exist."
|
||||
mock_result = MagicMock()
|
||||
mock_result.stdout = ""
|
||||
mock_result.stderr = stderr_content
|
||||
mock_result.returncode = 1
|
||||
expected_analysis = "Path does not exist. Verify the file path and ensure the file is present before invoking."
|
||||
with patch("subprocess.run", return_value=mock_result), \
|
||||
patch("shutil.which", return_value="powershell.exe"), \
|
||||
patch("ai_client.run_tier4_analysis", return_value=expected_analysis) as mock_analysis:
|
||||
output = run_powershell(script, base_dir, qa_callback=ai_client.run_tier4_analysis)
|
||||
mock_analysis.assert_called_once_with(stderr_content)
|
||||
assert f"QA ANALYSIS:\n{expected_analysis}" in output
|
||||
|
||||
def test_ai_client_passes_qa_callback():
|
||||
"""
|
||||
"""
|
||||
Verifies that ai_client.send passes the qa_callback down to the provider function.
|
||||
"""
|
||||
import ai_client
|
||||
|
||||
# Mocking a provider function to avoid actual API calls
|
||||
mock_send_gemini = MagicMock(return_value="AI Response")
|
||||
|
||||
qa_callback = MagicMock(return_value="QA Analysis")
|
||||
|
||||
# Force provider to gemini and mock its send function
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._send_gemini", mock_send_gemini):
|
||||
|
||||
ai_client.send(
|
||||
md_content="Context",
|
||||
user_message="Hello",
|
||||
qa_callback=qa_callback
|
||||
)
|
||||
|
||||
# Verify provider received the qa_callback
|
||||
mock_send_gemini.assert_called_once()
|
||||
args, kwargs = mock_send_gemini.call_args
|
||||
# qa_callback is the 7th positional argument in _send_gemini
|
||||
assert args[6] == qa_callback
|
||||
import ai_client
|
||||
# Mocking a provider function to avoid actual API calls
|
||||
mock_send_gemini = MagicMock(return_value="AI Response")
|
||||
qa_callback = MagicMock(return_value="QA Analysis")
|
||||
# Force provider to gemini and mock its send function
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._send_gemini", mock_send_gemini):
|
||||
ai_client.send(
|
||||
md_content="Context",
|
||||
user_message="Hello",
|
||||
qa_callback=qa_callback
|
||||
)
|
||||
# Verify provider received the qa_callback
|
||||
mock_send_gemini.assert_called_once()
|
||||
args, kwargs = mock_send_gemini.call_args
|
||||
# qa_callback is the 7th positional argument in _send_gemini
|
||||
assert args[6] == qa_callback
|
||||
|
||||
def test_gemini_provider_passes_qa_callback_to_run_script():
|
||||
"""
|
||||
"""
|
||||
Verifies that _send_gemini passes the qa_callback to _run_script.
|
||||
"""
|
||||
import ai_client
|
||||
|
||||
# Mock Gemini chat and client
|
||||
mock_client = MagicMock()
|
||||
mock_chat = MagicMock()
|
||||
|
||||
# Simulate a tool call response
|
||||
mock_part = MagicMock()
|
||||
mock_part.text = ""
|
||||
mock_part.function_call = MagicMock()
|
||||
mock_part.function_call.name = "run_powershell"
|
||||
mock_part.function_call.args = {"script": "dir"}
|
||||
|
||||
mock_candidate = MagicMock()
|
||||
mock_candidate.content.parts = [mock_part]
|
||||
mock_candidate.finish_reason.name = "STOP"
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.candidates = [mock_candidate]
|
||||
mock_response.usage_metadata.prompt_token_count = 10
|
||||
mock_response.usage_metadata.candidates_token_count = 5
|
||||
|
||||
# Second call returns a stop response to break the loop
|
||||
mock_stop_part = MagicMock()
|
||||
mock_stop_part.text = "Done"
|
||||
mock_stop_part.function_call = None
|
||||
|
||||
mock_stop_candidate = MagicMock()
|
||||
mock_stop_candidate.content.parts = [mock_stop_part]
|
||||
mock_stop_candidate.finish_reason.name = "STOP"
|
||||
|
||||
mock_stop_response = MagicMock()
|
||||
mock_stop_response.candidates = [mock_stop_candidate]
|
||||
mock_stop_response.usage_metadata.prompt_token_count = 5
|
||||
mock_stop_response.usage_metadata.candidates_token_count = 2
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
|
||||
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_client.models.count_tokens.return_value = mock_count_resp
|
||||
|
||||
qa_callback = MagicMock()
|
||||
|
||||
# Set global state for the test
|
||||
with patch("ai_client._gemini_client", mock_client), \
|
||||
patch("ai_client._gemini_chat", None), \
|
||||
patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._run_script", return_value="output") as mock_run_script, \
|
||||
patch("ai_client._get_gemini_history_list", return_value=[]):
|
||||
|
||||
# Ensure chats.create returns our mock_chat
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
|
||||
ai_client._send_gemini(
|
||||
md_content="Context",
|
||||
user_message="Run dir",
|
||||
base_dir=".",
|
||||
qa_callback=qa_callback
|
||||
)
|
||||
|
||||
# Verify _run_script received the qa_callback
|
||||
mock_run_script.assert_called_once_with("dir", ".", qa_callback)
|
||||
import ai_client
|
||||
# Mock Gemini chat and client
|
||||
mock_client = MagicMock()
|
||||
mock_chat = MagicMock()
|
||||
# Simulate a tool call response
|
||||
mock_part = MagicMock()
|
||||
mock_part.text = ""
|
||||
mock_part.function_call = MagicMock()
|
||||
mock_part.function_call.name = "run_powershell"
|
||||
mock_part.function_call.args = {"script": "dir"}
|
||||
mock_candidate = MagicMock()
|
||||
mock_candidate.content.parts = [mock_part]
|
||||
mock_candidate.finish_reason.name = "STOP"
|
||||
mock_response = MagicMock()
|
||||
mock_response.candidates = [mock_candidate]
|
||||
mock_response.usage_metadata.prompt_token_count = 10
|
||||
mock_response.usage_metadata.candidates_token_count = 5
|
||||
# Second call returns a stop response to break the loop
|
||||
mock_stop_part = MagicMock()
|
||||
mock_stop_part.text = "Done"
|
||||
mock_stop_part.function_call = None
|
||||
mock_stop_candidate = MagicMock()
|
||||
mock_stop_candidate.content.parts = [mock_stop_part]
|
||||
mock_stop_candidate.finish_reason.name = "STOP"
|
||||
mock_stop_response = MagicMock()
|
||||
mock_stop_response.candidates = [mock_stop_candidate]
|
||||
mock_stop_response.usage_metadata.prompt_token_count = 5
|
||||
mock_stop_response.usage_metadata.candidates_token_count = 2
|
||||
mock_chat.send_message.side_effect = [mock_response, mock_stop_response]
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_client.models.count_tokens.return_value = mock_count_resp
|
||||
qa_callback = MagicMock()
|
||||
# Set global state for the test
|
||||
with patch("ai_client._gemini_client", mock_client), \
|
||||
patch("ai_client._gemini_chat", None), \
|
||||
patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._run_script", return_value="output") as mock_run_script, \
|
||||
patch("ai_client._get_gemini_history_list", return_value=[]):
|
||||
# Ensure chats.create returns our mock_chat
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
ai_client._send_gemini(
|
||||
md_content="Context",
|
||||
user_message="Run dir",
|
||||
base_dir=".",
|
||||
qa_callback=qa_callback
|
||||
)
|
||||
# Verify _run_script received the qa_callback
|
||||
mock_run_script.assert_called_once_with("dir", ".", qa_callback)
|
||||
|
||||
@@ -3,134 +3,113 @@ from pathlib import Path
|
||||
from aggregate import build_tier1_context, build_tier2_context, build_tier3_context
|
||||
|
||||
def test_build_tier1_context_exists():
|
||||
# This should fail if the function is not defined
|
||||
file_items = [
|
||||
{"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
|
||||
]
|
||||
history = ["User: hello", "AI: hi"]
|
||||
|
||||
result = build_tier1_context(file_items, Path("."), [], history)
|
||||
|
||||
assert "Product content" in result
|
||||
# other.py should be summarized, not full content in a code block
|
||||
assert "Other content" not in result or "Summarized" in result # Assuming summary format
|
||||
# This should fail if the function is not defined
|
||||
file_items = [
|
||||
{"path": Path("conductor/product.md"), "entry": "conductor/product.md", "content": "Product content", "error": False},
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
|
||||
]
|
||||
history = ["User: hello", "AI: hi"]
|
||||
result = build_tier1_context(file_items, Path("."), [], history)
|
||||
assert "Product content" in result
|
||||
# other.py should be summarized, not full content in a code block
|
||||
assert "Other content" not in result or "Summarized" in result # Assuming summary format
|
||||
|
||||
def test_build_tier2_context_exists():
|
||||
file_items = [
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
|
||||
]
|
||||
history = ["User: hello"]
|
||||
result = build_tier2_context(file_items, Path("."), [], history)
|
||||
assert "Other content" in result
|
||||
file_items = [
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "Other content", "error": False}
|
||||
]
|
||||
history = ["User: hello"]
|
||||
result = build_tier2_context(file_items, Path("."), [], history)
|
||||
assert "Other content" in result
|
||||
|
||||
def test_build_tier3_context_ast_skeleton(monkeypatch):
|
||||
from unittest.mock import MagicMock
|
||||
import aggregate
|
||||
import file_cache
|
||||
|
||||
# Mock ASTParser
|
||||
mock_parser_instance = MagicMock()
|
||||
mock_parser_instance.get_skeleton.return_value = "def other():\n ..."
|
||||
mock_parser_class = MagicMock(return_value=mock_parser_instance)
|
||||
|
||||
# Mock file_cache.ASTParser in aggregate module
|
||||
monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
|
||||
|
||||
file_items = [
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
|
||||
]
|
||||
history = []
|
||||
|
||||
# New behavior check: it should use ASTParser for .py files not in focus
|
||||
result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
|
||||
|
||||
assert "def other():" in result
|
||||
assert "..." in result
|
||||
assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
|
||||
mock_parser_class.assert_called_once_with("python")
|
||||
mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n pass")
|
||||
from unittest.mock import MagicMock
|
||||
import aggregate
|
||||
import file_cache
|
||||
# Mock ASTParser
|
||||
mock_parser_instance = MagicMock()
|
||||
mock_parser_instance.get_skeleton.return_value = "def other():\n ..."
|
||||
mock_parser_class = MagicMock(return_value=mock_parser_instance)
|
||||
# Mock file_cache.ASTParser in aggregate module
|
||||
monkeypatch.setattr("aggregate.ASTParser", mock_parser_class)
|
||||
file_items = [
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
|
||||
]
|
||||
history = []
|
||||
# New behavior check: it should use ASTParser for .py files not in focus
|
||||
result = build_tier3_context(file_items, Path("."), [], history, focus_files=[])
|
||||
assert "def other():" in result
|
||||
assert "..." in result
|
||||
assert "Python" not in result # summarize.py output should not be there if AST skeleton is used
|
||||
mock_parser_class.assert_called_once_with("python")
|
||||
mock_parser_instance.get_skeleton.assert_called_once_with("def other():\n pass")
|
||||
|
||||
def test_build_tier3_context_exists():
|
||||
file_items = [
|
||||
{"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n pass", "error": False},
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
|
||||
]
|
||||
history = ["User: hello"]
|
||||
result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
|
||||
|
||||
assert "def focus():" in result
|
||||
assert "pass" in result
|
||||
# other.py should have skeletonized content, not full "pass" (if get_skeleton works)
|
||||
# However, for a simple "pass", the skeleton might be the same or similar.
|
||||
# Let's check for the header
|
||||
assert "other.py" in result
|
||||
assert "AST Skeleton" in result
|
||||
file_items = [
|
||||
{"path": Path("focus.py"), "entry": "focus.py", "content": "def focus():\n pass", "error": False},
|
||||
{"path": Path("other.py"), "entry": "other.py", "content": "def other():\n pass", "error": False}
|
||||
]
|
||||
history = ["User: hello"]
|
||||
result = build_tier3_context(file_items, Path("."), [], history, focus_files=["focus.py"])
|
||||
assert "def focus():" in result
|
||||
assert "pass" in result
|
||||
# other.py should have skeletonized content, not full "pass" (if get_skeleton works)
|
||||
# However, for a simple "pass", the skeleton might be the same or similar.
|
||||
# Let's check for the header
|
||||
assert "other.py" in result
|
||||
assert "AST Skeleton" in result
|
||||
|
||||
def test_build_file_items_with_tiers(tmp_path):
|
||||
from aggregate import build_file_items
|
||||
|
||||
# Create some dummy files
|
||||
file1 = tmp_path / "file1.txt"
|
||||
file1.write_text("content1")
|
||||
file2 = tmp_path / "file2.txt"
|
||||
file2.write_text("content2")
|
||||
|
||||
files_config = [
|
||||
"file1.txt",
|
||||
{"path": "file2.txt", "tier": 3}
|
||||
]
|
||||
|
||||
items = build_file_items(tmp_path, files_config)
|
||||
|
||||
assert len(items) == 2
|
||||
|
||||
item1 = next(i for i in items if i["entry"] == "file1.txt")
|
||||
assert item1["content"] == "content1"
|
||||
assert "tier" in item1
|
||||
assert item1["tier"] is None
|
||||
|
||||
item2 = next(i for i in items if i["entry"] == "file2.txt")
|
||||
assert item2["content"] == "content2"
|
||||
assert item2["tier"] == 3
|
||||
from aggregate import build_file_items
|
||||
# Create some dummy files
|
||||
file1 = tmp_path / "file1.txt"
|
||||
file1.write_text("content1")
|
||||
file2 = tmp_path / "file2.txt"
|
||||
file2.write_text("content2")
|
||||
files_config = [
|
||||
"file1.txt",
|
||||
{"path": "file2.txt", "tier": 3}
|
||||
]
|
||||
items = build_file_items(tmp_path, files_config)
|
||||
assert len(items) == 2
|
||||
item1 = next(i for i in items if i["entry"] == "file1.txt")
|
||||
assert item1["content"] == "content1"
|
||||
assert "tier" in item1
|
||||
assert item1["tier"] is None
|
||||
item2 = next(i for i in items if i["entry"] == "file2.txt")
|
||||
assert item2["content"] == "content2"
|
||||
assert item2["tier"] == 3
|
||||
|
||||
def test_build_files_section_with_dicts(tmp_path):
|
||||
from aggregate import build_files_section
|
||||
|
||||
file1 = tmp_path / "file1.txt"
|
||||
file1.write_text("content1")
|
||||
|
||||
files_config = [
|
||||
{"path": str(file1)}
|
||||
]
|
||||
|
||||
result = build_files_section(tmp_path, files_config)
|
||||
assert "content1" in result
|
||||
assert "file1.txt" in result
|
||||
from aggregate import build_files_section
|
||||
file1 = tmp_path / "file1.txt"
|
||||
file1.write_text("content1")
|
||||
files_config = [
|
||||
{"path": str(file1)}
|
||||
]
|
||||
result = build_files_section(tmp_path, files_config)
|
||||
assert "content1" in result
|
||||
assert "file1.txt" in result
|
||||
|
||||
def test_tiered_context_by_tier_field():
|
||||
file_items = [
|
||||
{"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
|
||||
{"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
|
||||
{"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
|
||||
]
|
||||
|
||||
# Test Tier 1 Context
|
||||
result_t1 = build_tier1_context(file_items, Path("."), [], [])
|
||||
assert "Full Tier 1 Content" in result_t1
|
||||
assert "Line 2" in result_t1 # In full
|
||||
|
||||
# tier3_file.txt should be summarized
|
||||
assert "tier3_file.txt" in result_t1
|
||||
assert "preview:" in result_t1
|
||||
assert "Line 9" not in result_t1 # Only first 8 lines in preview
|
||||
|
||||
# Test Tier 3 Context
|
||||
result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
|
||||
assert "Full Tier 3 Content" in result_t3
|
||||
assert "Line 10" in result_t3 # In full
|
||||
|
||||
# tier1_file.txt should be summarized
|
||||
assert "tier1_file.txt" in result_t3
|
||||
assert "preview:" in result_t3
|
||||
assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview
|
||||
file_items = [
|
||||
{"path": Path("tier1_file.txt"), "entry": "tier1_file.txt", "content": "Full Tier 1 Content\nLine 2", "tier": 1},
|
||||
{"path": Path("tier3_file.txt"), "entry": "tier3_file.txt", "content": "Full Tier 3 Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": 3},
|
||||
{"path": Path("other.txt"), "entry": "other.txt", "content": "Other Content\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10", "tier": None}
|
||||
]
|
||||
# Test Tier 1 Context
|
||||
result_t1 = build_tier1_context(file_items, Path("."), [], [])
|
||||
assert "Full Tier 1 Content" in result_t1
|
||||
assert "Line 2" in result_t1 # In full
|
||||
# tier3_file.txt should be summarized
|
||||
assert "tier3_file.txt" in result_t1
|
||||
assert "preview:" in result_t1
|
||||
assert "Line 9" not in result_t1 # Only first 8 lines in preview
|
||||
# Test Tier 3 Context
|
||||
result_t3 = build_tier3_context(file_items, Path("."), [], [], focus_files=[])
|
||||
assert "Full Tier 3 Content" in result_t3
|
||||
assert "Line 10" in result_t3 # In full
|
||||
# tier1_file.txt should be summarized
|
||||
assert "tier1_file.txt" in result_t3
|
||||
assert "preview:" in result_t3
|
||||
assert "Full Tier 1 Content" in result_t3 # It's short, so it's in preview
|
||||
|
||||
@@ -8,8 +8,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
import ai_client
|
||||
|
||||
def test_token_usage_tracking():
|
||||
ai_client.reset_session()
|
||||
# Mock an API response with token usage
|
||||
usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
|
||||
# This would test the internal accumulator in ai_client
|
||||
pass
|
||||
ai_client.reset_session()
|
||||
# Mock an API response with token usage
|
||||
usage = {"prompt_tokens": 100, "candidates_tokens": 50, "total_tokens": 150}
|
||||
# This would test the internal accumulator in ai_client
|
||||
pass
|
||||
|
||||
@@ -9,73 +9,61 @@ from models import TrackState, Metadata, Ticket
|
||||
from project_manager import save_track_state, load_track_state
|
||||
|
||||
def test_track_state_persistence(tmp_path):
|
||||
"""
|
||||
"""
|
||||
Tests saving and loading a TrackState object to/from a TOML file.
|
||||
1. Create a TrackState object with sample metadata, discussion, and tasks.
|
||||
2. Call save_track_state('test_track', state, base_dir).
|
||||
3. Verify that base_dir/conductor/tracks/test_track/state.toml exists.
|
||||
4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object.
|
||||
"""
|
||||
base_dir = tmp_path
|
||||
track_id = "test-track-999" # Metadata internal ID
|
||||
track_folder_name = "test_track" # Folder name used in persistence
|
||||
|
||||
# 1. Create a TrackState object with sample data
|
||||
metadata = Metadata(
|
||||
id=track_id,
|
||||
name="Test Track",
|
||||
status="in_progress",
|
||||
created_at=datetime(2023, 1, 1, 12, 0, 0),
|
||||
updated_at=datetime(2023, 1, 2, 13, 0, 0)
|
||||
)
|
||||
|
||||
discussion = [
|
||||
{"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
|
||||
{"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
|
||||
]
|
||||
|
||||
tasks = [
|
||||
Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
|
||||
Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
|
||||
]
|
||||
|
||||
original_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks
|
||||
)
|
||||
|
||||
# 2. Call save_track_state('test_track', state, base_dir)
|
||||
save_track_state(track_folder_name, original_state, base_dir)
|
||||
|
||||
# 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
|
||||
state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
|
||||
assert state_file_path.exists(), f"State file should exist at {state_file_path}"
|
||||
|
||||
# 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
|
||||
loaded_state = load_track_state(track_folder_name, base_dir)
|
||||
|
||||
assert loaded_state is not None, "load_track_state returned None"
|
||||
|
||||
# Verify equality
|
||||
assert loaded_state.metadata.id == original_state.metadata.id
|
||||
assert loaded_state.metadata.name == original_state.metadata.name
|
||||
assert loaded_state.metadata.status == original_state.metadata.status
|
||||
assert loaded_state.metadata.created_at == original_state.metadata.created_at
|
||||
assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
|
||||
|
||||
assert len(loaded_state.tasks) == len(original_state.tasks)
|
||||
for i in range(len(original_state.tasks)):
|
||||
assert loaded_state.tasks[i].id == original_state.tasks[i].id
|
||||
assert loaded_state.tasks[i].description == original_state.tasks[i].description
|
||||
assert loaded_state.tasks[i].status == original_state.tasks[i].status
|
||||
assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
|
||||
|
||||
assert len(loaded_state.discussion) == len(original_state.discussion)
|
||||
for i in range(len(original_state.discussion)):
|
||||
assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
|
||||
assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
|
||||
assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
|
||||
|
||||
# Final check: deep equality of dataclasses
|
||||
assert loaded_state == original_state
|
||||
base_dir = tmp_path
|
||||
track_id = "test-track-999" # Metadata internal ID
|
||||
track_folder_name = "test_track" # Folder name used in persistence
|
||||
# 1. Create a TrackState object with sample data
|
||||
metadata = Metadata(
|
||||
id=track_id,
|
||||
name="Test Track",
|
||||
status="in_progress",
|
||||
created_at=datetime(2023, 1, 1, 12, 0, 0),
|
||||
updated_at=datetime(2023, 1, 2, 13, 0, 0)
|
||||
)
|
||||
discussion = [
|
||||
{"role": "User", "content": "Hello", "ts": datetime(2023, 1, 1, 12, 0, 0)},
|
||||
{"role": "AI", "content": "Hi there!", "ts": datetime(2023, 1, 1, 12, 0, 5)}
|
||||
]
|
||||
tasks = [
|
||||
Ticket(id="task-1", description="First task", status="completed", assigned_to="worker-1"),
|
||||
Ticket(id="task-2", description="Second task", status="todo", assigned_to="worker-2")
|
||||
]
|
||||
original_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks
|
||||
)
|
||||
# 2. Call save_track_state('test_track', state, base_dir)
|
||||
save_track_state(track_folder_name, original_state, base_dir)
|
||||
# 3. Verify that base_dir/conductor/tracks/test_track/state.toml exists
|
||||
state_file_path = base_dir / "conductor" / "tracks" / track_folder_name / "state.toml"
|
||||
assert state_file_path.exists(), f"State file should exist at {state_file_path}"
|
||||
# 4. Call load_track_state('test_track', base_dir) and verify it returns an identical TrackState object
|
||||
loaded_state = load_track_state(track_folder_name, base_dir)
|
||||
assert loaded_state is not None, "load_track_state returned None"
|
||||
# Verify equality
|
||||
assert loaded_state.metadata.id == original_state.metadata.id
|
||||
assert loaded_state.metadata.name == original_state.metadata.name
|
||||
assert loaded_state.metadata.status == original_state.metadata.status
|
||||
assert loaded_state.metadata.created_at == original_state.metadata.created_at
|
||||
assert loaded_state.metadata.updated_at == original_state.metadata.updated_at
|
||||
assert len(loaded_state.tasks) == len(original_state.tasks)
|
||||
for i in range(len(original_state.tasks)):
|
||||
assert loaded_state.tasks[i].id == original_state.tasks[i].id
|
||||
assert loaded_state.tasks[i].description == original_state.tasks[i].description
|
||||
assert loaded_state.tasks[i].status == original_state.tasks[i].status
|
||||
assert loaded_state.tasks[i].assigned_to == original_state.tasks[i].assigned_to
|
||||
assert len(loaded_state.discussion) == len(original_state.discussion)
|
||||
for i in range(len(original_state.discussion)):
|
||||
assert loaded_state.discussion[i]["role"] == original_state.discussion[i]["role"]
|
||||
assert loaded_state.discussion[i]["content"] == original_state.discussion[i]["content"]
|
||||
assert loaded_state.discussion[i]["ts"] == original_state.discussion[i]["ts"]
|
||||
# Final check: deep equality of dataclasses
|
||||
assert loaded_state == original_state
|
||||
|
||||
@@ -7,166 +7,154 @@ from models import Metadata, TrackState, Ticket
|
||||
# --- Pytest Tests ---
|
||||
|
||||
def test_track_state_instantiation():
|
||||
"""Test creating a TrackState object."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-123",
|
||||
name="Initial Setup",
|
||||
status="in_progress",
|
||||
created_at=now - timedelta(days=1),
|
||||
updated_at=now,
|
||||
)
|
||||
discussion = [
|
||||
{"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
|
||||
{"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
|
||||
]
|
||||
# Update Ticket instantiation to match models.py fields (description, assigned_to)
|
||||
tasks = [
|
||||
Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
|
||||
Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
|
||||
]
|
||||
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
|
||||
assert track_state.metadata.id == "track-123"
|
||||
assert len(track_state.discussion) == 2
|
||||
assert len(track_state.tasks) == 2
|
||||
assert isinstance(track_state.tasks[0], Ticket)
|
||||
assert track_state.tasks[0].description == "Design UI"
|
||||
assert track_state.tasks[0].assigned_to == "dev1"
|
||||
"""Test creating a TrackState object."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-123",
|
||||
name="Initial Setup",
|
||||
status="in_progress",
|
||||
created_at=now - timedelta(days=1),
|
||||
updated_at=now,
|
||||
)
|
||||
discussion = [
|
||||
{"role": "user", "content": "Hello", "ts": now - timedelta(hours=1)},
|
||||
{"role": "assistant", "content": "Hi there!", "ts": now - timedelta(hours=2)},
|
||||
]
|
||||
# Update Ticket instantiation to match models.py fields (description, assigned_to)
|
||||
tasks = [
|
||||
Ticket(id="task-a", description="Design UI", status="todo", assigned_to="dev1"),
|
||||
Ticket(id="task-b", description="Implement Backend", status="todo", assigned_to="dev2"),
|
||||
]
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
assert track_state.metadata.id == "track-123"
|
||||
assert len(track_state.discussion) == 2
|
||||
assert len(track_state.tasks) == 2
|
||||
assert isinstance(track_state.tasks[0], Ticket)
|
||||
assert track_state.tasks[0].description == "Design UI"
|
||||
assert track_state.tasks[0].assigned_to == "dev1"
|
||||
|
||||
def test_track_state_to_dict():
|
||||
"""Test the to_dict() method for serialization."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-456",
|
||||
name="Refinement Phase",
|
||||
status="completed",
|
||||
created_at=now - timedelta(days=5),
|
||||
updated_at=now - timedelta(days=2),
|
||||
)
|
||||
discussion = [
|
||||
{"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
|
||||
{"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
|
||||
]
|
||||
# Update Ticket instantiation
|
||||
tasks = [
|
||||
Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
|
||||
]
|
||||
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
|
||||
track_dict = track_state.to_dict()
|
||||
|
||||
assert track_dict["metadata"]["id"] == "track-456"
|
||||
assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
|
||||
assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
|
||||
assert len(track_dict["discussion"]) == 2
|
||||
assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
|
||||
assert len(track_dict["tasks"]) == 1
|
||||
# Use the Ticket's to_dict method for serialization
|
||||
assert track_dict["tasks"][0]["id"] == "task-c"
|
||||
assert track_dict["tasks"][0]["description"] == "Add feature X"
|
||||
assert track_dict["tasks"][0]["assigned_to"] == "dev3"
|
||||
"""Test the to_dict() method for serialization."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-456",
|
||||
name="Refinement Phase",
|
||||
status="completed",
|
||||
created_at=now - timedelta(days=5),
|
||||
updated_at=now - timedelta(days=2),
|
||||
)
|
||||
discussion = [
|
||||
{"role": "user", "content": "Need changes", "ts": now - timedelta(hours=3)},
|
||||
{"role": "assistant", "content": "Understood.", "ts": now - timedelta(hours=4)},
|
||||
]
|
||||
# Update Ticket instantiation
|
||||
tasks = [
|
||||
Ticket(id="task-c", description="Add feature X", status="in_progress", assigned_to="dev3"),
|
||||
]
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
track_dict = track_state.to_dict()
|
||||
assert track_dict["metadata"]["id"] == "track-456"
|
||||
assert track_dict["metadata"]["created_at"] == metadata.created_at.isoformat()
|
||||
assert track_dict["metadata"]["updated_at"] == metadata.updated_at.isoformat()
|
||||
assert len(track_dict["discussion"]) == 2
|
||||
assert track_dict["discussion"][0]["ts"] == discussion[0]["ts"].isoformat()
|
||||
assert len(track_dict["tasks"]) == 1
|
||||
# Use the Ticket's to_dict method for serialization
|
||||
assert track_dict["tasks"][0]["id"] == "task-c"
|
||||
assert track_dict["tasks"][0]["description"] == "Add feature X"
|
||||
assert track_dict["tasks"][0]["assigned_to"] == "dev3"
|
||||
|
||||
def test_track_state_from_dict():
|
||||
"""Test the from_dict() class method for deserialization."""
|
||||
now = datetime.now(timezone.utc)
|
||||
track_dict_data = {
|
||||
"metadata": {
|
||||
"id": "track-789",
|
||||
"name": "Final Review",
|
||||
"status": "pending",
|
||||
"created_at": (now - timedelta(days=10)).isoformat(),
|
||||
"updated_at": (now - timedelta(days=9)).isoformat(),
|
||||
},
|
||||
"discussion": [
|
||||
{"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
|
||||
],
|
||||
"tasks": [
|
||||
# Use fields from models.py Ticket definition for deserialization
|
||||
{"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
|
||||
],
|
||||
}
|
||||
"""Test the from_dict() class method for deserialization."""
|
||||
now = datetime.now(timezone.utc)
|
||||
track_dict_data = {
|
||||
"metadata": {
|
||||
"id": "track-789",
|
||||
"name": "Final Review",
|
||||
"status": "pending",
|
||||
"created_at": (now - timedelta(days=10)).isoformat(),
|
||||
"updated_at": (now - timedelta(days=9)).isoformat(),
|
||||
},
|
||||
"discussion": [
|
||||
{"role": "user", "content": "Review complete.", "ts": (now - timedelta(hours=5)).isoformat()},
|
||||
],
|
||||
"tasks": [
|
||||
# Use fields from models.py Ticket definition for deserialization
|
||||
{"id": "task-d", "description": "Deploy", "status": "completed", "assigned_to": "ops1"},
|
||||
],
|
||||
}
|
||||
track_state = TrackState.from_dict(track_dict_data)
|
||||
assert isinstance(track_state, TrackState)
|
||||
assert track_state.metadata.id == "track-789"
|
||||
assert isinstance(track_state.metadata.created_at, datetime)
|
||||
assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
|
||||
assert len(track_state.discussion) == 1
|
||||
assert isinstance(track_state.discussion[0]["ts"], datetime)
|
||||
assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
|
||||
assert len(track_state.tasks) == 1
|
||||
assert isinstance(track_state.tasks[0], Ticket)
|
||||
assert track_state.tasks[0].id == "task-d"
|
||||
assert track_state.tasks[0].description == "Deploy"
|
||||
assert track_state.tasks[0].assigned_to == "ops1"
|
||||
# Test case for empty lists and missing keys for robustness
|
||||
|
||||
track_state = TrackState.from_dict(track_dict_data)
|
||||
|
||||
assert isinstance(track_state, TrackState)
|
||||
assert track_state.metadata.id == "track-789"
|
||||
assert isinstance(track_state.metadata.created_at, datetime)
|
||||
assert track_state.metadata.created_at.isoformat() == track_dict_data["metadata"]["created_at"]
|
||||
assert len(track_state.discussion) == 1
|
||||
assert isinstance(track_state.discussion[0]["ts"], datetime)
|
||||
assert track_state.discussion[0]["ts"].isoformat() == track_dict_data["discussion"][0]["ts"]
|
||||
assert len(track_state.tasks) == 1
|
||||
assert isinstance(track_state.tasks[0], Ticket)
|
||||
assert track_state.tasks[0].id == "task-d"
|
||||
assert track_state.tasks[0].description == "Deploy"
|
||||
assert track_state.tasks[0].assigned_to == "ops1"
|
||||
|
||||
# Test case for empty lists and missing keys for robustness
|
||||
def test_track_state_from_dict_empty_and_missing():
|
||||
"""Test from_dict with empty lists and missing optional keys."""
|
||||
track_dict_data = {
|
||||
"metadata": {
|
||||
"id": "track-empty",
|
||||
"name": "Empty State",
|
||||
# created_at, updated_at, status are optional in from_dict logic
|
||||
},
|
||||
"discussion": [], # Empty discussion list
|
||||
"tasks": [], # Empty tasks list
|
||||
}
|
||||
"""Test from_dict with empty lists and missing optional keys."""
|
||||
track_dict_data = {
|
||||
"metadata": {
|
||||
"id": "track-empty",
|
||||
"name": "Empty State",
|
||||
# created_at, updated_at, status are optional in from_dict logic
|
||||
},
|
||||
"discussion": [], # Empty discussion list
|
||||
"tasks": [], # Empty tasks list
|
||||
}
|
||||
track_state = TrackState.from_dict(track_dict_data)
|
||||
assert isinstance(track_state, TrackState)
|
||||
assert track_state.metadata.id == "track-empty"
|
||||
assert track_state.metadata.name == "Empty State"
|
||||
assert track_state.metadata.created_at is None
|
||||
assert track_state.metadata.updated_at is None
|
||||
assert track_state.metadata.status is None
|
||||
assert len(track_state.discussion) == 0
|
||||
assert len(track_state.tasks) == 0
|
||||
# Test case for to_dict with None values or missing optional data
|
||||
|
||||
track_state = TrackState.from_dict(track_dict_data)
|
||||
|
||||
assert isinstance(track_state, TrackState)
|
||||
assert track_state.metadata.id == "track-empty"
|
||||
assert track_state.metadata.name == "Empty State"
|
||||
assert track_state.metadata.created_at is None
|
||||
assert track_state.metadata.updated_at is None
|
||||
assert track_state.metadata.status is None
|
||||
assert len(track_state.discussion) == 0
|
||||
assert len(track_state.tasks) == 0
|
||||
|
||||
# Test case for to_dict with None values or missing optional data
|
||||
def test_track_state_to_dict_with_none():
|
||||
"""Test to_dict with None values in optional fields."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-none",
|
||||
name="None Test",
|
||||
status=None, # None status
|
||||
created_at=now,
|
||||
updated_at=None, # None updated_at
|
||||
)
|
||||
discussion = [
|
||||
{"role": "system", "content": "Info", "ts": None}, # None timestamp
|
||||
]
|
||||
# Update Ticket instantiation
|
||||
tasks = [
|
||||
Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
|
||||
]
|
||||
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
|
||||
track_dict = track_state.to_dict()
|
||||
|
||||
assert track_dict["metadata"]["status"] is None
|
||||
# Check that isoformat was called on datetime object, not None
|
||||
assert track_dict["metadata"]["created_at"] == now.isoformat()
|
||||
assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
|
||||
assert track_dict["discussion"][0]["ts"] is None
|
||||
assert track_dict["tasks"][0]["description"] == "Task None"
|
||||
assert track_dict["tasks"][0]["assigned_to"] == "anon"
|
||||
"""Test to_dict with None values in optional fields."""
|
||||
now = datetime.now(timezone.utc)
|
||||
metadata = Metadata(
|
||||
id="track-none",
|
||||
name="None Test",
|
||||
status=None, # None status
|
||||
created_at=now,
|
||||
updated_at=None, # None updated_at
|
||||
)
|
||||
discussion = [
|
||||
{"role": "system", "content": "Info", "ts": None}, # None timestamp
|
||||
]
|
||||
# Update Ticket instantiation
|
||||
tasks = [
|
||||
Ticket(id="task-none", description="Task None", status="pending", assigned_to="anon"),
|
||||
]
|
||||
track_state = TrackState(
|
||||
metadata=metadata,
|
||||
discussion=discussion,
|
||||
tasks=tasks,
|
||||
)
|
||||
track_dict = track_state.to_dict()
|
||||
assert track_dict["metadata"]["status"] is None
|
||||
# Check that isoformat was called on datetime object, not None
|
||||
assert track_dict["metadata"]["created_at"] == now.isoformat()
|
||||
assert track_dict["metadata"]["updated_at"] is None # This should be None as it's passed as None
|
||||
assert track_dict["discussion"][0]["ts"] is None
|
||||
assert track_dict["tasks"][0]["description"] == "Task None"
|
||||
assert track_dict["tasks"][0]["assigned_to"] == "anon"
|
||||
|
||||
@@ -2,29 +2,24 @@ import tree_sitter_python as tspython
|
||||
from tree_sitter import Language, Parser
|
||||
|
||||
def test_tree_sitter_python_setup():
|
||||
"""
|
||||
"""
|
||||
Verifies that tree-sitter and tree-sitter-python are correctly installed
|
||||
and can parse a simple Python function string.
|
||||
"""
|
||||
# Initialize the Python language and parser
|
||||
PY_LANGUAGE = Language(tspython.language())
|
||||
parser = Parser(PY_LANGUAGE)
|
||||
|
||||
# Simple Python code to parse
|
||||
code = """def hello():
|
||||
# Initialize the Python language and parser
|
||||
PY_LANGUAGE = Language(tspython.language())
|
||||
parser = Parser(PY_LANGUAGE)
|
||||
# Simple Python code to parse
|
||||
code = """def hello():
|
||||
print('world')"""
|
||||
|
||||
# Parse the code
|
||||
tree = parser.parse(bytes(code, "utf8"))
|
||||
|
||||
# Assert that the root node is a 'module'
|
||||
assert tree.root_node.type == "module"
|
||||
|
||||
# Verify we can find a function definition
|
||||
found_function = False
|
||||
for child in tree.root_node.children:
|
||||
if child.type == "function_definition":
|
||||
found_function = True
|
||||
break
|
||||
|
||||
assert found_function, "Should have found a function_definition node"
|
||||
# Parse the code
|
||||
tree = parser.parse(bytes(code, "utf8"))
|
||||
# Assert that the root node is a 'module'
|
||||
assert tree.root_node.type == "module"
|
||||
# Verify we can find a function definition
|
||||
found_function = False
|
||||
for child in tree.root_node.children:
|
||||
if child.type == "function_definition":
|
||||
found_function = True
|
||||
break
|
||||
assert found_function, "Should have found a function_definition node"
|
||||
|
||||
@@ -8,15 +8,15 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.user_agent import UserSimAgent
|
||||
|
||||
def test_user_agent_instantiation():
|
||||
agent = UserSimAgent(hook_client=None)
|
||||
assert agent is not None
|
||||
agent = UserSimAgent(hook_client=None)
|
||||
assert agent is not None
|
||||
|
||||
def test_perform_action_with_delay():
|
||||
agent = UserSimAgent(hook_client=None)
|
||||
called = False
|
||||
def action():
|
||||
nonlocal called
|
||||
called = True
|
||||
|
||||
agent.perform_action_with_delay(action)
|
||||
assert called is True
|
||||
agent = UserSimAgent(hook_client=None)
|
||||
called = False
|
||||
|
||||
def action():
|
||||
nonlocal called
|
||||
called = True
|
||||
agent.perform_action_with_delay(action)
|
||||
assert called is True
|
||||
|
||||
@@ -9,39 +9,33 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
from simulation.workflow_sim import WorkflowSimulator
|
||||
|
||||
def test_simulator_instantiation():
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
assert sim is not None
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
assert sim is not None
|
||||
|
||||
def test_setup_new_project():
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
|
||||
# Mock responses for wait_for_server
|
||||
client.wait_for_server.return_value = True
|
||||
|
||||
sim.setup_new_project("TestProject", "/tmp/test_git")
|
||||
|
||||
# Verify hook calls
|
||||
client.click.assert_any_call("btn_project_new")
|
||||
client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
|
||||
client.click.assert_any_call("btn_project_save")
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
# Mock responses for wait_for_server
|
||||
client.wait_for_server.return_value = True
|
||||
sim.setup_new_project("TestProject", "/tmp/test_git")
|
||||
# Verify hook calls
|
||||
client.click.assert_any_call("btn_project_new")
|
||||
client.set_value.assert_any_call("project_git_dir", "/tmp/test_git")
|
||||
client.click.assert_any_call("btn_project_save")
|
||||
|
||||
def test_discussion_switching():
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
|
||||
sim.create_discussion("NewDisc")
|
||||
client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
|
||||
client.click.assert_called_with("btn_disc_create")
|
||||
|
||||
sim.switch_discussion("NewDisc")
|
||||
client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
sim.create_discussion("NewDisc")
|
||||
client.set_value.assert_called_with("disc_new_name_input", "NewDisc")
|
||||
client.click.assert_called_with("btn_disc_create")
|
||||
sim.switch_discussion("NewDisc")
|
||||
client.select_list_item.assert_called_with("disc_listbox", "NewDisc")
|
||||
|
||||
def test_history_truncation():
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
|
||||
sim.truncate_history(3)
|
||||
client.set_value.assert_called_with("disc_truncate_pairs", 3)
|
||||
client.click.assert_called_with("btn_disc_truncate")
|
||||
client = MagicMock()
|
||||
sim = WorkflowSimulator(client)
|
||||
sim.truncate_history(3)
|
||||
client.set_value.assert_called_with("disc_truncate_pairs", 3)
|
||||
client.click.assert_called_with("btn_disc_truncate")
|
||||
|
||||
@@ -8,89 +8,80 @@ import unittest
|
||||
# Calculate project root
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
class TestMMAGUIRobust(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
# 1. Launch gui_2.py with --enable-test-hooks
|
||||
cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
|
||||
print(f"Launching GUI: {' '.join(cls.gui_command)}")
|
||||
cls.gui_process = subprocess.Popen(
|
||||
cls.gui_command,
|
||||
cwd=PROJECT_ROOT,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
cls.client = ApiHookClient()
|
||||
print("Waiting for GUI to start...")
|
||||
if not cls.client.wait_for_server(timeout=10):
|
||||
cls.gui_process.terminate()
|
||||
raise RuntimeError("GUI failed to start or hook server not responsive.")
|
||||
print("GUI started.")
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
# 1. Launch gui_2.py with --enable-test-hooks
|
||||
cls.gui_command = [sys.executable, "gui_2.py", "--enable-test-hooks"]
|
||||
print(f"Launching GUI: {' '.join(cls.gui_command)}")
|
||||
cls.gui_process = subprocess.Popen(
|
||||
cls.gui_command,
|
||||
cwd=PROJECT_ROOT,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
cls.client = ApiHookClient()
|
||||
print("Waiting for GUI to start...")
|
||||
if not cls.client.wait_for_server(timeout=10):
|
||||
cls.gui_process.terminate()
|
||||
raise RuntimeError("GUI failed to start or hook server not responsive.")
|
||||
print("GUI started.")
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if cls.gui_process:
|
||||
cls.gui_process.terminate()
|
||||
cls.gui_process.wait(timeout=5)
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
if cls.gui_process:
|
||||
cls.gui_process.terminate()
|
||||
cls.gui_process.wait(timeout=5)
|
||||
|
||||
def test_mma_state_ingestion(self):
|
||||
"""Verify that mma_state_update event correctly updates GUI state."""
|
||||
track_data = {
|
||||
"id": "robust_test_track",
|
||||
"title": "Robust Verification Track",
|
||||
"description": "Verifying internal state ingestion"
|
||||
}
|
||||
tickets_data = [
|
||||
{"id": "T1", "target_file": "file1.py", "status": "todo"},
|
||||
{"id": "T2", "target_file": "file2.py", "status": "running"},
|
||||
{"id": "T3", "target_file": "file3.py", "status": "complete"},
|
||||
]
|
||||
|
||||
payload = {
|
||||
"status": "active",
|
||||
"active_tier": "Tier 2",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
}
|
||||
|
||||
print("Pushing mma_state_update...")
|
||||
self.client.push_event("mma_state_update", payload)
|
||||
|
||||
# Give GUI a moment to process the async task
|
||||
time.sleep(1.0)
|
||||
|
||||
print("Querying mma_status...")
|
||||
status = self.client.get_mma_status()
|
||||
|
||||
self.assertEqual(status["mma_status"], "active")
|
||||
self.assertEqual(status["active_tier"], "Tier 2")
|
||||
self.assertEqual(status["active_track"]["id"], "robust_test_track")
|
||||
self.assertEqual(len(status["active_tickets"]), 3)
|
||||
self.assertEqual(status["active_tickets"][2]["status"], "complete")
|
||||
print("MMA state ingestion verified successfully.")
|
||||
def test_mma_state_ingestion(self):
|
||||
"""Verify that mma_state_update event correctly updates GUI state."""
|
||||
track_data = {
|
||||
"id": "robust_test_track",
|
||||
"title": "Robust Verification Track",
|
||||
"description": "Verifying internal state ingestion"
|
||||
}
|
||||
tickets_data = [
|
||||
{"id": "T1", "target_file": "file1.py", "status": "todo"},
|
||||
{"id": "T2", "target_file": "file2.py", "status": "running"},
|
||||
{"id": "T3", "target_file": "file3.py", "status": "complete"},
|
||||
]
|
||||
payload = {
|
||||
"status": "active",
|
||||
"active_tier": "Tier 2",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
}
|
||||
print("Pushing mma_state_update...")
|
||||
self.client.push_event("mma_state_update", payload)
|
||||
# Give GUI a moment to process the async task
|
||||
time.sleep(1.0)
|
||||
print("Querying mma_status...")
|
||||
status = self.client.get_mma_status()
|
||||
self.assertEqual(status["mma_status"], "active")
|
||||
self.assertEqual(status["active_tier"], "Tier 2")
|
||||
self.assertEqual(status["active_track"]["id"], "robust_test_track")
|
||||
self.assertEqual(len(status["active_tickets"]), 3)
|
||||
self.assertEqual(status["active_tickets"][2]["status"], "complete")
|
||||
print("MMA state ingestion verified successfully.")
|
||||
|
||||
def test_mma_step_approval_trigger(self):
|
||||
"""Verify that mma_step_approval event sets the pending approval flag."""
|
||||
payload = {
|
||||
"ticket_id": "T2",
|
||||
"payload": "echo 'Robust Test'"
|
||||
}
|
||||
|
||||
print("Pushing mma_step_approval...")
|
||||
self.client.push_event("mma_step_approval", payload)
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
print("Querying mma_status for pending approval...")
|
||||
status = self.client.get_mma_status()
|
||||
|
||||
self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
|
||||
print("MMA step approval trigger verified successfully.")
|
||||
def test_mma_step_approval_trigger(self):
|
||||
"""Verify that mma_step_approval event sets the pending approval flag."""
|
||||
payload = {
|
||||
"ticket_id": "T2",
|
||||
"payload": "echo 'Robust Test'"
|
||||
}
|
||||
print("Pushing mma_step_approval...")
|
||||
self.client.push_event("mma_step_approval", payload)
|
||||
time.sleep(1.0)
|
||||
print("Querying mma_status for pending approval...")
|
||||
status = self.client.get_mma_status()
|
||||
self.assertTrue(status["pending_approval"], "GUI did not register pending MMA approval")
|
||||
print("MMA step approval trigger verified successfully.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
||||
@@ -5,65 +5,58 @@ import os
|
||||
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
from api_hook_client import ApiHookClient
|
||||
|
||||
def diag_run():
|
||||
print("Launching GUI for manual inspection + automated hooks...")
|
||||
# Use a log file for GUI output
|
||||
with open("gui_diag.log", "w") as log_file:
|
||||
gui_process = subprocess.Popen(
|
||||
[sys.executable, "gui_2.py", "--enable-test-hooks"],
|
||||
cwd=PROJECT_ROOT,
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
text=True
|
||||
)
|
||||
|
||||
client = ApiHookClient()
|
||||
print("Waiting for GUI...")
|
||||
if not client.wait_for_server(timeout=10):
|
||||
print("GUI failed to start.")
|
||||
gui_process.terminate()
|
||||
return
|
||||
|
||||
# Pushing state
|
||||
track_data = {"id": "diag_track", "title": "Diagnostic Track"}
|
||||
tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
|
||||
|
||||
print("Pushing state update...")
|
||||
client.push_event("mma_state_update", {
|
||||
"status": "active",
|
||||
"active_tier": "Tier 1",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
})
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
print("Pushing approval request...")
|
||||
client.push_event("mma_step_approval", {
|
||||
"ticket_id": "T0",
|
||||
"payload": "Get-ChildItem"
|
||||
})
|
||||
|
||||
print("\nGUI is running. Check 'gui_diag.log' for output.")
|
||||
print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
|
||||
|
||||
try:
|
||||
start_poll = time.time()
|
||||
while time.time() - start_poll < 30:
|
||||
try:
|
||||
status = client.get_mma_status()
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
|
||||
except Exception as e:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
|
||||
time.sleep(2)
|
||||
except KeyboardInterrupt:
|
||||
print("Stopping...")
|
||||
finally:
|
||||
gui_process.terminate()
|
||||
print("Launching GUI for manual inspection + automated hooks...")
|
||||
# Use a log file for GUI output
|
||||
with open("gui_diag.log", "w") as log_file:
|
||||
gui_process = subprocess.Popen(
|
||||
[sys.executable, "gui_2.py", "--enable-test-hooks"],
|
||||
cwd=PROJECT_ROOT,
|
||||
stdout=log_file,
|
||||
stderr=log_file,
|
||||
text=True
|
||||
)
|
||||
client = ApiHookClient()
|
||||
print("Waiting for GUI...")
|
||||
if not client.wait_for_server(timeout=10):
|
||||
print("GUI failed to start.")
|
||||
gui_process.terminate()
|
||||
return
|
||||
# Pushing state
|
||||
track_data = {"id": "diag_track", "title": "Diagnostic Track"}
|
||||
tickets_data = [{"id": f"T{i}", "status": "todo"} for i in range(3)]
|
||||
print("Pushing state update...")
|
||||
client.push_event("mma_state_update", {
|
||||
"status": "active",
|
||||
"active_tier": "Tier 1",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
})
|
||||
time.sleep(2)
|
||||
print("Pushing approval request...")
|
||||
client.push_event("mma_step_approval", {
|
||||
"ticket_id": "T0",
|
||||
"payload": "Get-ChildItem"
|
||||
})
|
||||
print("\nGUI is running. Check 'gui_diag.log' for output.")
|
||||
print("I will now poll mma_status every 2 seconds. Ctrl+C to stop.")
|
||||
try:
|
||||
start_poll = time.time()
|
||||
while time.time() - start_poll < 30:
|
||||
try:
|
||||
status = client.get_mma_status()
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Status: {status.get('mma_status')}, Pending Approval: {status.get('pending_approval')}")
|
||||
except Exception as e:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] Error querying status: {e}")
|
||||
time.sleep(2)
|
||||
except KeyboardInterrupt:
|
||||
print("Stopping...")
|
||||
finally:
|
||||
gui_process.terminate()
|
||||
|
||||
if __name__ == "__main__":
|
||||
diag_run()
|
||||
diag_run()
|
||||
|
||||
@@ -12,130 +12,116 @@ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
# Ensure project root is in sys.path to import modules like api_hook_client
|
||||
if PROJECT_ROOT not in sys.path:
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
print(f"Added '{PROJECT_ROOT}' to sys.path for imports.")
|
||||
|
||||
try:
|
||||
from api_hook_client import ApiHookClient
|
||||
from api_hook_client import ApiHookClient
|
||||
except ImportError as e:
|
||||
print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
|
||||
print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
|
||||
print(f"Import error: {e}")
|
||||
sys.exit(1)
|
||||
print(f"Error: Could not import ApiHookClient from '{API_HOOK_CLIENT_MODULE}'.")
|
||||
print(f"Please ensure '{API_HOOK_CLIENT_MODULE}.py' is accessible and '{PROJECT_ROOT}' is correctly added to sys.path.")
|
||||
print(f"Import error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def run_visual_mma_verification():
|
||||
print("Starting visual MMA verification test...")
|
||||
|
||||
# Change current directory to project root
|
||||
original_dir = os.getcwd()
|
||||
if original_dir != PROJECT_ROOT:
|
||||
try:
|
||||
os.chdir(PROJECT_ROOT)
|
||||
print(f"Changed current directory to: {PROJECT_ROOT}")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
|
||||
return
|
||||
|
||||
# 1. Launch gui_2.py with --enable-test-hooks
|
||||
gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
|
||||
print(f"Launching GUI with command: {' '.join(gui_command)}")
|
||||
|
||||
try:
|
||||
gui_process = subprocess.Popen(
|
||||
gui_command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
cwd=PROJECT_ROOT
|
||||
)
|
||||
print(f"GUI process started with PID: {gui_process.pid}")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error starting GUI process: {e}")
|
||||
return
|
||||
|
||||
# Wait for GUI to start
|
||||
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
|
||||
time.sleep(5)
|
||||
|
||||
if gui_process.poll() is not None:
|
||||
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
|
||||
return
|
||||
|
||||
# 2. Use ApiHookClient
|
||||
try:
|
||||
client = ApiHookClient()
|
||||
print("ApiHookClient initialized successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to initialize ApiHookClient. Error: {e}")
|
||||
if gui_process:
|
||||
gui_process.terminate()
|
||||
return
|
||||
|
||||
# 3. Setup MMA data
|
||||
track_data = {
|
||||
"id": "visual_test_track",
|
||||
"title": "Visual Verification Track",
|
||||
"description": "A track to verify MMA UI components"
|
||||
}
|
||||
tickets_data = [
|
||||
{"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
|
||||
{"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
|
||||
{"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
|
||||
{"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
|
||||
{"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
|
||||
]
|
||||
|
||||
print("\nPushing MMA state update...")
|
||||
try:
|
||||
payload = {
|
||||
"status": "running",
|
||||
"active_tier": "Tier 3",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
}
|
||||
client.push_event("mma_state_update", payload)
|
||||
print(" - MMA state update pushed.")
|
||||
except Exception as e:
|
||||
print(f" - Warning: Failed to push mma_state_update: {e}")
|
||||
|
||||
time.sleep(3)
|
||||
|
||||
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
|
||||
try:
|
||||
approval_payload = {
|
||||
"ticket_id": "TICKET-002",
|
||||
"payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
|
||||
}
|
||||
client.push_event("mma_step_approval", approval_payload)
|
||||
print("mma_step_approval event pushed successfully.")
|
||||
except Exception as e:
|
||||
print(f"Error pushing mma_step_approval event: {e}")
|
||||
|
||||
# 5. Provide clear print statements for manual verification
|
||||
print("\n--- Manual Verification Instructions ---")
|
||||
print("Please visually inspect the running GUI application:")
|
||||
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
|
||||
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
|
||||
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
|
||||
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
|
||||
print("\n--------------------------------------")
|
||||
print("The test script has finished its automated actions.")
|
||||
print("The GUI application is still running. Press Enter to exit.")
|
||||
|
||||
try:
|
||||
input()
|
||||
except EOFError:
|
||||
pass
|
||||
|
||||
print("\nStopping GUI process...")
|
||||
if gui_process:
|
||||
gui_process.terminate()
|
||||
gui_process.wait(timeout=5)
|
||||
|
||||
print("Visual MMA verification test script finished.")
|
||||
print("Starting visual MMA verification test...")
|
||||
# Change current directory to project root
|
||||
original_dir = os.getcwd()
|
||||
if original_dir != PROJECT_ROOT:
|
||||
try:
|
||||
os.chdir(PROJECT_ROOT)
|
||||
print(f"Changed current directory to: {PROJECT_ROOT}")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Project root directory '{PROJECT_ROOT}' not found.")
|
||||
return
|
||||
# 1. Launch gui_2.py with --enable-test-hooks
|
||||
gui_command = [sys.executable, GUI_SCRIPT, TEST_HOOKS_FLAG]
|
||||
print(f"Launching GUI with command: {' '.join(gui_command)}")
|
||||
try:
|
||||
gui_process = subprocess.Popen(
|
||||
gui_command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
cwd=PROJECT_ROOT
|
||||
)
|
||||
print(f"GUI process started with PID: {gui_process.pid}")
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Could not find Python executable '{sys.executable}' or script '{GUI_SCRIPT}'.")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Error starting GUI process: {e}")
|
||||
return
|
||||
# Wait for GUI to start
|
||||
print("Waiting for GUI to initialize and hook server to start (5 seconds)...")
|
||||
time.sleep(5)
|
||||
if gui_process.poll() is not None:
|
||||
print(f"Error: GUI process exited prematurely with return code {gui_process.returncode}.")
|
||||
return
|
||||
# 2. Use ApiHookClient
|
||||
try:
|
||||
client = ApiHookClient()
|
||||
print("ApiHookClient initialized successfully.")
|
||||
except Exception as e:
|
||||
print(f"Failed to initialize ApiHookClient. Error: {e}")
|
||||
if gui_process:
|
||||
gui_process.terminate()
|
||||
return
|
||||
# 3. Setup MMA data
|
||||
track_data = {
|
||||
"id": "visual_test_track",
|
||||
"title": "Visual Verification Track",
|
||||
"description": "A track to verify MMA UI components"
|
||||
}
|
||||
tickets_data = [
|
||||
{"id": "TICKET-001", "target_file": "core.py", "status": "todo"},
|
||||
{"id": "TICKET-002", "target_file": "utils.py", "status": "running"},
|
||||
{"id": "TICKET-003", "target_file": "tests.py", "status": "complete"},
|
||||
{"id": "TICKET-004", "target_file": "api.py", "status": "blocked"},
|
||||
{"id": "TICKET-005", "target_file": "gui.py", "status": "paused"},
|
||||
]
|
||||
print("\nPushing MMA state update...")
|
||||
try:
|
||||
payload = {
|
||||
"status": "running",
|
||||
"active_tier": "Tier 3",
|
||||
"track": track_data,
|
||||
"tickets": tickets_data
|
||||
}
|
||||
client.push_event("mma_state_update", payload)
|
||||
print(" - MMA state update pushed.")
|
||||
except Exception as e:
|
||||
print(f" - Warning: Failed to push mma_state_update: {e}")
|
||||
time.sleep(3)
|
||||
print("Pushing 'mma_step_approval' event to trigger HITL modal...")
|
||||
try:
|
||||
approval_payload = {
|
||||
"ticket_id": "TICKET-002",
|
||||
"payload": "powershell -Command \"Write-Host 'Hello from Tier 3'\""
|
||||
}
|
||||
client.push_event("mma_step_approval", approval_payload)
|
||||
print("mma_step_approval event pushed successfully.")
|
||||
except Exception as e:
|
||||
print(f"Error pushing mma_step_approval event: {e}")
|
||||
# 5. Provide clear print statements for manual verification
|
||||
print("\n--- Manual Verification Instructions ---")
|
||||
print("Please visually inspect the running GUI application:")
|
||||
print("1. MMA Dashboard: Ensure the 'MMA Dashboard' panel is visible and active.")
|
||||
print("2. Ticket Queue: Verify the 'Ticket Queue' section displays all 5 tickets with correct statuses.")
|
||||
print("3. Progress Bar: Check that the progress bar correctly reflects the completed/total tickets.")
|
||||
print("4. Approval Modal: Confirm that an 'MMA Step Approval' modal has appeared.")
|
||||
print("\n--------------------------------------")
|
||||
print("The test script has finished its automated actions.")
|
||||
print("The GUI application is still running. Press Enter to exit.")
|
||||
try:
|
||||
input()
|
||||
except EOFError:
|
||||
pass
|
||||
print("\nStopping GUI process...")
|
||||
if gui_process:
|
||||
gui_process.terminate()
|
||||
gui_process.wait(timeout=5)
|
||||
print("Visual MMA verification test script finished.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_visual_mma_verification()
|
||||
run_visual_mma_verification()
|
||||
|
||||
@@ -11,7 +11,7 @@ from api_hook_client import ApiHookClient
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_mma_epic_lifecycle(live_gui):
|
||||
"""
|
||||
"""
|
||||
Integration test for the full MMA Epic lifecycle.
|
||||
1. Start App.
|
||||
2. Trigger 'New Epic' request.
|
||||
@@ -20,76 +20,61 @@ def test_mma_epic_lifecycle(live_gui):
|
||||
5. Verify Tier 2 generates tickets.
|
||||
6. Verify execution loop starts.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=15), "API hook server failed to start."
|
||||
|
||||
print("[Test] Initializing MMA Epic lifecycle test...")
|
||||
|
||||
# 0. Setup: Ensure we have a project and are in a clean state
|
||||
client.click("btn_reset")
|
||||
time.sleep(1)
|
||||
|
||||
# 1. Set Epic input
|
||||
epic_text = "Improve the logging system to include timestamps in all tool calls."
|
||||
print(f"[Test] Setting Epic input: {epic_text}")
|
||||
client.set_value("mma_epic_input", epic_text)
|
||||
|
||||
# 2. Trigger 'New Epic' (Plan Epic)
|
||||
print("[Test] Clicking 'Plan Epic (Tier 1)'...")
|
||||
client.click("btn_mma_plan_epic")
|
||||
|
||||
# 3. Verify that Tier 1 generates tracks
|
||||
print("[Test] Polling for Tier 1 tracks...")
|
||||
tracks_generated = False
|
||||
for i in range(120):
|
||||
status = client.get_value("ai_status")
|
||||
# Check if the proposal modal is shown or status changed
|
||||
if status and "Epic tracks generated" in str(status):
|
||||
tracks_generated = True
|
||||
print(f"[Test] Tracks generated after {i}s")
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
|
||||
|
||||
# 4. Trigger 'Start Track' for the first track
|
||||
print("[Test] Triggering 'Start Track' for track index 0...")
|
||||
client.click("btn_mma_start_track", user_data={"index": 0})
|
||||
|
||||
# 5. Verify that Tier 2 generates tickets and starts execution
|
||||
print("[Test] Polling for Tier 2 ticket generation and execution start...")
|
||||
execution_started = False
|
||||
for i in range(60):
|
||||
mma_status = client.get_mma_status()
|
||||
status_str = mma_status.get("mma_status", "idle")
|
||||
active_tier = mma_status.get("active_tier", "")
|
||||
|
||||
if status_str == "running" or "Tier 3" in str(active_tier):
|
||||
execution_started = True
|
||||
print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
|
||||
break
|
||||
|
||||
current_ai_status = client.get_value("ai_status")
|
||||
if i % 5 == 0:
|
||||
print(f" ... still waiting. Current AI Status: {current_ai_status}")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
|
||||
|
||||
# 6. Final verification of MMA state
|
||||
final_mma = client.get_mma_status()
|
||||
print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
|
||||
print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
|
||||
print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
|
||||
|
||||
assert final_mma.get("mma_status") in ["running", "done", "blocked"]
|
||||
assert len(final_mma.get("active_tickets", [])) > 0
|
||||
|
||||
print("[Test] MMA Epic lifecycle verification successful!")
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=15), "API hook server failed to start."
|
||||
print("[Test] Initializing MMA Epic lifecycle test...")
|
||||
# 0. Setup: Ensure we have a project and are in a clean state
|
||||
client.click("btn_reset")
|
||||
time.sleep(1)
|
||||
# 1. Set Epic input
|
||||
epic_text = "Improve the logging system to include timestamps in all tool calls."
|
||||
print(f"[Test] Setting Epic input: {epic_text}")
|
||||
client.set_value("mma_epic_input", epic_text)
|
||||
# 2. Trigger 'New Epic' (Plan Epic)
|
||||
print("[Test] Clicking 'Plan Epic (Tier 1)'...")
|
||||
client.click("btn_mma_plan_epic")
|
||||
# 3. Verify that Tier 1 generates tracks
|
||||
print("[Test] Polling for Tier 1 tracks...")
|
||||
tracks_generated = False
|
||||
for i in range(120):
|
||||
status = client.get_value("ai_status")
|
||||
# Check if the proposal modal is shown or status changed
|
||||
if status and "Epic tracks generated" in str(status):
|
||||
tracks_generated = True
|
||||
print(f"[Test] Tracks generated after {i}s")
|
||||
break
|
||||
time.sleep(1)
|
||||
assert tracks_generated, "Tier 1 failed to generate tracks within 60 seconds."
|
||||
# 4. Trigger 'Start Track' for the first track
|
||||
print("[Test] Triggering 'Start Track' for track index 0...")
|
||||
client.click("btn_mma_start_track", user_data={"index": 0})
|
||||
# 5. Verify that Tier 2 generates tickets and starts execution
|
||||
print("[Test] Polling for Tier 2 ticket generation and execution start...")
|
||||
execution_started = False
|
||||
for i in range(60):
|
||||
mma_status = client.get_mma_status()
|
||||
status_str = mma_status.get("mma_status", "idle")
|
||||
active_tier = mma_status.get("active_tier", "")
|
||||
if status_str == "running" or "Tier 3" in str(active_tier):
|
||||
execution_started = True
|
||||
print(f"[Test] Execution started (Status: {status_str}, Tier: {active_tier}) after {i}s")
|
||||
break
|
||||
current_ai_status = client.get_value("ai_status")
|
||||
if i % 5 == 0:
|
||||
print(f" ... still waiting. Current AI Status: {current_ai_status}")
|
||||
time.sleep(1)
|
||||
assert execution_started, "Tier 2 failed to generate tickets or execution failed to start within 60 seconds."
|
||||
# 6. Final verification of MMA state
|
||||
final_mma = client.get_mma_status()
|
||||
print(f"[Test] Final MMA Status: {final_mma.get('mma_status')}")
|
||||
print(f"[Test] Active Tier: {final_mma.get('active_tier')}")
|
||||
print(f"[Test] Ticket Count: {len(final_mma.get('active_tickets', []))}")
|
||||
assert final_mma.get("mma_status") in ["running", "done", "blocked"]
|
||||
assert len(final_mma.get("active_tickets", [])) > 0
|
||||
print("[Test] MMA Epic lifecycle verification successful!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# If run directly, try to use pytest
|
||||
import subprocess
|
||||
# Using sys.executable to ensure we use the same environment
|
||||
subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
|
||||
# If run directly, try to use pytest
|
||||
import subprocess
|
||||
# Using sys.executable to ensure we use the same environment
|
||||
subprocess.run([sys.executable, "-m", "pytest", "-v", __file__])
|
||||
|
||||
@@ -10,35 +10,30 @@ from api_hook_client import ApiHookClient
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_mma_epic_simulation(live_gui):
|
||||
"""
|
||||
"""
|
||||
Integration test for MMA epic simulation.
|
||||
Red Phase: asserts False.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
# Try selecting MMA Dashboard tab if applicable (using typical naming convention)
|
||||
try:
|
||||
client.select_tab('main_tab_bar', 'tab_mma')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set model to mock to avoid real API calls and timeouts
|
||||
try:
|
||||
client.set_value('current_model', 'mock')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
client.set_value('mma_epic_input', 'Build a simple calculator')
|
||||
client.click('btn_mma_plan_epic')
|
||||
|
||||
# Poll client.get_mma_status() every 1 second (up to 30 seconds)
|
||||
success = False
|
||||
for i in range(30):
|
||||
status = client.get_mma_status()
|
||||
if status and status.get('tracks') and len(status['tracks']) > 0:
|
||||
success = True
|
||||
break
|
||||
time.sleep(1)
|
||||
|
||||
assert success, "Failed to generate at least one track."
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
# Try selecting MMA Dashboard tab if applicable (using typical naming convention)
|
||||
try:
|
||||
client.select_tab('main_tab_bar', 'tab_mma')
|
||||
except Exception:
|
||||
pass
|
||||
# Set model to mock to avoid real API calls and timeouts
|
||||
try:
|
||||
client.set_value('current_model', 'mock')
|
||||
except Exception:
|
||||
pass
|
||||
client.set_value('mma_epic_input', 'Build a simple calculator')
|
||||
client.click('btn_mma_plan_epic')
|
||||
# Poll client.get_mma_status() every 1 second (up to 30 seconds)
|
||||
success = False
|
||||
for i in range(30):
|
||||
status = client.get_mma_status()
|
||||
if status and status.get('tracks') and len(status['tracks']) > 0:
|
||||
success = True
|
||||
break
|
||||
time.sleep(1)
|
||||
assert success, "Failed to generate at least one track."
|
||||
|
||||
Reference in New Issue
Block a user