checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -15,6 +15,7 @@ import tomllib
import re
import glob
from pathlib import Path, PureWindowsPath
from typing import Any
import summarize
import project_manager
from file_cache import ASTParser
@@ -39,7 +40,6 @@ def is_absolute_with_drive(entry: str) -> bool:
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry
matches = []
if is_wildcard:
root = Path(entry) if has_drive else base_dir / entry
@@ -47,7 +47,6 @@ def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
else:
p = Path(entry) if has_drive else (base_dir / entry).resolve()
matches = [p]
# Blacklist filter
filtered = []
for p in matches:
@@ -55,7 +54,6 @@ def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
if name == "history.toml" or name.endswith("_history.toml"):
continue
filtered.append(p)
return sorted(filtered)
def build_discussion_section(history: list[str]) -> str:
@@ -64,14 +62,13 @@ def build_discussion_section(history: list[str]) -> str:
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict]) -> str:
def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
sections = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
else:
entry = entry_raw
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
@@ -104,8 +101,7 @@ def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections)
def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
"""
Return a list of dicts describing each file, for use by ai_client when it
wants to upload individual files rather than inline everything as markdown.
@@ -126,7 +122,6 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
else:
entry = entry_raw
tier = None
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
@@ -147,7 +142,7 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
"""
Build a compact summary section using summarize.py — one short block per file.
Used as the initial <context> block instead of full file contents.
@@ -155,7 +150,7 @@ def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items)
def _build_files_section_from_items(file_items: list[dict]) -> str:
def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = []
for item in file_items:
@@ -171,8 +166,7 @@ def _build_files_section_from_items(file_items: list[dict]) -> str:
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
@@ -188,35 +182,29 @@ def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path,
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_no_history(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_discussion_text(history: list[str]) -> str:
"""Build just the discussion history section text. Returns empty string if no history."""
if not history:
return ""
return "## Discussion History\n\n" + build_discussion_section(history)
def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 1 Context: Strategic/Orchestration.
Full content for core conductor files and files with tier=1, summaries for others.
"""
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
# Files section
if file_items:
sections = []
for item in file_items:
path = item.get("path")
name = path.name if path else ""
if name in core_files or item.get("tier") == 1:
# Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
@@ -225,47 +213,38 @@ def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, scree
# Summarize
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
summarize.summarise_file(path, item.get("content", "")))
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_tier2_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 2 Context: Architectural/Tech Lead.
Full content for all files (standard behavior).
"""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
"""
Tier 3 Context: Execution/Worker.
Full content for focus_files and files with tier=3, summaries/skeletons for others.
"""
parts = []
if file_items:
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "")
path_str = str(path) if path else ""
# Check if this file is in focus_files (by name or path)
is_focus = False
for focus in focus_files:
if focus == entry or (path and focus == path.name) or focus in path_str:
is_focus = True
break
if is_focus or item.get("tier") == 3:
sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
@@ -281,19 +260,14 @@ def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, scree
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
else:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if files:
@@ -308,7 +282,7 @@ def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict) -> tuple[str, Path, list[dict]]:
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
@@ -318,7 +292,6 @@ def run(config: dict) -> tuple[str, Path, list[dict]]:
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
@@ -330,26 +303,22 @@ def run(config: dict) -> tuple[str, Path, list[dict]]:
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def main():
# Load global config to find active project
def main() -> None:
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")

File diff suppressed because it is too large Load Diff

View File

@@ -24,11 +24,9 @@ class ApiHookClient:
def _make_request(self, method, endpoint, data=None, timeout=None):
url = f"{self.base_url}{endpoint}"
headers = {'Content-Type': 'application/json'}
last_exception = None
# Increase default request timeout for local server
req_timeout = timeout if timeout is not None else 2.0
for attempt in range(self.max_retries + 1):
try:
if method == 'GET':
@@ -37,7 +35,6 @@ class ApiHookClient:
response = requests.post(url, json=data, headers=headers, timeout=req_timeout)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
@@ -54,7 +51,6 @@ class ApiHookClient:
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
except json.JSONDecodeError as e:
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
if last_exception:
raise last_exception
@@ -133,7 +129,6 @@ class ApiHookClient:
return v
except Exception:
pass
try:
# Try GET fallback
res = self._make_request('GET', f'/api/gui/value/{item}')
@@ -143,7 +138,6 @@ class ApiHookClient:
return v
except Exception:
pass
try:
# Fallback for thinking/live/prior which are in diagnostics
diag = self._make_request('GET', '/api/gui/diagnostics')

View File

@@ -7,12 +7,14 @@ import session_logger
class HookServerInstance(ThreadingHTTPServer):
"""Custom HTTPServer that carries a reference to the main App instance."""
def __init__(self, server_address, RequestHandlerClass, app):
super().__init__(server_address, RequestHandlerClass)
self.app = app
class HookHandler(BaseHTTPRequestHandler):
"""Handles incoming HTTP requests for the API hooks."""
def do_GET(self):
app = self.server.app
session_logger.log_api_hook("GET", self.path, "")
@@ -61,7 +63,6 @@ class HookHandler(BaseHTTPRequestHandler):
data = json.loads(body.decode('utf-8'))
field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
event = threading.Event()
result = {"value": None}
@@ -76,13 +77,11 @@ class HookHandler(BaseHTTPRequestHandler):
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
@@ -104,13 +103,11 @@ class HookHandler(BaseHTTPRequestHandler):
result["value"] = getattr(app, attr, None)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
@@ -133,13 +130,11 @@ class HookHandler(BaseHTTPRequestHandler):
result["pending_approval"] = app._pending_mma_approval is not None
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_mma
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
@@ -162,13 +157,11 @@ class HookHandler(BaseHTTPRequestHandler):
result["prior"] = getattr(app, "is_viewing_prior_session", False)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": check_all
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
@@ -188,7 +181,6 @@ class HookHandler(BaseHTTPRequestHandler):
body = self.rfile.read(content_length)
body_str = body.decode('utf-8') if body else ""
session_logger.log_api_hook("POST", self.path, body_str)
try:
data = json.loads(body_str) if body_str else {}
if self.path == '/api/project':
@@ -209,7 +201,6 @@ class HookHandler(BaseHTTPRequestHandler):
elif self.path == '/api/gui':
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
@@ -218,14 +209,11 @@ class HookHandler(BaseHTTPRequestHandler):
elif self.path == '/api/ask':
request_id = str(uuid.uuid4())
event = threading.Event()
if not hasattr(app, '_pending_asks'):
app._pending_asks = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock:
app._api_event_queue.append({
@@ -233,20 +221,17 @@ class HookHandler(BaseHTTPRequestHandler):
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id)
# Clean up response after reading
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
@@ -257,26 +242,21 @@ class HookHandler(BaseHTTPRequestHandler):
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
elif self.path == '/api/ask/respond':
request_id = data.get('request_id')
response_data = data.get('response')
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id]
event.set()
# Clean up pending ask entry
del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
@@ -306,29 +286,24 @@ class HookServer:
def start(self):
if self.thread and self.thread.is_alive():
return
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
return
# Ensure the app has the task queue and lock initialized
if not hasattr(self.app, '_pending_gui_tasks'):
self.app._pending_gui_tasks = []
if not hasattr(self.app, '_pending_gui_tasks_lock'):
self.app._pending_gui_tasks_lock = threading.Lock()
# Initialize ask-related dictionaries
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start()

View File

@@ -17,9 +17,8 @@ def run_diag(role, prompt):
return str(e)
if __name__ == "__main__":
# Test 1: Simple read
# Test 1: Simple read
print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")

View File

@@ -22,7 +22,6 @@ def test_subagent_script_qa_live():
"""Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt)
assert result.returncode == 0
# Expected output should mention the fix for division by zero
assert "zero" in result.stdout.lower()
@@ -33,7 +32,6 @@ def test_subagent_script_worker_live():
"""Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert "def" in result.stdout.lower()
assert "hello" in result.stdout.lower()
@@ -42,7 +40,6 @@ def test_subagent_script_utility_live():
"""Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert "true" in result.stdout.lower()
@@ -51,7 +48,6 @@ def test_subagent_isolation_live():
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
# A stateless agent should not know any previous context.
assert "unknown" in result.stdout.lower()

View File

@@ -8,7 +8,6 @@ def test_parser_role_choices():
parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
args = parser.parse_args(['--role', role, test_prompt])
assert args.role == role
@@ -57,37 +56,28 @@ def test_execute_agent():
role = "tier3-worker"
prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = mock_stdout
mock_process.returncode = 0
mock_run.return_value = mock_process
result = execute_agent(role, prompt, docs)
mock_run.assert_called_once()
args, kwargs = mock_run.call_args
cmd_list = args[0]
assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
assert "gemini" in ps_cmd
assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
def test_get_dependencies(tmp_path):
@@ -102,8 +92,8 @@ def test_get_dependencies(tmp_path):
dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
import re
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log"
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
@@ -130,7 +120,6 @@ def test_execute_agent_tier3_injection(tmp_path):
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd()
os.chdir(tmp_path)

View File

@@ -15,24 +15,19 @@ def log_message(msg):
timestamp = "2026-02-25"
print(f"[{timestamp}] {msg}")
'''
skeleton = generate_skeleton(sample_code)
# Check that signatures are preserved
assert "class Calculator:" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "def log_message(msg):" in skeleton
# Check that docstrings are preserved
assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that implementation details are removed
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton

View File

@@ -9,5 +9,5 @@ This file tracks all major tracks for the project. Each track has its own detail
---
- [ ] **Track: AI-Optimized Python Style Refactor**
- [~] **Track: AI-Optimized Python Style Refactor**
*Link: [./tracks/python_style_refactor_20260227/](./tracks/python_style_refactor_20260227/)*

View File

@@ -6,14 +6,18 @@
- [x] Task: Conductor - User Manual Verification 'Phase 1: Pilot and Tooling' (Protocol in workflow.md) [checkpoint: Phase1]
## Phase 2: Core Refactor - Indentation and Newlines
- [~] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`).
- [ ] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`).
- [ ] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`).
- [ ] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`).
- [ ] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md)
- [x] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`). [db65162]
- [x] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`). [db65162]
- [x] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`). [db65162]
- [x] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`). [db65162]
- [x] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md) [checkpoint: Phase2]
## Phase 3: AI-Optimized Metadata and Final Cleanup
- [ ] Task: Conductor - Implement Strict Type Hinting and Compact Imports across the Entire Codebase.
- [~] Task: Conductor - Implement Strict Type Hinting across the Entire Codebase.
- [x] Engine Core (`ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py`)
- [x] Develop/Integrate Surgical AST Tools in `mcp_client.py` and `tools.json`.
- [x] Management Modules (project_manager.py, session_logger.py) [19c28a1]
- [~] UI Modules (`gui_2.py`, `gui_legacy.py`)
- [ ] Task: Conductor - Update `conductor/code_styleguides/python.md` with the new AI-optimized standard.
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Metadata and Final Documentation' (Protocol in workflow.md)

View File

@@ -8,7 +8,6 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **Newlines (Ultra-Compact):**
- Maximum **one (1)** blank line between top-level definitions (classes, functions).
- **Zero (0)** blank lines inside function or method bodies.
- **Imports (Compact):** Consolidate imports into compact blocks to reduce vertical space.
- **Typing (Strict):** Ensure all function and method signatures include strict type hints for `Args` and `Returns`.
- **Scope:**
- Target: All `.py` files in the project root and subdirectories.
@@ -19,14 +18,22 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **AST Compatibility:** The style must not interfere with existing AST tools (`ast`, `tree-sitter`) used for interface extraction and code outlines.
- **Token Efficiency:** The primary goal is to reduce the total token count of the codebase.
## 4. Acceptance Criteria
- [ ] Codebase indentation is uniformly 1 space.
- [ ] No `.py` file contains consecutive blank lines.
- [ ] No `.py` file contains blank lines within function or method bodies.
- [ ] All functions/methods have complete type hints.
- [ ] Application remains functional and passes existing tests.
## 4. Current Status (Progress Checkpoint)
- **Phase 1: Completed.** Tooling developed (`scripts/ai_style_formatter.py`) and verified.
- **Phase 2: Completed.** Global codebase refactor for indentation and ultra-compact newlines (including 1-line gap before definitions) applied to all Python files.
- **Phase 3: In Progress.**
- **Surgical Tooling:** New tools added to `mcp_client.py` and `.gemini/tools.json`: `get_file_slice`, `set_file_slice`, `py_update_definition`, `py_get_signature`, `py_set_signature`, `py_get_class_summary`, `py_get_var_declaration`, `py_set_var_declaration`.
- **Core Typing:** `ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py` fully updated with strict type hints.
- **Remaining:** `project_manager.py`, `session_logger.py`, `gui_2.py`, `gui_legacy.py` need strict typing.
## 5. Out of Scope
## 5. Acceptance Criteria
- [x] Codebase indentation is uniformly 1 space.
- [x] No `.py` file contains consecutive blank lines.
- [x] No `.py` file contains blank lines within function or method bodies.
- [~] All functions/methods have complete type hints (Core Engine complete, UI/Manager pending).
- [x] Application remains functional and passes existing tests.
## 6. Out of Scope
- Architectural changes or logic refactoring.
- Modification of non-Python files (e.g., `.md`, `.toml`, `.ps1`).
- Breaking PEP 8 compliance where it's not strictly necessary for token reduction (though indentation and blank lines are explicitly targeted).
- Import compaction (discarded per user request).

View File

@@ -5,7 +5,7 @@
- [x] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`). 2a30e62
## Phase 2: Epic & Track Verification
- [ ] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
- [~] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
- [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer.
## Phase 3: DAG & Spawn Interception Verification

View File

@@ -11,27 +11,22 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
# 1. Set Tier 2 Model (Tech Lead - Flash)
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
ai_client.reset_session()
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
user_message = (
f"### TRACK BRIEF:\n{track_brief}\n\n"
f"### MODULE SKELETONS:\n{module_skeletons}\n\n"
"Please generate the implementation tickets for this track."
)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 2 Model
response = ai_client.send(
md_content="",
user_message=user_message
)
# 4. Parse JSON Output
# Extract JSON array from markdown code blocks if present
json_match = response.strip()
@@ -39,13 +34,11 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
# If it's still not valid JSON, try to find a [ ... ] block
if not (json_match.startswith('[') and json_match.endswith(']')):
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
if match:
json_match = match.group(0)
tickets = json.loads(json_match)
return tickets
except Exception as e:
@@ -68,20 +61,18 @@ def topological_sort(tickets: list[dict]) -> list[dict]:
ticket_objs = []
for t_data in tickets:
ticket_objs.append(Ticket.from_dict(t_data))
# 2. Use TrackDAG for validation and sorting
dag = TrackDAG(ticket_objs)
try:
sorted_ids = dag.topological_sort()
except ValueError as e:
raise ValueError(f"DAG Validation Error: {e}")
# 3. Return sorted dictionaries
ticket_map = {t['id']: t for t in tickets}
return [ticket_map[tid] for tid in sorted_ids]
if __name__ == "__main__":
# Quick test if run directly
# Quick test if run directly
test_brief = "Implement a new feature."
test_skeletons = "class NewFeature: pass"
tickets = generate_tickets(test_brief, test_skeletons)

View File

@@ -22,7 +22,7 @@ paths = [
"C:\\projects\\manual_slop\\tests\\temp_livetoolssim.toml",
"C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml",
]
active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
active = "C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml"
[gui.show_windows]
"Context Hub" = true

View File

@@ -6,6 +6,7 @@ class TrackDAG:
Manages a Directed Acyclic Graph of implementation tickets.
Provides methods for dependency resolution, cycle detection, and topological sorting.
"""
def __init__(self, tickets: List[Ticket]):
"""
Initializes the TrackDAG with a list of Ticket objects.
@@ -50,19 +51,15 @@ class TrackDAG:
return True
if ticket_id in visited:
return False
visited.add(ticket_id)
rec_stack.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for neighbor in ticket.depends_on:
if is_cyclic(neighbor):
return True
rec_stack.remove(ticket_id)
return False
for ticket in self.tickets:
if ticket.id not in visited:
if is_cyclic(ticket.id):
@@ -79,7 +76,6 @@ class TrackDAG:
"""
if self.has_cycle():
raise ValueError("Dependency cycle detected")
visited = set()
stack = []
@@ -93,10 +89,8 @@ class TrackDAG:
for dep_id in ticket.depends_on:
visit(dep_id)
stack.append(ticket_id)
for ticket in self.tickets:
visit(ticket.id)
return stack
class ExecutionEngine:
@@ -104,6 +98,7 @@ class ExecutionEngine:
A state machine that governs the progression of tasks within a TrackDAG.
Handles automatic queueing and manual task approval.
"""
def __init__(self, dag: TrackDAG, auto_queue: bool = False):
"""
Initializes the ExecutionEngine.
@@ -122,12 +117,10 @@ class ExecutionEngine:
A list of ready Ticket objects.
"""
ready = self.dag.get_ready_tasks()
if self.auto_queue:
for ticket in ready:
if not ticket.step_mode:
ticket.status = "in_progress"
return ready
def approve_task(self, task_id: str):
@@ -145,7 +138,6 @@ class ExecutionEngine:
if not dep or dep.status != "completed":
all_done = False
break
if all_done:
ticket.status = "in_progress"

View File

@@ -49,7 +49,6 @@ class ASTParser:
if body and body.type == "block":
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}")
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
@@ -57,7 +56,6 @@ class ASTParser:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
@@ -67,10 +65,8 @@ class ASTParser:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")

View File

@@ -8,6 +8,7 @@ class EventEmitter:
"""
Simple event emitter for decoupled communication between modules.
"""
def __init__(self):
"""Initializes the EventEmitter with an empty listener map."""
self._listeners: Dict[str, List[Callable]] = {}
@@ -41,6 +42,7 @@ class AsyncEventQueue:
"""
Asynchronous event queue for decoupled communication using asyncio.Queue.
"""
def __init__(self):
"""Initializes the AsyncEventQueue with an internal asyncio.Queue."""
self._queue: asyncio.Queue = asyncio.Queue()
@@ -68,6 +70,7 @@ class UserRequestEvent:
"""
Payload for a user request event.
"""
def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
self.prompt = prompt
self.stable_md = stable_md

View File

@@ -10,12 +10,12 @@ from typing import Optional
import tree_sitter
import tree_sitter_python
class ASTParser:
"""
Parser for extracting AST-based views of source code.
Currently supports Python.
"""
def __init__(self, language: str):
if language != "python":
raise ValueError(f"Language '{language}' not supported yet.")
@@ -51,7 +51,6 @@ class ASTParser:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
@@ -61,18 +60,14 @@ class ASTParser:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def get_curated_view(self, code: str) -> str:
@@ -120,7 +115,6 @@ class ASTParser:
if body and body.type == "block":
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
@@ -128,7 +122,6 @@ class ASTParser:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
@@ -138,36 +131,27 @@ class ASTParser:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def reset_client():
pass
def content_block_type(path: Path) -> str:
return "unsupported"
def get_file_id(path: Path) -> Optional[str]:
return None
def evict(path: Path):
pass
def list_cached() -> list[dict]:
return []

View File

@@ -31,38 +31,27 @@ class GeminiCliAdapter:
Uses --prompt flag with a placeholder and sends the content via stdin.
"""
start_time = time.time()
command_parts = [self.binary_path]
if model:
command_parts.extend(['-m', f'"{model}"'])
# Use an empty string placeholder.
command_parts.extend(['--prompt', '""'])
if self.session_id:
command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts)
# Construct the prompt text by prepending system_instruction if available
prompt_text = message
if system_instruction:
prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = ""
tool_calls = []
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
process = None
stdout_content = ""
stderr_content = ""
stdin_content = prompt_text
try:
process = subprocess.Popen(
command,
@@ -73,12 +62,9 @@ class GeminiCliAdapter:
shell=True,
env=env
)
stdout_output, stderr_output = process.communicate(input=prompt_text)
stdout_content = stdout_output
stderr_content = stderr_output
for line in stdout_content.splitlines():
line = line.strip()
if not line:
@@ -86,7 +72,6 @@ class GeminiCliAdapter:
try:
data = json.loads(line)
msg_type = data.get("type")
if msg_type == "init":
if "session_id" in data:
self.session_id = data.get("session_id")
@@ -115,7 +100,6 @@ class GeminiCliAdapter:
tool_calls.append(tc)
except json.JSONDecodeError:
continue
except Exception as e:
if process:
process.kill()
@@ -132,7 +116,6 @@ class GeminiCliAdapter:
latency=current_latency
)
self.last_latency = current_latency
return {
"text": accumulated_text,
"tool_calls": tool_calls,

497
gui_2.py

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -9,6 +9,7 @@ class LogPruner:
Ensures that only whitelisted or significant sessions (based on size/content)
are preserved long-term.
"""
def __init__(self, log_registry: LogRegistry, logs_dir: str):
"""
Initializes the LogPruner.
@@ -31,22 +32,17 @@ class LogPruner:
"""
now = datetime.now()
cutoff_time = now - timedelta(hours=24)
# Ensure the base logs directory exists.
if not os.path.isdir(self.logs_dir):
return
# Get sessions that are old and not whitelisted from the registry
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time)
# Prune sessions if their size is less than 2048 bytes
for session_info in old_sessions_to_check:
session_id = session_info['session_id']
session_path = session_info['path']
if not session_path or not os.path.isdir(session_path):
continue
# Calculate total size of files in the directory
total_size = 0
try:
@@ -55,7 +51,6 @@ class LogPruner:
total_size += entry.stat().st_size
except OSError:
continue
# Prune if the total size is less than 2KB (2048 bytes)
if total_size < 2048: # 2KB
try:

View File

@@ -8,6 +8,7 @@ class LogRegistry:
Manages a persistent registry of session logs using a TOML file.
Tracks session paths, start times, whitelisting status, and metadata.
"""
def __init__(self, registry_path):
"""
Initializes the LogRegistry with a path to the registry file.
@@ -75,7 +76,6 @@ class LogRegistry:
else:
session_data_copy[k] = v
data_to_save[session_id] = session_data_copy
with open(self.registry_path, 'wb') as f:
tomli_w.dump(data_to_save, f)
except Exception as e:
@@ -92,13 +92,11 @@ class LogRegistry:
"""
if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
# Store start_time internally as a string to satisfy tests
if isinstance(start_time, datetime):
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
@@ -122,11 +120,9 @@ class LogRegistry:
if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.")
return
# Ensure metadata exists
if self.data[session_id].get('metadata') is None:
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
@@ -134,11 +130,9 @@ class LogRegistry:
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
def is_session_whitelisted(self, session_id):
@@ -154,7 +148,6 @@ class LogRegistry:
session_data = self.data.get(session_id)
if session_data is None:
return False # Non-existent sessions are not whitelisted
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
return session_data.get('whitelisted', False)
@@ -169,23 +162,19 @@ class LogRegistry:
"""
if session_id not in self.data:
return
session_data = self.data[session_id]
session_path = session_data.get('path')
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
@@ -199,11 +188,9 @@ class LogRegistry:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
@@ -213,7 +200,6 @@ class LogRegistry:
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
@@ -245,9 +231,7 @@ class LogRegistry:
start_time = None
else:
start_time = start_time_raw
is_whitelisted = session_data.get('whitelisted', False)
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
old_sessions.append({
'session_id': session_id,

File diff suppressed because it is too large Load Diff

View File

@@ -68,12 +68,10 @@ class Track:
"""
# Map ticket IDs to their current status for efficient lookup
status_map = {t.id: t.status for t in self.tickets}
executable = []
for ticket in self.tickets:
if ticket.status != "todo":
continue
# Check if all dependencies are completed
all_deps_completed = True
for dep_id in ticket.depends_on:
@@ -81,10 +79,8 @@ class Track:
if status_map.get(dep_id) != "completed":
all_deps_completed = False
break
if all_deps_completed:
executable.append(ticket)
return executable
@dataclass

View File

@@ -16,6 +16,7 @@ class ConductorEngine:
"""
Orchestrates the execution of tickets within a track.
"""
def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
self.track = track
self.event_queue = event_queue
@@ -31,7 +32,6 @@ class ConductorEngine:
async def _push_state(self, status: str = "running", active_tier: str = None):
if not self.event_queue:
return
payload = {
"status": status,
"active_tier": active_tier,
@@ -54,7 +54,6 @@ class ConductorEngine:
if not isinstance(data, list):
print("Error: JSON input must be a list of ticket definitions.")
return
for ticket_data in data:
# Construct Ticket object, using defaults for optional fields
ticket = Ticket(
@@ -66,11 +65,9 @@ class ConductorEngine:
step_mode=ticket_data.get("step_mode", False)
)
self.track.tickets.append(ticket)
# Rebuild DAG and Engine after parsing new tickets
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
except json.JSONDecodeError as e:
print(f"Error parsing JSON tickets: {e}")
except KeyError as e:
@@ -83,11 +80,9 @@ class ConductorEngine:
md_content: The full markdown context (history + files) for AI workers.
"""
await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)")
while True:
# 1. Identify ready tasks
ready_tasks = self.engine.tick()
# 2. Check for completion or blockage
if not ready_tasks:
all_done = all(t.status == "completed" for t in self.track.tickets)
@@ -100,11 +95,9 @@ class ConductorEngine:
# Wait for async tasks to complete
await asyncio.sleep(1)
continue
print("No more executable tickets. Track is blocked or finished.")
await self._push_state(status="blocked", active_tier=None)
break
# 3. Process ready tasks
loop = asyncio.get_event_loop()
for ticket in ready_tasks:
@@ -114,13 +107,11 @@ class ConductorEngine:
ticket.status = "in_progress"
print(f"Executing ticket {ticket.id}: {ticket.description}")
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
context = WorkerContext(
ticket_id=ticket.id,
model_name="gemini-2.5-flash-lite",
messages=[]
)
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
# We pass the md_content so the worker has full context.
context_files = ticket.context_requirements if ticket.context_requirements else None
@@ -135,7 +126,6 @@ class ConductorEngine:
md_content
)
await self._push_state(active_tier="Tier 2 (Tech Lead)")
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
# Task is ready but needs approval
print(f"Ticket {ticket.id} is ready and awaiting approval.")
@@ -151,14 +141,12 @@ def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_
# We use a list container so the GUI can inject the actual Dialog object back to us
# since the dialog is created in the GUI thread.
dialog_container = [None]
task = {
"action": "mma_step_approval",
"ticket_id": ticket_id,
"payload": payload,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
@@ -169,16 +157,13 @@ def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_step_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
approved, final_payload = dialog_container[0].wait()
return approved
return False
def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> Tuple[bool, str, str]:
@@ -186,9 +171,7 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
Pushes a spawn approval request to the GUI and waits for response.
Returns (approved, modified_prompt, modified_context)
"""
dialog_container = [None]
task = {
"action": "mma_spawn_approval",
"ticket_id": ticket_id,
@@ -197,7 +180,6 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
"context_md": context_md,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
@@ -208,15 +190,12 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_spawn_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
res = dialog_container[0].wait()
if isinstance(res, dict):
approved = res.get("approved", False)
abort = res.get("abort", False)
@@ -232,10 +211,8 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
modified_prompt = final_payload.get("prompt", prompt)
modified_context = final_payload.get("context_md", context_md)
return approved, modified_prompt, modified_context
return False, prompt, context_md
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None, md_content: str = ""):
"""
Simulates the lifecycle of a single agent working on a ticket.
@@ -250,7 +227,6 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
"""
# Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session()
context_injection = ""
if context_files:
parser = ASTParser(language="python")
@@ -267,7 +243,6 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
context_injection += f"\nFile: {file_path}\n{view}\n"
except Exception as e:
context_injection += f"\nError reading {file_path}: {e}\n"
# Build a prompt for the worker
user_message = (
f"You are assigned to Ticket {ticket.id}.\n"
@@ -275,12 +250,10 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
)
if context_injection:
user_message += f"\nContext Files:\n{context_injection}\n"
user_message += (
"Please complete this task. If you are blocked and cannot proceed, "
"start your response with 'BLOCKED' and explain why."
)
# HITL Clutch: call confirm_spawn if event_queue is provided
if event_queue:
approved, modified_prompt, modified_context = confirm_spawn(
@@ -293,16 +266,14 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
if not approved:
ticket.mark_blocked("Spawn rejected by user.")
return "BLOCKED: Spawn rejected by user."
user_message = modified_prompt
md_content = modified_context
# HITL Clutch: pass the queue and ticket_id to confirm_execution
def clutch_callback(payload: str) -> bool:
if not event_queue:
return True
return confirm_execution(payload, event_queue, ticket.id)
response = ai_client.send(
md_content=md_content,
user_message=user_message,
@@ -310,16 +281,13 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis
)
# Update usage in engine if provided
if engine:
stats = {} # ai_client.get_token_stats() is not available
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
if "BLOCKED" in response.upper():
ticket.mark_blocked(response)
else:
ticket.mark_complete()
return response

View File

@@ -13,27 +13,21 @@ def get_track_history_summary() -> str:
Scans conductor/archive/ and conductor/tracks/ to build a summary of past work.
"""
summary_parts = []
archive_path = CONDUCTOR_PATH / "archive"
tracks_path = CONDUCTOR_PATH / "tracks"
paths_to_scan = []
if archive_path.exists():
paths_to_scan.extend(list(archive_path.iterdir()))
if tracks_path.exists():
paths_to_scan.extend(list(tracks_path.iterdir()))
for track_dir in paths_to_scan:
if not track_dir.is_dir():
continue
metadata_file = track_dir / "metadata.json"
spec_file = track_dir / "spec.md"
title = track_dir.name
status = "unknown"
overview = "No overview available."
if metadata_file.exists():
try:
with open(metadata_file, "r", encoding="utf-8") as f:
@@ -42,7 +36,6 @@ def get_track_history_summary() -> str:
status = meta.get("status", status)
except Exception:
pass
if spec_file.exists():
try:
with open(spec_file, "r", encoding="utf-8") as f:
@@ -55,12 +48,9 @@ def get_track_history_summary() -> str:
overview = content[:200] + "..."
except Exception:
pass
summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
if not summary_parts:
return "No previous tracks found."
return "\n".join(summary_parts)
def generate_tracks(user_request: str, project_config: dict, file_items: list[dict], history_summary: str = None) -> list[dict]:
@@ -70,26 +60,19 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
"""
# 1. Build Repository Map (Summary View)
repo_map = summarize.build_summary_markdown(file_items)
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init")
user_message_parts = [
f"### USER REQUEST:\n{user_request}\n",
f"### REPOSITORY MAP:\n{repo_map}\n"
]
if history_summary:
user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n")
user_message_parts.append("Please generate the implementation tracks for this request.")
user_message = "\n".join(user_message_parts)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 1 Model (Strategic - Pro)
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1
@@ -97,7 +80,6 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
md_content="", # We pass everything in user_message for clarity
user_message=user_message
)
# 4. Parse JSON Output
try:
# The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks.
@@ -106,7 +88,6 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
tracks = json.loads(json_match)
# Ensure each track has a 'title' for the GUI
for t in tracks:
@@ -122,12 +103,11 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
ai_client.set_custom_system_prompt(old_system_prompt)
if __name__ == "__main__":
# Quick CLI test
# Quick CLI test
import project_manager
proj = project_manager.load_project("manual_slop.toml")
flat = project_manager.flat_config(proj)
file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", []))
print("Testing Tier 1 Track Generation...")
history = get_track_history_summary()
tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history)

View File

@@ -11,7 +11,6 @@ class CodeOutliner:
tree = ast.parse(code)
except SyntaxError as e:
return f"ERROR parsing code: {e}"
output = []
def get_docstring(node):
@@ -30,26 +29,21 @@ class CodeOutliner:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for item in node.body:
walk(item, indent + 1)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
start_line = node.lineno
end_line = getattr(node, "end_lineno", start_line)
prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]"
# Check if it's a method
# We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method.
if indent > 0:
prefix = "[Method]"
output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for node in tree.body:
walk(node)
return "\n".join(output)
def get_outline(path: Path, code: str) -> str:

View File

@@ -12,11 +12,9 @@ class PerformanceMonitor:
self._process = psutil.Process()
self._cpu_usage = 0.0
self._cpu_lock = threading.Lock()
# Input lag tracking
self._last_input_time = None
self._input_lag_ms = 0.0
# Alerts
self.alert_callback = None
self.thresholds = {
@@ -26,11 +24,9 @@ class PerformanceMonitor:
}
self._last_alert_time = 0
self._alert_cooldown = 30 # seconds
# Detailed profiling
self._component_timings = {}
self._comp_start = {}
# Start CPU usage monitoring thread
self._stop_event = threading.Event()
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True)
@@ -47,7 +43,6 @@ class PerformanceMonitor:
self._cpu_usage = usage
except Exception:
pass
# Sleep in small increments to stay responsive to stop_event
for _ in range(10):
if self._stop_event.is_set():
@@ -71,18 +66,14 @@ class PerformanceMonitor:
def end_frame(self):
if self._start_time is None:
return
end_time = time.time()
self._last_frame_time = (end_time - self._start_time) * 1000.0
self._frame_count += 1
# Calculate input lag if an input occurred during this frame
if self._last_input_time is not None:
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
self._last_input_time = None
self._check_alerts()
elapsed_since_fps = end_time - self._fps_last_time
if elapsed_since_fps >= 1.0:
self._fps = self._frame_count / elapsed_since_fps
@@ -92,11 +83,9 @@ class PerformanceMonitor:
def _check_alerts(self):
if not self.alert_callback:
return
now = time.time()
if now - self._last_alert_time < self._alert_cooldown:
return
metrics = self.get_metrics()
alerts = []
if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
@@ -105,7 +94,6 @@ class PerformanceMonitor:
alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
if alerts:
self._last_alert_time = now
self.alert_callback("; ".join(alerts))
@@ -113,7 +101,6 @@ class PerformanceMonitor:
def get_metrics(self):
with self._cpu_lock:
cpu_usage = self._cpu_usage
metrics = {
'last_frame_time_ms': self._last_frame_time,
'fps': self._fps,
@@ -122,11 +109,9 @@ class PerformanceMonitor:
}
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
metrics['input_lag_ms'] = self._input_lag_ms
# Add detailed timings
for name, elapsed in self._component_timings.items():
metrics[f'time_{name}_ms'] = elapsed
return metrics
def stop(self):

View File

@@ -3,7 +3,7 @@
Note(Gemini):
Handles loading/saving of project .toml configurations.
Also handles serializing the discussion history into the TOML format using a special
@timestamp prefix to preserve the exact sequence of events.
@timestamp prefix to preserve the exact sequence of events.
"""
import subprocess
import datetime
@@ -11,25 +11,20 @@ import tomllib
import tomli_w
import re
import json
from typing import Any, Optional, TYPE_CHECKING, Union
from pathlib import Path
TS_FMT = "%Y-%m-%dT%H:%M:%S"
if TYPE_CHECKING:
from models import TrackState
TS_FMT: str = "%Y-%m-%dT%H:%M:%S"
def now_ts() -> str:
return datetime.datetime.now().strftime(TS_FMT)
def parse_ts(s: str):
def parse_ts(s: str) -> Optional[datetime.datetime]:
try:
return datetime.datetime.strptime(s, TS_FMT)
except Exception:
return None
# ── entry serialisation ──────────────────────────────────────────────────────
def entry_to_str(entry: dict) -> str:
def entry_to_str(entry: dict[str, Any]) -> str:
"""Serialise a disc entry dict -> stored string."""
ts = entry.get("ts", "")
role = entry.get("role", "User")
@@ -37,9 +32,7 @@ def entry_to_str(entry: dict) -> str:
if ts:
return f"@{ts}\n{role}:\n{content}"
return f"{role}:\n{content}"
def str_to_entry(raw: str, roles: list[str]) -> dict:
def str_to_entry(raw: str, roles: list[str]) -> dict[str, Any]:
"""Parse a stored string back to a disc entry dict."""
ts = ""
rest = raw
@@ -63,10 +56,7 @@ def str_to_entry(raw: str, roles: list[str]) -> dict:
matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
content = parts[1].strip() if len(parts) > 1 else ""
return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
# ── git helpers ──────────────────────────────────────────────────────────────
def get_git_commit(git_dir: str) -> str:
try:
r = subprocess.run(
@@ -76,8 +66,6 @@ def get_git_commit(git_dir: str) -> str:
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
def get_git_log(git_dir: str, n: int = 5) -> str:
try:
r = subprocess.run(
@@ -87,15 +75,10 @@ def get_git_log(git_dir: str, n: int = 5) -> str:
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
# ── default structures ───────────────────────────────────────────────────────
def default_discussion() -> dict:
def default_discussion() -> dict[str, Any]:
return {"git_commit": "", "last_updated": now_ts(), "history": []}
def default_project(name: str = "unnamed") -> dict:
def default_project(name: str = "unnamed") -> dict[str, Any]:
return {
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""},
"output": {"output_dir": "./md_gen"},
@@ -125,91 +108,63 @@ def default_project(name: str = "unnamed") -> dict:
"tracks": []
}
}
# ── load / save ──────────────────────────────────────────────────────────────
def get_history_path(project_path: str | Path) -> Path:
def get_history_path(project_path: Union[str, Path]) -> Path:
"""Return the Path to the sibling history TOML file for a given project."""
p = Path(project_path)
return p.parent / f"{p.stem}_history.toml"
def load_project(path: str | Path) -> dict:
def load_project(path: Union[str, Path]) -> dict[str, Any]:
"""
Load a project TOML file.
Automatically migrates legacy 'discussion' keys to a sibling history file.
"""
with open(path, "rb") as f:
proj = tomllib.load(f)
# Automatic Migration: move legacy 'discussion' to sibling file
hist_path = get_history_path(path)
if "discussion" in proj:
disc = proj.pop("discussion")
# Save to history file if it doesn't exist yet (or overwrite to migrate)
with open(hist_path, "wb") as f:
tomli_w.dump(disc, f)
# Save the stripped project file
save_project(proj, path)
# Restore for the returned dict so GUI works as before
proj["discussion"] = disc
else:
# Load from sibling if it exists
if hist_path.exists():
proj["discussion"] = load_history(path)
return proj
def load_history(project_path: str | Path) -> dict:
def load_history(project_path: Union[str, Path]) -> dict[str, Any]:
"""Load the segregated discussion history from its dedicated TOML file."""
hist_path = get_history_path(project_path)
if hist_path.exists():
with open(hist_path, "rb") as f:
return tomllib.load(f)
return {}
def clean_nones(data):
def clean_nones(data: Any) -> Any:
"""Recursively remove None values from a dictionary/list."""
if isinstance(data, dict):
return {k: clean_nones(v) for k, v in data.items() if v is not None}
elif isinstance(data, list):
return [clean_nones(v) for v in data if v is not None]
return data
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
def save_project(proj: dict[str, Any], path: Union[str, Path], disc_data: Optional[dict[str, Any]] = None) -> None:
"""
Save the project TOML.
If 'discussion' is present in proj, it is moved to the sibling history file.
"""
# Clean None values as TOML doesn't support them
proj = clean_nones(proj)
# Ensure 'discussion' is NOT in the main project dict
if "discussion" in proj:
# If disc_data wasn't provided, use the one from proj
if disc_data is None:
disc_data = proj["discussion"]
# Remove it so it doesn't get saved to the main file
proj = dict(proj) # shallow copy to avoid mutating caller's dict
proj = dict(proj)
del proj["discussion"]
with open(path, "wb") as f:
tomli_w.dump(proj, f)
if disc_data:
disc_data = clean_nones(disc_data)
hist_path = get_history_path(path)
with open(hist_path, "wb") as f:
tomli_w.dump(disc_data, f)
# ── migration helper ─────────────────────────────────────────────────────────
def migrate_from_legacy_config(cfg: dict) -> dict:
def migrate_from_legacy_config(cfg: dict[str, Any]) -> dict[str, Any]:
"""Build a fresh project dict from a legacy flat config.toml. Does NOT save."""
name = cfg.get("output", {}).get("namespace", "project")
proj = default_project(name)
@@ -222,21 +177,16 @@ def migrate_from_legacy_config(cfg: dict) -> dict:
main_disc["history"] = disc.get("history", [])
main_disc["last_updated"] = now_ts()
return proj
# ── flat config for aggregate.run() ─────────────────────────────────────────
def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None = None) -> dict:
def flat_config(proj: dict[str, Any], disc_name: Optional[str] = None, track_id: Optional[str] = None) -> dict[str, Any]:
"""Return a flat config dict compatible with aggregate.run()."""
disc_sec = proj.get("discussion", {})
if track_id:
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", "."))
else:
name = disc_name or disc_sec.get("active", "main")
disc_data = disc_sec.get("discussions", {}).get(name, {})
history = disc_data.get("history", [])
return {
"project": proj.get("project", {}),
"output": proj.get("output", {}),
@@ -247,11 +197,8 @@ def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None =
"history": history,
},
}
# ── track state persistence ─────────────────────────────────────────────────
def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."):
def save_track_state(track_id: str, state: 'TrackState', base_dir: Union[str, Path] = ".") -> None:
"""
Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
"""
@@ -261,9 +208,7 @@ def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path =
data = clean_nones(state.to_dict())
with open(state_file, "wb") as f:
tomli_w.dump(data, f)
def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
def load_track_state(track_id: str, base_dir: Union[str, Path] = ".") -> Optional['TrackState']:
"""
Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
"""
@@ -274,9 +219,7 @@ def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
with open(state_file, "rb") as f:
data = tomllib.load(f)
return TrackState.from_dict(data)
def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
def load_track_history(track_id: str, base_dir: Union[str, Path] = ".") -> list[str]:
"""
Loads the discussion history for a specific track from its state.toml.
Returns a list of entry strings formatted with @timestamp.
@@ -285,8 +228,7 @@ def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
state = load_track_state(track_id, base_dir)
if not state:
return []
history = []
history: list[str] = []
for entry in state.discussion:
e = dict(entry)
ts = e.get("ts")
@@ -294,9 +236,7 @@ def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
e["ts"] = ts.strftime(TS_FMT)
history.append(entry_to_str(e))
return history
def save_track_history(track_id: str, history: list, base_dir: str | Path = "."):
def save_track_history(track_id: str, history: list[str], base_dir: Union[str, Path] = ".") -> None:
"""
Saves the discussion history for a specific track to its state.toml.
'history' is expected to be a list of formatted strings.
@@ -305,14 +245,11 @@ def save_track_history(track_id: str, history: list, base_dir: str | Path = ".")
state = load_track_state(track_id, base_dir)
if not state:
return
roles = ["User", "AI", "Vendor API", "System", "Reasoning"]
entries = [str_to_entry(h, roles) for h in history]
state.discussion = entries
save_track_state(track_id, state, base_dir)
def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
def get_all_tracks(base_dir: Union[str, Path] = ".") -> list[dict[str, Any]]:
"""
Scans the conductor/tracks/ directory and returns a list of dictionaries
containing track metadata: 'id', 'title', 'status', 'complete', 'total',
@@ -324,14 +261,12 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
tracks_dir = Path(base_dir) / "conductor" / "tracks"
if not tracks_dir.exists():
return []
results = []
results: list[dict[str, Any]] = []
for entry in tracks_dir.iterdir():
if not entry.is_dir():
continue
track_id = entry.name
track_info = {
track_info: dict[str, Any] = {
"id": track_id,
"title": track_id,
"status": "unknown",
@@ -339,9 +274,7 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
"total": 0,
"progress": 0.0
}
state_found = False
# Try loading state.toml
try:
state = load_track_state(track_id, base_dir)
if state:
@@ -355,9 +288,7 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
state_found = True
except Exception:
pass
if not state_found:
# Try loading metadata.json
metadata_file = entry / "metadata.json"
if metadata_file.exists():
try:
@@ -368,18 +299,12 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
track_info["status"] = data.get("status", "unknown")
except Exception:
pass
# Try parsing plan.md for complete/total if state was missing or empty
if track_info["total"] == 0:
plan_file = entry / "plan.md"
if plan_file.exists():
try:
with open(plan_file, "r", encoding="utf-8") as f:
content = f.read()
# Simple regex to count tasks
# - [ ] Task: ...
# - [x] Task: ...
# - [~] Task: ...
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
track_info["total"] = len(tasks)
@@ -388,7 +313,5 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
track_info["progress"] = float(track_info["complete"]) / track_info["total"]
except Exception:
pass
results.append(track_info)
return results

10
refactor_ui_task.toml Normal file
View File

@@ -0,0 +1,10 @@
role = "tier3-worker"
prompt = """Implement strict type hints for ALL functions and methods in @gui_2.py and @gui_legacy.py.
1. Use specific types (e.g., dict[str, Any], list[str], Union[str, Path], etc.) for arguments and returns.
2. Maintain the 'AI-Optimized' style: 1-space indentation, NO blank lines within function bodies, and maximum 1 blank line between definitions.
3. Since these files are very large, you MUST use surgical tools (discovered_tool_py_update_definition, discovered_tool_py_set_signature, discovered_tool_py_set_var_declaration) to apply changes. Do NOT try to overwrite the entire file at once.
4. Do NOT change any logic.
5. Use discovered_tool_py_check_syntax after each major change to verify syntax.
6. Ensure 'from typing import Any, dict, list, Union, Optional, Callable' etc. are present.
7. Focus on completing the task efficiently without hitting timeouts."""
docs = ["gui_2.py", "gui_legacy.py", "conductor/workflow.md"]

View File

@@ -5,27 +5,22 @@ from dag_engine import TrackDAG, ExecutionEngine
def test_auto_queue_and_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1, t2])
# Expectation: ExecutionEngine takes auto_queue parameter
try:
engine = ExecutionEngine(dag, auto_queue=True)
except TypeError:
pytest.fail("ExecutionEngine does not accept auto_queue parameter")
# Tick 1: T1 should be 'in-progress' because auto_queue=True
# T2 should remain 'todo' because step_mode=True
engine.tick()
assert t1.status == "in_progress"
assert t2.status == "todo"
# Approve T2
try:
engine.approve_task("T2")
except AttributeError:
pytest.fail("ExecutionEngine does not have approve_task method")
assert t2.status == "in_progress"
if __name__ == "__main__":

View File

@@ -0,0 +1,21 @@
import subprocess
import sys
def test_type_hints():
files = ["project_manager.py", "session_logger.py"]
all_missing = []
for f in files:
print(f"Scanning {f}...")
result = subprocess.run(["uv", "run", "python", "scripts/type_hint_scanner.py", f], capture_output=True, text=True)
if result.stdout.strip():
print(f"Missing hints in {f}:\n{result.stdout}")
all_missing.append(f)
if all_missing:
print(f"FAILURE: Missing type hints in: {', '.join(all_missing)}")
sys.exit(1)
else:
print("SUCCESS: All functions have type hints.")
sys.exit(0)
if __name__ == "__main__":
test_type_hints()

View File

@@ -68,20 +68,16 @@ Example usage:
type=str,
help="Category of tests to run (e.g., 'unit', 'integration')."
)
# Parse known arguments for the script itself, then parse remaining args for pytest
args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:])
selected_test_files = []
manifest_data = None
if args.manifest:
try:
manifest_data = load_manifest(args.manifest)
except (FileNotFoundError, tomllib.TOMLDecodeError):
# Error message already printed by load_manifest
sys.exit(1)
if args.category:
# Case 1: --manifest and --category provided
files = get_test_files(manifest_data, args.category)
@@ -94,7 +90,6 @@ Example usage:
print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr)
for cat in default_categories:
files = get_test_files(manifest_data, cat)
@@ -104,14 +99,11 @@ Example usage:
print("Error: --category requires --manifest to be specified.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
# Combine selected test files with any remaining pytest arguments that were not parsed by this script.
# We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times.
pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--']
# Filter out any empty strings that might have been included.
final_pytest_args = [arg for arg in pytest_command_args if arg]
# If no specific tests were selected from manifest/category and no manifest was provided,
# and no other pytest args were given, pytest.main([]) runs default test discovery.
print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)

View File

@@ -89,13 +89,20 @@ def main():
# This prevents the hook from affecting normal CLI usage.
hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
if hook_context != "manual_slop":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.")
if hook_context != "manual_slop" and hook_context != "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop' or 'mma_headless'. Allowing execution without confirmation.")
print(json.dumps({
"decision": "allow",
"reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
}))
return
if hook_context == "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is 'mma_headless'. Allowing execution for sub-agent.")
print(json.dumps({
"decision": "allow",
"reason": "Sub-agent headless mode (MMA)."
}))
return
# 5. Use 'ApiHookClient' (assuming GUI is on http://127.0.0.1:8999)
logging.debug("GEMINI_CLI_HOOK_CONTEXT is 'manual_slop'. Proceeding with API Hook Client.")
client = ApiHookClient(base_url="http://127.0.0.1:8999")

View File

@@ -189,15 +189,15 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
command_text += f"\n\nTASK: {prompt}\n\n"
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
# Whitelist tools to ensure they are available to the model in headless mode.
allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,discovered_tool_search_files,discovered_tool_get_file_summary,discovered_tool_py_get_skeleton,discovered_tool_py_get_code_outline,discovered_tool_py_get_definition,discovered_tool_py_update_definition,discovered_tool_py_get_signature,discovered_tool_py_set_signature,discovered_tool_py_get_class_summary,discovered_tool_py_get_var_declaration,discovered_tool_py_set_var_declaration,discovered_tool_get_git_diff,discovered_tool_run_powershell,activate_skill,codebase_investigator,discovered_tool_web_search,discovered_tool_fetch_url,discovered_tool_py_find_usages,discovered_tool_py_get_imports,discovered_tool_py_check_syntax,discovered_tool_py_get_hierarchy,discovered_tool_py_get_docstring,discovered_tool_get_tree"
ps_command = (
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
f"gemini -p '{role}' --output-format json --model {model}"
)
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
try:
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8')
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "mma_headless"
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8', env=env)
result = process.stdout
if not process.stdout and process.stderr:
result = f"Error: {process.stderr}"

View File

@@ -3,7 +3,6 @@
Opens timestamped log/script files at startup and keeps them open for the
lifetime of the process. The next run of the GUI creates new files; the
previous run's files are simply closed when the process exits.
File layout
-----------
logs/
@@ -12,87 +11,64 @@ logs/
clicalls_<ts>.log - sequential record of every CLI subprocess call
scripts/generated/
<ts>_<seq:04d>.ps1 - each PowerShell script the AI generated, in order
Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
"""
import atexit
import datetime
import json
import threading
from typing import Any, Optional, TextIO
from pathlib import Path
_LOG_DIR = Path("./logs")
_SCRIPTS_DIR = Path("./scripts/generated")
_LOG_DIR: Path = Path("./logs")
_SCRIPTS_DIR: Path = Path("./scripts/generated")
_ts: str = "" # session timestamp string e.g. "20260301_142233"
_session_id: str = "" # YYYYMMDD_HHMMSS[_Label]
_session_dir: Path = None # Path to the sub-directory for this session
_session_dir: Optional[Path] = None # Path to the sub-directory for this session
_seq: int = 0 # monotonic counter for script files this session
_seq_lock = threading.Lock()
_comms_fh = None # file handle: logs/<session_id>/comms.log
_tool_fh = None # file handle: logs/<session_id>/toolcalls.log
_api_fh = None # file handle: logs/<session_id>/apihooks.log
_cli_fh = None # file handle: logs/<session_id>/clicalls.log
_seq_lock: threading.Lock = threading.Lock()
_comms_fh: Optional[TextIO] = None # file handle: logs/<session_id>/comms.log
_tool_fh: Optional[TextIO] = None # file handle: logs/<session_id>/toolcalls.log
_api_fh: Optional[TextIO] = None # file handle: logs/<session_id>/apihooks.log
_cli_fh: Optional[TextIO] = None # file handle: logs/<session_id>/clicalls.log
def _now_ts() -> str:
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
def open_session(label: str | None = None):
def open_session(label: Optional[str] = None) -> None:
"""
Called once at GUI startup. Creates the log directories if needed and
opens the log files for this session within a sub-directory.
"""
global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq
if _comms_fh is not None:
return # already open
return
_ts = _now_ts()
_session_id = _ts
if label:
# Sanitize label: remove non-alphanumeric chars
safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label)
_session_id += f"_{safe_label}"
_session_dir = _LOG_DIR / _session_id
_session_dir.mkdir(parents=True, exist_ok=True)
_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
_seq = 0
_comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1)
_tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1)
_api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1)
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1)
_tool_fh.write(f"# Tool-call log — session {_session_id}\n\n")
_tool_fh.flush()
_cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n")
_cli_fh.flush()
# Register this session in the log registry
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.register_session(_session_id, str(_session_dir), datetime.datetime.now())
except Exception as e:
print(f"Warning: Could not register session in LogRegistry: {e}")
atexit.register(close_session)
def close_session():
def close_session() -> None:
"""Flush and close all log files. Called on clean exit."""
global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR
if _comms_fh is None:
return
# Close files first to ensure all data is flushed to disk
if _comms_fh:
_comms_fh.close()
_comms_fh = None
@@ -105,20 +81,14 @@ def close_session():
if _cli_fh:
_cli_fh.close()
_cli_fh = None
# Trigger auto-whitelist update for this session after closing
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.update_auto_whitelist_status(_session_id)
except Exception as e:
print(f"Warning: Could not update auto-whitelist on close: {e}")
def log_api_hook(method: str, path: str, payload: str):
"""
Log an API hook invocation.
"""
def log_api_hook(method: str, path: str, payload: str) -> None:
"""Log an API hook invocation."""
if _api_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
@@ -127,9 +97,7 @@ def log_api_hook(method: str, path: str, payload: str):
_api_fh.flush()
except Exception:
pass
def log_comms(entry: dict):
def log_comms(entry: dict[str, Any]) -> None:
"""
Append one comms entry to the comms log file as a JSON-L line.
Thread-safe (GIL + line-buffered file).
@@ -140,34 +108,25 @@ def log_comms(entry: dict):
_comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
except Exception:
pass
def log_tool_call(script: str, result: str, script_path: str | None):
def log_tool_call(script: str, result: str, script_path: Optional[str]) -> Optional[str]:
"""
Append a tool-call record to the toolcalls log and write the PS1 script to
scripts/generated/. Returns the path of the written script file.
"""
global _seq
if _tool_fh is None:
return script_path # logger not open yet
return script_path
with _seq_lock:
_seq += 1
seq = _seq
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
# Write the .ps1 file
ps1_name = f"{_ts}_{seq:04d}.ps1"
ps1_path = _SCRIPTS_DIR / ps1_name
ps1_path: Optional[Path] = _SCRIPTS_DIR / ps1_name
try:
ps1_path.write_text(script, encoding="utf-8")
except Exception as exc:
ps1_path = None
ps1_name = f"(write error: {exc})"
# Append to the tool-call sequence log (script body omitted - see .ps1 file)
try:
_tool_fh.write(
f"## Call #{seq} [{ts_entry}]\n"
@@ -179,17 +138,11 @@ def log_tool_call(script: str, result: str, script_path: str | None):
_tool_fh.flush()
except Exception:
pass
return str(ps1_path) if ps1_path else None
def log_cli_call(command: str, stdin_content: str | None, stdout_content: str | None, stderr_content: str | None, latency: float):
"""
Log details of a CLI subprocess execution.
"""
def log_cli_call(command: str, stdin_content: Optional[str], stdout_content: Optional[str], stderr_content: Optional[str], latency: float) -> None:
"""Log details of a CLI subprocess execution."""
if _cli_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
log_data = {

View File

@@ -3,7 +3,7 @@ import subprocess, shutil
from pathlib import Path
from typing import Callable, Optional
TIMEOUT_SECONDS = 60
TIMEOUT_SECONDS: int = 60
def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None) -> str:
"""
@@ -12,28 +12,37 @@ def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[s
If qa_callback is provided and the command fails or has stderr,
the callback is called with the stderr content and its result is appended.
"""
safe_dir = str(base_dir).replace("'", "''")
full_script = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
safe_dir: str = str(base_dir).replace("'", "''")
full_script: str = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
# Try common executable names
exe = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
exe: Optional[str] = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
if not exe: return "ERROR: Neither powershell nor pwsh found in PATH"
try:
r = subprocess.run(
process = subprocess.Popen(
[exe, "-NoProfile", "-NonInteractive", "-Command", full_script],
capture_output=True, text=True, timeout=TIMEOUT_SECONDS, cwd=base_dir
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=base_dir
)
parts = []
if r.stdout.strip(): parts.append(f"STDOUT:\n{r.stdout.strip()}")
if r.stderr.strip(): parts.append(f"STDERR:\n{r.stderr.strip()}")
parts.append(f"EXIT CODE: {r.returncode}")
stdout, stderr = process.communicate(timeout=TIMEOUT_SECONDS)
# QA Interceptor logic
if (r.returncode != 0 or r.stderr.strip()) and qa_callback:
qa_analysis = qa_callback(r.stderr.strip())
parts: list[str] = []
if stdout.strip(): parts.append(f"STDOUT:\n{stdout.strip()}")
if stderr.strip(): parts.append(f"STDERR:\n{stderr.strip()}")
parts.append(f"EXIT CODE: {process.returncode}")
if (process.returncode != 0 or stderr.strip()) and qa_callback:
qa_analysis: Optional[str] = qa_callback(stderr.strip())
if qa_analysis:
parts.append(f"\nQA ANALYSIS:\n{qa_analysis}")
return "\n".join(parts)
except subprocess.TimeoutExpired: return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
except Exception as e: return f"ERROR: {e}"
except subprocess.TimeoutExpired:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
except KeyboardInterrupt:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
raise
except Exception as e:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: {e}"

View File

@@ -12,61 +12,46 @@ def main():
if not client.wait_for_server(timeout=10):
print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
return
sim = WorkflowSimulator(client)
# 1. Start Clean
print("\n[Action] Resetting Session...")
client.click("btn_reset")
time.sleep(2)
# 2. Project Scaffolding
project_name = f"LiveTest_{int(time.time())}"
# Use actual project dir for realism
git_dir = os.path.abspath(".")
project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
sim.setup_new_project(project_name, git_dir, project_path)
# Enable auto-add so results appear in history automatically
client.set_value("auto_add_history", True)
time.sleep(1)
# 3. Discussion Loop (3 turns for speed, but logic supports more)
turns = [
"Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
"That looks great. Can you also add a feature to print the name of the operating system?",
"Excellent. Now, please create a requirements.txt file with 'requests' in it."
]
for i, msg in enumerate(turns):
print(f"\n--- Turn {i+1} ---")
# Switch to Comms Log to see the send
client.select_tab("operations_tabs", "tab_comms")
sim.run_discussion_turn(msg)
# Check thinking indicator
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
print("[Status] Thinking indicator is visible.")
# Switch to Tool Log halfway through wait
time.sleep(2)
client.select_tab("operations_tabs", "tab_tool")
# Wait for AI response if not already finished
# (run_discussion_turn already waits, so we just observe)
# 4. History Management
print("\n[Action] Creating new discussion thread...")
sim.create_discussion("Refinement")
print("\n[Action] Switching back to Default...")
sim.switch_discussion("Default")
# 5. Manual Sign-off Simulation
print("\n=== Walkthrough Complete ===")
print("Please verify the following in the GUI:")

View File

@@ -14,21 +14,17 @@ def main():
if not client.wait_for_server(timeout=5):
print("Hook server not found. Start GUI with --enable-test-hooks")
return
sim_agent = UserSimAgent(client)
# 1. Reset session to start clean
print("Resetting session...")
client.click("btn_reset")
time.sleep(2) # Give it time to clear
# 2. Initial message
initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
print(f"
[USER]: {initial_msg}")
[USER]: {initial_msg}")
client.set_value("ai_input", initial_msg)
client.click("btn_gen_send")
# 3. Wait for AI response
print("Waiting for AI response...", end="", flush=True)
last_entry_count = 0
@@ -37,21 +33,18 @@ def main():
print(".", end="", flush=True)
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if len(entries) > last_entry_count:
# Something happened
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
print(f"
[AI]: {last_entry.get('content')[:100]}...")
[AI]: {last_entry.get('content')[:100]}...")
print("
Ping-pong successful!")
Ping-pong successful!")
return
last_entry_count = len(entries)
print("
Timeout waiting for AI response")
Timeout waiting for AI response")
if __name__ == "__main__":
main()

View File

@@ -6,30 +6,25 @@ from simulation.sim_base import BaseSimulation, run_sim
class AISettingsSimulation(BaseSimulation):
def run(self):
print("\n--- Running AI Settings Simulation (Gemini Only) ---")
# 1. Verify initial model
provider = self.client.get_value("current_provider")
model = self.client.get_value("current_model")
print(f"[Sim] Initial Provider: {provider}, Model: {model}")
assert provider == "gemini", f"Expected gemini, got {provider}"
# 2. Switch to another Gemini model
other_gemini = "gemini-1.5-flash"
print(f"[Sim] Switching to {other_gemini}...")
self.client.set_value("current_model", other_gemini)
time.sleep(2)
# Verify
new_model = self.client.get_value("current_model")
print(f"[Sim] Updated Model: {new_model}")
assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}"
# 3. Switch back to flash-lite
target_model = "gemini-2.5-flash-lite"
print(f"[Sim] Switching back to {target_model}...")
self.client.set_value("current_model", target_model)
time.sleep(2)
final_model = self.client.get_value("current_model")
print(f"[Sim] Final Model: {final_model}")
assert final_model == target_model, f"Expected {target_model}, got {final_model}"

View File

@@ -14,7 +14,6 @@ class BaseSimulation:
self.client = ApiHookClient()
else:
self.client = client
self.sim = WorkflowSimulator(self.client)
self.project_path = None
@@ -22,19 +21,15 @@ class BaseSimulation:
print(f"\n[BaseSim] Connecting to GUI...")
if not self.client.wait_for_server(timeout=5):
raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks")
print("[BaseSim] Resetting session...")
self.client.click("btn_reset")
time.sleep(0.5)
git_dir = os.path.abspath(".")
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml")
if os.path.exists(self.project_path):
os.remove(self.project_path)
print(f"[BaseSim] Scaffolding Project: {project_name}")
self.sim.setup_new_project(project_name, git_dir, self.project_path)
# Standard test settings
self.client.set_value("auto_add_history", True)
self.client.set_value("current_provider", "gemini")

View File

@@ -6,18 +6,15 @@ from simulation.sim_base import BaseSimulation, run_sim
class ContextSimulation(BaseSimulation):
def run(self):
print("\n--- Running Context & Chat Simulation ---")
# 1. Test Discussion Creation
disc_name = f"TestDisc_{int(time.time())}"
print(f"[Sim] Creating discussion: {disc_name}")
self.sim.create_discussion(disc_name)
time.sleep(1)
# Verify it's in the list
session = self.client.get_session()
# The session structure usually has discussions listed somewhere, or we can check the listbox
# For now, we'll trust the click and check the session update
# 2. Test File Aggregation & Context Refresh
print("[Sim] Testing context refresh and token budget...")
proj = self.client.get_project()
@@ -27,22 +24,18 @@ class ContextSimulation(BaseSimulation):
for f in all_py:
if f not in proj['project']['files']['paths']:
proj['project']['files']['paths'].append(f)
# Update project via hook
self.client.post_project(proj['project'])
time.sleep(1)
# Trigger MD Only to refresh context and token budget
print("[Sim] Clicking MD Only...")
self.client.click("btn_md_only")
time.sleep(5)
# Verify status
proj_updated = self.client.get_project()
status = self.client.get_value("ai_status")
print(f"[Sim] Status: {status}")
assert "md written" in status, f"Expected 'md written' in status, got {status}"
# Verify token budget
pct = self.client.get_value("token_budget_pct")
current = self.client.get_value("token_budget_current")
@@ -50,23 +43,19 @@ class ContextSimulation(BaseSimulation):
# We'll just warn if it's 0 but the MD was written, as it might be a small context
if pct == 0:
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
# 3. Test Chat Turn
msg = "What is the current date and time? Answer in one sentence."
print(f"[Sim] Sending message: {msg}")
self.sim.run_discussion_turn(msg)
# 4. Verify History
print("[Sim] Verifying history...")
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# We expect at least 2 entries (User and AI)
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 5. Test History Truncation
print("[Sim] Testing history truncation...")
self.sim.truncate_history(1)

View File

@@ -11,18 +11,15 @@ class ExecutionSimulation(BaseSimulation):
def run(self):
print("\n--- Running Execution & Modals Simulation ---")
# 1. Trigger script generation (Async so we don't block on the wait loop)
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
print(f"[Sim] Sending message to trigger script: {msg}")
self.sim.run_discussion_turn_async(msg)
# 2. Monitor for events and text responses
print("[Sim] Monitoring for script approvals and AI text...")
start_wait = time.time()
approved_count = 0
success = False
consecutive_errors = 0
while time.time() - start_wait < 90:
# Check for error status (be lenient with transients)
@@ -34,7 +31,6 @@ class ExecutionSimulation(BaseSimulation):
break
else:
consecutive_errors = 0
# Check for script confirmation event
ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
if ev:
@@ -43,16 +39,13 @@ class ExecutionSimulation(BaseSimulation):
approved_count += 1
# Give more time if we just approved a script
start_wait = time.time()
# Check if AI has responded with text yet
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Debug: log last few roles/content
if entries:
last_few = entries[-3:]
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
if any(e.get('role') == 'AI' and e.get('content') for e in entries):
# Double check content for our keyword
for e in entries:
@@ -61,7 +54,6 @@ class ExecutionSimulation(BaseSimulation):
success = True
break
if success: break
# Also check if output is already in history via tool role
for e in entries:
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
@@ -69,9 +61,7 @@ class ExecutionSimulation(BaseSimulation):
success = True
break
if success: break
time.sleep(1.0)
assert success, "Failed to observe script execution output or AI confirmation text"
print(f"[Sim] Final check: approved {approved_count} scripts.")

View File

@@ -6,30 +6,24 @@ from simulation.sim_base import BaseSimulation, run_sim
class ToolsSimulation(BaseSimulation):
def run(self):
print("\n--- Running Tools Simulation ---")
# 1. Trigger list_directory tool
msg = "List the files in the current directory."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 2. Wait for AI to execute tool
print("[Sim] Waiting for tool execution...")
time.sleep(5) # Give it some time
# 3. Verify Tool Log
# We need a hook to get the tool log
# In gui_2.py, there is _on_tool_log which appends to self._tool_log
# We need a hook to read self._tool_log
# 4. Trigger read_file tool
msg = "Read the first 10 lines of aggregate.py."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 5. Wait and Verify
print("[Sim] Waiting for tool execution...")
time.sleep(5)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Tool outputs are usually in the conversation history as 'Tool' role or similar
@@ -38,7 +32,6 @@ class ToolsSimulation(BaseSimulation):
# Actually in Gemini history, they might be nested.
# But our GUI disc_entries list usually has them as separate entries or
# they are part of the AI turn.
# Let's check if the AI mentions it in its response
last_ai_msg = entries[-1]['content']
print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")

View File

@@ -22,14 +22,12 @@ class UserSimAgent:
# ai_client expects md_content and user_message.
# It handles its own internal history.
# We want the 'User AI' to have context of what the 'Assistant AI' said.
# For now, let's just use the last message from Assistant as the prompt.
last_ai_msg = ""
for entry in reversed(conversation_history):
if entry.get('role') == 'AI':
last_ai_msg = entry.get('content', '')
break
# We need to set a custom system prompt for the User Simulator
try:
ai_client.set_custom_system_prompt(self.system_prompt)
@@ -38,7 +36,6 @@ class UserSimAgent:
response = ai_client.send(md_content="", user_message=last_ai_msg)
finally:
ai_client.set_custom_system_prompt("")
return response
def perform_action_with_delay(self, action_func, *args, **kwargs):

View File

@@ -54,7 +54,6 @@ class WorkflowSimulator:
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
user_message = self.user_agent.generate_response(entries)
print(f"\n[USER]: {user_message}")
self.client.set_value("ai_input", user_message)
self.client.click("btn_gen_send")
@@ -63,14 +62,12 @@ class WorkflowSimulator:
print("Waiting for AI response...", end="", flush=True)
start_time = time.time()
last_count = len(self.client.get_session().get('session', {}).get('entries', []))
while time.time() - start_time < timeout:
# Check for error status first
status = self.client.get_value("ai_status")
if status and status.lower().startswith("error"):
print(f"\n[ABORT] GUI reported error status: {status}")
return {"role": "AI", "content": f"ERROR: {status}"}
time.sleep(1)
print(".", end="", flush=True)
entries = self.client.get_session().get('session', {}).get('entries', [])
@@ -82,6 +79,5 @@ class WorkflowSimulator:
if "error" in content.lower() or "blocked" in content.lower():
print(f"[WARN] AI response appears to contain an error message.")
return last_entry
print("\nTimeout waiting for AI")
return None

View File

@@ -27,20 +27,17 @@ import ast
import re
from pathlib import Path
# ------------------------------------------------------------------ per-type extractors
def _summarise_python(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Python** — {line_count} lines"]
try:
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
except SyntaxError as e:
parts.append(f"_Parse error: {e}_")
return "\n".join(parts)
# Imports
imports = []
for node in ast.walk(tree):
@@ -53,7 +50,6 @@ def _summarise_python(path: Path, content: str) -> str:
if imports:
unique_imports = sorted(set(imports))
parts.append(f"imports: {', '.join(unique_imports)}")
# Top-level constants (ALL_CAPS assignments)
constants = []
for node in ast.iter_child_nodes(tree):
@@ -66,7 +62,6 @@ def _summarise_python(path: Path, content: str) -> str:
constants.append(node.target.id)
if constants:
parts.append(f"constants: {', '.join(constants)}")
# Classes + their methods
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.ClassDef):
@@ -78,7 +73,6 @@ def _summarise_python(path: Path, content: str) -> str:
parts.append(f"class {node.name}: {', '.join(methods)}")
else:
parts.append(f"class {node.name}")
# Top-level functions
top_fns = [
node.name for node in ast.iter_child_nodes(tree)
@@ -86,15 +80,12 @@ def _summarise_python(path: Path, content: str) -> str:
]
if top_fns:
parts.append(f"functions: {', '.join(top_fns)}")
return "\n".join(parts)
def _summarise_toml(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**TOML** — {line_count} lines"]
# Extract top-level table headers [key] and [[key]]
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
tables = []
@@ -104,7 +95,6 @@ def _summarise_toml(path: Path, content: str) -> str:
tables.append(m.group(1).strip())
if tables:
parts.append(f"tables: {', '.join(tables)}")
# Top-level key = value (not inside a [table])
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
in_table = False
@@ -119,15 +109,12 @@ def _summarise_toml(path: Path, content: str) -> str:
top_keys.append(m.group(1))
if top_keys:
parts.append(f"top-level keys: {', '.join(top_keys)}")
return "\n".join(parts)
def _summarise_markdown(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Markdown** — {line_count} lines"]
headings = []
for line in lines:
m = re.match(r"^(#{1,3})\s+(.+)", line)
@@ -138,10 +125,8 @@ def _summarise_markdown(path: Path, content: str) -> str:
headings.append(f"{indent}{text}")
if headings:
parts.append("headings:\n" + "\n".join(f" {h}" for h in headings))
return "\n".join(parts)
def _summarise_generic(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
@@ -151,9 +136,7 @@ def _summarise_generic(path: Path, content: str) -> str:
if preview:
parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
return "\n".join(parts)
# ------------------------------------------------------------------ dispatch
# ------------------------------------------------------------------ dispatch
_SUMMARISERS = {
".py": _summarise_python,
@@ -164,7 +147,6 @@ _SUMMARISERS = {
".ps1": _summarise_generic,
}
def summarise_file(path: Path, content: str) -> str:
"""
Return a compact markdown summary string for a single file.
@@ -177,7 +159,6 @@ def summarise_file(path: Path, content: str) -> str:
except Exception as e:
return f"_Summariser error: {e}_"
def summarise_items(file_items: list[dict]) -> list[dict]:
"""
Given a list of file_item dicts (as returned by aggregate.build_file_items),
@@ -196,7 +177,6 @@ def summarise_items(file_items: list[dict]) -> list[dict]:
result.append({**item, "summary": summary})
return result
def build_summary_markdown(file_items: list[dict]) -> str:
"""
Build a compact markdown string of file summaries, suitable for the

View File

@@ -14,7 +14,6 @@ class TestMMAPersistence(unittest.TestCase):
def test_save_load_mma(self):
proj = project_manager.default_project("test")
proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]}
test_file = Path("test_mma_proj.toml")
try:
project_manager.save_project(proj, test_file)

View File

@@ -47,10 +47,8 @@ def live_gui():
"""
gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
stdout=log_file,
@@ -58,11 +56,9 @@ def live_gui():
text=True,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
start_time = time.time()
while time.time() - start_time < max_retries:
try:
@@ -76,12 +72,10 @@ def live_gui():
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break
time.sleep(0.5)
if not ready:
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
kill_process_tree(process.pid)
pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
yield process, gui_script
finally:

21
tests/mock_alias_tool.py Normal file
View File

@@ -0,0 +1,21 @@
import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
else:
# We must call the bridge to trigger the GUI approval!
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
stdout, _ = proc.communicate(input=json.dumps(tool_call))
# Even if bridge says allow, we emit the tool_use to the adapter
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
print(json.dumps({
"type": "tool_use",
"name": "list_directory",
"id": "alias_call",
"args": {"dir_path": "."}
}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)

View File

@@ -4,24 +4,20 @@ import subprocess
import os
def main():
# Debug log to stderr
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
try:
# On Windows, stdin might be closed or behave weirdly if not handled
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
@@ -36,7 +32,6 @@ def main():
"session_id": "mock-session-final"
}), flush=True)
return
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Using format that bridge understands
@@ -44,10 +39,8 @@ def main():
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
try:
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
process = subprocess.Popen(
@@ -59,16 +52,13 @@ def main():
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
@@ -77,13 +67,11 @@ def main():
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:56:53"
last_updated = "2026-02-28T07:35:03"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -6,10 +6,10 @@ roles = [
"Reasoning",
]
history = []
active = "TestDisc_1772236592"
active = "TestDisc_1772282083"
auto_add = true
[discussions.TestDisc_1772236592]
[discussions.TestDisc_1772282083]
git_commit = ""
last_updated = "2026-02-27T18:56:46"
last_updated = "2026-02-28T07:34:56"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:53"
last_updated = "2026-02-28T07:35:49"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:10"
last_updated = "2026-02-28T07:35:20"
history = []

View File

@@ -18,7 +18,5 @@ history = [
[discussions.AutoDisc]
git_commit = ""
last_updated = "2026-02-27T23:54:05"
history = [
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
]
last_updated = "2026-02-28T07:34:41"
history = []

View File

@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client
def test_agent_capabilities_listing():
# Verify that the agent exposes its available tools correctly
# Verify that the agent exposes its available tools correctly
pass

View File

@@ -9,13 +9,13 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import set_agent_tools, _build_anthropic_tools
def test_set_agent_tools():
# Correct usage: pass a dict
# Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools)
def test_build_anthropic_tools_conversion():
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools]

View File

@@ -9,10 +9,8 @@ def test_ai_client_send_gemini_cli():
"""
test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
mock_adapter_instance = MockAdapterClass.return_value
@@ -20,7 +18,6 @@ def test_ai_client_send_gemini_cli():
mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.session_id = "test-session"
# Verify that 'events' are emitted correctly
with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
@@ -28,14 +25,11 @@ def test_ai_client_send_gemini_cli():
user_message=test_message,
base_dir="."
)
# Check that the adapter's send method was called.
mock_adapter_instance.send.assert_called()
# Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response

View File

@@ -8,7 +8,6 @@ def test_list_models_gemini_cli():
for the 'gemini_cli' provider.
"""
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models

View File

@@ -68,7 +68,6 @@ def test_multiline_string_safety():
# def a():
# '''
# This is a...
result = format_code(source)
assert " This is a multiline" in result
assert result.startswith("def a():\n '''")

View File

@@ -25,7 +25,7 @@ class MockCandidate:
self.finish_reason.name = "STOP"
def test_ai_client_event_emitter_exists():
# This should fail initially because 'events' won't exist on ai_client
# This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events')
def test_event_emission():
@@ -37,18 +37,13 @@ def test_event_emission():
def test_send_emits_events():
with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
@@ -58,27 +53,20 @@ def test_send_emits_events():
def test_send_emits_events_proper():
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response.usage_metadata = MockUsage()
mock_chat.send_message.return_value = mock_response
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
@@ -87,42 +75,31 @@ def test_send_emits_tool_events():
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
# 1. Setup mock response with a tool call
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"path": "test.txt"}
mock_response_with_tool = MagicMock()
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer)
mock_response_final = MagicMock()
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_dispatch.return_value = "file content"
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
ai_client.send("context", "message")
# Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'

View File

@@ -71,7 +71,6 @@ def test_get_text_value():
client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
assert client.get_text_value("dummy_tag") is None
@@ -83,17 +82,14 @@ def test_get_node_status():
# When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
assert client.get_node_status("my_node") == "completed"
# When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
assert client.get_node_status("my_node") == "failed"
# When neither works
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):

View File

@@ -37,7 +37,6 @@ def test_app_processes_new_actions():
import gui_legacy
from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg
dpg.create_context()
try:
with patch('gui_legacy.load_config', return_value={}), \
@@ -46,11 +45,9 @@ def test_app_processes_new_actions():
patch('gui_legacy.project_manager'), \
patch.object(gui_legacy.App, '_load_active_project'):
app = gui_legacy.App()
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
# Test select_tab
app._pending_gui_tasks.append({
"action": "select_tab",
@@ -59,7 +56,6 @@ def test_app_processes_new_actions():
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
# Test select_list_item
mock_cb = MagicMock()
mock_get_cb.return_value = mock_cb

View File

@@ -37,17 +37,14 @@ class MyClass:
return None
'''
skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
assert "def complex_function(a, b):" in skeleton
assert "class MyClass:" in skeleton
assert "def method_without_docstring(self):" in skeleton
# Check that docstring is preserved
assert '"""' in skeleton
assert "This is a docstring." in skeleton
assert "It should be preserved." in skeleton
# Check that bodies are replaced with '...'
assert "..." in skeleton
assert "result = a + b" not in skeleton
@@ -87,19 +84,15 @@ class MyClass:
print("method preserved", x)
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated

View File

@@ -26,19 +26,15 @@ class MyClass:
print("method preserved")
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated

View File

@@ -4,44 +4,39 @@ from events import AsyncEventQueue
def test_async_event_queue_put_get():
"""Verify that an event can be asynchronously put and retrieved from the queue."""
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
asyncio.run(run_test())
def test_async_event_queue_multiple():
"""Verify that multiple events can be asynchronously put and retrieved in order."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
asyncio.run(run_test())
def test_async_event_queue_none_payload():
"""Verify that an event with None payload works correctly."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
asyncio.run(run_test())

View File

@@ -16,14 +16,11 @@ def test_auto_whitelist_keywords(registry_setup):
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
@@ -32,14 +29,11 @@ def test_auto_whitelist_message_count(registry_setup):
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
@@ -48,14 +42,11 @@ def test_auto_whitelist_large_size(registry_setup):
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
@@ -64,13 +55,10 @@ def test_no_auto_whitelist_insignificant(registry_setup):
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2

View File

@@ -26,13 +26,10 @@ class TestCliToolBridge(unittest.TestCase):
# 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
@@ -44,12 +41,9 @@ class TestCliToolBridge(unittest.TestCase):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@@ -61,12 +55,9 @@ class TestCliToolBridge(unittest.TestCase):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')

View File

@@ -28,21 +28,16 @@ class TestCliToolBridgeMapping(unittest.TestCase):
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")

View File

@@ -20,7 +20,6 @@ def simulate_conductor_phase_completion(client: ApiHookClient):
"verification_successful": False,
"verification_message": ""
}
try:
status = client.get_status()
if status.get('status') == 'ok':
@@ -32,7 +31,6 @@ def simulate_conductor_phase_completion(client: ApiHookClient):
except Exception as e:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {e}"
return results
def test_conductor_integrates_api_hook_client_for_verification(live_gui):
@@ -42,7 +40,6 @@ def test_conductor_integrates_api_hook_client_for_verification(live_gui):
"""
client = ApiHookClient()
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert "successfully" in results["verification_message"]
@@ -52,11 +49,9 @@ def test_conductor_handles_api_hook_failure(live_gui):
We patch the client's get_status to simulate failure even with live GUI.
"""
client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
@@ -66,7 +61,6 @@ def test_conductor_handles_api_hook_connection_error():
"""
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
# Check for expected error substrings from ApiHookClient
msg = results["verification_message"]

View File

@@ -23,30 +23,25 @@ async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
# Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
assert ticket2.status == "completed"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
@@ -59,21 +54,15 @@ async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send.return_value = "Task complete. I have updated the file."
result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
assert ticket.status == "completed"
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
@@ -87,17 +76,13 @@ async def test_run_worker_lifecycle_context_injection(monkeypatch):
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock ASTParser which is expected to be imported in multi_agent_conductor
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# Setup open mock to return different content for different files
file_contents = {
"primary.py": "def primary(): pass",
@@ -110,23 +95,17 @@ async def test_run_worker_lifecycle_context_injection(monkeypatch):
mock_file.read.return_value = content
mock_file.__enter__.return_value = mock_file
return mock_file
mock_open.side_effect = mock_open_side_effect
# Setup ASTParser mock
mock_ast_parser = mock_ast_parser_class.return_value
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
@@ -142,18 +121,13 @@ async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Simulate a response indicating a block
mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
@@ -166,29 +140,23 @@ async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
# We simulate ai_client.send by making it call the pre_tool_callback it received
def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
if callback:
# Simulate calling it with some payload
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
return "Success"
mock_send.side_effect = mock_send_side_effect
mock_confirm.return_value = True
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
@@ -201,24 +169,17 @@ async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
run_worker_lifecycle(ticket, context)
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
@@ -229,10 +190,8 @@ async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
"""
import json
from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
engine = ConductorEngine(track=track)
tickets_json = json.dumps([
{
"id": "T1",
@@ -256,35 +215,26 @@ async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
"depends_on": []
}
])
engine.parse_json_tickets(tickets_json)
assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls

View File

@@ -20,23 +20,19 @@ class TestConductorTechLead(unittest.TestCase):
}
]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
module_skeletons = "Test skeletons"
# Call the function
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
# Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
@@ -46,10 +42,8 @@ class TestConductorTechLead(unittest.TestCase):
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])

View File

@@ -6,7 +6,6 @@ def test_get_ready_tasks_linear():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 1
@@ -16,7 +15,6 @@ def test_get_ready_tasks_branching():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 2
@@ -26,14 +24,12 @@ def test_get_ready_tasks_branching():
def test_has_cycle_no_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert not dag.has_cycle()
def test_has_cycle_direct_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert dag.has_cycle()
@@ -41,17 +37,15 @@ def test_has_cycle_indirect_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
assert dag.has_cycle()
def test_has_cycle_complex_no_cycle():
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
assert not dag.has_cycle()
@@ -59,10 +53,8 @@ def test_get_ready_tasks_multiple_deps():
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
@@ -70,7 +62,6 @@ def test_topological_sort():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
sort = dag.topological_sort()
assert sort == ["T1", "T2", "T3"]
@@ -78,7 +69,6 @@ def test_topological_sort():
def test_topological_sort_cycle():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
with pytest.raises(ValueError, match="Dependency cycle detected"):
dag.topological_sort()

View File

@@ -18,10 +18,8 @@ def test_credentials_error_mentions_deepseek(monkeypatch):
"""
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
ai_client._load_credentials()
err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
@@ -58,7 +56,6 @@ def test_gui_provider_list_via_hooks(live_gui):
import time
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
client.set_value('current_provider', 'deepseek')
time.sleep(0.5)

View File

@@ -15,7 +15,6 @@ def test_deepseek_completion_logic():
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
@@ -27,7 +26,6 @@ def test_deepseek_completion_logic():
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
assert mock_post.called
@@ -37,7 +35,6 @@ def test_deepseek_reasoning_logic():
Verifies that reasoning_content is captured and wrapped in <thinking> tags.
"""
ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
@@ -53,7 +50,6 @@ def test_deepseek_reasoning_logic():
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
@@ -63,10 +59,8 @@ def test_deepseek_tool_calling():
Verifies that DeepSeek provider correctly identifies and executes tool calls.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
patch("mcp_client.dispatch") as mock_dispatch:
# 1. Mock first response with a tool call
mock_resp1 = MagicMock()
mock_resp1.status_code = 200
@@ -88,7 +82,6 @@ def test_deepseek_tool_calling():
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
}
# 2. Mock second response (final answer)
mock_resp2 = MagicMock()
mock_resp2.status_code = 200
@@ -102,12 +95,9 @@ def test_deepseek_tool_calling():
}],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
}
mock_post.side_effect = [mock_resp1, mock_resp2]
mock_dispatch.return_value = "Hello World"
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert "File content is: Hello World" in result
assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
@@ -118,12 +108,10 @@ def test_deepseek_streaming():
Verifies that DeepSeek provider correctly aggregates streaming chunks.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
# Mock a streaming response
mock_response = MagicMock()
mock_response.status_code = 200
# Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
chunks = [
@@ -134,6 +122,5 @@ def test_deepseek_streaming():
]
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"

View File

@@ -3,48 +3,38 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine
def test_execution_engine_basic_flow():
# Setup tickets with dependencies
# Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
engine = ExecutionEngine(dag)
# Tick 1: Only T1 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
ready = engine.tick()
assert len(ready) == 2
ids = {t.id for t in ready}
assert ids == {"T2", "T3"}
# Complete T2
engine.update_task_status("T2", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T3"
# Complete T3
engine.update_task_status("T3", "completed")
# Tick 4: T4 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
@@ -59,34 +49,27 @@ def test_execution_engine_status_persistence():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
assert t1.status == "in_progress"
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
def test_execution_engine_auto_queue():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "in_progress"
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
ready = engine.tick()
assert len(ready) == 0
assert t2.status == "todo"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
@@ -95,20 +78,16 @@ def test_execution_engine_auto_queue():
def test_execution_engine_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "todo"
# Manual approval
engine.approve_task("T1")
assert t1.status == "in_progress"
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
@@ -117,7 +96,6 @@ def test_execution_engine_approve_task():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
engine.approve_task("T1")
assert t1.status == "in_progress"

View File

@@ -17,7 +17,6 @@ def test_context_sim_live(live_gui):
"""Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim.setup("LiveContextSim")
sim.run()
@@ -28,7 +27,6 @@ def test_ai_settings_sim_live(live_gui):
"""Run the AI Settings simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim.setup("LiveAISettingsSim")
sim.run()
@@ -39,7 +37,6 @@ def test_tools_sim_live(live_gui):
"""Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim.setup("LiveToolsSim")
sim.run()
@@ -50,7 +47,6 @@ def test_execution_sim_live(live_gui):
"""Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
sim.run()

View File

@@ -28,25 +28,20 @@ class TestGeminiCliAdapter(unittest.TestCase):
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
@@ -64,15 +59,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
@@ -89,15 +81,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
json.dumps({"type": "result", "usage": {}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
@@ -114,15 +103,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
json.dumps({"type": "result", "usage": usage_data})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)

View File

@@ -10,8 +10,7 @@ import os
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path:
sys.path.append(project_root)
# Import the class to be tested
# Import the class to be tested
from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapterParity(unittest.TestCase):
@@ -21,7 +20,6 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
# Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
@@ -38,10 +36,8 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
@@ -56,18 +52,14 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
@@ -83,14 +75,11 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
@@ -106,19 +95,14 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
@@ -132,16 +116,12 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
@@ -155,20 +135,15 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
"""
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)

View File

@@ -14,7 +14,6 @@ def test_gemini_cli_context_bleed_prevention(live_gui):
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
with open(bleed_mock, "w") as f:
@@ -24,26 +23,20 @@ print(json.dumps({"type": "message", "role": "user", "content": "I am echoing yo
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
# Wait for completion
time.sleep(3)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
def test_gemini_cli_parameter_resilience(live_gui):
@@ -55,7 +48,6 @@ def test_gemini_cli_parameter_resilience(live_gui):
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
@@ -64,7 +56,6 @@ def test_gemini_cli_parameter_resilience(live_gui):
bridge_path_str = bridge_path.replace("\\", "/")
else:
bridge_path_str = bridge_path
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
prompt = sys.stdin.read()
@@ -88,14 +79,11 @@ else:
}}), flush=True)
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
# Handle approval
timeout = 15
start_time = time.time()
@@ -108,18 +96,14 @@ else:
approved = True
if approved: break
time.sleep(0.5)
assert approved, "Tool approval event never received"
# Verify tool result in history
time.sleep(2)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
def test_gemini_cli_loop_termination(live_gui):
@@ -131,16 +115,13 @@ def test_gemini_cli_loop_termination(live_gui):
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
# Handle approvals (mock does one tool call)
timeout = 20
start_time = time.time()
@@ -153,7 +134,6 @@ def test_gemini_cli_loop_termination(live_gui):
approved = True
if approved: break
time.sleep(0.5)
# Wait for the second round and final answer
found_final = False
start_time = time.time()
@@ -166,5 +146,4 @@ def test_gemini_cli_loop_termination(live_gui):
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"

View File

@@ -11,41 +11,33 @@ def test_gemini_cli_full_integration(live_gui):
Handles 'ask_received' events from the bridge and any other approval requests.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
# Clear events
client.get_events()
# 2. Trigger a message in the GUI
print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
@@ -53,7 +45,6 @@ def test_gemini_cli_full_integration(live_gui):
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
@@ -63,7 +54,6 @@ def test_gemini_cli_full_integration(live_gui):
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
@@ -74,12 +64,9 @@ def test_gemini_cli_full_integration(live_gui):
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
@@ -88,22 +75,18 @@ def test_gemini_cli_rejection_and_history(live_gui):
Integration test for the Gemini CLI provider: Rejection flow and history.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
@@ -121,9 +104,7 @@ def test_gemini_cli_rejection_and_history(live_gui):
break
if denied: break
time.sleep(0.5)
assert denied, "No ask_received event to deny"
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
@@ -137,5 +118,4 @@ def test_gemini_cli_rejection_and_history(live_gui):
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"

View File

@@ -26,9 +26,7 @@ def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
@@ -42,11 +40,8 @@ def test_get_history_bleed_stats(mock_adapter_class):
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500

View File

@@ -16,33 +16,26 @@ def test_get_gemini_cache_stats_with_mock_client():
"""
# Ensure a clean state before the test by resetting the session
reset_session()
# 1. Create a mock for the cache object that the client will return
mock_cache = MagicMock()
mock_cache.name = "cachedContents/test-cache"
mock_cache.display_name = "Test Cache"
mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.size_bytes = 1024
# 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# Configure its `caches.list` method to return our mock cache
mock_client_instance.caches.list.return_value = [mock_cache]
# 3. Patch the Client constructor to return our mock instance
# This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 4. Call the function under test
stats = get_gemini_cache_stats()
# 5. Assert that the function behaved as expected
# It should have constructed the client
mock_client_constructor.assert_called_once()
# It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats

View File

@@ -12,7 +12,6 @@ def app_instance():
"""
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
@@ -35,13 +34,11 @@ def test_app_subscribes_to_events(app_instance):
with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance()
mock_on.assert_called()
calls = mock_on.call_args_list
event_names = [call.args[0] for call in calls]
assert "request_start" in event_names
assert "response_received" in event_names
assert "tool_execution" in event_names
for call in calls:
handler = call.args[1]
assert hasattr(handler, '__self__')

View File

@@ -30,7 +30,6 @@ def test_gui2_hubs_exist_in_show_windows(app_instance):
"Files & Media",
"Theme",
]
for hub in expected_hubs:
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
@@ -43,6 +42,5 @@ def test_gui2_old_windows_removed_from_show_windows(app_instance):
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -8,7 +8,6 @@ from events import EventEmitter
def app_instance():
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
@@ -32,7 +31,6 @@ def test_mcp_tool_call_is_dispatched(app_instance):
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"}
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
@@ -47,25 +45,19 @@ def test_mcp_tool_call_is_dispatched(app_instance):
prompt_token_count = 100
candidates_token_count = 10
cached_content_token_count = 0
mock_response_with_tool.usage_metadata = DummyUsage()
# 3. Create a mock for the final AI response after the tool call
mock_response_final = MagicMock()
mock_response_final.text = "Final answer"
mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
# 4. Patch the necessary components
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
@@ -74,6 +66,5 @@ def test_mcp_tool_call_is_dispatched(app_instance):
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})

View File

@@ -30,10 +30,8 @@ def test_gui2_set_value_hook_works(live_gui):
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
@@ -45,17 +43,14 @@ def test_gui2_click_hook_works(live_gui):
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
test_value = "This text should be cleared by the reset button."
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
client.click('btn_reset')
time.sleep(0.5)
# Verify it was reset
assert client.get_value('ai_input') == ""
@@ -66,7 +61,6 @@ def test_gui2_custom_callback_hook_works(live_gui):
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
'action': 'custom_callback',
'callback': '_test_callback_func_write_to_file',
@@ -74,9 +68,7 @@ def test_gui2_custom_callback_hook_works(live_gui):
}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:

View File

@@ -17,15 +17,12 @@ def test_performance_benchmarking(live_gui):
"""
process, gui_script = live_gui
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(3.0)
# Collect metrics over 5 seconds
fps_values = []
cpu_values = []
frame_time_values = []
start_time = time.time()
while time.time() - start_time < 5:
try:
@@ -35,7 +32,6 @@ def test_performance_benchmarking(live_gui):
fps = metrics.get('fps', 0.0)
cpu = metrics.get('cpu_percent', 0.0)
ft = metrics.get('last_frame_time_ms', 0.0)
# In some CI environments without a display, metrics might be 0
# We only record positive ones to avoid skewing averages if hooks are failing
if fps > 0:
@@ -45,19 +41,15 @@ def test_performance_benchmarking(live_gui):
time.sleep(0.1)
except Exception:
break
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
_shared_metrics[gui_script] = {
"avg_fps": avg_fps,
"avg_cpu": avg_cpu,
"avg_ft": avg_ft
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
@@ -70,19 +62,15 @@ def test_performance_parity():
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui2_m = _shared_metrics["gui_2.py"]
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
print(f"\n--- Performance Parity Results ---")
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"

View File

@@ -23,23 +23,19 @@ def mock_gui():
return gui
def test_handle_generate_send_pushes_event(mock_gui):
# Mock _do_generate to return sample data
# Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text"
))
mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
mock_gui.event_queue.put = MagicMock()
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
mock_gui._handle_generate_send()
# Verify run_coroutine_threadsafe was called
assert mock_run.called
# Verify the call to event_queue.put was correct
# This is a bit tricky since the first arg to run_coroutine_threadsafe
# is the coroutine returned by event_queue.put().
@@ -62,7 +58,6 @@ def test_user_request_event_payload():
disc_text="disc",
base_dir="."
)
d = payload.to_dict()
assert d["prompt"] == "hello"
assert d["stable_md"] == "md"

View File

@@ -25,7 +25,6 @@ def app_instance():
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
@@ -37,7 +36,7 @@ def test_diagnostics_panel_initialization(app_instance):
assert len(app_instance.perf_history["frame_time"]) == 100
def test_diagnostics_panel_updates(app_instance):
# Mock dependencies
# Mock dependencies
mock_metrics = {
'last_frame_time_ms': 10.0,
'fps': 100.0,
@@ -45,21 +44,17 @@ def test_diagnostics_panel_updates(app_instance):
'input_lag_ms': 5.0
}
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
# We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
app_instance._update_performance_diagnostics()
# Verify UI updates
mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0

View File

@@ -14,7 +14,6 @@ def app_instance():
render a window or block execution.
"""
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
@@ -30,32 +29,25 @@ def app_instance():
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_gui_updates_on_event(app_instance):
# Patch dependencies for the test
# Patch dependencies for the test
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.configure_item'), \
patch('ai_client.get_history_bleed_stats') as mock_stats:
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
# We'll use patch.object to see if _refresh_api_metrics is called
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
# Simulate event
ai_client.events.emit("response_received", payload={})
# Process tasks manually
app_instance._process_pending_gui_tasks()
# Verify that _refresh_api_metrics was called
mock_refresh.assert_called_once()
# Verify that dpg.set_value was called for the metrics widgets
calls = [call.args[0] for call in mock_set_value.call_args_list]
assert "token_budget_bar" in calls

View File

@@ -13,28 +13,23 @@ def test_idle_performance_requirements(live_gui):
Requirement: GUI must maintain stable performance on idle.
"""
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(2.0)
# Get multiple samples to be sure
samples = []
for _ in range(5):
perf_data = client.get_performance()
samples.append(perf_data)
time.sleep(0.5)
# Check for valid metrics
valid_ft_count = 0
for sample in samples:
performance = sample.get('performance', {})
frame_time = performance.get('last_frame_time_ms', 0.0)
# We expect a positive frame time if rendering is happening
if frame_time > 0:
valid_ft_count += 1
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
# In some CI environments without a real display, frame time might remain 0
# but we've verified the hook is returning the dictionary.

View File

@@ -13,13 +13,11 @@ def test_comms_volume_stress_performance(live_gui):
Stress test: Inject many session entries and verify performance doesn't degrade.
"""
client = ApiHookClient()
# 1. Capture baseline
time.sleep(2.0) # Wait for stability
baseline_resp = client.get_performance()
baseline = baseline_resp.get('performance', {})
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
# 2. Inject 50 "dummy" session entries
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
large_session = []
@@ -30,23 +28,17 @@ def test_comms_volume_stress_performance(live_gui):
"ts": time.time(),
"collapsed": False
})
client.post_session(large_session)
# Give it a moment to process UI updates
time.sleep(1.0)
# 3. Capture stress performance
stress_resp = client.get_performance()
stress = stress_resp.get('performance', {})
stress_ft = stress.get('last_frame_time_ms', 0.0)
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
# If we got valid timing, assert it's within reason
if stress_ft > 0:
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
# Ensure the session actually updated
session_data = client.get_session()
entries = session_data.get('session', {}).get('entries', [])

View File

@@ -23,7 +23,6 @@ def app_instance():
render a window or block execution.
"""
dpg.create_context()
# Patch only the functions that would show a window or block,
# and the App methods that rebuild UI on init.
with patch('dearpygui.dearpygui.create_viewport'), \
@@ -37,10 +36,8 @@ def app_instance():
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_telemetry_panel_updates_correctly(app_instance):
@@ -50,7 +47,6 @@ def test_telemetry_panel_updates_correctly(app_instance):
"""
# 1. Set the provider to anthropic
app_instance.current_provider = "anthropic"
# 2. Define the mock stats
mock_stats = {
"provider": "anthropic",
@@ -58,7 +54,6 @@ def test_telemetry_panel_updates_correctly(app_instance):
"current": 135000,
"percentage": 75.0,
}
# 3. Patch the dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
@@ -66,17 +61,13 @@ def test_telemetry_panel_updates_correctly(app_instance):
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# 4. Call the method under test
app_instance._refresh_api_metrics()
# 5. Assert the results
mock_get_stats.assert_called_once()
# Assert history bleed widgets were updated
mock_set_value.assert_any_call("token_budget_bar", 0.75)
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
# Assert Gemini-specific widget was hidden
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
@@ -87,7 +78,6 @@ def test_cache_data_display_updates_correctly(app_instance):
"""
# 1. Set the provider to Gemini
app_instance.current_provider = "gemini"
# 2. Define mock cache stats
mock_cache_stats = {
'cache_count': 5,
@@ -95,7 +85,6 @@ def test_cache_data_display_updates_correctly(app_instance):
}
# Expected formatted string
expected_text = "Gemini Caches: 5 (12.1 KB)"
# 3. Patch dependencies
app_instance._last_bleed_update_time = 0 # Force update
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
@@ -103,16 +92,12 @@ def test_cache_data_display_updates_correctly(app_instance):
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
# We also need to mock get_history_bleed_stats as it's called in the same function
with patch('ai_client.get_history_bleed_stats', return_value={}):
# 4. Call the method under test with payload
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
# 5. Assert the results
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
# Check that the UI item was shown and its value was set
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
mock_set_value.assert_any_call("gemini_cache_label", expected_text)

Some files were not shown because too many files have changed in this diff Show More