checkpoint: massive refactor
This commit is contained in:
57
aggregate.py
57
aggregate.py
@@ -15,6 +15,7 @@ import tomllib
|
||||
import re
|
||||
import glob
|
||||
from pathlib import Path, PureWindowsPath
|
||||
from typing import Any
|
||||
import summarize
|
||||
import project_manager
|
||||
from file_cache import ASTParser
|
||||
@@ -39,7 +40,6 @@ def is_absolute_with_drive(entry: str) -> bool:
|
||||
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
||||
has_drive = is_absolute_with_drive(entry)
|
||||
is_wildcard = "*" in entry
|
||||
|
||||
matches = []
|
||||
if is_wildcard:
|
||||
root = Path(entry) if has_drive else base_dir / entry
|
||||
@@ -47,7 +47,6 @@ def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
||||
else:
|
||||
p = Path(entry) if has_drive else (base_dir / entry).resolve()
|
||||
matches = [p]
|
||||
|
||||
# Blacklist filter
|
||||
filtered = []
|
||||
for p in matches:
|
||||
@@ -55,7 +54,6 @@ def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
||||
if name == "history.toml" or name.endswith("_history.toml"):
|
||||
continue
|
||||
filtered.append(p)
|
||||
|
||||
return sorted(filtered)
|
||||
|
||||
def build_discussion_section(history: list[str]) -> str:
|
||||
@@ -64,14 +62,13 @@ def build_discussion_section(history: list[str]) -> str:
|
||||
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
|
||||
return "\n\n---\n\n".join(sections)
|
||||
|
||||
def build_files_section(base_dir: Path, files: list[str | dict]) -> str:
|
||||
def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
|
||||
sections = []
|
||||
for entry_raw in files:
|
||||
if isinstance(entry_raw, dict):
|
||||
entry = entry_raw.get("path")
|
||||
else:
|
||||
entry = entry_raw
|
||||
|
||||
paths = resolve_paths(base_dir, entry)
|
||||
if not paths:
|
||||
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
|
||||
@@ -104,8 +101,7 @@ def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
|
||||
sections.append(f"### `{original}`\n\n})")
|
||||
return "\n\n---\n\n".join(sections)
|
||||
|
||||
|
||||
def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
|
||||
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Return a list of dicts describing each file, for use by ai_client when it
|
||||
wants to upload individual files rather than inline everything as markdown.
|
||||
@@ -126,7 +122,6 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
|
||||
else:
|
||||
entry = entry_raw
|
||||
tier = None
|
||||
|
||||
paths = resolve_paths(base_dir, entry)
|
||||
if not paths:
|
||||
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
|
||||
@@ -147,7 +142,7 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
|
||||
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
|
||||
return items
|
||||
|
||||
def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
|
||||
def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
|
||||
"""
|
||||
Build a compact summary section using summarize.py — one short block per file.
|
||||
Used as the initial <context> block instead of full file contents.
|
||||
@@ -155,7 +150,7 @@ def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
|
||||
items = build_file_items(base_dir, files)
|
||||
return summarize.build_summary_markdown(items)
|
||||
|
||||
def _build_files_section_from_items(file_items: list[dict]) -> str:
|
||||
def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
|
||||
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
|
||||
sections = []
|
||||
for item in file_items:
|
||||
@@ -171,8 +166,7 @@ def _build_files_section_from_items(file_items: list[dict]) -> str:
|
||||
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
|
||||
return "\n\n---\n\n".join(sections)
|
||||
|
||||
|
||||
def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
||||
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
||||
"""Build markdown from pre-read file items instead of re-reading from disk."""
|
||||
parts = []
|
||||
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
||||
@@ -188,35 +182,29 @@ def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path,
|
||||
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
def build_markdown_no_history(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
|
||||
def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
|
||||
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
|
||||
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
|
||||
|
||||
|
||||
def build_discussion_text(history: list[str]) -> str:
|
||||
"""Build just the discussion history section text. Returns empty string if no history."""
|
||||
if not history:
|
||||
return ""
|
||||
return "## Discussion History\n\n" + build_discussion_section(history)
|
||||
|
||||
|
||||
def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
||||
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
||||
"""
|
||||
Tier 1 Context: Strategic/Orchestration.
|
||||
Full content for core conductor files and files with tier=1, summaries for others.
|
||||
"""
|
||||
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
|
||||
|
||||
parts = []
|
||||
|
||||
# Files section
|
||||
if file_items:
|
||||
sections = []
|
||||
for item in file_items:
|
||||
path = item.get("path")
|
||||
name = path.name if path else ""
|
||||
|
||||
if name in core_files or item.get("tier") == 1:
|
||||
# Include in full
|
||||
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
|
||||
@@ -225,47 +213,38 @@ def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, scree
|
||||
# Summarize
|
||||
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
|
||||
summarize.summarise_file(path, item.get("content", "")))
|
||||
|
||||
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
|
||||
|
||||
if screenshots:
|
||||
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
||||
|
||||
if history:
|
||||
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
def build_tier2_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
||||
def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
||||
"""
|
||||
Tier 2 Context: Architectural/Tech Lead.
|
||||
Full content for all files (standard behavior).
|
||||
"""
|
||||
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
|
||||
|
||||
|
||||
def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
|
||||
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
|
||||
"""
|
||||
Tier 3 Context: Execution/Worker.
|
||||
Full content for focus_files and files with tier=3, summaries/skeletons for others.
|
||||
"""
|
||||
parts = []
|
||||
|
||||
if file_items:
|
||||
sections = []
|
||||
for item in file_items:
|
||||
path = item.get("path")
|
||||
entry = item.get("entry", "")
|
||||
path_str = str(path) if path else ""
|
||||
|
||||
# Check if this file is in focus_files (by name or path)
|
||||
is_focus = False
|
||||
for focus in focus_files:
|
||||
if focus == entry or (path and focus == path.name) or focus in path_str:
|
||||
is_focus = True
|
||||
break
|
||||
|
||||
if is_focus or item.get("tier") == 3:
|
||||
sections.append("### `" + (entry or path_str) + "`\n\n" +
|
||||
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
|
||||
@@ -281,19 +260,14 @@ def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, scree
|
||||
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
|
||||
else:
|
||||
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
|
||||
|
||||
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
|
||||
|
||||
if screenshots:
|
||||
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
||||
|
||||
if history:
|
||||
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
||||
def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
||||
parts = []
|
||||
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
||||
if files:
|
||||
@@ -308,7 +282,7 @@ def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir:
|
||||
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
def run(config: dict) -> tuple[str, Path, list[dict]]:
|
||||
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
|
||||
namespace = config.get("project", {}).get("name")
|
||||
if not namespace:
|
||||
namespace = config.get("output", {}).get("namespace", "project")
|
||||
@@ -318,7 +292,6 @@ def run(config: dict) -> tuple[str, Path, list[dict]]:
|
||||
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
|
||||
screenshots = config.get("screenshots", {}).get("paths", [])
|
||||
history = config.get("discussion", {}).get("history", [])
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
increment = find_next_increment(output_dir, namespace)
|
||||
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
||||
@@ -330,26 +303,22 @@ def run(config: dict) -> tuple[str, Path, list[dict]]:
|
||||
output_file.write_text(markdown, encoding="utf-8")
|
||||
return markdown, output_file, file_items
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
# Load global config to find active project
|
||||
config_path = Path("config.toml")
|
||||
if not config_path.exists():
|
||||
print("config.toml not found.")
|
||||
return
|
||||
|
||||
with open(config_path, "rb") as f:
|
||||
global_cfg = tomllib.load(f)
|
||||
|
||||
active_path = global_cfg.get("projects", {}).get("active")
|
||||
if not active_path:
|
||||
print("No active project found in config.toml.")
|
||||
return
|
||||
|
||||
# Use project_manager to load project (handles history segregation)
|
||||
proj = project_manager.load_project(active_path)
|
||||
# Use flat_config to make it compatible with aggregate.run()
|
||||
config = project_manager.flat_config(proj)
|
||||
|
||||
markdown, output_file, _ = run(config)
|
||||
print(f"Written: {output_file}")
|
||||
|
||||
|
||||
316
ai_client.py
316
ai_client.py
File diff suppressed because it is too large
Load Diff
@@ -24,11 +24,9 @@ class ApiHookClient:
|
||||
def _make_request(self, method, endpoint, data=None, timeout=None):
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
|
||||
last_exception = None
|
||||
# Increase default request timeout for local server
|
||||
req_timeout = timeout if timeout is not None else 2.0
|
||||
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
if method == 'GET':
|
||||
@@ -37,7 +35,6 @@ class ApiHookClient:
|
||||
response = requests.post(url, json=data, headers=headers, timeout=req_timeout)
|
||||
else:
|
||||
raise ValueError(f"Unsupported HTTP method: {method}")
|
||||
|
||||
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
||||
return response.json()
|
||||
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
|
||||
@@ -54,7 +51,6 @@ class ApiHookClient:
|
||||
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
|
||||
|
||||
if last_exception:
|
||||
raise last_exception
|
||||
|
||||
@@ -133,7 +129,6 @@ class ApiHookClient:
|
||||
return v
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Try GET fallback
|
||||
res = self._make_request('GET', f'/api/gui/value/{item}')
|
||||
@@ -143,7 +138,6 @@ class ApiHookClient:
|
||||
return v
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
# Fallback for thinking/live/prior which are in diagnostics
|
||||
diag = self._make_request('GET', '/api/gui/diagnostics')
|
||||
|
||||
29
api_hooks.py
29
api_hooks.py
@@ -7,12 +7,14 @@ import session_logger
|
||||
|
||||
class HookServerInstance(ThreadingHTTPServer):
|
||||
"""Custom HTTPServer that carries a reference to the main App instance."""
|
||||
|
||||
def __init__(self, server_address, RequestHandlerClass, app):
|
||||
super().__init__(server_address, RequestHandlerClass)
|
||||
self.app = app
|
||||
|
||||
class HookHandler(BaseHTTPRequestHandler):
|
||||
"""Handles incoming HTTP requests for the API hooks."""
|
||||
|
||||
def do_GET(self):
|
||||
app = self.server.app
|
||||
session_logger.log_api_hook("GET", self.path, "")
|
||||
@@ -61,7 +63,6 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
data = json.loads(body.decode('utf-8'))
|
||||
field_tag = data.get("field")
|
||||
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
|
||||
|
||||
event = threading.Event()
|
||||
result = {"value": None}
|
||||
|
||||
@@ -76,13 +77,11 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
|
||||
finally:
|
||||
event.set()
|
||||
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "custom_callback",
|
||||
"callback": get_val
|
||||
})
|
||||
|
||||
if event.wait(timeout=2):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
@@ -104,13 +103,11 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
result["value"] = getattr(app, attr, None)
|
||||
finally:
|
||||
event.set()
|
||||
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "custom_callback",
|
||||
"callback": get_val
|
||||
})
|
||||
|
||||
if event.wait(timeout=2):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
@@ -133,13 +130,11 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
result["pending_approval"] = app._pending_mma_approval is not None
|
||||
finally:
|
||||
event.set()
|
||||
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "custom_callback",
|
||||
"callback": get_mma
|
||||
})
|
||||
|
||||
if event.wait(timeout=2):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
@@ -162,13 +157,11 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
result["prior"] = getattr(app, "is_viewing_prior_session", False)
|
||||
finally:
|
||||
event.set()
|
||||
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "custom_callback",
|
||||
"callback": check_all
|
||||
})
|
||||
|
||||
if event.wait(timeout=2):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
@@ -188,7 +181,6 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
body = self.rfile.read(content_length)
|
||||
body_str = body.decode('utf-8') if body else ""
|
||||
session_logger.log_api_hook("POST", self.path, body_str)
|
||||
|
||||
try:
|
||||
data = json.loads(body_str) if body_str else {}
|
||||
if self.path == '/api/project':
|
||||
@@ -209,7 +201,6 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
elif self.path == '/api/gui':
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append(data)
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.end_headers()
|
||||
@@ -218,14 +209,11 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
elif self.path == '/api/ask':
|
||||
request_id = str(uuid.uuid4())
|
||||
event = threading.Event()
|
||||
|
||||
if not hasattr(app, '_pending_asks'):
|
||||
app._pending_asks = {}
|
||||
if not hasattr(app, '_ask_responses'):
|
||||
app._ask_responses = {}
|
||||
|
||||
app._pending_asks[request_id] = event
|
||||
|
||||
# Emit event for test/client discovery
|
||||
with app._api_event_queue_lock:
|
||||
app._api_event_queue.append({
|
||||
@@ -233,20 +221,17 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
"request_id": request_id,
|
||||
"data": data
|
||||
})
|
||||
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"type": "ask",
|
||||
"request_id": request_id,
|
||||
"data": data
|
||||
})
|
||||
|
||||
if event.wait(timeout=60.0):
|
||||
response_data = app._ask_responses.get(request_id)
|
||||
# Clean up response after reading
|
||||
if request_id in app._ask_responses:
|
||||
del app._ask_responses[request_id]
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.end_headers()
|
||||
@@ -257,26 +242,21 @@ class HookHandler(BaseHTTPRequestHandler):
|
||||
self.send_response(504)
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
|
||||
|
||||
elif self.path == '/api/ask/respond':
|
||||
request_id = data.get('request_id')
|
||||
response_data = data.get('response')
|
||||
|
||||
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
|
||||
app._ask_responses[request_id] = response_data
|
||||
event = app._pending_asks[request_id]
|
||||
event.set()
|
||||
|
||||
# Clean up pending ask entry
|
||||
del app._pending_asks[request_id]
|
||||
|
||||
# Queue GUI task to clear the dialog
|
||||
with app._pending_gui_tasks_lock:
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "clear_ask",
|
||||
"request_id": request_id
|
||||
})
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.end_headers()
|
||||
@@ -306,29 +286,24 @@ class HookServer:
|
||||
def start(self):
|
||||
if self.thread and self.thread.is_alive():
|
||||
return
|
||||
|
||||
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
|
||||
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
|
||||
return
|
||||
|
||||
# Ensure the app has the task queue and lock initialized
|
||||
if not hasattr(self.app, '_pending_gui_tasks'):
|
||||
self.app._pending_gui_tasks = []
|
||||
if not hasattr(self.app, '_pending_gui_tasks_lock'):
|
||||
self.app._pending_gui_tasks_lock = threading.Lock()
|
||||
|
||||
# Initialize ask-related dictionaries
|
||||
if not hasattr(self.app, '_pending_asks'):
|
||||
self.app._pending_asks = {}
|
||||
if not hasattr(self.app, '_ask_responses'):
|
||||
self.app._ask_responses = {}
|
||||
|
||||
# Event queue for test script subscriptions
|
||||
if not hasattr(self.app, '_api_event_queue'):
|
||||
self.app._api_event_queue = []
|
||||
if not hasattr(self.app, '_api_event_queue_lock'):
|
||||
self.app._api_event_queue_lock = threading.Lock()
|
||||
|
||||
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
|
||||
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
@@ -20,6 +20,5 @@ if __name__ == "__main__":
|
||||
# Test 1: Simple read
|
||||
print("TEST 1: read_file")
|
||||
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
|
||||
|
||||
print("\nTEST 2: run_shell_command")
|
||||
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
|
||||
|
||||
@@ -22,7 +22,6 @@ def test_subagent_script_qa_live():
|
||||
"""Verify that the QA role works and returns a compressed fix."""
|
||||
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
|
||||
result = run_ps_script("QA", prompt)
|
||||
|
||||
assert result.returncode == 0
|
||||
# Expected output should mention the fix for division by zero
|
||||
assert "zero" in result.stdout.lower()
|
||||
@@ -33,7 +32,6 @@ def test_subagent_script_worker_live():
|
||||
"""Verify that the Worker role works and returns code."""
|
||||
prompt = "Write a python function that returns 'hello world'"
|
||||
result = run_ps_script("Worker", prompt)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "def" in result.stdout.lower()
|
||||
assert "hello" in result.stdout.lower()
|
||||
@@ -42,7 +40,6 @@ def test_subagent_script_utility_live():
|
||||
"""Verify that the Utility role works."""
|
||||
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
|
||||
result = run_ps_script("Utility", prompt)
|
||||
|
||||
assert result.returncode == 0
|
||||
assert "true" in result.stdout.lower()
|
||||
|
||||
@@ -51,7 +48,6 @@ def test_subagent_isolation_live():
|
||||
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
|
||||
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
|
||||
result = run_ps_script("Utility", prompt)
|
||||
|
||||
assert result.returncode == 0
|
||||
# A stateless agent should not know any previous context.
|
||||
assert "unknown" in result.stdout.lower()
|
||||
|
||||
@@ -8,7 +8,6 @@ def test_parser_role_choices():
|
||||
parser = create_parser()
|
||||
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
|
||||
test_prompt = "Analyze the codebase for bottlenecks."
|
||||
|
||||
for role in valid_roles:
|
||||
args = parser.parse_args(['--role', role, test_prompt])
|
||||
assert args.role == role
|
||||
@@ -57,37 +56,28 @@ def test_execute_agent():
|
||||
role = "tier3-worker"
|
||||
prompt = "Write a unit test."
|
||||
docs = ["file1.py", "docs/spec.md"]
|
||||
|
||||
expected_model = "gemini-2.5-flash-lite"
|
||||
|
||||
mock_stdout = "Mocked AI Response"
|
||||
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_process = MagicMock()
|
||||
mock_process.stdout = mock_stdout
|
||||
mock_process.returncode = 0
|
||||
mock_run.return_value = mock_process
|
||||
|
||||
result = execute_agent(role, prompt, docs)
|
||||
|
||||
mock_run.assert_called_once()
|
||||
args, kwargs = mock_run.call_args
|
||||
cmd_list = args[0]
|
||||
|
||||
assert cmd_list[0] == "powershell.exe"
|
||||
assert "-Command" in cmd_list
|
||||
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
|
||||
assert "gemini" in ps_cmd
|
||||
assert f"--model {expected_model}" in ps_cmd
|
||||
|
||||
# Verify input contains the prompt and system directive
|
||||
input_text = kwargs.get("input")
|
||||
assert "STRICT SYSTEM DIRECTIVE" in input_text
|
||||
assert "TASK: Write a unit test." in input_text
|
||||
|
||||
assert kwargs.get("capture_output") is True
|
||||
assert kwargs.get("text") is True
|
||||
|
||||
assert result == mock_stdout
|
||||
|
||||
def test_get_dependencies(tmp_path):
|
||||
@@ -102,8 +92,8 @@ def test_get_dependencies(tmp_path):
|
||||
dependencies = get_dependencies(str(filepath))
|
||||
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
|
||||
|
||||
|
||||
import re
|
||||
|
||||
def test_execute_agent_logging(tmp_path):
|
||||
log_file = tmp_path / "mma_delegation.log"
|
||||
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
|
||||
@@ -130,7 +120,6 @@ def test_execute_agent_tier3_injection(tmp_path):
|
||||
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
|
||||
dep_file = tmp_path / "dependency.py"
|
||||
dep_file.write_text(dep_content)
|
||||
|
||||
# We need to ensure generate_skeleton is mockable or working
|
||||
old_cwd = os.getcwd()
|
||||
os.chdir(tmp_path)
|
||||
|
||||
@@ -15,24 +15,19 @@ def log_message(msg):
|
||||
timestamp = "2026-02-25"
|
||||
print(f"[{timestamp}] {msg}")
|
||||
'''
|
||||
|
||||
skeleton = generate_skeleton(sample_code)
|
||||
|
||||
# Check that signatures are preserved
|
||||
assert "class Calculator:" in skeleton
|
||||
assert "def add(self, a: int, b: int) -> int:" in skeleton
|
||||
assert "def log_message(msg):" in skeleton
|
||||
|
||||
# Check that docstrings are preserved
|
||||
assert '"""Performs basic math operations."""' in skeleton
|
||||
assert '"""Adds two numbers."""' in skeleton
|
||||
|
||||
# Check that implementation details are removed
|
||||
assert "result = a + b" not in skeleton
|
||||
assert "return result" not in skeleton
|
||||
assert "timestamp =" not in skeleton
|
||||
assert "print(" not in skeleton
|
||||
|
||||
# Check that bodies are replaced with ellipsis
|
||||
assert "..." in skeleton
|
||||
|
||||
|
||||
@@ -9,5 +9,5 @@ This file tracks all major tracks for the project. Each track has its own detail
|
||||
|
||||
---
|
||||
|
||||
- [ ] **Track: AI-Optimized Python Style Refactor**
|
||||
- [~] **Track: AI-Optimized Python Style Refactor**
|
||||
*Link: [./tracks/python_style_refactor_20260227/](./tracks/python_style_refactor_20260227/)*
|
||||
@@ -6,14 +6,18 @@
|
||||
- [x] Task: Conductor - User Manual Verification 'Phase 1: Pilot and Tooling' (Protocol in workflow.md) [checkpoint: Phase1]
|
||||
|
||||
## Phase 2: Core Refactor - Indentation and Newlines
|
||||
- [~] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`).
|
||||
- [ ] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`).
|
||||
- [ ] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`).
|
||||
- [ ] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`).
|
||||
- [ ] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md)
|
||||
- [x] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`). [db65162]
|
||||
- [x] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`). [db65162]
|
||||
- [x] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`). [db65162]
|
||||
- [x] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`). [db65162]
|
||||
- [x] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md) [checkpoint: Phase2]
|
||||
|
||||
## Phase 3: AI-Optimized Metadata and Final Cleanup
|
||||
- [ ] Task: Conductor - Implement Strict Type Hinting and Compact Imports across the Entire Codebase.
|
||||
- [~] Task: Conductor - Implement Strict Type Hinting across the Entire Codebase.
|
||||
- [x] Engine Core (`ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py`)
|
||||
- [x] Develop/Integrate Surgical AST Tools in `mcp_client.py` and `tools.json`.
|
||||
- [x] Management Modules (project_manager.py, session_logger.py) [19c28a1]
|
||||
- [~] UI Modules (`gui_2.py`, `gui_legacy.py`)
|
||||
- [ ] Task: Conductor - Update `conductor/code_styleguides/python.md` with the new AI-optimized standard.
|
||||
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Metadata and Final Documentation' (Protocol in workflow.md)
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
|
||||
- **Newlines (Ultra-Compact):**
|
||||
- Maximum **one (1)** blank line between top-level definitions (classes, functions).
|
||||
- **Zero (0)** blank lines inside function or method bodies.
|
||||
- **Imports (Compact):** Consolidate imports into compact blocks to reduce vertical space.
|
||||
- **Typing (Strict):** Ensure all function and method signatures include strict type hints for `Args` and `Returns`.
|
||||
- **Scope:**
|
||||
- Target: All `.py` files in the project root and subdirectories.
|
||||
@@ -19,14 +18,22 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
|
||||
- **AST Compatibility:** The style must not interfere with existing AST tools (`ast`, `tree-sitter`) used for interface extraction and code outlines.
|
||||
- **Token Efficiency:** The primary goal is to reduce the total token count of the codebase.
|
||||
|
||||
## 4. Acceptance Criteria
|
||||
- [ ] Codebase indentation is uniformly 1 space.
|
||||
- [ ] No `.py` file contains consecutive blank lines.
|
||||
- [ ] No `.py` file contains blank lines within function or method bodies.
|
||||
- [ ] All functions/methods have complete type hints.
|
||||
- [ ] Application remains functional and passes existing tests.
|
||||
## 4. Current Status (Progress Checkpoint)
|
||||
- **Phase 1: Completed.** Tooling developed (`scripts/ai_style_formatter.py`) and verified.
|
||||
- **Phase 2: Completed.** Global codebase refactor for indentation and ultra-compact newlines (including 1-line gap before definitions) applied to all Python files.
|
||||
- **Phase 3: In Progress.**
|
||||
- **Surgical Tooling:** New tools added to `mcp_client.py` and `.gemini/tools.json`: `get_file_slice`, `set_file_slice`, `py_update_definition`, `py_get_signature`, `py_set_signature`, `py_get_class_summary`, `py_get_var_declaration`, `py_set_var_declaration`.
|
||||
- **Core Typing:** `ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py` fully updated with strict type hints.
|
||||
- **Remaining:** `project_manager.py`, `session_logger.py`, `gui_2.py`, `gui_legacy.py` need strict typing.
|
||||
|
||||
## 5. Out of Scope
|
||||
## 5. Acceptance Criteria
|
||||
- [x] Codebase indentation is uniformly 1 space.
|
||||
- [x] No `.py` file contains consecutive blank lines.
|
||||
- [x] No `.py` file contains blank lines within function or method bodies.
|
||||
- [~] All functions/methods have complete type hints (Core Engine complete, UI/Manager pending).
|
||||
- [x] Application remains functional and passes existing tests.
|
||||
|
||||
## 6. Out of Scope
|
||||
- Architectural changes or logic refactoring.
|
||||
- Modification of non-Python files (e.g., `.md`, `.toml`, `.ps1`).
|
||||
- Breaking PEP 8 compliance where it's not strictly necessary for token reduction (though indentation and blank lines are explicitly targeted).
|
||||
- Import compaction (discarded per user request).
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
- [x] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`). 2a30e62
|
||||
|
||||
## Phase 2: Epic & Track Verification
|
||||
- [ ] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
|
||||
- [~] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
|
||||
- [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer.
|
||||
|
||||
## Phase 3: DAG & Spawn Interception Verification
|
||||
|
||||
@@ -11,27 +11,22 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
|
||||
# 1. Set Tier 2 Model (Tech Lead - Flash)
|
||||
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
|
||||
ai_client.reset_session()
|
||||
|
||||
# 2. Construct Prompt
|
||||
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
|
||||
|
||||
user_message = (
|
||||
f"### TRACK BRIEF:\n{track_brief}\n\n"
|
||||
f"### MODULE SKELETONS:\n{module_skeletons}\n\n"
|
||||
"Please generate the implementation tickets for this track."
|
||||
)
|
||||
|
||||
# Set custom system prompt for this call
|
||||
old_system_prompt = ai_client._custom_system_prompt
|
||||
ai_client.set_custom_system_prompt(system_prompt)
|
||||
|
||||
try:
|
||||
# 3. Call Tier 2 Model
|
||||
response = ai_client.send(
|
||||
md_content="",
|
||||
user_message=user_message
|
||||
)
|
||||
|
||||
# 4. Parse JSON Output
|
||||
# Extract JSON array from markdown code blocks if present
|
||||
json_match = response.strip()
|
||||
@@ -39,13 +34,11 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
|
||||
json_match = json_match.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in json_match:
|
||||
json_match = json_match.split("```")[1].split("```")[0].strip()
|
||||
|
||||
# If it's still not valid JSON, try to find a [ ... ] block
|
||||
if not (json_match.startswith('[') and json_match.endswith(']')):
|
||||
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
|
||||
if match:
|
||||
json_match = match.group(0)
|
||||
|
||||
tickets = json.loads(json_match)
|
||||
return tickets
|
||||
except Exception as e:
|
||||
@@ -68,14 +61,12 @@ def topological_sort(tickets: list[dict]) -> list[dict]:
|
||||
ticket_objs = []
|
||||
for t_data in tickets:
|
||||
ticket_objs.append(Ticket.from_dict(t_data))
|
||||
|
||||
# 2. Use TrackDAG for validation and sorting
|
||||
dag = TrackDAG(ticket_objs)
|
||||
try:
|
||||
sorted_ids = dag.topological_sort()
|
||||
except ValueError as e:
|
||||
raise ValueError(f"DAG Validation Error: {e}")
|
||||
|
||||
# 3. Return sorted dictionaries
|
||||
ticket_map = {t['id']: t for t in tickets}
|
||||
return [ticket_map[tid] for tid in sorted_ids]
|
||||
|
||||
@@ -22,7 +22,7 @@ paths = [
|
||||
"C:\\projects\\manual_slop\\tests\\temp_livetoolssim.toml",
|
||||
"C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml",
|
||||
]
|
||||
active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
|
||||
active = "C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml"
|
||||
|
||||
[gui.show_windows]
|
||||
"Context Hub" = true
|
||||
|
||||
@@ -6,6 +6,7 @@ class TrackDAG:
|
||||
Manages a Directed Acyclic Graph of implementation tickets.
|
||||
Provides methods for dependency resolution, cycle detection, and topological sorting.
|
||||
"""
|
||||
|
||||
def __init__(self, tickets: List[Ticket]):
|
||||
"""
|
||||
Initializes the TrackDAG with a list of Ticket objects.
|
||||
@@ -50,19 +51,15 @@ class TrackDAG:
|
||||
return True
|
||||
if ticket_id in visited:
|
||||
return False
|
||||
|
||||
visited.add(ticket_id)
|
||||
rec_stack.add(ticket_id)
|
||||
|
||||
ticket = self.ticket_map.get(ticket_id)
|
||||
if ticket:
|
||||
for neighbor in ticket.depends_on:
|
||||
if is_cyclic(neighbor):
|
||||
return True
|
||||
|
||||
rec_stack.remove(ticket_id)
|
||||
return False
|
||||
|
||||
for ticket in self.tickets:
|
||||
if ticket.id not in visited:
|
||||
if is_cyclic(ticket.id):
|
||||
@@ -79,7 +76,6 @@ class TrackDAG:
|
||||
"""
|
||||
if self.has_cycle():
|
||||
raise ValueError("Dependency cycle detected")
|
||||
|
||||
visited = set()
|
||||
stack = []
|
||||
|
||||
@@ -93,10 +89,8 @@ class TrackDAG:
|
||||
for dep_id in ticket.depends_on:
|
||||
visit(dep_id)
|
||||
stack.append(ticket_id)
|
||||
|
||||
for ticket in self.tickets:
|
||||
visit(ticket.id)
|
||||
|
||||
return stack
|
||||
|
||||
class ExecutionEngine:
|
||||
@@ -104,6 +98,7 @@ class ExecutionEngine:
|
||||
A state machine that governs the progression of tasks within a TrackDAG.
|
||||
Handles automatic queueing and manual task approval.
|
||||
"""
|
||||
|
||||
def __init__(self, dag: TrackDAG, auto_queue: bool = False):
|
||||
"""
|
||||
Initializes the ExecutionEngine.
|
||||
@@ -122,12 +117,10 @@ class ExecutionEngine:
|
||||
A list of ready Ticket objects.
|
||||
"""
|
||||
ready = self.dag.get_ready_tasks()
|
||||
|
||||
if self.auto_queue:
|
||||
for ticket in ready:
|
||||
if not ticket.step_mode:
|
||||
ticket.status = "in_progress"
|
||||
|
||||
return ready
|
||||
|
||||
def approve_task(self, task_id: str):
|
||||
@@ -145,7 +138,6 @@ class ExecutionEngine:
|
||||
if not dep or dep.status != "completed":
|
||||
all_done = False
|
||||
break
|
||||
|
||||
if all_done:
|
||||
ticket.status = "in_progress"
|
||||
|
||||
|
||||
@@ -49,7 +49,6 @@ class ASTParser:
|
||||
if body and body.type == "block":
|
||||
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
|
||||
print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}")
|
||||
|
||||
if not preserve:
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
@@ -57,7 +56,6 @@ class ASTParser:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
@@ -67,10 +65,8 @@ class ASTParser:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
|
||||
walk(tree.root_node)
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
|
||||
@@ -8,6 +8,7 @@ class EventEmitter:
|
||||
"""
|
||||
Simple event emitter for decoupled communication between modules.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the EventEmitter with an empty listener map."""
|
||||
self._listeners: Dict[str, List[Callable]] = {}
|
||||
@@ -41,6 +42,7 @@ class AsyncEventQueue:
|
||||
"""
|
||||
Asynchronous event queue for decoupled communication using asyncio.Queue.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the AsyncEventQueue with an internal asyncio.Queue."""
|
||||
self._queue: asyncio.Queue = asyncio.Queue()
|
||||
@@ -68,6 +70,7 @@ class UserRequestEvent:
|
||||
"""
|
||||
Payload for a user request event.
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
|
||||
self.prompt = prompt
|
||||
self.stable_md = stable_md
|
||||
|
||||
@@ -10,12 +10,12 @@ from typing import Optional
|
||||
import tree_sitter
|
||||
import tree_sitter_python
|
||||
|
||||
|
||||
class ASTParser:
|
||||
"""
|
||||
Parser for extracting AST-based views of source code.
|
||||
Currently supports Python.
|
||||
"""
|
||||
|
||||
def __init__(self, language: str):
|
||||
if language != "python":
|
||||
raise ValueError(f"Language '{language}' not supported yet.")
|
||||
@@ -51,7 +51,6 @@ class ASTParser:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
@@ -61,18 +60,14 @@ class ASTParser:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
|
||||
walk(tree.root_node)
|
||||
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
|
||||
return code_bytes.decode("utf8")
|
||||
|
||||
def get_curated_view(self, code: str) -> str:
|
||||
@@ -120,7 +115,6 @@ class ASTParser:
|
||||
if body and body.type == "block":
|
||||
# Check if we should preserve it
|
||||
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
|
||||
|
||||
if not preserve:
|
||||
indent = " " * body.start_point.column
|
||||
first_stmt = None
|
||||
@@ -128,7 +122,6 @@ class ASTParser:
|
||||
if child.type != "comment":
|
||||
first_stmt = child
|
||||
break
|
||||
|
||||
if first_stmt and is_docstring(first_stmt):
|
||||
start_byte = first_stmt.end_byte
|
||||
end_byte = body.end_byte
|
||||
@@ -138,36 +131,27 @@ class ASTParser:
|
||||
start_byte = body.start_byte
|
||||
end_byte = body.end_byte
|
||||
edits.append((start_byte, end_byte, "..."))
|
||||
|
||||
for child in node.children:
|
||||
walk(child)
|
||||
|
||||
walk(tree.root_node)
|
||||
|
||||
# Apply edits in reverse to maintain byte offsets
|
||||
edits.sort(key=lambda x: x[0], reverse=True)
|
||||
code_bytes = bytearray(code, "utf8")
|
||||
for start, end, replacement in edits:
|
||||
code_bytes[start:end] = bytes(replacement, "utf8")
|
||||
|
||||
return code_bytes.decode("utf8")
|
||||
|
||||
|
||||
def reset_client():
|
||||
pass
|
||||
|
||||
|
||||
def content_block_type(path: Path) -> str:
|
||||
return "unsupported"
|
||||
|
||||
|
||||
def get_file_id(path: Path) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def evict(path: Path):
|
||||
pass
|
||||
|
||||
|
||||
def list_cached() -> list[dict]:
|
||||
return []
|
||||
|
||||
@@ -31,38 +31,27 @@ class GeminiCliAdapter:
|
||||
Uses --prompt flag with a placeholder and sends the content via stdin.
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
command_parts = [self.binary_path]
|
||||
|
||||
if model:
|
||||
command_parts.extend(['-m', f'"{model}"'])
|
||||
|
||||
# Use an empty string placeholder.
|
||||
command_parts.extend(['--prompt', '""'])
|
||||
|
||||
if self.session_id:
|
||||
command_parts.extend(['--resume', self.session_id])
|
||||
|
||||
command_parts.extend(['--output-format', 'stream-json'])
|
||||
|
||||
command = " ".join(command_parts)
|
||||
|
||||
# Construct the prompt text by prepending system_instruction if available
|
||||
prompt_text = message
|
||||
if system_instruction:
|
||||
prompt_text = f"{system_instruction}\n\n{message}"
|
||||
|
||||
accumulated_text = ""
|
||||
tool_calls = []
|
||||
|
||||
env = os.environ.copy()
|
||||
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
|
||||
|
||||
process = None
|
||||
stdout_content = ""
|
||||
stderr_content = ""
|
||||
stdin_content = prompt_text
|
||||
|
||||
try:
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
@@ -73,12 +62,9 @@ class GeminiCliAdapter:
|
||||
shell=True,
|
||||
env=env
|
||||
)
|
||||
|
||||
stdout_output, stderr_output = process.communicate(input=prompt_text)
|
||||
|
||||
stdout_content = stdout_output
|
||||
stderr_content = stderr_output
|
||||
|
||||
for line in stdout_content.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
@@ -86,7 +72,6 @@ class GeminiCliAdapter:
|
||||
try:
|
||||
data = json.loads(line)
|
||||
msg_type = data.get("type")
|
||||
|
||||
if msg_type == "init":
|
||||
if "session_id" in data:
|
||||
self.session_id = data.get("session_id")
|
||||
@@ -115,7 +100,6 @@ class GeminiCliAdapter:
|
||||
tool_calls.append(tc)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
if process:
|
||||
process.kill()
|
||||
@@ -132,7 +116,6 @@ class GeminiCliAdapter:
|
||||
latency=current_latency
|
||||
)
|
||||
self.last_latency = current_latency
|
||||
|
||||
return {
|
||||
"text": accumulated_text,
|
||||
"tool_calls": tool_calls,
|
||||
|
||||
189
gui_legacy.py
189
gui_legacy.py
File diff suppressed because it is too large
Load Diff
@@ -9,6 +9,7 @@ class LogPruner:
|
||||
Ensures that only whitelisted or significant sessions (based on size/content)
|
||||
are preserved long-term.
|
||||
"""
|
||||
|
||||
def __init__(self, log_registry: LogRegistry, logs_dir: str):
|
||||
"""
|
||||
Initializes the LogPruner.
|
||||
@@ -31,22 +32,17 @@ class LogPruner:
|
||||
"""
|
||||
now = datetime.now()
|
||||
cutoff_time = now - timedelta(hours=24)
|
||||
|
||||
# Ensure the base logs directory exists.
|
||||
if not os.path.isdir(self.logs_dir):
|
||||
return
|
||||
|
||||
# Get sessions that are old and not whitelisted from the registry
|
||||
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time)
|
||||
|
||||
# Prune sessions if their size is less than 2048 bytes
|
||||
for session_info in old_sessions_to_check:
|
||||
session_id = session_info['session_id']
|
||||
session_path = session_info['path']
|
||||
|
||||
if not session_path or not os.path.isdir(session_path):
|
||||
continue
|
||||
|
||||
# Calculate total size of files in the directory
|
||||
total_size = 0
|
||||
try:
|
||||
@@ -55,7 +51,6 @@ class LogPruner:
|
||||
total_size += entry.stat().st_size
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
# Prune if the total size is less than 2KB (2048 bytes)
|
||||
if total_size < 2048: # 2KB
|
||||
try:
|
||||
|
||||
@@ -8,6 +8,7 @@ class LogRegistry:
|
||||
Manages a persistent registry of session logs using a TOML file.
|
||||
Tracks session paths, start times, whitelisting status, and metadata.
|
||||
"""
|
||||
|
||||
def __init__(self, registry_path):
|
||||
"""
|
||||
Initializes the LogRegistry with a path to the registry file.
|
||||
@@ -75,7 +76,6 @@ class LogRegistry:
|
||||
else:
|
||||
session_data_copy[k] = v
|
||||
data_to_save[session_id] = session_data_copy
|
||||
|
||||
with open(self.registry_path, 'wb') as f:
|
||||
tomli_w.dump(data_to_save, f)
|
||||
except Exception as e:
|
||||
@@ -92,13 +92,11 @@ class LogRegistry:
|
||||
"""
|
||||
if session_id in self.data:
|
||||
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
|
||||
|
||||
# Store start_time internally as a string to satisfy tests
|
||||
if isinstance(start_time, datetime):
|
||||
start_time_str = start_time.isoformat()
|
||||
else:
|
||||
start_time_str = start_time
|
||||
|
||||
self.data[session_id] = {
|
||||
'path': path,
|
||||
'start_time': start_time_str,
|
||||
@@ -122,11 +120,9 @@ class LogRegistry:
|
||||
if session_id not in self.data:
|
||||
print(f"Error: Session ID '{session_id}' not found for metadata update.")
|
||||
return
|
||||
|
||||
# Ensure metadata exists
|
||||
if self.data[session_id].get('metadata') is None:
|
||||
self.data[session_id]['metadata'] = {}
|
||||
|
||||
# Update fields
|
||||
self.data[session_id]['metadata']['message_count'] = message_count
|
||||
self.data[session_id]['metadata']['errors'] = errors
|
||||
@@ -134,11 +130,9 @@ class LogRegistry:
|
||||
self.data[session_id]['metadata']['whitelisted'] = whitelisted
|
||||
self.data[session_id]['metadata']['reason'] = reason
|
||||
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
|
||||
|
||||
# Also update the top-level whitelisted flag if provided
|
||||
if whitelisted is not None:
|
||||
self.data[session_id]['whitelisted'] = whitelisted
|
||||
|
||||
self.save_registry() # Save after update
|
||||
|
||||
def is_session_whitelisted(self, session_id):
|
||||
@@ -154,7 +148,6 @@ class LogRegistry:
|
||||
session_data = self.data.get(session_id)
|
||||
if session_data is None:
|
||||
return False # Non-existent sessions are not whitelisted
|
||||
|
||||
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
|
||||
return session_data.get('whitelisted', False)
|
||||
|
||||
@@ -169,23 +162,19 @@ class LogRegistry:
|
||||
"""
|
||||
if session_id not in self.data:
|
||||
return
|
||||
|
||||
session_data = self.data[session_id]
|
||||
session_path = session_data.get('path')
|
||||
if not session_path or not os.path.isdir(session_path):
|
||||
return
|
||||
|
||||
total_size_bytes = 0
|
||||
message_count = 0
|
||||
found_keywords = []
|
||||
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
|
||||
|
||||
try:
|
||||
for entry in os.scandir(session_path):
|
||||
if entry.is_file():
|
||||
size = entry.stat().st_size
|
||||
total_size_bytes += size
|
||||
|
||||
# Analyze comms.log for messages and keywords
|
||||
if entry.name == "comms.log":
|
||||
try:
|
||||
@@ -199,11 +188,9 @@ class LogRegistry:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
size_kb = total_size_bytes / 1024
|
||||
whitelisted = False
|
||||
reason = ""
|
||||
|
||||
if found_keywords:
|
||||
whitelisted = True
|
||||
reason = f"Found keywords: {', '.join(found_keywords)}"
|
||||
@@ -213,7 +200,6 @@ class LogRegistry:
|
||||
elif size_kb > 50:
|
||||
whitelisted = True
|
||||
reason = f"Large session size: {size_kb:.1f} KB"
|
||||
|
||||
self.update_session_metadata(
|
||||
session_id,
|
||||
message_count=message_count,
|
||||
@@ -245,9 +231,7 @@ class LogRegistry:
|
||||
start_time = None
|
||||
else:
|
||||
start_time = start_time_raw
|
||||
|
||||
is_whitelisted = session_data.get('whitelisted', False)
|
||||
|
||||
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
|
||||
old_sessions.append({
|
||||
'session_id': session_id,
|
||||
|
||||
750
mcp_client.py
750
mcp_client.py
File diff suppressed because it is too large
Load Diff
@@ -68,12 +68,10 @@ class Track:
|
||||
"""
|
||||
# Map ticket IDs to their current status for efficient lookup
|
||||
status_map = {t.id: t.status for t in self.tickets}
|
||||
|
||||
executable = []
|
||||
for ticket in self.tickets:
|
||||
if ticket.status != "todo":
|
||||
continue
|
||||
|
||||
# Check if all dependencies are completed
|
||||
all_deps_completed = True
|
||||
for dep_id in ticket.depends_on:
|
||||
@@ -81,10 +79,8 @@ class Track:
|
||||
if status_map.get(dep_id) != "completed":
|
||||
all_deps_completed = False
|
||||
break
|
||||
|
||||
if all_deps_completed:
|
||||
executable.append(ticket)
|
||||
|
||||
return executable
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -16,6 +16,7 @@ class ConductorEngine:
|
||||
"""
|
||||
Orchestrates the execution of tickets within a track.
|
||||
"""
|
||||
|
||||
def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
|
||||
self.track = track
|
||||
self.event_queue = event_queue
|
||||
@@ -31,7 +32,6 @@ class ConductorEngine:
|
||||
async def _push_state(self, status: str = "running", active_tier: str = None):
|
||||
if not self.event_queue:
|
||||
return
|
||||
|
||||
payload = {
|
||||
"status": status,
|
||||
"active_tier": active_tier,
|
||||
@@ -54,7 +54,6 @@ class ConductorEngine:
|
||||
if not isinstance(data, list):
|
||||
print("Error: JSON input must be a list of ticket definitions.")
|
||||
return
|
||||
|
||||
for ticket_data in data:
|
||||
# Construct Ticket object, using defaults for optional fields
|
||||
ticket = Ticket(
|
||||
@@ -66,11 +65,9 @@ class ConductorEngine:
|
||||
step_mode=ticket_data.get("step_mode", False)
|
||||
)
|
||||
self.track.tickets.append(ticket)
|
||||
|
||||
# Rebuild DAG and Engine after parsing new tickets
|
||||
self.dag = TrackDAG(self.track.tickets)
|
||||
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON tickets: {e}")
|
||||
except KeyError as e:
|
||||
@@ -83,11 +80,9 @@ class ConductorEngine:
|
||||
md_content: The full markdown context (history + files) for AI workers.
|
||||
"""
|
||||
await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)")
|
||||
|
||||
while True:
|
||||
# 1. Identify ready tasks
|
||||
ready_tasks = self.engine.tick()
|
||||
|
||||
# 2. Check for completion or blockage
|
||||
if not ready_tasks:
|
||||
all_done = all(t.status == "completed" for t in self.track.tickets)
|
||||
@@ -100,11 +95,9 @@ class ConductorEngine:
|
||||
# Wait for async tasks to complete
|
||||
await asyncio.sleep(1)
|
||||
continue
|
||||
|
||||
print("No more executable tickets. Track is blocked or finished.")
|
||||
await self._push_state(status="blocked", active_tier=None)
|
||||
break
|
||||
|
||||
# 3. Process ready tasks
|
||||
loop = asyncio.get_event_loop()
|
||||
for ticket in ready_tasks:
|
||||
@@ -114,13 +107,11 @@ class ConductorEngine:
|
||||
ticket.status = "in_progress"
|
||||
print(f"Executing ticket {ticket.id}: {ticket.description}")
|
||||
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
|
||||
|
||||
context = WorkerContext(
|
||||
ticket_id=ticket.id,
|
||||
model_name="gemini-2.5-flash-lite",
|
||||
messages=[]
|
||||
)
|
||||
|
||||
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
|
||||
# We pass the md_content so the worker has full context.
|
||||
context_files = ticket.context_requirements if ticket.context_requirements else None
|
||||
@@ -135,7 +126,6 @@ class ConductorEngine:
|
||||
md_content
|
||||
)
|
||||
await self._push_state(active_tier="Tier 2 (Tech Lead)")
|
||||
|
||||
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
|
||||
# Task is ready but needs approval
|
||||
print(f"Ticket {ticket.id} is ready and awaiting approval.")
|
||||
@@ -151,14 +141,12 @@ def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_
|
||||
# We use a list container so the GUI can inject the actual Dialog object back to us
|
||||
# since the dialog is created in the GUI thread.
|
||||
dialog_container = [None]
|
||||
|
||||
task = {
|
||||
"action": "mma_step_approval",
|
||||
"ticket_id": ticket_id,
|
||||
"payload": payload,
|
||||
"dialog_container": dialog_container
|
||||
}
|
||||
|
||||
# Push to queue
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
@@ -169,16 +157,13 @@ def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_
|
||||
except Exception:
|
||||
# Fallback if no loop
|
||||
event_queue._queue.put_nowait(("mma_step_approval", task))
|
||||
|
||||
# Wait for the GUI to create the dialog and for the user to respond
|
||||
start = time.time()
|
||||
while dialog_container[0] is None and time.time() - start < 60:
|
||||
time.sleep(0.1)
|
||||
|
||||
if dialog_container[0]:
|
||||
approved, final_payload = dialog_container[0].wait()
|
||||
return approved
|
||||
|
||||
return False
|
||||
|
||||
def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> Tuple[bool, str, str]:
|
||||
@@ -186,9 +171,7 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
|
||||
Pushes a spawn approval request to the GUI and waits for response.
|
||||
Returns (approved, modified_prompt, modified_context)
|
||||
"""
|
||||
|
||||
dialog_container = [None]
|
||||
|
||||
task = {
|
||||
"action": "mma_spawn_approval",
|
||||
"ticket_id": ticket_id,
|
||||
@@ -197,7 +180,6 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
|
||||
"context_md": context_md,
|
||||
"dialog_container": dialog_container
|
||||
}
|
||||
|
||||
# Push to queue
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
@@ -208,15 +190,12 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
|
||||
except Exception:
|
||||
# Fallback if no loop
|
||||
event_queue._queue.put_nowait(("mma_spawn_approval", task))
|
||||
|
||||
# Wait for the GUI to create the dialog and for the user to respond
|
||||
start = time.time()
|
||||
while dialog_container[0] is None and time.time() - start < 60:
|
||||
time.sleep(0.1)
|
||||
|
||||
if dialog_container[0]:
|
||||
res = dialog_container[0].wait()
|
||||
|
||||
if isinstance(res, dict):
|
||||
approved = res.get("approved", False)
|
||||
abort = res.get("abort", False)
|
||||
@@ -232,10 +211,8 @@ def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.A
|
||||
modified_prompt = final_payload.get("prompt", prompt)
|
||||
modified_context = final_payload.get("context_md", context_md)
|
||||
return approved, modified_prompt, modified_context
|
||||
|
||||
return False, prompt, context_md
|
||||
|
||||
|
||||
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None, md_content: str = ""):
|
||||
"""
|
||||
Simulates the lifecycle of a single agent working on a ticket.
|
||||
@@ -250,7 +227,6 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
"""
|
||||
# Enforce Context Amnesia: each ticket starts with a clean slate.
|
||||
ai_client.reset_session()
|
||||
|
||||
context_injection = ""
|
||||
if context_files:
|
||||
parser = ASTParser(language="python")
|
||||
@@ -267,7 +243,6 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
context_injection += f"\nFile: {file_path}\n{view}\n"
|
||||
except Exception as e:
|
||||
context_injection += f"\nError reading {file_path}: {e}\n"
|
||||
|
||||
# Build a prompt for the worker
|
||||
user_message = (
|
||||
f"You are assigned to Ticket {ticket.id}.\n"
|
||||
@@ -275,12 +250,10 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
)
|
||||
if context_injection:
|
||||
user_message += f"\nContext Files:\n{context_injection}\n"
|
||||
|
||||
user_message += (
|
||||
"Please complete this task. If you are blocked and cannot proceed, "
|
||||
"start your response with 'BLOCKED' and explain why."
|
||||
)
|
||||
|
||||
# HITL Clutch: call confirm_spawn if event_queue is provided
|
||||
if event_queue:
|
||||
approved, modified_prompt, modified_context = confirm_spawn(
|
||||
@@ -293,16 +266,14 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
if not approved:
|
||||
ticket.mark_blocked("Spawn rejected by user.")
|
||||
return "BLOCKED: Spawn rejected by user."
|
||||
|
||||
user_message = modified_prompt
|
||||
md_content = modified_context
|
||||
|
||||
# HITL Clutch: pass the queue and ticket_id to confirm_execution
|
||||
|
||||
def clutch_callback(payload: str) -> bool:
|
||||
if not event_queue:
|
||||
return True
|
||||
return confirm_execution(payload, event_queue, ticket.id)
|
||||
|
||||
response = ai_client.send(
|
||||
md_content=md_content,
|
||||
user_message=user_message,
|
||||
@@ -310,16 +281,13 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
|
||||
pre_tool_callback=clutch_callback if ticket.step_mode else None,
|
||||
qa_callback=ai_client.run_tier4_analysis
|
||||
)
|
||||
|
||||
# Update usage in engine if provided
|
||||
if engine:
|
||||
stats = {} # ai_client.get_token_stats() is not available
|
||||
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
|
||||
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
|
||||
|
||||
if "BLOCKED" in response.upper():
|
||||
ticket.mark_blocked(response)
|
||||
else:
|
||||
ticket.mark_complete()
|
||||
|
||||
return response
|
||||
|
||||
@@ -13,27 +13,21 @@ def get_track_history_summary() -> str:
|
||||
Scans conductor/archive/ and conductor/tracks/ to build a summary of past work.
|
||||
"""
|
||||
summary_parts = []
|
||||
|
||||
archive_path = CONDUCTOR_PATH / "archive"
|
||||
tracks_path = CONDUCTOR_PATH / "tracks"
|
||||
|
||||
paths_to_scan = []
|
||||
if archive_path.exists():
|
||||
paths_to_scan.extend(list(archive_path.iterdir()))
|
||||
if tracks_path.exists():
|
||||
paths_to_scan.extend(list(tracks_path.iterdir()))
|
||||
|
||||
for track_dir in paths_to_scan:
|
||||
if not track_dir.is_dir():
|
||||
continue
|
||||
|
||||
metadata_file = track_dir / "metadata.json"
|
||||
spec_file = track_dir / "spec.md"
|
||||
|
||||
title = track_dir.name
|
||||
status = "unknown"
|
||||
overview = "No overview available."
|
||||
|
||||
if metadata_file.exists():
|
||||
try:
|
||||
with open(metadata_file, "r", encoding="utf-8") as f:
|
||||
@@ -42,7 +36,6 @@ def get_track_history_summary() -> str:
|
||||
status = meta.get("status", status)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if spec_file.exists():
|
||||
try:
|
||||
with open(spec_file, "r", encoding="utf-8") as f:
|
||||
@@ -55,12 +48,9 @@ def get_track_history_summary() -> str:
|
||||
overview = content[:200] + "..."
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
|
||||
|
||||
if not summary_parts:
|
||||
return "No previous tracks found."
|
||||
|
||||
return "\n".join(summary_parts)
|
||||
|
||||
def generate_tracks(user_request: str, project_config: dict, file_items: list[dict], history_summary: str = None) -> list[dict]:
|
||||
@@ -70,26 +60,19 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
|
||||
"""
|
||||
# 1. Build Repository Map (Summary View)
|
||||
repo_map = summarize.build_summary_markdown(file_items)
|
||||
|
||||
# 2. Construct Prompt
|
||||
system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init")
|
||||
|
||||
user_message_parts = [
|
||||
f"### USER REQUEST:\n{user_request}\n",
|
||||
f"### REPOSITORY MAP:\n{repo_map}\n"
|
||||
]
|
||||
|
||||
if history_summary:
|
||||
user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n")
|
||||
|
||||
user_message_parts.append("Please generate the implementation tracks for this request.")
|
||||
|
||||
user_message = "\n".join(user_message_parts)
|
||||
|
||||
# Set custom system prompt for this call
|
||||
old_system_prompt = ai_client._custom_system_prompt
|
||||
ai_client.set_custom_system_prompt(system_prompt)
|
||||
|
||||
try:
|
||||
# 3. Call Tier 1 Model (Strategic - Pro)
|
||||
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1
|
||||
@@ -97,7 +80,6 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
|
||||
md_content="", # We pass everything in user_message for clarity
|
||||
user_message=user_message
|
||||
)
|
||||
|
||||
# 4. Parse JSON Output
|
||||
try:
|
||||
# The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks.
|
||||
@@ -106,7 +88,6 @@ def generate_tracks(user_request: str, project_config: dict, file_items: list[di
|
||||
json_match = json_match.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in json_match:
|
||||
json_match = json_match.split("```")[1].split("```")[0].strip()
|
||||
|
||||
tracks = json.loads(json_match)
|
||||
# Ensure each track has a 'title' for the GUI
|
||||
for t in tracks:
|
||||
@@ -127,7 +108,6 @@ if __name__ == "__main__":
|
||||
proj = project_manager.load_project("manual_slop.toml")
|
||||
flat = project_manager.flat_config(proj)
|
||||
file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", []))
|
||||
|
||||
print("Testing Tier 1 Track Generation...")
|
||||
history = get_track_history_summary()
|
||||
tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history)
|
||||
|
||||
@@ -11,7 +11,6 @@ class CodeOutliner:
|
||||
tree = ast.parse(code)
|
||||
except SyntaxError as e:
|
||||
return f"ERROR parsing code: {e}"
|
||||
|
||||
output = []
|
||||
|
||||
def get_docstring(node):
|
||||
@@ -30,26 +29,21 @@ class CodeOutliner:
|
||||
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
|
||||
for item in node.body:
|
||||
walk(item, indent + 1)
|
||||
|
||||
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
start_line = node.lineno
|
||||
end_line = getattr(node, "end_lineno", start_line)
|
||||
prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]"
|
||||
|
||||
# Check if it's a method
|
||||
# We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef
|
||||
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method.
|
||||
if indent > 0:
|
||||
prefix = "[Method]"
|
||||
|
||||
output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
|
||||
doc = get_docstring(node)
|
||||
if doc:
|
||||
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
|
||||
|
||||
for node in tree.body:
|
||||
walk(node)
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
def get_outline(path: Path, code: str) -> str:
|
||||
|
||||
@@ -12,11 +12,9 @@ class PerformanceMonitor:
|
||||
self._process = psutil.Process()
|
||||
self._cpu_usage = 0.0
|
||||
self._cpu_lock = threading.Lock()
|
||||
|
||||
# Input lag tracking
|
||||
self._last_input_time = None
|
||||
self._input_lag_ms = 0.0
|
||||
|
||||
# Alerts
|
||||
self.alert_callback = None
|
||||
self.thresholds = {
|
||||
@@ -26,11 +24,9 @@ class PerformanceMonitor:
|
||||
}
|
||||
self._last_alert_time = 0
|
||||
self._alert_cooldown = 30 # seconds
|
||||
|
||||
# Detailed profiling
|
||||
self._component_timings = {}
|
||||
self._comp_start = {}
|
||||
|
||||
# Start CPU usage monitoring thread
|
||||
self._stop_event = threading.Event()
|
||||
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True)
|
||||
@@ -47,7 +43,6 @@ class PerformanceMonitor:
|
||||
self._cpu_usage = usage
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Sleep in small increments to stay responsive to stop_event
|
||||
for _ in range(10):
|
||||
if self._stop_event.is_set():
|
||||
@@ -71,18 +66,14 @@ class PerformanceMonitor:
|
||||
def end_frame(self):
|
||||
if self._start_time is None:
|
||||
return
|
||||
|
||||
end_time = time.time()
|
||||
self._last_frame_time = (end_time - self._start_time) * 1000.0
|
||||
self._frame_count += 1
|
||||
|
||||
# Calculate input lag if an input occurred during this frame
|
||||
if self._last_input_time is not None:
|
||||
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
|
||||
self._last_input_time = None
|
||||
|
||||
self._check_alerts()
|
||||
|
||||
elapsed_since_fps = end_time - self._fps_last_time
|
||||
if elapsed_since_fps >= 1.0:
|
||||
self._fps = self._frame_count / elapsed_since_fps
|
||||
@@ -92,11 +83,9 @@ class PerformanceMonitor:
|
||||
def _check_alerts(self):
|
||||
if not self.alert_callback:
|
||||
return
|
||||
|
||||
now = time.time()
|
||||
if now - self._last_alert_time < self._alert_cooldown:
|
||||
return
|
||||
|
||||
metrics = self.get_metrics()
|
||||
alerts = []
|
||||
if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
|
||||
@@ -105,7 +94,6 @@ class PerformanceMonitor:
|
||||
alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
|
||||
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
|
||||
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
|
||||
|
||||
if alerts:
|
||||
self._last_alert_time = now
|
||||
self.alert_callback("; ".join(alerts))
|
||||
@@ -113,7 +101,6 @@ class PerformanceMonitor:
|
||||
def get_metrics(self):
|
||||
with self._cpu_lock:
|
||||
cpu_usage = self._cpu_usage
|
||||
|
||||
metrics = {
|
||||
'last_frame_time_ms': self._last_frame_time,
|
||||
'fps': self._fps,
|
||||
@@ -122,11 +109,9 @@ class PerformanceMonitor:
|
||||
}
|
||||
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
|
||||
metrics['input_lag_ms'] = self._input_lag_ms
|
||||
|
||||
# Add detailed timings
|
||||
for name, elapsed in self._component_timings.items():
|
||||
metrics[f'time_{name}_ms'] = elapsed
|
||||
|
||||
return metrics
|
||||
|
||||
def stop(self):
|
||||
|
||||
@@ -11,25 +11,20 @@ import tomllib
|
||||
import tomli_w
|
||||
import re
|
||||
import json
|
||||
from typing import Any, Optional, TYPE_CHECKING, Union
|
||||
from pathlib import Path
|
||||
|
||||
TS_FMT = "%Y-%m-%dT%H:%M:%S"
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from models import TrackState
|
||||
TS_FMT: str = "%Y-%m-%dT%H:%M:%S"
|
||||
def now_ts() -> str:
|
||||
return datetime.datetime.now().strftime(TS_FMT)
|
||||
|
||||
|
||||
def parse_ts(s: str):
|
||||
def parse_ts(s: str) -> Optional[datetime.datetime]:
|
||||
try:
|
||||
return datetime.datetime.strptime(s, TS_FMT)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ── entry serialisation ──────────────────────────────────────────────────────
|
||||
|
||||
def entry_to_str(entry: dict) -> str:
|
||||
def entry_to_str(entry: dict[str, Any]) -> str:
|
||||
"""Serialise a disc entry dict -> stored string."""
|
||||
ts = entry.get("ts", "")
|
||||
role = entry.get("role", "User")
|
||||
@@ -37,9 +32,7 @@ def entry_to_str(entry: dict) -> str:
|
||||
if ts:
|
||||
return f"@{ts}\n{role}:\n{content}"
|
||||
return f"{role}:\n{content}"
|
||||
|
||||
|
||||
def str_to_entry(raw: str, roles: list[str]) -> dict:
|
||||
def str_to_entry(raw: str, roles: list[str]) -> dict[str, Any]:
|
||||
"""Parse a stored string back to a disc entry dict."""
|
||||
ts = ""
|
||||
rest = raw
|
||||
@@ -63,10 +56,7 @@ def str_to_entry(raw: str, roles: list[str]) -> dict:
|
||||
matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
|
||||
content = parts[1].strip() if len(parts) > 1 else ""
|
||||
return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
|
||||
|
||||
|
||||
# ── git helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_git_commit(git_dir: str) -> str:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
@@ -76,8 +66,6 @@ def get_git_commit(git_dir: str) -> str:
|
||||
return r.stdout.strip() if r.returncode == 0 else ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def get_git_log(git_dir: str, n: int = 5) -> str:
|
||||
try:
|
||||
r = subprocess.run(
|
||||
@@ -87,15 +75,10 @@ def get_git_log(git_dir: str, n: int = 5) -> str:
|
||||
return r.stdout.strip() if r.returncode == 0 else ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
# ── default structures ───────────────────────────────────────────────────────
|
||||
|
||||
def default_discussion() -> dict:
|
||||
def default_discussion() -> dict[str, Any]:
|
||||
return {"git_commit": "", "last_updated": now_ts(), "history": []}
|
||||
|
||||
|
||||
def default_project(name: str = "unnamed") -> dict:
|
||||
def default_project(name: str = "unnamed") -> dict[str, Any]:
|
||||
return {
|
||||
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""},
|
||||
"output": {"output_dir": "./md_gen"},
|
||||
@@ -125,91 +108,63 @@ def default_project(name: str = "unnamed") -> dict:
|
||||
"tracks": []
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ── load / save ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_history_path(project_path: str | Path) -> Path:
|
||||
def get_history_path(project_path: Union[str, Path]) -> Path:
|
||||
"""Return the Path to the sibling history TOML file for a given project."""
|
||||
p = Path(project_path)
|
||||
return p.parent / f"{p.stem}_history.toml"
|
||||
|
||||
|
||||
def load_project(path: str | Path) -> dict:
|
||||
def load_project(path: Union[str, Path]) -> dict[str, Any]:
|
||||
"""
|
||||
Load a project TOML file.
|
||||
Automatically migrates legacy 'discussion' keys to a sibling history file.
|
||||
"""
|
||||
with open(path, "rb") as f:
|
||||
proj = tomllib.load(f)
|
||||
|
||||
# Automatic Migration: move legacy 'discussion' to sibling file
|
||||
hist_path = get_history_path(path)
|
||||
if "discussion" in proj:
|
||||
disc = proj.pop("discussion")
|
||||
# Save to history file if it doesn't exist yet (or overwrite to migrate)
|
||||
with open(hist_path, "wb") as f:
|
||||
tomli_w.dump(disc, f)
|
||||
# Save the stripped project file
|
||||
save_project(proj, path)
|
||||
# Restore for the returned dict so GUI works as before
|
||||
proj["discussion"] = disc
|
||||
else:
|
||||
# Load from sibling if it exists
|
||||
if hist_path.exists():
|
||||
proj["discussion"] = load_history(path)
|
||||
|
||||
return proj
|
||||
|
||||
|
||||
def load_history(project_path: str | Path) -> dict:
|
||||
def load_history(project_path: Union[str, Path]) -> dict[str, Any]:
|
||||
"""Load the segregated discussion history from its dedicated TOML file."""
|
||||
hist_path = get_history_path(project_path)
|
||||
if hist_path.exists():
|
||||
with open(hist_path, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def clean_nones(data):
|
||||
def clean_nones(data: Any) -> Any:
|
||||
"""Recursively remove None values from a dictionary/list."""
|
||||
if isinstance(data, dict):
|
||||
return {k: clean_nones(v) for k, v in data.items() if v is not None}
|
||||
elif isinstance(data, list):
|
||||
return [clean_nones(v) for v in data if v is not None]
|
||||
return data
|
||||
|
||||
|
||||
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
|
||||
def save_project(proj: dict[str, Any], path: Union[str, Path], disc_data: Optional[dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Save the project TOML.
|
||||
If 'discussion' is present in proj, it is moved to the sibling history file.
|
||||
"""
|
||||
# Clean None values as TOML doesn't support them
|
||||
proj = clean_nones(proj)
|
||||
|
||||
# Ensure 'discussion' is NOT in the main project dict
|
||||
if "discussion" in proj:
|
||||
# If disc_data wasn't provided, use the one from proj
|
||||
if disc_data is None:
|
||||
disc_data = proj["discussion"]
|
||||
# Remove it so it doesn't get saved to the main file
|
||||
proj = dict(proj) # shallow copy to avoid mutating caller's dict
|
||||
proj = dict(proj)
|
||||
del proj["discussion"]
|
||||
|
||||
with open(path, "wb") as f:
|
||||
tomli_w.dump(proj, f)
|
||||
|
||||
if disc_data:
|
||||
disc_data = clean_nones(disc_data)
|
||||
hist_path = get_history_path(path)
|
||||
with open(hist_path, "wb") as f:
|
||||
tomli_w.dump(disc_data, f)
|
||||
|
||||
|
||||
# ── migration helper ─────────────────────────────────────────────────────────
|
||||
|
||||
def migrate_from_legacy_config(cfg: dict) -> dict:
|
||||
def migrate_from_legacy_config(cfg: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Build a fresh project dict from a legacy flat config.toml. Does NOT save."""
|
||||
name = cfg.get("output", {}).get("namespace", "project")
|
||||
proj = default_project(name)
|
||||
@@ -222,21 +177,16 @@ def migrate_from_legacy_config(cfg: dict) -> dict:
|
||||
main_disc["history"] = disc.get("history", [])
|
||||
main_disc["last_updated"] = now_ts()
|
||||
return proj
|
||||
|
||||
|
||||
# ── flat config for aggregate.run() ─────────────────────────────────────────
|
||||
|
||||
def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None = None) -> dict:
|
||||
def flat_config(proj: dict[str, Any], disc_name: Optional[str] = None, track_id: Optional[str] = None) -> dict[str, Any]:
|
||||
"""Return a flat config dict compatible with aggregate.run()."""
|
||||
disc_sec = proj.get("discussion", {})
|
||||
|
||||
if track_id:
|
||||
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", "."))
|
||||
else:
|
||||
name = disc_name or disc_sec.get("active", "main")
|
||||
disc_data = disc_sec.get("discussions", {}).get(name, {})
|
||||
history = disc_data.get("history", [])
|
||||
|
||||
return {
|
||||
"project": proj.get("project", {}),
|
||||
"output": proj.get("output", {}),
|
||||
@@ -247,11 +197,8 @@ def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None =
|
||||
"history": history,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ── track state persistence ─────────────────────────────────────────────────
|
||||
|
||||
def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."):
|
||||
def save_track_state(track_id: str, state: 'TrackState', base_dir: Union[str, Path] = ".") -> None:
|
||||
"""
|
||||
Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
|
||||
"""
|
||||
@@ -261,9 +208,7 @@ def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path =
|
||||
data = clean_nones(state.to_dict())
|
||||
with open(state_file, "wb") as f:
|
||||
tomli_w.dump(data, f)
|
||||
|
||||
|
||||
def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
|
||||
def load_track_state(track_id: str, base_dir: Union[str, Path] = ".") -> Optional['TrackState']:
|
||||
"""
|
||||
Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
|
||||
"""
|
||||
@@ -274,9 +219,7 @@ def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
|
||||
with open(state_file, "rb") as f:
|
||||
data = tomllib.load(f)
|
||||
return TrackState.from_dict(data)
|
||||
|
||||
|
||||
def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
|
||||
def load_track_history(track_id: str, base_dir: Union[str, Path] = ".") -> list[str]:
|
||||
"""
|
||||
Loads the discussion history for a specific track from its state.toml.
|
||||
Returns a list of entry strings formatted with @timestamp.
|
||||
@@ -285,8 +228,7 @@ def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
|
||||
state = load_track_state(track_id, base_dir)
|
||||
if not state:
|
||||
return []
|
||||
|
||||
history = []
|
||||
history: list[str] = []
|
||||
for entry in state.discussion:
|
||||
e = dict(entry)
|
||||
ts = e.get("ts")
|
||||
@@ -294,9 +236,7 @@ def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
|
||||
e["ts"] = ts.strftime(TS_FMT)
|
||||
history.append(entry_to_str(e))
|
||||
return history
|
||||
|
||||
|
||||
def save_track_history(track_id: str, history: list, base_dir: str | Path = "."):
|
||||
def save_track_history(track_id: str, history: list[str], base_dir: Union[str, Path] = ".") -> None:
|
||||
"""
|
||||
Saves the discussion history for a specific track to its state.toml.
|
||||
'history' is expected to be a list of formatted strings.
|
||||
@@ -305,14 +245,11 @@ def save_track_history(track_id: str, history: list, base_dir: str | Path = ".")
|
||||
state = load_track_state(track_id, base_dir)
|
||||
if not state:
|
||||
return
|
||||
|
||||
roles = ["User", "AI", "Vendor API", "System", "Reasoning"]
|
||||
entries = [str_to_entry(h, roles) for h in history]
|
||||
state.discussion = entries
|
||||
save_track_state(track_id, state, base_dir)
|
||||
|
||||
|
||||
def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
def get_all_tracks(base_dir: Union[str, Path] = ".") -> list[dict[str, Any]]:
|
||||
"""
|
||||
Scans the conductor/tracks/ directory and returns a list of dictionaries
|
||||
containing track metadata: 'id', 'title', 'status', 'complete', 'total',
|
||||
@@ -324,14 +261,12 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
tracks_dir = Path(base_dir) / "conductor" / "tracks"
|
||||
if not tracks_dir.exists():
|
||||
return []
|
||||
|
||||
results = []
|
||||
results: list[dict[str, Any]] = []
|
||||
for entry in tracks_dir.iterdir():
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
track_id = entry.name
|
||||
track_info = {
|
||||
track_info: dict[str, Any] = {
|
||||
"id": track_id,
|
||||
"title": track_id,
|
||||
"status": "unknown",
|
||||
@@ -339,9 +274,7 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
"total": 0,
|
||||
"progress": 0.0
|
||||
}
|
||||
|
||||
state_found = False
|
||||
# Try loading state.toml
|
||||
try:
|
||||
state = load_track_state(track_id, base_dir)
|
||||
if state:
|
||||
@@ -355,9 +288,7 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
state_found = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not state_found:
|
||||
# Try loading metadata.json
|
||||
metadata_file = entry / "metadata.json"
|
||||
if metadata_file.exists():
|
||||
try:
|
||||
@@ -368,18 +299,12 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
track_info["status"] = data.get("status", "unknown")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try parsing plan.md for complete/total if state was missing or empty
|
||||
if track_info["total"] == 0:
|
||||
plan_file = entry / "plan.md"
|
||||
if plan_file.exists():
|
||||
try:
|
||||
with open(plan_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
# Simple regex to count tasks
|
||||
# - [ ] Task: ...
|
||||
# - [x] Task: ...
|
||||
# - [~] Task: ...
|
||||
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
|
||||
completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
|
||||
track_info["total"] = len(tasks)
|
||||
@@ -388,7 +313,5 @@ def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
|
||||
track_info["progress"] = float(track_info["complete"]) / track_info["total"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results.append(track_info)
|
||||
|
||||
return results
|
||||
|
||||
10
refactor_ui_task.toml
Normal file
10
refactor_ui_task.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
role = "tier3-worker"
|
||||
prompt = """Implement strict type hints for ALL functions and methods in @gui_2.py and @gui_legacy.py.
|
||||
1. Use specific types (e.g., dict[str, Any], list[str], Union[str, Path], etc.) for arguments and returns.
|
||||
2. Maintain the 'AI-Optimized' style: 1-space indentation, NO blank lines within function bodies, and maximum 1 blank line between definitions.
|
||||
3. Since these files are very large, you MUST use surgical tools (discovered_tool_py_update_definition, discovered_tool_py_set_signature, discovered_tool_py_set_var_declaration) to apply changes. Do NOT try to overwrite the entire file at once.
|
||||
4. Do NOT change any logic.
|
||||
5. Use discovered_tool_py_check_syntax after each major change to verify syntax.
|
||||
6. Ensure 'from typing import Any, dict, list, Union, Optional, Callable' etc. are present.
|
||||
7. Focus on completing the task efficiently without hitting timeouts."""
|
||||
docs = ["gui_2.py", "gui_legacy.py", "conductor/workflow.md"]
|
||||
@@ -5,27 +5,22 @@ from dag_engine import TrackDAG, ExecutionEngine
|
||||
def test_auto_queue_and_step_mode():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True)
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
# Expectation: ExecutionEngine takes auto_queue parameter
|
||||
try:
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
except TypeError:
|
||||
pytest.fail("ExecutionEngine does not accept auto_queue parameter")
|
||||
|
||||
# Tick 1: T1 should be 'in-progress' because auto_queue=True
|
||||
# T2 should remain 'todo' because step_mode=True
|
||||
engine.tick()
|
||||
|
||||
assert t1.status == "in_progress"
|
||||
assert t2.status == "todo"
|
||||
|
||||
# Approve T2
|
||||
try:
|
||||
engine.approve_task("T2")
|
||||
except AttributeError:
|
||||
pytest.fail("ExecutionEngine does not have approve_task method")
|
||||
|
||||
assert t2.status == "in_progress"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
21
reproduce_missing_hints.py
Normal file
21
reproduce_missing_hints.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def test_type_hints():
|
||||
files = ["project_manager.py", "session_logger.py"]
|
||||
all_missing = []
|
||||
for f in files:
|
||||
print(f"Scanning {f}...")
|
||||
result = subprocess.run(["uv", "run", "python", "scripts/type_hint_scanner.py", f], capture_output=True, text=True)
|
||||
if result.stdout.strip():
|
||||
print(f"Missing hints in {f}:\n{result.stdout}")
|
||||
all_missing.append(f)
|
||||
if all_missing:
|
||||
print(f"FAILURE: Missing type hints in: {', '.join(all_missing)}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("SUCCESS: All functions have type hints.")
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_type_hints()
|
||||
@@ -68,20 +68,16 @@ Example usage:
|
||||
type=str,
|
||||
help="Category of tests to run (e.g., 'unit', 'integration')."
|
||||
)
|
||||
|
||||
# Parse known arguments for the script itself, then parse remaining args for pytest
|
||||
args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:])
|
||||
|
||||
selected_test_files = []
|
||||
manifest_data = None
|
||||
|
||||
if args.manifest:
|
||||
try:
|
||||
manifest_data = load_manifest(args.manifest)
|
||||
except (FileNotFoundError, tomllib.TOMLDecodeError):
|
||||
# Error message already printed by load_manifest
|
||||
sys.exit(1)
|
||||
|
||||
if args.category:
|
||||
# Case 1: --manifest and --category provided
|
||||
files = get_test_files(manifest_data, args.category)
|
||||
@@ -94,7 +90,6 @@ Example usage:
|
||||
print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr)
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr)
|
||||
for cat in default_categories:
|
||||
files = get_test_files(manifest_data, cat)
|
||||
@@ -104,14 +99,11 @@ Example usage:
|
||||
print("Error: --category requires --manifest to be specified.", file=sys.stderr)
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Combine selected test files with any remaining pytest arguments that were not parsed by this script.
|
||||
# We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times.
|
||||
pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--']
|
||||
|
||||
# Filter out any empty strings that might have been included.
|
||||
final_pytest_args = [arg for arg in pytest_command_args if arg]
|
||||
|
||||
# If no specific tests were selected from manifest/category and no manifest was provided,
|
||||
# and no other pytest args were given, pytest.main([]) runs default test discovery.
|
||||
print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)
|
||||
|
||||
@@ -89,13 +89,20 @@ def main():
|
||||
# This prevents the hook from affecting normal CLI usage.
|
||||
hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
|
||||
logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
|
||||
if hook_context != "manual_slop":
|
||||
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.")
|
||||
if hook_context != "manual_slop" and hook_context != "mma_headless":
|
||||
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop' or 'mma_headless'. Allowing execution without confirmation.")
|
||||
print(json.dumps({
|
||||
"decision": "allow",
|
||||
"reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
|
||||
}))
|
||||
return
|
||||
if hook_context == "mma_headless":
|
||||
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is 'mma_headless'. Allowing execution for sub-agent.")
|
||||
print(json.dumps({
|
||||
"decision": "allow",
|
||||
"reason": "Sub-agent headless mode (MMA)."
|
||||
}))
|
||||
return
|
||||
# 5. Use 'ApiHookClient' (assuming GUI is on http://127.0.0.1:8999)
|
||||
logging.debug("GEMINI_CLI_HOOK_CONTEXT is 'manual_slop'. Proceeding with API Hook Client.")
|
||||
client = ApiHookClient(base_url="http://127.0.0.1:8999")
|
||||
|
||||
@@ -189,15 +189,15 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
|
||||
command_text += f"\n\nTASK: {prompt}\n\n"
|
||||
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
|
||||
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
|
||||
# Whitelist tools to ensure they are available to the model in headless mode.
|
||||
allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,discovered_tool_search_files,discovered_tool_get_file_summary,discovered_tool_py_get_skeleton,discovered_tool_py_get_code_outline,discovered_tool_py_get_definition,discovered_tool_py_update_definition,discovered_tool_py_get_signature,discovered_tool_py_set_signature,discovered_tool_py_get_class_summary,discovered_tool_py_get_var_declaration,discovered_tool_py_set_var_declaration,discovered_tool_get_git_diff,discovered_tool_run_powershell,activate_skill,codebase_investigator,discovered_tool_web_search,discovered_tool_fetch_url,discovered_tool_py_find_usages,discovered_tool_py_get_imports,discovered_tool_py_check_syntax,discovered_tool_py_get_hierarchy,discovered_tool_py_get_docstring,discovered_tool_get_tree"
|
||||
ps_command = (
|
||||
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
|
||||
f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
|
||||
f"gemini -p '{role}' --output-format json --model {model}"
|
||||
)
|
||||
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
|
||||
try:
|
||||
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8')
|
||||
env = os.environ.copy()
|
||||
env["GEMINI_CLI_HOOK_CONTEXT"] = "mma_headless"
|
||||
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8', env=env)
|
||||
result = process.stdout
|
||||
if not process.stdout and process.stderr:
|
||||
result = f"Error: {process.stderr}"
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
Opens timestamped log/script files at startup and keeps them open for the
|
||||
lifetime of the process. The next run of the GUI creates new files; the
|
||||
previous run's files are simply closed when the process exits.
|
||||
|
||||
File layout
|
||||
-----------
|
||||
logs/
|
||||
@@ -12,87 +11,64 @@ logs/
|
||||
clicalls_<ts>.log - sequential record of every CLI subprocess call
|
||||
scripts/generated/
|
||||
<ts>_<seq:04d>.ps1 - each PowerShell script the AI generated, in order
|
||||
|
||||
Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
|
||||
"""
|
||||
|
||||
import atexit
|
||||
import datetime
|
||||
import json
|
||||
import threading
|
||||
from typing import Any, Optional, TextIO
|
||||
from pathlib import Path
|
||||
|
||||
_LOG_DIR = Path("./logs")
|
||||
_SCRIPTS_DIR = Path("./scripts/generated")
|
||||
|
||||
_LOG_DIR: Path = Path("./logs")
|
||||
_SCRIPTS_DIR: Path = Path("./scripts/generated")
|
||||
_ts: str = "" # session timestamp string e.g. "20260301_142233"
|
||||
_session_id: str = "" # YYYYMMDD_HHMMSS[_Label]
|
||||
_session_dir: Path = None # Path to the sub-directory for this session
|
||||
_session_dir: Optional[Path] = None # Path to the sub-directory for this session
|
||||
_seq: int = 0 # monotonic counter for script files this session
|
||||
_seq_lock = threading.Lock()
|
||||
|
||||
_comms_fh = None # file handle: logs/<session_id>/comms.log
|
||||
_tool_fh = None # file handle: logs/<session_id>/toolcalls.log
|
||||
_api_fh = None # file handle: logs/<session_id>/apihooks.log
|
||||
_cli_fh = None # file handle: logs/<session_id>/clicalls.log
|
||||
|
||||
|
||||
_seq_lock: threading.Lock = threading.Lock()
|
||||
_comms_fh: Optional[TextIO] = None # file handle: logs/<session_id>/comms.log
|
||||
_tool_fh: Optional[TextIO] = None # file handle: logs/<session_id>/toolcalls.log
|
||||
_api_fh: Optional[TextIO] = None # file handle: logs/<session_id>/apihooks.log
|
||||
_cli_fh: Optional[TextIO] = None # file handle: logs/<session_id>/clicalls.log
|
||||
def _now_ts() -> str:
|
||||
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
|
||||
def open_session(label: str | None = None):
|
||||
def open_session(label: Optional[str] = None) -> None:
|
||||
"""
|
||||
Called once at GUI startup. Creates the log directories if needed and
|
||||
opens the log files for this session within a sub-directory.
|
||||
"""
|
||||
global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq
|
||||
|
||||
if _comms_fh is not None:
|
||||
return # already open
|
||||
|
||||
return
|
||||
_ts = _now_ts()
|
||||
_session_id = _ts
|
||||
if label:
|
||||
# Sanitize label: remove non-alphanumeric chars
|
||||
safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label)
|
||||
_session_id += f"_{safe_label}"
|
||||
|
||||
_session_dir = _LOG_DIR / _session_id
|
||||
_session_dir.mkdir(parents=True, exist_ok=True)
|
||||
_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
_seq = 0
|
||||
|
||||
_comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1)
|
||||
_tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1)
|
||||
_api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1)
|
||||
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1)
|
||||
|
||||
_tool_fh.write(f"# Tool-call log — session {_session_id}\n\n")
|
||||
_tool_fh.flush()
|
||||
_cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n")
|
||||
_cli_fh.flush()
|
||||
|
||||
# Register this session in the log registry
|
||||
try:
|
||||
from log_registry import LogRegistry
|
||||
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
|
||||
registry.register_session(_session_id, str(_session_dir), datetime.datetime.now())
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not register session in LogRegistry: {e}")
|
||||
|
||||
atexit.register(close_session)
|
||||
|
||||
|
||||
def close_session():
|
||||
def close_session() -> None:
|
||||
"""Flush and close all log files. Called on clean exit."""
|
||||
global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR
|
||||
|
||||
if _comms_fh is None:
|
||||
return
|
||||
|
||||
# Close files first to ensure all data is flushed to disk
|
||||
if _comms_fh:
|
||||
_comms_fh.close()
|
||||
_comms_fh = None
|
||||
@@ -105,20 +81,14 @@ def close_session():
|
||||
if _cli_fh:
|
||||
_cli_fh.close()
|
||||
_cli_fh = None
|
||||
|
||||
# Trigger auto-whitelist update for this session after closing
|
||||
try:
|
||||
from log_registry import LogRegistry
|
||||
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
|
||||
registry.update_auto_whitelist_status(_session_id)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not update auto-whitelist on close: {e}")
|
||||
|
||||
|
||||
def log_api_hook(method: str, path: str, payload: str):
|
||||
"""
|
||||
Log an API hook invocation.
|
||||
"""
|
||||
def log_api_hook(method: str, path: str, payload: str) -> None:
|
||||
"""Log an API hook invocation."""
|
||||
if _api_fh is None:
|
||||
return
|
||||
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
|
||||
@@ -127,9 +97,7 @@ def log_api_hook(method: str, path: str, payload: str):
|
||||
_api_fh.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def log_comms(entry: dict):
|
||||
def log_comms(entry: dict[str, Any]) -> None:
|
||||
"""
|
||||
Append one comms entry to the comms log file as a JSON-L line.
|
||||
Thread-safe (GIL + line-buffered file).
|
||||
@@ -140,34 +108,25 @@ def log_comms(entry: dict):
|
||||
_comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def log_tool_call(script: str, result: str, script_path: str | None):
|
||||
def log_tool_call(script: str, result: str, script_path: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Append a tool-call record to the toolcalls log and write the PS1 script to
|
||||
scripts/generated/. Returns the path of the written script file.
|
||||
"""
|
||||
global _seq
|
||||
|
||||
if _tool_fh is None:
|
||||
return script_path # logger not open yet
|
||||
|
||||
return script_path
|
||||
with _seq_lock:
|
||||
_seq += 1
|
||||
seq = _seq
|
||||
|
||||
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
|
||||
|
||||
# Write the .ps1 file
|
||||
ps1_name = f"{_ts}_{seq:04d}.ps1"
|
||||
ps1_path = _SCRIPTS_DIR / ps1_name
|
||||
ps1_path: Optional[Path] = _SCRIPTS_DIR / ps1_name
|
||||
try:
|
||||
ps1_path.write_text(script, encoding="utf-8")
|
||||
except Exception as exc:
|
||||
ps1_path = None
|
||||
ps1_name = f"(write error: {exc})"
|
||||
|
||||
# Append to the tool-call sequence log (script body omitted - see .ps1 file)
|
||||
try:
|
||||
_tool_fh.write(
|
||||
f"## Call #{seq} [{ts_entry}]\n"
|
||||
@@ -179,17 +138,11 @@ def log_tool_call(script: str, result: str, script_path: str | None):
|
||||
_tool_fh.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return str(ps1_path) if ps1_path else None
|
||||
|
||||
|
||||
def log_cli_call(command: str, stdin_content: str | None, stdout_content: str | None, stderr_content: str | None, latency: float):
|
||||
"""
|
||||
Log details of a CLI subprocess execution.
|
||||
"""
|
||||
def log_cli_call(command: str, stdin_content: Optional[str], stdout_content: Optional[str], stderr_content: Optional[str], latency: float) -> None:
|
||||
"""Log details of a CLI subprocess execution."""
|
||||
if _cli_fh is None:
|
||||
return
|
||||
|
||||
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
|
||||
try:
|
||||
log_data = {
|
||||
|
||||
@@ -3,7 +3,7 @@ import subprocess, shutil
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
TIMEOUT_SECONDS = 60
|
||||
TIMEOUT_SECONDS: int = 60
|
||||
|
||||
def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None) -> str:
|
||||
"""
|
||||
@@ -12,28 +12,37 @@ def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[s
|
||||
If qa_callback is provided and the command fails or has stderr,
|
||||
the callback is called with the stderr content and its result is appended.
|
||||
"""
|
||||
safe_dir = str(base_dir).replace("'", "''")
|
||||
full_script = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
|
||||
safe_dir: str = str(base_dir).replace("'", "''")
|
||||
full_script: str = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
|
||||
# Try common executable names
|
||||
exe = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
|
||||
exe: Optional[str] = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
|
||||
if not exe: return "ERROR: Neither powershell nor pwsh found in PATH"
|
||||
|
||||
try:
|
||||
r = subprocess.run(
|
||||
process = subprocess.Popen(
|
||||
[exe, "-NoProfile", "-NonInteractive", "-Command", full_script],
|
||||
capture_output=True, text=True, timeout=TIMEOUT_SECONDS, cwd=base_dir
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=base_dir
|
||||
)
|
||||
parts = []
|
||||
if r.stdout.strip(): parts.append(f"STDOUT:\n{r.stdout.strip()}")
|
||||
if r.stderr.strip(): parts.append(f"STDERR:\n{r.stderr.strip()}")
|
||||
parts.append(f"EXIT CODE: {r.returncode}")
|
||||
stdout, stderr = process.communicate(timeout=TIMEOUT_SECONDS)
|
||||
|
||||
# QA Interceptor logic
|
||||
if (r.returncode != 0 or r.stderr.strip()) and qa_callback:
|
||||
qa_analysis = qa_callback(r.stderr.strip())
|
||||
parts: list[str] = []
|
||||
if stdout.strip(): parts.append(f"STDOUT:\n{stdout.strip()}")
|
||||
if stderr.strip(): parts.append(f"STDERR:\n{stderr.strip()}")
|
||||
parts.append(f"EXIT CODE: {process.returncode}")
|
||||
|
||||
if (process.returncode != 0 or stderr.strip()) and qa_callback:
|
||||
qa_analysis: Optional[str] = qa_callback(stderr.strip())
|
||||
if qa_analysis:
|
||||
parts.append(f"\nQA ANALYSIS:\n{qa_analysis}")
|
||||
|
||||
return "\n".join(parts)
|
||||
except subprocess.TimeoutExpired: return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
|
||||
except Exception as e: return f"ERROR: {e}"
|
||||
except subprocess.TimeoutExpired:
|
||||
if 'process' in locals() and process:
|
||||
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
|
||||
return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
|
||||
except KeyboardInterrupt:
|
||||
if 'process' in locals() and process:
|
||||
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
|
||||
raise
|
||||
except Exception as e:
|
||||
if 'process' in locals() and process:
|
||||
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
|
||||
return f"ERROR: {e}"
|
||||
|
||||
@@ -12,61 +12,46 @@ def main():
|
||||
if not client.wait_for_server(timeout=10):
|
||||
print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
|
||||
return
|
||||
|
||||
sim = WorkflowSimulator(client)
|
||||
|
||||
# 1. Start Clean
|
||||
print("\n[Action] Resetting Session...")
|
||||
client.click("btn_reset")
|
||||
time.sleep(2)
|
||||
|
||||
# 2. Project Scaffolding
|
||||
project_name = f"LiveTest_{int(time.time())}"
|
||||
# Use actual project dir for realism
|
||||
git_dir = os.path.abspath(".")
|
||||
project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
|
||||
|
||||
print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
|
||||
sim.setup_new_project(project_name, git_dir, project_path)
|
||||
|
||||
# Enable auto-add so results appear in history automatically
|
||||
client.set_value("auto_add_history", True)
|
||||
time.sleep(1)
|
||||
|
||||
# 3. Discussion Loop (3 turns for speed, but logic supports more)
|
||||
turns = [
|
||||
"Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
|
||||
"That looks great. Can you also add a feature to print the name of the operating system?",
|
||||
"Excellent. Now, please create a requirements.txt file with 'requests' in it."
|
||||
]
|
||||
|
||||
for i, msg in enumerate(turns):
|
||||
print(f"\n--- Turn {i+1} ---")
|
||||
|
||||
# Switch to Comms Log to see the send
|
||||
client.select_tab("operations_tabs", "tab_comms")
|
||||
|
||||
sim.run_discussion_turn(msg)
|
||||
|
||||
# Check thinking indicator
|
||||
state = client.get_indicator_state("thinking_indicator")
|
||||
if state.get('shown'):
|
||||
print("[Status] Thinking indicator is visible.")
|
||||
|
||||
# Switch to Tool Log halfway through wait
|
||||
time.sleep(2)
|
||||
client.select_tab("operations_tabs", "tab_tool")
|
||||
|
||||
# Wait for AI response if not already finished
|
||||
# (run_discussion_turn already waits, so we just observe)
|
||||
|
||||
# 4. History Management
|
||||
print("\n[Action] Creating new discussion thread...")
|
||||
sim.create_discussion("Refinement")
|
||||
|
||||
print("\n[Action] Switching back to Default...")
|
||||
sim.switch_discussion("Default")
|
||||
|
||||
# 5. Manual Sign-off Simulation
|
||||
print("\n=== Walkthrough Complete ===")
|
||||
print("Please verify the following in the GUI:")
|
||||
|
||||
@@ -14,21 +14,17 @@ def main():
|
||||
if not client.wait_for_server(timeout=5):
|
||||
print("Hook server not found. Start GUI with --enable-test-hooks")
|
||||
return
|
||||
|
||||
sim_agent = UserSimAgent(client)
|
||||
|
||||
# 1. Reset session to start clean
|
||||
print("Resetting session...")
|
||||
client.click("btn_reset")
|
||||
time.sleep(2) # Give it time to clear
|
||||
|
||||
# 2. Initial message
|
||||
initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
|
||||
print(f"
|
||||
[USER]: {initial_msg}")
|
||||
client.set_value("ai_input", initial_msg)
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Wait for AI response
|
||||
print("Waiting for AI response...", end="", flush=True)
|
||||
last_entry_count = 0
|
||||
@@ -37,19 +33,16 @@ def main():
|
||||
print(".", end="", flush=True)
|
||||
session = client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
|
||||
if len(entries) > last_entry_count:
|
||||
# Something happened
|
||||
last_entry = entries[-1]
|
||||
if last_entry.get('role') == 'AI' and last_entry.get('content'):
|
||||
print(f"
|
||||
|
||||
[AI]: {last_entry.get('content')[:100]}...")
|
||||
print("
|
||||
Ping-pong successful!")
|
||||
return
|
||||
last_entry_count = len(entries)
|
||||
|
||||
print("
|
||||
Timeout waiting for AI response")
|
||||
|
||||
|
||||
@@ -6,30 +6,25 @@ from simulation.sim_base import BaseSimulation, run_sim
|
||||
class AISettingsSimulation(BaseSimulation):
|
||||
def run(self):
|
||||
print("\n--- Running AI Settings Simulation (Gemini Only) ---")
|
||||
|
||||
# 1. Verify initial model
|
||||
provider = self.client.get_value("current_provider")
|
||||
model = self.client.get_value("current_model")
|
||||
print(f"[Sim] Initial Provider: {provider}, Model: {model}")
|
||||
assert provider == "gemini", f"Expected gemini, got {provider}"
|
||||
|
||||
# 2. Switch to another Gemini model
|
||||
other_gemini = "gemini-1.5-flash"
|
||||
print(f"[Sim] Switching to {other_gemini}...")
|
||||
self.client.set_value("current_model", other_gemini)
|
||||
time.sleep(2)
|
||||
|
||||
# Verify
|
||||
new_model = self.client.get_value("current_model")
|
||||
print(f"[Sim] Updated Model: {new_model}")
|
||||
assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}"
|
||||
|
||||
# 3. Switch back to flash-lite
|
||||
target_model = "gemini-2.5-flash-lite"
|
||||
print(f"[Sim] Switching back to {target_model}...")
|
||||
self.client.set_value("current_model", target_model)
|
||||
time.sleep(2)
|
||||
|
||||
final_model = self.client.get_value("current_model")
|
||||
print(f"[Sim] Final Model: {final_model}")
|
||||
assert final_model == target_model, f"Expected {target_model}, got {final_model}"
|
||||
|
||||
@@ -14,7 +14,6 @@ class BaseSimulation:
|
||||
self.client = ApiHookClient()
|
||||
else:
|
||||
self.client = client
|
||||
|
||||
self.sim = WorkflowSimulator(self.client)
|
||||
self.project_path = None
|
||||
|
||||
@@ -22,19 +21,15 @@ class BaseSimulation:
|
||||
print(f"\n[BaseSim] Connecting to GUI...")
|
||||
if not self.client.wait_for_server(timeout=5):
|
||||
raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks")
|
||||
|
||||
print("[BaseSim] Resetting session...")
|
||||
self.client.click("btn_reset")
|
||||
time.sleep(0.5)
|
||||
|
||||
git_dir = os.path.abspath(".")
|
||||
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml")
|
||||
if os.path.exists(self.project_path):
|
||||
os.remove(self.project_path)
|
||||
|
||||
print(f"[BaseSim] Scaffolding Project: {project_name}")
|
||||
self.sim.setup_new_project(project_name, git_dir, self.project_path)
|
||||
|
||||
# Standard test settings
|
||||
self.client.set_value("auto_add_history", True)
|
||||
self.client.set_value("current_provider", "gemini")
|
||||
|
||||
@@ -6,18 +6,15 @@ from simulation.sim_base import BaseSimulation, run_sim
|
||||
class ContextSimulation(BaseSimulation):
|
||||
def run(self):
|
||||
print("\n--- Running Context & Chat Simulation ---")
|
||||
|
||||
# 1. Test Discussion Creation
|
||||
disc_name = f"TestDisc_{int(time.time())}"
|
||||
print(f"[Sim] Creating discussion: {disc_name}")
|
||||
self.sim.create_discussion(disc_name)
|
||||
time.sleep(1)
|
||||
|
||||
# Verify it's in the list
|
||||
session = self.client.get_session()
|
||||
# The session structure usually has discussions listed somewhere, or we can check the listbox
|
||||
# For now, we'll trust the click and check the session update
|
||||
|
||||
# 2. Test File Aggregation & Context Refresh
|
||||
print("[Sim] Testing context refresh and token budget...")
|
||||
proj = self.client.get_project()
|
||||
@@ -27,22 +24,18 @@ class ContextSimulation(BaseSimulation):
|
||||
for f in all_py:
|
||||
if f not in proj['project']['files']['paths']:
|
||||
proj['project']['files']['paths'].append(f)
|
||||
|
||||
# Update project via hook
|
||||
self.client.post_project(proj['project'])
|
||||
time.sleep(1)
|
||||
|
||||
# Trigger MD Only to refresh context and token budget
|
||||
print("[Sim] Clicking MD Only...")
|
||||
self.client.click("btn_md_only")
|
||||
time.sleep(5)
|
||||
|
||||
# Verify status
|
||||
proj_updated = self.client.get_project()
|
||||
status = self.client.get_value("ai_status")
|
||||
print(f"[Sim] Status: {status}")
|
||||
assert "md written" in status, f"Expected 'md written' in status, got {status}"
|
||||
|
||||
# Verify token budget
|
||||
pct = self.client.get_value("token_budget_pct")
|
||||
current = self.client.get_value("token_budget_current")
|
||||
@@ -50,23 +43,19 @@ class ContextSimulation(BaseSimulation):
|
||||
# We'll just warn if it's 0 but the MD was written, as it might be a small context
|
||||
if pct == 0:
|
||||
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
|
||||
|
||||
# 3. Test Chat Turn
|
||||
msg = "What is the current date and time? Answer in one sentence."
|
||||
print(f"[Sim] Sending message: {msg}")
|
||||
self.sim.run_discussion_turn(msg)
|
||||
|
||||
# 4. Verify History
|
||||
print("[Sim] Verifying history...")
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
|
||||
# We expect at least 2 entries (User and AI)
|
||||
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
|
||||
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
|
||||
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
|
||||
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
|
||||
|
||||
# 5. Test History Truncation
|
||||
print("[Sim] Testing history truncation...")
|
||||
self.sim.truncate_history(1)
|
||||
|
||||
@@ -11,18 +11,15 @@ class ExecutionSimulation(BaseSimulation):
|
||||
|
||||
def run(self):
|
||||
print("\n--- Running Execution & Modals Simulation ---")
|
||||
|
||||
# 1. Trigger script generation (Async so we don't block on the wait loop)
|
||||
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
|
||||
print(f"[Sim] Sending message to trigger script: {msg}")
|
||||
self.sim.run_discussion_turn_async(msg)
|
||||
|
||||
# 2. Monitor for events and text responses
|
||||
print("[Sim] Monitoring for script approvals and AI text...")
|
||||
start_wait = time.time()
|
||||
approved_count = 0
|
||||
success = False
|
||||
|
||||
consecutive_errors = 0
|
||||
while time.time() - start_wait < 90:
|
||||
# Check for error status (be lenient with transients)
|
||||
@@ -34,7 +31,6 @@ class ExecutionSimulation(BaseSimulation):
|
||||
break
|
||||
else:
|
||||
consecutive_errors = 0
|
||||
|
||||
# Check for script confirmation event
|
||||
ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
|
||||
if ev:
|
||||
@@ -43,16 +39,13 @@ class ExecutionSimulation(BaseSimulation):
|
||||
approved_count += 1
|
||||
# Give more time if we just approved a script
|
||||
start_wait = time.time()
|
||||
|
||||
# Check if AI has responded with text yet
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
|
||||
# Debug: log last few roles/content
|
||||
if entries:
|
||||
last_few = entries[-3:]
|
||||
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
|
||||
|
||||
if any(e.get('role') == 'AI' and e.get('content') for e in entries):
|
||||
# Double check content for our keyword
|
||||
for e in entries:
|
||||
@@ -61,7 +54,6 @@ class ExecutionSimulation(BaseSimulation):
|
||||
success = True
|
||||
break
|
||||
if success: break
|
||||
|
||||
# Also check if output is already in history via tool role
|
||||
for e in entries:
|
||||
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
|
||||
@@ -69,9 +61,7 @@ class ExecutionSimulation(BaseSimulation):
|
||||
success = True
|
||||
break
|
||||
if success: break
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
assert success, "Failed to observe script execution output or AI confirmation text"
|
||||
print(f"[Sim] Final check: approved {approved_count} scripts.")
|
||||
|
||||
|
||||
@@ -6,30 +6,24 @@ from simulation.sim_base import BaseSimulation, run_sim
|
||||
class ToolsSimulation(BaseSimulation):
|
||||
def run(self):
|
||||
print("\n--- Running Tools Simulation ---")
|
||||
|
||||
# 1. Trigger list_directory tool
|
||||
msg = "List the files in the current directory."
|
||||
print(f"[Sim] Sending message to trigger tool: {msg}")
|
||||
self.sim.run_discussion_turn(msg)
|
||||
|
||||
# 2. Wait for AI to execute tool
|
||||
print("[Sim] Waiting for tool execution...")
|
||||
time.sleep(5) # Give it some time
|
||||
|
||||
# 3. Verify Tool Log
|
||||
# We need a hook to get the tool log
|
||||
# In gui_2.py, there is _on_tool_log which appends to self._tool_log
|
||||
# We need a hook to read self._tool_log
|
||||
|
||||
# 4. Trigger read_file tool
|
||||
msg = "Read the first 10 lines of aggregate.py."
|
||||
print(f"[Sim] Sending message to trigger tool: {msg}")
|
||||
self.sim.run_discussion_turn(msg)
|
||||
|
||||
# 5. Wait and Verify
|
||||
print("[Sim] Waiting for tool execution...")
|
||||
time.sleep(5)
|
||||
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
# Tool outputs are usually in the conversation history as 'Tool' role or similar
|
||||
@@ -38,7 +32,6 @@ class ToolsSimulation(BaseSimulation):
|
||||
# Actually in Gemini history, they might be nested.
|
||||
# But our GUI disc_entries list usually has them as separate entries or
|
||||
# they are part of the AI turn.
|
||||
|
||||
# Let's check if the AI mentions it in its response
|
||||
last_ai_msg = entries[-1]['content']
|
||||
print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")
|
||||
|
||||
@@ -22,14 +22,12 @@ class UserSimAgent:
|
||||
# ai_client expects md_content and user_message.
|
||||
# It handles its own internal history.
|
||||
# We want the 'User AI' to have context of what the 'Assistant AI' said.
|
||||
|
||||
# For now, let's just use the last message from Assistant as the prompt.
|
||||
last_ai_msg = ""
|
||||
for entry in reversed(conversation_history):
|
||||
if entry.get('role') == 'AI':
|
||||
last_ai_msg = entry.get('content', '')
|
||||
break
|
||||
|
||||
# We need to set a custom system prompt for the User Simulator
|
||||
try:
|
||||
ai_client.set_custom_system_prompt(self.system_prompt)
|
||||
@@ -38,7 +36,6 @@ class UserSimAgent:
|
||||
response = ai_client.send(md_content="", user_message=last_ai_msg)
|
||||
finally:
|
||||
ai_client.set_custom_system_prompt("")
|
||||
|
||||
return response
|
||||
|
||||
def perform_action_with_delay(self, action_func, *args, **kwargs):
|
||||
|
||||
@@ -54,7 +54,6 @@ class WorkflowSimulator:
|
||||
session = self.client.get_session()
|
||||
entries = session.get('session', {}).get('entries', [])
|
||||
user_message = self.user_agent.generate_response(entries)
|
||||
|
||||
print(f"\n[USER]: {user_message}")
|
||||
self.client.set_value("ai_input", user_message)
|
||||
self.client.click("btn_gen_send")
|
||||
@@ -63,14 +62,12 @@ class WorkflowSimulator:
|
||||
print("Waiting for AI response...", end="", flush=True)
|
||||
start_time = time.time()
|
||||
last_count = len(self.client.get_session().get('session', {}).get('entries', []))
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
# Check for error status first
|
||||
status = self.client.get_value("ai_status")
|
||||
if status and status.lower().startswith("error"):
|
||||
print(f"\n[ABORT] GUI reported error status: {status}")
|
||||
return {"role": "AI", "content": f"ERROR: {status}"}
|
||||
|
||||
time.sleep(1)
|
||||
print(".", end="", flush=True)
|
||||
entries = self.client.get_session().get('session', {}).get('entries', [])
|
||||
@@ -82,6 +79,5 @@ class WorkflowSimulator:
|
||||
if "error" in content.lower() or "blocked" in content.lower():
|
||||
print(f"[WARN] AI response appears to contain an error message.")
|
||||
return last_entry
|
||||
|
||||
print("\nTimeout waiting for AI")
|
||||
return None
|
||||
|
||||
20
summarize.py
20
summarize.py
@@ -27,20 +27,17 @@ import ast
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ per-type extractors
|
||||
|
||||
def _summarise_python(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**Python** — {line_count} lines"]
|
||||
|
||||
try:
|
||||
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
|
||||
except SyntaxError as e:
|
||||
parts.append(f"_Parse error: {e}_")
|
||||
return "\n".join(parts)
|
||||
|
||||
# Imports
|
||||
imports = []
|
||||
for node in ast.walk(tree):
|
||||
@@ -53,7 +50,6 @@ def _summarise_python(path: Path, content: str) -> str:
|
||||
if imports:
|
||||
unique_imports = sorted(set(imports))
|
||||
parts.append(f"imports: {', '.join(unique_imports)}")
|
||||
|
||||
# Top-level constants (ALL_CAPS assignments)
|
||||
constants = []
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
@@ -66,7 +62,6 @@ def _summarise_python(path: Path, content: str) -> str:
|
||||
constants.append(node.target.id)
|
||||
if constants:
|
||||
parts.append(f"constants: {', '.join(constants)}")
|
||||
|
||||
# Classes + their methods
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
@@ -78,7 +73,6 @@ def _summarise_python(path: Path, content: str) -> str:
|
||||
parts.append(f"class {node.name}: {', '.join(methods)}")
|
||||
else:
|
||||
parts.append(f"class {node.name}")
|
||||
|
||||
# Top-level functions
|
||||
top_fns = [
|
||||
node.name for node in ast.iter_child_nodes(tree)
|
||||
@@ -86,15 +80,12 @@ def _summarise_python(path: Path, content: str) -> str:
|
||||
]
|
||||
if top_fns:
|
||||
parts.append(f"functions: {', '.join(top_fns)}")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _summarise_toml(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**TOML** — {line_count} lines"]
|
||||
|
||||
# Extract top-level table headers [key] and [[key]]
|
||||
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
|
||||
tables = []
|
||||
@@ -104,7 +95,6 @@ def _summarise_toml(path: Path, content: str) -> str:
|
||||
tables.append(m.group(1).strip())
|
||||
if tables:
|
||||
parts.append(f"tables: {', '.join(tables)}")
|
||||
|
||||
# Top-level key = value (not inside a [table])
|
||||
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
|
||||
in_table = False
|
||||
@@ -119,15 +109,12 @@ def _summarise_toml(path: Path, content: str) -> str:
|
||||
top_keys.append(m.group(1))
|
||||
if top_keys:
|
||||
parts.append(f"top-level keys: {', '.join(top_keys)}")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _summarise_markdown(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**Markdown** — {line_count} lines"]
|
||||
|
||||
headings = []
|
||||
for line in lines:
|
||||
m = re.match(r"^(#{1,3})\s+(.+)", line)
|
||||
@@ -138,10 +125,8 @@ def _summarise_markdown(path: Path, content: str) -> str:
|
||||
headings.append(f"{indent}{text}")
|
||||
if headings:
|
||||
parts.append("headings:\n" + "\n".join(f" {h}" for h in headings))
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _summarise_generic(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
@@ -151,8 +136,6 @@ def _summarise_generic(path: Path, content: str) -> str:
|
||||
if preview:
|
||||
parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ dispatch
|
||||
|
||||
_SUMMARISERS = {
|
||||
@@ -164,7 +147,6 @@ _SUMMARISERS = {
|
||||
".ps1": _summarise_generic,
|
||||
}
|
||||
|
||||
|
||||
def summarise_file(path: Path, content: str) -> str:
|
||||
"""
|
||||
Return a compact markdown summary string for a single file.
|
||||
@@ -177,7 +159,6 @@ def summarise_file(path: Path, content: str) -> str:
|
||||
except Exception as e:
|
||||
return f"_Summariser error: {e}_"
|
||||
|
||||
|
||||
def summarise_items(file_items: list[dict]) -> list[dict]:
|
||||
"""
|
||||
Given a list of file_item dicts (as returned by aggregate.build_file_items),
|
||||
@@ -196,7 +177,6 @@ def summarise_items(file_items: list[dict]) -> list[dict]:
|
||||
result.append({**item, "summary": summary})
|
||||
return result
|
||||
|
||||
|
||||
def build_summary_markdown(file_items: list[dict]) -> str:
|
||||
"""
|
||||
Build a compact markdown string of file summaries, suitable for the
|
||||
|
||||
@@ -14,7 +14,6 @@ class TestMMAPersistence(unittest.TestCase):
|
||||
def test_save_load_mma(self):
|
||||
proj = project_manager.default_project("test")
|
||||
proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]}
|
||||
|
||||
test_file = Path("test_mma_proj.toml")
|
||||
try:
|
||||
project_manager.save_project(proj, test_file)
|
||||
|
||||
@@ -47,10 +47,8 @@ def live_gui():
|
||||
"""
|
||||
gui_script = "gui_2.py"
|
||||
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
|
||||
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
|
||||
|
||||
process = subprocess.Popen(
|
||||
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
|
||||
stdout=log_file,
|
||||
@@ -58,11 +56,9 @@ def live_gui():
|
||||
text=True,
|
||||
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
|
||||
)
|
||||
|
||||
max_retries = 15 # Slightly more time for gui_2
|
||||
ready = False
|
||||
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
|
||||
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < max_retries:
|
||||
try:
|
||||
@@ -76,12 +72,10 @@ def live_gui():
|
||||
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
if not ready:
|
||||
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
|
||||
kill_process_tree(process.pid)
|
||||
pytest.fail(f"Failed to start {gui_script} with test hooks.")
|
||||
|
||||
try:
|
||||
yield process, gui_script
|
||||
finally:
|
||||
|
||||
21
tests/mock_alias_tool.py
Normal file
21
tests/mock_alias_tool.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import sys, json, os, subprocess
|
||||
prompt = sys.stdin.read()
|
||||
if '"role": "tool"' in prompt:
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
|
||||
else:
|
||||
# We must call the bridge to trigger the GUI approval!
|
||||
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
|
||||
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
|
||||
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
|
||||
stdout, _ = proc.communicate(input=json.dumps(tool_call))
|
||||
|
||||
# Even if bridge says allow, we emit the tool_use to the adapter
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
|
||||
print(json.dumps({
|
||||
"type": "tool_use",
|
||||
"name": "list_directory",
|
||||
"id": "alias_call",
|
||||
"args": {"dir_path": "."}
|
||||
}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
|
||||
@@ -7,21 +7,17 @@ def main():
|
||||
# Debug log to stderr
|
||||
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
|
||||
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
|
||||
|
||||
# Read prompt from stdin
|
||||
try:
|
||||
# On Windows, stdin might be closed or behave weirdly if not handled
|
||||
prompt = sys.stdin.read()
|
||||
except EOFError:
|
||||
prompt = ""
|
||||
|
||||
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
# Skip management commands
|
||||
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
|
||||
return
|
||||
|
||||
# If the prompt contains tool results, provide final answer
|
||||
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
|
||||
print(json.dumps({
|
||||
@@ -36,7 +32,6 @@ def main():
|
||||
"session_id": "mock-session-final"
|
||||
}), flush=True)
|
||||
return
|
||||
|
||||
# Default flow: simulate a tool call
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
# Using format that bridge understands
|
||||
@@ -44,10 +39,8 @@ def main():
|
||||
"name": "read_file",
|
||||
"input": {"path": "test.txt"}
|
||||
}
|
||||
|
||||
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
|
||||
sys.stderr.flush()
|
||||
|
||||
try:
|
||||
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
|
||||
process = subprocess.Popen(
|
||||
@@ -59,16 +52,13 @@ def main():
|
||||
env=os.environ
|
||||
)
|
||||
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
|
||||
|
||||
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
|
||||
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
|
||||
|
||||
decision_data = json.loads(stdout.strip())
|
||||
decision = decision_data.get("decision")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
|
||||
decision = "deny"
|
||||
|
||||
if decision == "allow":
|
||||
# Simulate REAL CLI field names for adapter normalization test
|
||||
print(json.dumps({
|
||||
@@ -77,13 +67,11 @@ def main():
|
||||
"tool_id": "call_123",
|
||||
"parameters": {"path": "test.txt"}
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": "I am reading the file now..."
|
||||
}), flush=True)
|
||||
|
||||
print(json.dumps({
|
||||
"type": "result",
|
||||
"status": "success",
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:56:53"
|
||||
last_updated = "2026-02-28T07:35:03"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -6,10 +6,10 @@ roles = [
|
||||
"Reasoning",
|
||||
]
|
||||
history = []
|
||||
active = "TestDisc_1772236592"
|
||||
active = "TestDisc_1772282083"
|
||||
auto_add = true
|
||||
|
||||
[discussions.TestDisc_1772236592]
|
||||
[discussions.TestDisc_1772282083]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:56:46"
|
||||
last_updated = "2026-02-28T07:34:56"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:57:53"
|
||||
last_updated = "2026-02-28T07:35:49"
|
||||
history = []
|
||||
|
||||
@@ -15,6 +15,8 @@ output_dir = "./md_gen"
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
[files.tier_assignments]
|
||||
|
||||
[screenshots]
|
||||
base_dir = "."
|
||||
paths = []
|
||||
|
||||
@@ -10,5 +10,5 @@ auto_add = true
|
||||
|
||||
[discussions.main]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T18:57:10"
|
||||
last_updated = "2026-02-28T07:35:20"
|
||||
history = []
|
||||
|
||||
@@ -18,7 +18,5 @@ history = [
|
||||
|
||||
[discussions.AutoDisc]
|
||||
git_commit = ""
|
||||
last_updated = "2026-02-27T23:54:05"
|
||||
history = [
|
||||
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
|
||||
]
|
||||
last_updated = "2026-02-28T07:34:41"
|
||||
history = []
|
||||
|
||||
@@ -9,10 +9,8 @@ def test_ai_client_send_gemini_cli():
|
||||
"""
|
||||
test_message = "Hello, this is a test prompt for the CLI adapter."
|
||||
test_response = "This is a dummy response from the Gemini CLI."
|
||||
|
||||
# Set provider to gemini_cli
|
||||
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
|
||||
|
||||
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
|
||||
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
|
||||
mock_adapter_instance = MockAdapterClass.return_value
|
||||
@@ -20,7 +18,6 @@ def test_ai_client_send_gemini_cli():
|
||||
mock_adapter_instance.last_usage = {"total_tokens": 100}
|
||||
mock_adapter_instance.last_latency = 0.5
|
||||
mock_adapter_instance.session_id = "test-session"
|
||||
|
||||
# Verify that 'events' are emitted correctly
|
||||
with patch.object(ai_client.events, 'emit') as mock_emit:
|
||||
response = ai_client.send(
|
||||
@@ -28,14 +25,11 @@ def test_ai_client_send_gemini_cli():
|
||||
user_message=test_message,
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
# Check that the adapter's send method was called.
|
||||
mock_adapter_instance.send.assert_called()
|
||||
|
||||
# Verify that the expected lifecycle events were emitted.
|
||||
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
|
||||
assert 'request_start' in emitted_event_names
|
||||
assert 'response_received' in emitted_event_names
|
||||
|
||||
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
|
||||
assert response == test_response
|
||||
|
||||
@@ -8,7 +8,6 @@ def test_list_models_gemini_cli():
|
||||
for the 'gemini_cli' provider.
|
||||
"""
|
||||
models = ai_client.list_models("gemini_cli")
|
||||
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-2.5-pro" in models
|
||||
|
||||
@@ -68,7 +68,6 @@ def test_multiline_string_safety():
|
||||
# def a():
|
||||
# '''
|
||||
# This is a...
|
||||
|
||||
result = format_code(source)
|
||||
assert " This is a multiline" in result
|
||||
assert result.startswith("def a():\n '''")
|
||||
|
||||
@@ -37,18 +37,13 @@ def test_event_emission():
|
||||
def test_send_emits_events():
|
||||
with patch("ai_client._send_gemini") as mock_send_gemini, \
|
||||
patch("ai_client._send_anthropic") as mock_send_anthropic:
|
||||
|
||||
mock_send_gemini.return_value = "gemini response"
|
||||
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
|
||||
# We mocked _send_gemini so it doesn't emit events inside.
|
||||
# But wait, ai_client.send itself emits request_start and response_received?
|
||||
# Actually, ai_client.send delegates to _send_gemini.
|
||||
@@ -58,27 +53,20 @@ def test_send_emits_events():
|
||||
def test_send_emits_events_proper():
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client:
|
||||
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
|
||||
mock_response.usage_metadata = MockUsage()
|
||||
mock_chat.send_message.return_value = mock_response
|
||||
|
||||
start_callback = MagicMock()
|
||||
response_callback = MagicMock()
|
||||
|
||||
ai_client.events.on("request_start", start_callback)
|
||||
ai_client.events.on("response_received", response_callback)
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
ai_client.send("context", "message")
|
||||
|
||||
assert start_callback.called
|
||||
assert response_callback.called
|
||||
|
||||
args, kwargs = start_callback.call_args
|
||||
assert kwargs['payload']['provider'] == 'gemini'
|
||||
|
||||
@@ -87,42 +75,31 @@ def test_send_emits_tool_events():
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
|
||||
mock_chat = MagicMock()
|
||||
mock_client.chats.create.return_value = mock_chat
|
||||
|
||||
# 1. Setup mock response with a tool call
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"path": "test.txt"}
|
||||
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
|
||||
mock_response_with_tool.usage_metadata = MockUsage()
|
||||
|
||||
# 2. Setup second mock response (final answer)
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
|
||||
mock_response_final.usage_metadata = MockUsage()
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
mock_dispatch.return_value = "file content"
|
||||
|
||||
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
|
||||
|
||||
tool_callback = MagicMock()
|
||||
ai_client.events.on("tool_execution", tool_callback)
|
||||
|
||||
ai_client.send("context", "message")
|
||||
|
||||
# Should be called twice: once for 'started', once for 'completed'
|
||||
assert tool_callback.call_count == 2
|
||||
|
||||
# Check 'started' call
|
||||
args, kwargs = tool_callback.call_args_list[0]
|
||||
assert kwargs['payload']['status'] == 'started'
|
||||
assert kwargs['payload']['tool'] == 'read_file'
|
||||
|
||||
# Check 'completed' call
|
||||
args, kwargs = tool_callback.call_args_list[1]
|
||||
assert kwargs['payload']['status'] == 'completed'
|
||||
|
||||
@@ -71,7 +71,6 @@ def test_get_text_value():
|
||||
client = ApiHookClient()
|
||||
with patch.object(client, 'get_value', return_value=123):
|
||||
assert client.get_text_value("dummy_tag") == "123"
|
||||
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
assert client.get_text_value("dummy_tag") is None
|
||||
|
||||
@@ -83,17 +82,14 @@ def test_get_node_status():
|
||||
# When get_value returns a status directly
|
||||
with patch.object(client, 'get_value', return_value="running"):
|
||||
assert client.get_node_status("my_node") == "running"
|
||||
|
||||
# When get_value returns None and diagnostics provides a nodes dict
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
|
||||
assert client.get_node_status("my_node") == "completed"
|
||||
|
||||
# When get_value returns None and diagnostics provides a direct key
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
|
||||
assert client.get_node_status("my_node") == "failed"
|
||||
|
||||
# When neither works
|
||||
with patch.object(client, 'get_value', return_value=None):
|
||||
with patch.object(client, '_make_request', return_value={}):
|
||||
|
||||
@@ -37,7 +37,6 @@ def test_app_processes_new_actions():
|
||||
import gui_legacy
|
||||
from unittest.mock import MagicMock, patch
|
||||
import dearpygui.dearpygui as dpg
|
||||
|
||||
dpg.create_context()
|
||||
try:
|
||||
with patch('gui_legacy.load_config', return_value={}), \
|
||||
@@ -46,11 +45,9 @@ def test_app_processes_new_actions():
|
||||
patch('gui_legacy.project_manager'), \
|
||||
patch.object(gui_legacy.App, '_load_active_project'):
|
||||
app = gui_legacy.App()
|
||||
|
||||
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
|
||||
|
||||
# Test select_tab
|
||||
app._pending_gui_tasks.append({
|
||||
"action": "select_tab",
|
||||
@@ -59,7 +56,6 @@ def test_app_processes_new_actions():
|
||||
})
|
||||
app._process_pending_gui_tasks()
|
||||
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
|
||||
|
||||
# Test select_list_item
|
||||
mock_cb = MagicMock()
|
||||
mock_get_cb.return_value = mock_cb
|
||||
|
||||
@@ -37,17 +37,14 @@ class MyClass:
|
||||
return None
|
||||
'''
|
||||
skeleton = parser.get_skeleton(code)
|
||||
|
||||
# Check that signatures are preserved
|
||||
assert "def complex_function(a, b):" in skeleton
|
||||
assert "class MyClass:" in skeleton
|
||||
assert "def method_without_docstring(self):" in skeleton
|
||||
|
||||
# Check that docstring is preserved
|
||||
assert '"""' in skeleton
|
||||
assert "This is a docstring." in skeleton
|
||||
assert "It should be preserved." in skeleton
|
||||
|
||||
# Check that bodies are replaced with '...'
|
||||
assert "..." in skeleton
|
||||
assert "result = a + b" not in skeleton
|
||||
@@ -87,19 +84,15 @@ class MyClass:
|
||||
print("method preserved", x)
|
||||
'''
|
||||
curated = parser.get_curated_view(code)
|
||||
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved", x)' in curated
|
||||
|
||||
@@ -26,19 +26,15 @@ class MyClass:
|
||||
print("method preserved")
|
||||
'''
|
||||
curated = parser.get_curated_view(code)
|
||||
|
||||
# Check that core_func is preserved
|
||||
assert 'print("this should be preserved")' in curated
|
||||
assert 'return True' in curated
|
||||
|
||||
# Check that hot_func is preserved
|
||||
assert '# [HOT]' in curated
|
||||
assert 'print("this should also be preserved")' in curated
|
||||
|
||||
# Check that normal_func is stripped but docstring is preserved
|
||||
assert '"""Normal doc."""' in curated
|
||||
assert 'print("this should be stripped")' not in curated
|
||||
assert '...' in curated
|
||||
|
||||
# Check that core_method is preserved
|
||||
assert 'print("method preserved")' in curated
|
||||
|
||||
@@ -4,44 +4,39 @@ from events import AsyncEventQueue
|
||||
|
||||
def test_async_event_queue_put_get():
|
||||
"""Verify that an event can be asynchronously put and retrieved from the queue."""
|
||||
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
event_name = "test_event"
|
||||
payload = {"data": "hello"}
|
||||
|
||||
await queue.put(event_name, payload)
|
||||
ret_name, ret_payload = await queue.get()
|
||||
|
||||
assert ret_name == event_name
|
||||
assert ret_payload == payload
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_async_event_queue_multiple():
|
||||
"""Verify that multiple events can be asynchronously put and retrieved in order."""
|
||||
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
|
||||
await queue.put("event1", 1)
|
||||
await queue.put("event2", 2)
|
||||
|
||||
name1, val1 = await queue.get()
|
||||
name2, val2 = await queue.get()
|
||||
|
||||
assert name1 == "event1"
|
||||
assert val1 == 1
|
||||
assert name2 == "event2"
|
||||
assert val2 == 2
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
def test_async_event_queue_none_payload():
|
||||
"""Verify that an event with None payload works correctly."""
|
||||
|
||||
async def run_test():
|
||||
queue = AsyncEventQueue()
|
||||
await queue.put("no_payload")
|
||||
name, payload = await queue.get()
|
||||
assert name == "no_payload"
|
||||
assert payload is None
|
||||
|
||||
asyncio.run(run_test())
|
||||
|
||||
@@ -16,14 +16,11 @@ def test_auto_whitelist_keywords(registry_setup):
|
||||
session_id = "test_kw"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with ERROR
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
@@ -32,14 +29,11 @@ def test_auto_whitelist_message_count(registry_setup):
|
||||
session_id = "test_msg_count"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create comms.log with > 10 lines
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("\n".join(["msg"] * 15))
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 15
|
||||
|
||||
@@ -48,14 +42,11 @@ def test_auto_whitelist_large_size(registry_setup):
|
||||
session_id = "test_large"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Create large file (> 50KB)
|
||||
large_file = session_dir / "large.log"
|
||||
large_file.write_text("x" * 60000)
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert registry.is_session_whitelisted(session_id)
|
||||
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
|
||||
|
||||
@@ -64,13 +55,10 @@ def test_no_auto_whitelist_insignificant(registry_setup):
|
||||
session_id = "test_insignificant"
|
||||
session_dir = logs_dir / session_id
|
||||
session_dir.mkdir()
|
||||
|
||||
# Small file, few lines, no keywords
|
||||
comms_log = session_dir / "comms.log"
|
||||
comms_log.write_text("hello\nworld")
|
||||
|
||||
registry.register_session(session_id, str(session_dir), datetime.now())
|
||||
registry.update_auto_whitelist_status(session_id)
|
||||
|
||||
assert not registry.is_session_whitelisted(session_id)
|
||||
assert registry.data[session_id]["metadata"]["message_count"] == 2
|
||||
|
||||
@@ -26,13 +26,10 @@ class TestCliToolBridge(unittest.TestCase):
|
||||
# 1. Mock stdin with a JSON string tool call
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
|
||||
# Run main
|
||||
main()
|
||||
|
||||
# 3. Capture stdout and assert allow
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'allow')
|
||||
@@ -44,12 +41,9 @@ class TestCliToolBridge(unittest.TestCase):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 4. Mock ApiHookClient to return denied
|
||||
mock_request.return_value = {'approved': False}
|
||||
|
||||
main()
|
||||
|
||||
# Assert deny
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
@@ -61,12 +55,9 @@ class TestCliToolBridge(unittest.TestCase):
|
||||
# Mock stdin
|
||||
mock_stdin.write(json.dumps(self.tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 5. Test case where hook server is unreachable (exception)
|
||||
mock_request.side_effect = Exception("Connection refused")
|
||||
|
||||
main()
|
||||
|
||||
# Assert deny on error
|
||||
output = json.loads(mock_stdout.getvalue().strip())
|
||||
self.assertEqual(output.get('decision'), 'deny')
|
||||
|
||||
@@ -28,21 +28,16 @@ class TestCliToolBridgeMapping(unittest.TestCase):
|
||||
'name': 'read_file',
|
||||
'input': {'path': 'test.txt'}
|
||||
}
|
||||
|
||||
# 1. Mock stdin with the API format JSON
|
||||
mock_stdin.write(json.dumps(api_tool_call))
|
||||
mock_stdin.seek(0)
|
||||
|
||||
# 2. Mock ApiHookClient to return approved
|
||||
mock_request.return_value = {'approved': True}
|
||||
|
||||
# Run main
|
||||
main()
|
||||
|
||||
# 3. Verify that request_confirmation was called with mapped values
|
||||
# If it's not mapped, it will likely be called with None or fail
|
||||
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
|
||||
|
||||
# 4. Capture stdout and assert allow
|
||||
output_str = mock_stdout.getvalue().strip()
|
||||
self.assertTrue(output_str, "Stdout should not be empty")
|
||||
|
||||
@@ -20,7 +20,6 @@ def simulate_conductor_phase_completion(client: ApiHookClient):
|
||||
"verification_successful": False,
|
||||
"verification_message": ""
|
||||
}
|
||||
|
||||
try:
|
||||
status = client.get_status()
|
||||
if status.get('status') == 'ok':
|
||||
@@ -32,7 +31,6 @@ def simulate_conductor_phase_completion(client: ApiHookClient):
|
||||
except Exception as e:
|
||||
results["verification_successful"] = False
|
||||
results["verification_message"] = f"Automated verification failed: {e}"
|
||||
|
||||
return results
|
||||
|
||||
def test_conductor_integrates_api_hook_client_for_verification(live_gui):
|
||||
@@ -42,7 +40,6 @@ def test_conductor_integrates_api_hook_client_for_verification(live_gui):
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is True
|
||||
assert "successfully" in results["verification_message"]
|
||||
|
||||
@@ -52,11 +49,9 @@ def test_conductor_handles_api_hook_failure(live_gui):
|
||||
We patch the client's get_status to simulate failure even with live GUI.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
|
||||
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is False
|
||||
assert "failed" in results["verification_message"]
|
||||
|
||||
@@ -66,7 +61,6 @@ def test_conductor_handles_api_hook_connection_error():
|
||||
"""
|
||||
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
|
||||
results = simulate_conductor_phase_completion(client)
|
||||
|
||||
assert results["verification_successful"] is False
|
||||
# Check for expected error substrings from ApiHookClient
|
||||
msg = results["verification_message"]
|
||||
|
||||
@@ -23,30 +23,25 @@ async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch
|
||||
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
|
||||
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
|
||||
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# We mock run_worker_lifecycle as it is expected to be in the same module
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
|
||||
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Track.get_executable_tickets() should be called repeatedly until all are done
|
||||
# T1 should run first, then T2.
|
||||
assert mock_lifecycle.call_count == 2
|
||||
assert ticket1.status == "completed"
|
||||
assert ticket2.status == "completed"
|
||||
|
||||
# Verify sequence: T1 before T2
|
||||
calls = mock_lifecycle.call_args_list
|
||||
assert calls[0][0][0].id == "T1"
|
||||
@@ -59,21 +54,15 @@ async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
mock_send.return_value = "Task complete. I have updated the file."
|
||||
|
||||
result = run_worker_lifecycle(ticket, context)
|
||||
|
||||
assert result == "Task complete. I have updated the file."
|
||||
assert ticket.status == "completed"
|
||||
mock_send.assert_called_once()
|
||||
|
||||
# Check if description was passed to send()
|
||||
args, kwargs = mock_send.call_args
|
||||
# user_message is passed as a keyword argument
|
||||
@@ -87,17 +76,13 @@ async def test_run_worker_lifecycle_context_injection(monkeypatch):
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
context_files = ["primary.py", "secondary.py"]
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# We mock ASTParser which is expected to be imported in multi_agent_conductor
|
||||
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
|
||||
patch("builtins.open", new_callable=MagicMock) as mock_open:
|
||||
|
||||
# Setup open mock to return different content for different files
|
||||
file_contents = {
|
||||
"primary.py": "def primary(): pass",
|
||||
@@ -110,23 +95,17 @@ async def test_run_worker_lifecycle_context_injection(monkeypatch):
|
||||
mock_file.read.return_value = content
|
||||
mock_file.__enter__.return_value = mock_file
|
||||
return mock_file
|
||||
|
||||
mock_open.side_effect = mock_open_side_effect
|
||||
|
||||
# Setup ASTParser mock
|
||||
mock_ast_parser = mock_ast_parser_class.return_value
|
||||
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
|
||||
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
|
||||
|
||||
mock_send.return_value = "Success"
|
||||
|
||||
run_worker_lifecycle(ticket, context, context_files=context_files)
|
||||
|
||||
# Verify ASTParser calls:
|
||||
# First file (primary) should get curated view, others (secondary) get skeleton
|
||||
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
|
||||
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
|
||||
|
||||
# Verify user_message contains the views
|
||||
_, kwargs = mock_send.call_args
|
||||
user_message = kwargs["user_message"]
|
||||
@@ -142,18 +121,13 @@ async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# Simulate a response indicating a block
|
||||
mock_send.return_value = "I am BLOCKED because I don't have enough information."
|
||||
|
||||
run_worker_lifecycle(ticket, context)
|
||||
|
||||
assert ticket.status == "blocked"
|
||||
assert "BLOCKED" in ticket.blocked_reason
|
||||
|
||||
@@ -166,29 +140,23 @@ async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
|
||||
# We simulate ai_client.send by making it call the pre_tool_callback it received
|
||||
|
||||
def mock_send_side_effect(md_content, user_message, **kwargs):
|
||||
callback = kwargs.get("pre_tool_callback")
|
||||
if callback:
|
||||
# Simulate calling it with some payload
|
||||
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
|
||||
return "Success"
|
||||
|
||||
mock_send.side_effect = mock_send_side_effect
|
||||
mock_confirm.return_value = True
|
||||
|
||||
mock_event_queue = MagicMock()
|
||||
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
|
||||
|
||||
# Verify confirm_execution was called
|
||||
mock_confirm.assert_called_once()
|
||||
assert ticket.status == "completed"
|
||||
@@ -201,24 +169,17 @@ async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
|
||||
"""
|
||||
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
|
||||
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
|
||||
|
||||
from multi_agent_conductor import run_worker_lifecycle
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
|
||||
|
||||
mock_confirm.return_value = False
|
||||
mock_send.return_value = "Task failed because tool execution was rejected."
|
||||
|
||||
run_worker_lifecycle(ticket, context)
|
||||
|
||||
# Verify it was passed to send
|
||||
args, kwargs = mock_send.call_args
|
||||
assert kwargs["pre_tool_callback"] is not None
|
||||
|
||||
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
|
||||
# here we just verify the wiring.
|
||||
|
||||
@@ -229,10 +190,8 @@ async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
|
||||
"""
|
||||
import json
|
||||
from multi_agent_conductor import ConductorEngine
|
||||
|
||||
track = Track(id="dynamic_track", description="Dynamic Track")
|
||||
engine = ConductorEngine(track=track)
|
||||
|
||||
tickets_json = json.dumps([
|
||||
{
|
||||
"id": "T1",
|
||||
@@ -256,35 +215,26 @@ async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
|
||||
"depends_on": []
|
||||
}
|
||||
])
|
||||
|
||||
engine.parse_json_tickets(tickets_json)
|
||||
|
||||
assert len(engine.track.tickets) == 3
|
||||
assert engine.track.tickets[0].id == "T1"
|
||||
assert engine.track.tickets[1].id == "T2"
|
||||
assert engine.track.tickets[2].id == "T3"
|
||||
|
||||
# Mock ai_client.send using monkeypatch
|
||||
mock_send = MagicMock()
|
||||
monkeypatch.setattr(ai_client, 'send', mock_send)
|
||||
|
||||
# Mock run_worker_lifecycle to mark tickets as complete
|
||||
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
|
||||
def side_effect(ticket, context, *args, **kwargs):
|
||||
ticket.mark_complete()
|
||||
return "Success"
|
||||
mock_lifecycle.side_effect = side_effect
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
assert mock_lifecycle.call_count == 3
|
||||
|
||||
# Verify dependency order: T1 must be called before T2
|
||||
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
|
||||
|
||||
t1_idx = calls.index("T1")
|
||||
t2_idx = calls.index("T2")
|
||||
assert t1_idx < t2_idx
|
||||
|
||||
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
|
||||
assert "T3" in calls
|
||||
|
||||
@@ -20,23 +20,19 @@ class TestConductorTechLead(unittest.TestCase):
|
||||
}
|
||||
]
|
||||
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
|
||||
|
||||
track_brief = "Test track brief"
|
||||
module_skeletons = "Test skeletons"
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
|
||||
|
||||
# Verify set_provider was called
|
||||
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
|
||||
mock_reset_session.assert_called_once()
|
||||
|
||||
# Verify send was called
|
||||
mock_send.assert_called_once()
|
||||
args, kwargs = mock_send.call_args
|
||||
self.assertEqual(kwargs['md_content'], "")
|
||||
self.assertIn(track_brief, kwargs['user_message'])
|
||||
self.assertIn(module_skeletons, kwargs['user_message'])
|
||||
|
||||
# Verify tickets were parsed correctly
|
||||
self.assertEqual(tickets, mock_tickets)
|
||||
|
||||
@@ -46,10 +42,8 @@ class TestConductorTechLead(unittest.TestCase):
|
||||
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
|
||||
# Setup mock invalid response
|
||||
mock_send.return_value = "Invalid JSON"
|
||||
|
||||
# Call the function
|
||||
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
|
||||
|
||||
# Verify it returns an empty list on parse error
|
||||
self.assertEqual(tickets, [])
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ def test_get_ready_tasks_linear():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 1
|
||||
@@ -16,7 +15,6 @@ def test_get_ready_tasks_branching():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
ready = dag.get_ready_tasks()
|
||||
assert len(ready) == 2
|
||||
@@ -26,14 +24,12 @@ def test_get_ready_tasks_branching():
|
||||
def test_has_cycle_no_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert not dag.has_cycle()
|
||||
|
||||
def test_has_cycle_direct_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
assert dag.has_cycle()
|
||||
|
||||
@@ -41,7 +37,6 @@ def test_has_cycle_indirect_cycle():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert dag.has_cycle()
|
||||
|
||||
@@ -51,7 +46,6 @@ def test_has_cycle_complex_no_cycle():
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
|
||||
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
|
||||
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
assert not dag.has_cycle()
|
||||
|
||||
@@ -59,10 +53,8 @@ def test_get_ready_tasks_multiple_deps():
|
||||
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
|
||||
|
||||
t2.status = "todo"
|
||||
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
|
||||
|
||||
@@ -70,7 +62,6 @@ def test_topological_sort():
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3])
|
||||
sort = dag.topological_sort()
|
||||
assert sort == ["T1", "T2", "T3"]
|
||||
@@ -78,7 +69,6 @@ def test_topological_sort():
|
||||
def test_topological_sort_cycle():
|
||||
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
|
||||
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
with pytest.raises(ValueError, match="Dependency cycle detected"):
|
||||
dag.topological_sort()
|
||||
|
||||
@@ -18,10 +18,8 @@ def test_credentials_error_mentions_deepseek(monkeypatch):
|
||||
"""
|
||||
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
|
||||
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
|
||||
|
||||
with pytest.raises(FileNotFoundError) as excinfo:
|
||||
ai_client._load_credentials()
|
||||
|
||||
err_msg = str(excinfo.value)
|
||||
assert "[deepseek]" in err_msg
|
||||
assert "api_key" in err_msg
|
||||
@@ -58,7 +56,6 @@ def test_gui_provider_list_via_hooks(live_gui):
|
||||
import time
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
# Attempt to set provider to deepseek to verify it's an allowed value
|
||||
client.set_value('current_provider', 'deepseek')
|
||||
time.sleep(0.5)
|
||||
|
||||
@@ -15,7 +15,6 @@ def test_deepseek_completion_logic():
|
||||
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
@@ -27,7 +26,6 @@ def test_deepseek_completion_logic():
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
|
||||
assert result == "DeepSeek Response"
|
||||
assert mock_post.called
|
||||
@@ -37,7 +35,6 @@ def test_deepseek_reasoning_logic():
|
||||
Verifies that reasoning_content is captured and wrapped in <thinking> tags.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-reasoner")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
@@ -53,7 +50,6 @@ def test_deepseek_reasoning_logic():
|
||||
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
|
||||
assert "<thinking>\nChain of thought\n</thinking>" in result
|
||||
assert "Final Answer" in result
|
||||
@@ -63,10 +59,8 @@ def test_deepseek_tool_calling():
|
||||
Verifies that DeepSeek provider correctly identifies and executes tool calls.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post, \
|
||||
patch("mcp_client.dispatch") as mock_dispatch:
|
||||
|
||||
# 1. Mock first response with a tool call
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.status_code = 200
|
||||
@@ -88,7 +82,6 @@ def test_deepseek_tool_calling():
|
||||
}],
|
||||
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
|
||||
}
|
||||
|
||||
# 2. Mock second response (final answer)
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.status_code = 200
|
||||
@@ -102,12 +95,9 @@ def test_deepseek_tool_calling():
|
||||
}],
|
||||
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
|
||||
}
|
||||
|
||||
mock_post.side_effect = [mock_resp1, mock_resp2]
|
||||
mock_dispatch.return_value = "Hello World"
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
|
||||
|
||||
assert "File content is: Hello World" in result
|
||||
assert mock_dispatch.called
|
||||
assert mock_dispatch.call_args[0][0] == "read_file"
|
||||
@@ -118,12 +108,10 @@ def test_deepseek_streaming():
|
||||
Verifies that DeepSeek provider correctly aggregates streaming chunks.
|
||||
"""
|
||||
ai_client.set_provider("deepseek", "deepseek-chat")
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
# Mock a streaming response
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
# Simulate OpenAI-style server-sent events (SSE) for streaming
|
||||
# Each line starts with 'data: ' and contains a JSON object
|
||||
chunks = [
|
||||
@@ -134,6 +122,5 @@ def test_deepseek_streaming():
|
||||
]
|
||||
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
|
||||
assert result == "Hello World"
|
||||
|
||||
@@ -8,43 +8,33 @@ def test_execution_engine_basic_flow():
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
|
||||
|
||||
dag = TrackDAG([t1, t2, t3, t4])
|
||||
engine = ExecutionEngine(dag)
|
||||
|
||||
# Tick 1: Only T1 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
|
||||
# Tick 2: T2 and T3 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 2
|
||||
ids = {t.id for t in ready}
|
||||
assert ids == {"T2", "T3"}
|
||||
|
||||
# Complete T2
|
||||
engine.update_task_status("T2", "completed")
|
||||
|
||||
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T3"
|
||||
|
||||
# Complete T3
|
||||
engine.update_task_status("T3", "completed")
|
||||
|
||||
# Tick 4: T4 should be ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T4"
|
||||
|
||||
# Complete T4
|
||||
engine.update_task_status("T4", "completed")
|
||||
|
||||
# Tick 5: Nothing ready
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
@@ -59,34 +49,27 @@ def test_execution_engine_status_persistence():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag)
|
||||
|
||||
engine.update_task_status("T1", "in_progress")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
|
||||
|
||||
def test_execution_engine_auto_queue():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
|
||||
|
||||
dag = TrackDAG([t1, t2])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
|
||||
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
assert t2.status == "todo"
|
||||
|
||||
# Complete T1
|
||||
engine.update_task_status("T1", "completed")
|
||||
|
||||
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
@@ -95,20 +78,16 @@ def test_execution_engine_auto_queue():
|
||||
|
||||
def test_execution_engine_step_mode():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
|
||||
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=True)
|
||||
|
||||
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 1
|
||||
assert ready[0].id == "T1"
|
||||
assert t1.status == "todo"
|
||||
|
||||
# Manual approval
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
|
||||
ready = engine.tick()
|
||||
assert len(ready) == 0
|
||||
@@ -117,7 +96,6 @@ def test_execution_engine_approve_task():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
|
||||
dag = TrackDAG([t1])
|
||||
engine = ExecutionEngine(dag, auto_queue=False)
|
||||
|
||||
# Should be able to approve even if auto_queue is False
|
||||
engine.approve_task("T1")
|
||||
assert t1.status == "in_progress"
|
||||
|
||||
@@ -17,7 +17,6 @@ def test_context_sim_live(live_gui):
|
||||
"""Run the Context & Chat simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ContextSimulation(client)
|
||||
sim.setup("LiveContextSim")
|
||||
sim.run()
|
||||
@@ -28,7 +27,6 @@ def test_ai_settings_sim_live(live_gui):
|
||||
"""Run the AI Settings simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = AISettingsSimulation(client)
|
||||
sim.setup("LiveAISettingsSim")
|
||||
sim.run()
|
||||
@@ -39,7 +37,6 @@ def test_tools_sim_live(live_gui):
|
||||
"""Run the Tools & Search simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ToolsSimulation(client)
|
||||
sim.setup("LiveToolsSim")
|
||||
sim.run()
|
||||
@@ -50,7 +47,6 @@ def test_execution_sim_live(live_gui):
|
||||
"""Run the Execution & Modals simulation against a live GUI."""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
sim = ExecutionSimulation(client)
|
||||
sim.setup("LiveExecutionSim")
|
||||
sim.run()
|
||||
|
||||
@@ -28,25 +28,20 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message = "Hello Gemini CLI"
|
||||
self.adapter.send(message)
|
||||
|
||||
# Verify subprocess.Popen call
|
||||
mock_popen.assert_called_once()
|
||||
args, kwargs = mock_popen.call_args
|
||||
cmd = args[0]
|
||||
|
||||
# Check mandatory CLI components
|
||||
self.assertIn("gemini", cmd)
|
||||
self.assertIn("--output-format", cmd)
|
||||
self.assertIn("stream-json", cmd)
|
||||
# Message should NOT be in cmd now
|
||||
self.assertNotIn(message, cmd)
|
||||
|
||||
# Verify message was sent via communicate
|
||||
process_mock.communicate.assert_called_once_with(input=message)
|
||||
|
||||
# Check process configuration
|
||||
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
|
||||
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
|
||||
@@ -64,15 +59,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
||||
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
result = self.adapter.send("test message")
|
||||
|
||||
self.assertEqual(result["text"], "The quick brown fox jumps.")
|
||||
self.assertEqual(result["tool_calls"], [])
|
||||
|
||||
@@ -89,15 +81,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
||||
json.dumps({"type": "result", "usage": {}})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
result = self.adapter.send("read test.txt")
|
||||
|
||||
# Result should contain the combined text from all 'message' events
|
||||
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
|
||||
self.assertEqual(len(result["tool_calls"]), 1)
|
||||
@@ -114,15 +103,12 @@ class TestGeminiCliAdapter(unittest.TestCase):
|
||||
json.dumps({"type": "result", "usage": usage_data})
|
||||
]
|
||||
stdout_content = "\n".join(jsonl_output) + "\n"
|
||||
|
||||
process_mock = MagicMock()
|
||||
process_mock.communicate.return_value = (stdout_content, "")
|
||||
process_mock.poll.return_value = 0
|
||||
process_mock.wait.return_value = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
self.adapter.send("usage test")
|
||||
|
||||
# Verify the usage was captured in the adapter instance
|
||||
self.assertEqual(self.adapter.last_usage, usage_data)
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ import os
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
# Import the class to be tested
|
||||
from gemini_cli_adapter import GeminiCliAdapter
|
||||
|
||||
@@ -21,7 +20,6 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
# Patch session_logger to prevent file operations during tests
|
||||
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
|
||||
self.mock_session_logger = self.session_logger_patcher.start()
|
||||
|
||||
self.adapter = GeminiCliAdapter(binary_path="gemini")
|
||||
self.adapter.session_id = None
|
||||
self.adapter.last_usage = None
|
||||
@@ -38,10 +36,8 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
contents_to_count = ["This is the first line.", "This is the second line."]
|
||||
expected_chars = len("\n".join(contents_to_count))
|
||||
expected_tokens = expected_chars // 4
|
||||
|
||||
token_count = self.adapter.count_tokens(contents=contents_to_count)
|
||||
self.assertEqual(token_count, expected_tokens)
|
||||
|
||||
# Verify that NO subprocess was started for counting
|
||||
mock_popen.assert_not_called()
|
||||
|
||||
@@ -56,18 +52,14 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
safety_settings = [
|
||||
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
|
||||
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
|
||||
]
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=safety_settings)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that no --safety flags were added to the command
|
||||
self.assertNotIn("--safety", command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
@@ -83,14 +75,11 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "Another prompt."
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=None)
|
||||
args_none, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_none[0])
|
||||
mock_popen.reset_mock()
|
||||
|
||||
self.adapter.send(message=message_content, safety_settings=[])
|
||||
args_empty, _ = mock_popen.call_args
|
||||
self.assertNotIn("--safety", args_empty[0])
|
||||
@@ -106,19 +95,14 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
system_instruction_text = "Some instruction"
|
||||
expected_input = f"{system_instruction_text}\n\n{message_content}"
|
||||
|
||||
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the system instruction was prepended to the input sent to communicate
|
||||
process_mock.communicate.assert_called_once_with(input=expected_input)
|
||||
|
||||
# Verify that no --system flag was added to the command
|
||||
self.assertNotIn("--system", command)
|
||||
|
||||
@@ -132,16 +116,12 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
process_mock.communicate.return_value = (mock_stdout_content, "")
|
||||
process_mock.returncode = 0
|
||||
mock_popen.return_value = process_mock
|
||||
|
||||
message_content = "User's prompt here."
|
||||
model_name = "gemini-1.5-flash"
|
||||
expected_command_part = f'-m "{model_name}"'
|
||||
|
||||
self.adapter.send(message=message_content, model=model_name)
|
||||
|
||||
args, kwargs = mock_popen.call_args
|
||||
command = args[0]
|
||||
|
||||
# Verify that the -m <model> flag was added to the command
|
||||
self.assertIn(expected_command_part, command)
|
||||
# Verify that the message was passed correctly via stdin
|
||||
@@ -155,20 +135,15 @@ class TestGeminiCliAdapterParity(unittest.TestCase):
|
||||
"""
|
||||
mock_process = MagicMock()
|
||||
mock_popen.return_value = mock_process
|
||||
|
||||
# Define an exception to simulate
|
||||
simulated_exception = RuntimeError("Simulated communicate error")
|
||||
mock_process.communicate.side_effect = simulated_exception
|
||||
|
||||
message_content = "User message"
|
||||
|
||||
# Assert that the exception is raised and process is killed
|
||||
with self.assertRaises(RuntimeError) as cm:
|
||||
self.adapter.send(message=message_content)
|
||||
|
||||
# Verify that the process's kill method was called
|
||||
mock_process.kill.assert_called_once()
|
||||
|
||||
# Verify that the correct exception was re-raised
|
||||
self.assertIs(cm.exception, simulated_exception)
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ def test_gemini_cli_context_bleed_prevention(live_gui):
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
|
||||
# Create a specialized mock for context bleed
|
||||
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
|
||||
with open(bleed_mock, "w") as f:
|
||||
@@ -24,26 +23,20 @@ print(json.dumps({"type": "message", "role": "user", "content": "I am echoing yo
|
||||
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
|
||||
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
|
||||
''')
|
||||
|
||||
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Test context bleed")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Wait for completion
|
||||
time.sleep(3)
|
||||
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
|
||||
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
|
||||
ai_entries = [e for e in entries if e.get("role") == "AI"]
|
||||
assert len(ai_entries) == 1
|
||||
assert ai_entries[0].get("content") == "Actual AI Response"
|
||||
assert "echoing you" not in ai_entries[0].get("content")
|
||||
|
||||
os.remove(bleed_mock)
|
||||
|
||||
def test_gemini_cli_parameter_resilience(live_gui):
|
||||
@@ -55,7 +48,6 @@ def test_gemini_cli_parameter_resilience(live_gui):
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# Create a mock that uses dir_path for list_directory
|
||||
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
|
||||
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
|
||||
@@ -64,7 +56,6 @@ def test_gemini_cli_parameter_resilience(live_gui):
|
||||
bridge_path_str = bridge_path.replace("\\", "/")
|
||||
else:
|
||||
bridge_path_str = bridge_path
|
||||
|
||||
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
|
||||
f.write(f'''import sys, json, os, subprocess
|
||||
prompt = sys.stdin.read()
|
||||
@@ -88,14 +79,11 @@ else:
|
||||
}}), flush=True)
|
||||
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
|
||||
''')
|
||||
|
||||
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Test parameter aliases")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Handle approval
|
||||
timeout = 15
|
||||
start_time = time.time()
|
||||
@@ -108,18 +96,14 @@ else:
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert approved, "Tool approval event never received"
|
||||
|
||||
# Verify tool result in history
|
||||
time.sleep(2)
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
|
||||
# Check for "Tool worked!" which implies the tool execution was successful
|
||||
found = any("Tool worked!" in e.get("content", "") for e in entries)
|
||||
assert found, "Tool result indicating success not found in history"
|
||||
|
||||
os.remove(alias_mock)
|
||||
|
||||
def test_gemini_cli_loop_termination(live_gui):
|
||||
@@ -131,16 +115,13 @@ def test_gemini_cli_loop_termination(live_gui):
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
client.set_value("ai_input", "Perform multi-round tool test")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# Handle approvals (mock does one tool call)
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
@@ -153,7 +134,6 @@ def test_gemini_cli_loop_termination(live_gui):
|
||||
approved = True
|
||||
if approved: break
|
||||
time.sleep(0.5)
|
||||
|
||||
# Wait for the second round and final answer
|
||||
found_final = False
|
||||
start_time = time.time()
|
||||
@@ -166,5 +146,4 @@ def test_gemini_cli_loop_termination(live_gui):
|
||||
break
|
||||
if found_final: break
|
||||
time.sleep(1)
|
||||
|
||||
assert found_final, "Final message after multi-round tool loop not found"
|
||||
|
||||
@@ -11,41 +11,33 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
Handles 'ask_received' events from the bridge and any other approval requests.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# 0. Reset session and enable history
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
# Switch to manual_slop project explicitly
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
# 1. Setup paths and configure the GUI
|
||||
# Use the real gemini CLI if available, otherwise use mock
|
||||
# For CI/testing we prefer mock
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
|
||||
print(f"[TEST] Setting current_provider to gemini_cli")
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
print(f"[TEST] Setting gcli_path to {cli_cmd}")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# Verify settings
|
||||
assert client.get_value("current_provider") == "gemini_cli"
|
||||
|
||||
# Clear events
|
||||
client.get_events()
|
||||
|
||||
# 2. Trigger a message in the GUI
|
||||
print("[TEST] Sending user message...")
|
||||
client.set_value("ai_input", "Please read test.txt")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Monitor for approval events
|
||||
print("[TEST] Waiting for approval events...")
|
||||
timeout = 45
|
||||
start_time = time.time()
|
||||
approved_count = 0
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
events = client.get_events()
|
||||
if events:
|
||||
@@ -53,7 +45,6 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
etype = ev.get("type")
|
||||
eid = ev.get("request_id") or ev.get("action_id")
|
||||
print(f"[TEST] Received event: {etype} (ID: {eid})")
|
||||
|
||||
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
|
||||
print(f"[TEST] Approving {etype} {eid}")
|
||||
if etype == "script_confirmation_required":
|
||||
@@ -63,7 +54,6 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
json={"request_id": eid, "response": {"approved": True}})
|
||||
assert resp.status_code == 200
|
||||
approved_count += 1
|
||||
|
||||
# Check if we got a final response in history
|
||||
session = client.get_session()
|
||||
entries = session.get("session", {}).get("entries", [])
|
||||
@@ -74,12 +64,9 @@ def test_gemini_cli_full_integration(live_gui):
|
||||
print(f"[TEST] Success! Found final message in history.")
|
||||
found_final = True
|
||||
break
|
||||
|
||||
if found_final:
|
||||
break
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
assert approved_count > 0, "No approval events were processed"
|
||||
assert found_final, "Final message from mock CLI was not found in the GUI history"
|
||||
|
||||
@@ -88,22 +75,18 @@ def test_gemini_cli_rejection_and_history(live_gui):
|
||||
Integration test for the Gemini CLI provider: Rejection flow and history.
|
||||
"""
|
||||
client = ApiHookClient("http://127.0.0.1:8999")
|
||||
|
||||
# 0. Reset session
|
||||
client.click("btn_reset")
|
||||
client.set_value("auto_add_history", True)
|
||||
client.select_list_item("proj_files", "manual_slop")
|
||||
|
||||
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
|
||||
cli_cmd = f'"{sys.executable}" "{mock_script}"'
|
||||
client.set_value("current_provider", "gemini_cli")
|
||||
client.set_value("gcli_path", cli_cmd)
|
||||
|
||||
# 2. Trigger a message
|
||||
print("[TEST] Sending user message (to be denied)...")
|
||||
client.set_value("ai_input", "Deny me")
|
||||
client.click("btn_gen_send")
|
||||
|
||||
# 3. Wait for event and reject
|
||||
timeout = 20
|
||||
start_time = time.time()
|
||||
@@ -121,9 +104,7 @@ def test_gemini_cli_rejection_and_history(live_gui):
|
||||
break
|
||||
if denied: break
|
||||
time.sleep(0.5)
|
||||
|
||||
assert denied, "No ask_received event to deny"
|
||||
|
||||
# 4. Verify rejection in history
|
||||
print("[TEST] Waiting for rejection in history...")
|
||||
rejection_found = False
|
||||
@@ -137,5 +118,4 @@ def test_gemini_cli_rejection_and_history(live_gui):
|
||||
break
|
||||
if rejection_found: break
|
||||
time.sleep(1.0)
|
||||
|
||||
assert rejection_found, "Rejection message not found in history"
|
||||
|
||||
@@ -26,9 +26,7 @@ def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
|
||||
mock_instance.last_usage = {"input_tokens": 10}
|
||||
mock_instance.last_latency = 0.1
|
||||
mock_instance.session_id = None
|
||||
|
||||
ai_client.send("context", "message", discussion_history="hist")
|
||||
|
||||
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
|
||||
assert mock_instance.send.called
|
||||
args, kwargs = mock_instance.send.call_args
|
||||
@@ -42,11 +40,8 @@ def test_get_history_bleed_stats(mock_adapter_class):
|
||||
mock_instance.last_usage = {"input_tokens": 1500}
|
||||
mock_instance.last_latency = 0.5
|
||||
mock_instance.session_id = "sess"
|
||||
|
||||
# Initialize by sending a message
|
||||
ai_client.send("context", "msg")
|
||||
|
||||
stats = ai_client.get_history_bleed_stats()
|
||||
|
||||
assert stats["provider"] == "gemini_cli"
|
||||
assert stats["current"] == 1500
|
||||
|
||||
@@ -16,33 +16,26 @@ def test_get_gemini_cache_stats_with_mock_client():
|
||||
"""
|
||||
# Ensure a clean state before the test by resetting the session
|
||||
reset_session()
|
||||
|
||||
# 1. Create a mock for the cache object that the client will return
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.name = "cachedContents/test-cache"
|
||||
mock_cache.display_name = "Test Cache"
|
||||
mock_cache.model = "models/gemini-1.5-pro-001"
|
||||
mock_cache.size_bytes = 1024
|
||||
|
||||
# 2. Create a mock for the client instance
|
||||
mock_client_instance = MagicMock()
|
||||
# Configure its `caches.list` method to return our mock cache
|
||||
mock_client_instance.caches.list.return_value = [mock_cache]
|
||||
|
||||
# 3. Patch the Client constructor to return our mock instance
|
||||
# This intercepts the `_ensure_gemini_client` call inside the function
|
||||
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
|
||||
|
||||
# 4. Call the function under test
|
||||
stats = get_gemini_cache_stats()
|
||||
|
||||
# 5. Assert that the function behaved as expected
|
||||
|
||||
# It should have constructed the client
|
||||
mock_client_constructor.assert_called_once()
|
||||
# It should have called the `list` method on the `caches` attribute
|
||||
mock_client_instance.caches.list.assert_called_once()
|
||||
|
||||
# The returned stats dictionary should be correct
|
||||
assert "cache_count" in stats
|
||||
assert "total_size_bytes" in stats
|
||||
|
||||
@@ -12,7 +12,6 @@ def app_instance():
|
||||
"""
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
@@ -35,13 +34,11 @@ def test_app_subscribes_to_events(app_instance):
|
||||
with patch.object(ai_client.events, 'on') as mock_on:
|
||||
app = app_instance()
|
||||
mock_on.assert_called()
|
||||
|
||||
calls = mock_on.call_args_list
|
||||
event_names = [call.args[0] for call in calls]
|
||||
assert "request_start" in event_names
|
||||
assert "response_received" in event_names
|
||||
assert "tool_execution" in event_names
|
||||
|
||||
for call in calls:
|
||||
handler = call.args[1]
|
||||
assert hasattr(handler, '__self__')
|
||||
|
||||
@@ -30,7 +30,6 @@ def test_gui2_hubs_exist_in_show_windows(app_instance):
|
||||
"Files & Media",
|
||||
"Theme",
|
||||
]
|
||||
|
||||
for hub in expected_hubs:
|
||||
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
|
||||
|
||||
@@ -43,6 +42,5 @@ def test_gui2_old_windows_removed_from_show_windows(app_instance):
|
||||
"Provider", "System Prompts",
|
||||
"Message", "Response", "Tool Calls", "Comms History"
|
||||
]
|
||||
|
||||
for old_win in old_windows:
|
||||
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
|
||||
|
||||
@@ -8,7 +8,6 @@ from events import EventEmitter
|
||||
def app_instance():
|
||||
if not hasattr(ai_client, 'events') or ai_client.events is None:
|
||||
ai_client.events = EventEmitter()
|
||||
|
||||
with (
|
||||
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
|
||||
patch('gui_2.save_config'),
|
||||
@@ -32,7 +31,6 @@ def test_mcp_tool_call_is_dispatched(app_instance):
|
||||
mock_fc = MagicMock()
|
||||
mock_fc.name = "read_file"
|
||||
mock_fc.args = {"file_path": "test.txt"}
|
||||
|
||||
# 2. Construct the mock AI response (Gemini format)
|
||||
mock_response_with_tool = MagicMock()
|
||||
mock_part = MagicMock()
|
||||
@@ -47,25 +45,19 @@ def test_mcp_tool_call_is_dispatched(app_instance):
|
||||
prompt_token_count = 100
|
||||
candidates_token_count = 10
|
||||
cached_content_token_count = 0
|
||||
|
||||
mock_response_with_tool.usage_metadata = DummyUsage()
|
||||
|
||||
# 3. Create a mock for the final AI response after the tool call
|
||||
mock_response_final = MagicMock()
|
||||
mock_response_final.text = "Final answer"
|
||||
mock_response_final.candidates = []
|
||||
mock_response_final.usage_metadata = DummyUsage()
|
||||
|
||||
# 4. Patch the necessary components
|
||||
with patch("ai_client._ensure_gemini_client"), \
|
||||
patch("ai_client._gemini_client") as mock_client, \
|
||||
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
|
||||
|
||||
mock_chat = mock_client.chats.create.return_value
|
||||
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
|
||||
|
||||
ai_client.set_provider("gemini", "mock-model")
|
||||
|
||||
# 5. Call the send function
|
||||
ai_client.send(
|
||||
md_content="some context",
|
||||
@@ -74,6 +66,5 @@ def test_mcp_tool_call_is_dispatched(app_instance):
|
||||
file_items=[],
|
||||
discussion_history=""
|
||||
)
|
||||
|
||||
# 6. Assert that the MCP dispatch function was called
|
||||
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
|
||||
|
||||
@@ -30,10 +30,8 @@ def test_gui2_set_value_hook_works(live_gui):
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_value = f"New value set by test: {uuid.uuid4()}"
|
||||
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
|
||||
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
# Verify the value was actually set using the new get_value hook
|
||||
time.sleep(0.5)
|
||||
current_value = client.get_value('ai_input')
|
||||
@@ -45,17 +43,14 @@ def test_gui2_click_hook_works(live_gui):
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
|
||||
# First, set some state that 'Reset' would clear.
|
||||
test_value = "This text should be cleared by the reset button."
|
||||
client.set_value('ai_input', test_value)
|
||||
time.sleep(0.5)
|
||||
assert client.get_value('ai_input') == test_value
|
||||
|
||||
# Now, trigger the click
|
||||
client.click('btn_reset')
|
||||
time.sleep(0.5)
|
||||
|
||||
# Verify it was reset
|
||||
assert client.get_value('ai_input') == ""
|
||||
|
||||
@@ -66,7 +61,6 @@ def test_gui2_custom_callback_hook_works(live_gui):
|
||||
client = ApiHookClient()
|
||||
assert client.wait_for_server(timeout=10)
|
||||
test_data = f"Callback executed: {uuid.uuid4()}"
|
||||
|
||||
gui_data = {
|
||||
'action': 'custom_callback',
|
||||
'callback': '_test_callback_func_write_to_file',
|
||||
@@ -74,9 +68,7 @@ def test_gui2_custom_callback_hook_works(live_gui):
|
||||
}
|
||||
response = client.post_gui(gui_data)
|
||||
assert response == {'status': 'queued'}
|
||||
|
||||
time.sleep(1) # Give gui_2.py time to process its task queue
|
||||
|
||||
# Assert that the file WAS created and contains the correct data
|
||||
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
|
||||
with open(TEST_CALLBACK_FILE, "r") as f:
|
||||
|
||||
@@ -17,15 +17,12 @@ def test_performance_benchmarking(live_gui):
|
||||
"""
|
||||
process, gui_script = live_gui
|
||||
client = ApiHookClient()
|
||||
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(3.0)
|
||||
|
||||
# Collect metrics over 5 seconds
|
||||
fps_values = []
|
||||
cpu_values = []
|
||||
frame_time_values = []
|
||||
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 5:
|
||||
try:
|
||||
@@ -35,7 +32,6 @@ def test_performance_benchmarking(live_gui):
|
||||
fps = metrics.get('fps', 0.0)
|
||||
cpu = metrics.get('cpu_percent', 0.0)
|
||||
ft = metrics.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# In some CI environments without a display, metrics might be 0
|
||||
# We only record positive ones to avoid skewing averages if hooks are failing
|
||||
if fps > 0:
|
||||
@@ -45,19 +41,15 @@ def test_performance_benchmarking(live_gui):
|
||||
time.sleep(0.1)
|
||||
except Exception:
|
||||
break
|
||||
|
||||
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
|
||||
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
|
||||
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
|
||||
|
||||
_shared_metrics[gui_script] = {
|
||||
"avg_fps": avg_fps,
|
||||
"avg_cpu": avg_cpu,
|
||||
"avg_ft": avg_ft
|
||||
}
|
||||
|
||||
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
|
||||
|
||||
# Absolute minimum requirements
|
||||
if avg_fps > 0:
|
||||
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
|
||||
@@ -70,19 +62,15 @@ def test_performance_parity():
|
||||
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
|
||||
if len(_shared_metrics) < 2:
|
||||
pytest.skip("Metrics for both GUIs not yet collected.")
|
||||
|
||||
gui_m = _shared_metrics["gui_legacy.py"]
|
||||
gui2_m = _shared_metrics["gui_2.py"]
|
||||
|
||||
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
|
||||
# Actually I'll use 0.15 for assertion and log the actual.
|
||||
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
|
||||
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
|
||||
|
||||
print(f"\n--- Performance Parity Results ---")
|
||||
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
|
||||
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
|
||||
|
||||
# We follow the 5% requirement for FPS
|
||||
# For CPU we might need more leeway
|
||||
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
|
||||
|
||||
@@ -29,17 +29,13 @@ def test_handle_generate_send_pushes_event(mock_gui):
|
||||
))
|
||||
mock_gui.ui_ai_input = "test prompt"
|
||||
mock_gui.ui_files_base_dir = "."
|
||||
|
||||
# Mock event_queue.put
|
||||
mock_gui.event_queue.put = MagicMock()
|
||||
|
||||
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
|
||||
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
|
||||
mock_gui._handle_generate_send()
|
||||
|
||||
# Verify run_coroutine_threadsafe was called
|
||||
assert mock_run.called
|
||||
|
||||
# Verify the call to event_queue.put was correct
|
||||
# This is a bit tricky since the first arg to run_coroutine_threadsafe
|
||||
# is the coroutine returned by event_queue.put().
|
||||
@@ -62,7 +58,6 @@ def test_user_request_event_payload():
|
||||
disc_text="disc",
|
||||
base_dir="."
|
||||
)
|
||||
|
||||
d = payload.to_dict()
|
||||
assert d["prompt"] == "hello"
|
||||
assert d["stable_md"] == "md"
|
||||
|
||||
@@ -25,7 +25,6 @@ def app_instance():
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
dpg.destroy_context()
|
||||
@@ -45,21 +44,17 @@ def test_diagnostics_panel_updates(app_instance):
|
||||
'input_lag_ms': 5.0
|
||||
}
|
||||
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
|
||||
|
||||
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
|
||||
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
|
||||
|
||||
# We also need to mock ai_client stats
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
app_instance._update_performance_diagnostics()
|
||||
|
||||
# Verify UI updates
|
||||
mock_set_value.assert_any_call("perf_fps_text", "100.0")
|
||||
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
|
||||
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
|
||||
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
|
||||
|
||||
# Verify history update
|
||||
assert app_instance.perf_history["frame_time"][-1] == 10.0
|
||||
|
||||
@@ -14,7 +14,6 @@ def app_instance():
|
||||
render a window or block execution.
|
||||
"""
|
||||
dpg.create_context()
|
||||
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
patch('dearpygui.dearpygui.setup_dearpygui'), \
|
||||
patch('dearpygui.dearpygui.show_viewport'), \
|
||||
@@ -30,10 +29,8 @@ def app_instance():
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_gui_updates_on_event(app_instance):
|
||||
@@ -42,20 +39,15 @@ def test_gui_updates_on_event(app_instance):
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
|
||||
patch('dearpygui.dearpygui.configure_item'), \
|
||||
patch('ai_client.get_history_bleed_stats') as mock_stats:
|
||||
|
||||
mock_stats.return_value = {"percentage": 50.0, "current": 500, "limit": 1000}
|
||||
|
||||
# We'll use patch.object to see if _refresh_api_metrics is called
|
||||
with patch.object(app_instance, '_refresh_api_metrics', wraps=app_instance._refresh_api_metrics) as mock_refresh:
|
||||
# Simulate event
|
||||
ai_client.events.emit("response_received", payload={})
|
||||
|
||||
# Process tasks manually
|
||||
app_instance._process_pending_gui_tasks()
|
||||
|
||||
# Verify that _refresh_api_metrics was called
|
||||
mock_refresh.assert_called_once()
|
||||
|
||||
# Verify that dpg.set_value was called for the metrics widgets
|
||||
calls = [call.args[0] for call in mock_set_value.call_args_list]
|
||||
assert "token_budget_bar" in calls
|
||||
|
||||
@@ -13,28 +13,23 @@ def test_idle_performance_requirements(live_gui):
|
||||
Requirement: GUI must maintain stable performance on idle.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
# Wait for app to stabilize and render some frames
|
||||
time.sleep(2.0)
|
||||
|
||||
# Get multiple samples to be sure
|
||||
samples = []
|
||||
for _ in range(5):
|
||||
perf_data = client.get_performance()
|
||||
samples.append(perf_data)
|
||||
time.sleep(0.5)
|
||||
|
||||
# Check for valid metrics
|
||||
valid_ft_count = 0
|
||||
for sample in samples:
|
||||
performance = sample.get('performance', {})
|
||||
frame_time = performance.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# We expect a positive frame time if rendering is happening
|
||||
if frame_time > 0:
|
||||
valid_ft_count += 1
|
||||
assert frame_time < 33.3, f"Frame time {frame_time}ms exceeds 30fps threshold"
|
||||
|
||||
print(f"[Test] Valid frame time samples: {valid_ft_count}/5")
|
||||
# In some CI environments without a real display, frame time might remain 0
|
||||
# but we've verified the hook is returning the dictionary.
|
||||
|
||||
@@ -13,13 +13,11 @@ def test_comms_volume_stress_performance(live_gui):
|
||||
Stress test: Inject many session entries and verify performance doesn't degrade.
|
||||
"""
|
||||
client = ApiHookClient()
|
||||
|
||||
# 1. Capture baseline
|
||||
time.sleep(2.0) # Wait for stability
|
||||
baseline_resp = client.get_performance()
|
||||
baseline = baseline_resp.get('performance', {})
|
||||
baseline_ft = baseline.get('last_frame_time_ms', 0.0)
|
||||
|
||||
# 2. Inject 50 "dummy" session entries
|
||||
# Role must match DISC_ROLES in gui_legacy.py (User, AI, Vendor API, System)
|
||||
large_session = []
|
||||
@@ -30,23 +28,17 @@ def test_comms_volume_stress_performance(live_gui):
|
||||
"ts": time.time(),
|
||||
"collapsed": False
|
||||
})
|
||||
|
||||
client.post_session(large_session)
|
||||
|
||||
# Give it a moment to process UI updates
|
||||
time.sleep(1.0)
|
||||
|
||||
# 3. Capture stress performance
|
||||
stress_resp = client.get_performance()
|
||||
stress = stress_resp.get('performance', {})
|
||||
stress_ft = stress.get('last_frame_time_ms', 0.0)
|
||||
|
||||
print(f"Baseline FT: {baseline_ft:.2f}ms, Stress FT: {stress_ft:.2f}ms")
|
||||
|
||||
# If we got valid timing, assert it's within reason
|
||||
if stress_ft > 0:
|
||||
assert stress_ft < 33.3, f"Stress frame time {stress_ft:.2f}ms exceeds 30fps threshold"
|
||||
|
||||
# Ensure the session actually updated
|
||||
session_data = client.get_session()
|
||||
entries = session_data.get('session', {}).get('entries', [])
|
||||
|
||||
@@ -23,7 +23,6 @@ def app_instance():
|
||||
render a window or block execution.
|
||||
"""
|
||||
dpg.create_context()
|
||||
|
||||
# Patch only the functions that would show a window or block,
|
||||
# and the App methods that rebuild UI on init.
|
||||
with patch('dearpygui.dearpygui.create_viewport'), \
|
||||
@@ -37,10 +36,8 @@ def app_instance():
|
||||
patch.object(App, '_rebuild_disc_roles_list'), \
|
||||
patch.object(App, '_rebuild_discussion_selector'), \
|
||||
patch.object(App, '_refresh_project_widgets'):
|
||||
|
||||
app = App()
|
||||
yield app
|
||||
|
||||
dpg.destroy_context()
|
||||
|
||||
def test_telemetry_panel_updates_correctly(app_instance):
|
||||
@@ -50,7 +47,6 @@ def test_telemetry_panel_updates_correctly(app_instance):
|
||||
"""
|
||||
# 1. Set the provider to anthropic
|
||||
app_instance.current_provider = "anthropic"
|
||||
|
||||
# 2. Define the mock stats
|
||||
mock_stats = {
|
||||
"provider": "anthropic",
|
||||
@@ -58,7 +54,6 @@ def test_telemetry_panel_updates_correctly(app_instance):
|
||||
"current": 135000,
|
||||
"percentage": 75.0,
|
||||
}
|
||||
|
||||
# 3. Patch the dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_history_bleed_stats', return_value=mock_stats) as mock_get_stats, \
|
||||
@@ -66,17 +61,13 @@ def test_telemetry_panel_updates_correctly(app_instance):
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
|
||||
# 4. Call the method under test
|
||||
app_instance._refresh_api_metrics()
|
||||
|
||||
# 5. Assert the results
|
||||
mock_get_stats.assert_called_once()
|
||||
|
||||
# Assert history bleed widgets were updated
|
||||
mock_set_value.assert_any_call("token_budget_bar", 0.75)
|
||||
mock_set_value.assert_any_call("token_budget_label", "135,000 / 180,000")
|
||||
|
||||
# Assert Gemini-specific widget was hidden
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=False)
|
||||
|
||||
@@ -87,7 +78,6 @@ def test_cache_data_display_updates_correctly(app_instance):
|
||||
"""
|
||||
# 1. Set the provider to Gemini
|
||||
app_instance.current_provider = "gemini"
|
||||
|
||||
# 2. Define mock cache stats
|
||||
mock_cache_stats = {
|
||||
'cache_count': 5,
|
||||
@@ -95,7 +85,6 @@ def test_cache_data_display_updates_correctly(app_instance):
|
||||
}
|
||||
# Expected formatted string
|
||||
expected_text = "Gemini Caches: 5 (12.1 KB)"
|
||||
|
||||
# 3. Patch dependencies
|
||||
app_instance._last_bleed_update_time = 0 # Force update
|
||||
with patch('ai_client.get_gemini_cache_stats', return_value=mock_cache_stats) as mock_get_cache_stats, \
|
||||
@@ -103,16 +92,12 @@ def test_cache_data_display_updates_correctly(app_instance):
|
||||
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
|
||||
patch('dearpygui.dearpygui.is_item_shown', return_value=False), \
|
||||
patch('dearpygui.dearpygui.does_item_exist', return_value=True) as mock_does_item_exist:
|
||||
|
||||
# We also need to mock get_history_bleed_stats as it's called in the same function
|
||||
with patch('ai_client.get_history_bleed_stats', return_value={}):
|
||||
|
||||
# 4. Call the method under test with payload
|
||||
app_instance._refresh_api_metrics(payload={'cache_stats': mock_cache_stats})
|
||||
|
||||
# 5. Assert the results
|
||||
# mock_get_cache_stats.assert_called_once() # No longer called synchronously
|
||||
|
||||
# Check that the UI item was shown and its value was set
|
||||
mock_configure_item.assert_any_call("gemini_cache_label", show=True)
|
||||
mock_set_value.assert_any_call("gemini_cache_label", expected_text)
|
||||
|
||||
@@ -18,11 +18,9 @@ class TestHeadlessAPI(unittest.TestCase):
|
||||
self.test_api_key = "test-secret-key"
|
||||
self.app_instance.config["headless"] = {"api_key": self.test_api_key}
|
||||
self.headers = {"X-API-KEY": self.test_api_key}
|
||||
|
||||
# Clear any leftover state
|
||||
self.app_instance._pending_actions = {}
|
||||
self.app_instance._pending_dialog = None
|
||||
|
||||
self.api = self.app_instance.create_api()
|
||||
self.client = TestClient(self.api)
|
||||
|
||||
@@ -58,7 +56,6 @@ class TestHeadlessAPI(unittest.TestCase):
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5}
|
||||
}
|
||||
}]
|
||||
|
||||
response = self.client.post("/api/v1/generate", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
@@ -71,7 +68,6 @@ class TestHeadlessAPI(unittest.TestCase):
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-action-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
|
||||
response = self.client.get("/api/v1/pending_actions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = response.json()
|
||||
@@ -83,7 +79,6 @@ class TestHeadlessAPI(unittest.TestCase):
|
||||
with patch('gui_2.uuid.uuid4', return_value="test-confirm-id"):
|
||||
dialog = gui_2.ConfirmDialog("dir", ".")
|
||||
self.app_instance._pending_actions[dialog._uid] = dialog
|
||||
|
||||
payload = {"approved": True}
|
||||
response = self.client.post("/api/v1/confirm/test-confirm-id", json=payload, headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@@ -96,7 +91,6 @@ class TestHeadlessAPI(unittest.TestCase):
|
||||
# Create a dummy log
|
||||
dummy_log = Path("logs/test_session_api.log")
|
||||
dummy_log.write_text("dummy content")
|
||||
|
||||
try:
|
||||
response = self.client.get("/api/v1/sessions", headers=self.headers)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
@@ -131,17 +125,13 @@ class TestHeadlessStartup(unittest.TestCase):
|
||||
def test_headless_flag_prevents_gui_run(self, mock_uvicorn_run, mock_cleanup, mock_save_config, mock_hook_server, mock_immapp_run):
|
||||
# Setup mock argv with --headless
|
||||
test_args = ["gui_2.py", "--headless"]
|
||||
|
||||
with patch.object(sys, 'argv', test_args):
|
||||
with patch('gui_2.session_logger.close_session'), \
|
||||
patch('gui_2.session_logger.open_session'):
|
||||
app = gui_2.App()
|
||||
|
||||
# Mock _fetch_models to avoid network calls
|
||||
app._fetch_models = MagicMock()
|
||||
|
||||
app.run()
|
||||
|
||||
# Expectation: immapp.run should NOT be called in headless mode
|
||||
mock_immapp_run.assert_not_called()
|
||||
# Expectation: uvicorn.run SHOULD be called
|
||||
|
||||
@@ -16,26 +16,19 @@ async def test_headless_verification_full_run():
|
||||
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
|
||||
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker1", depends_on=["T1"])
|
||||
track = Track(id="track_verify", description="Verification Track", tickets=[t1, t2])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
|
||||
with patch("ai_client.send") as mock_send, \
|
||||
patch("ai_client.reset_session") as mock_reset:
|
||||
|
||||
# We need mock_send to return something that doesn't contain "BLOCKED"
|
||||
mock_send.return_value = "Task completed successfully."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify both tickets are completed
|
||||
assert t1.status == "completed"
|
||||
assert t2.status == "completed"
|
||||
|
||||
# Verify that ai_client.send was called twice (once for each ticket)
|
||||
assert mock_send.call_count == 2
|
||||
|
||||
# Verify Context Amnesia: reset_session should be called for each ticket
|
||||
assert mock_reset.call_count == 2
|
||||
|
||||
@@ -47,62 +40,52 @@ async def test_headless_verification_error_and_qa_interceptor():
|
||||
"""
|
||||
t1 = Ticket(id="T1", description="Task with error", status="todo", assigned_to="worker1")
|
||||
track = Track(id="track_error", description="Error Track", tickets=[t1])
|
||||
|
||||
from events import AsyncEventQueue
|
||||
queue = AsyncEventQueue()
|
||||
engine = ConductorEngine(track=track, event_queue=queue)
|
||||
|
||||
# We need to simulate the tool loop inside ai_client._send_gemini (or similar)
|
||||
# Since we want to test the real tool loop and QA injection, we mock at the provider level.
|
||||
|
||||
with patch("ai_client._provider", "gemini"), \
|
||||
patch("ai_client._gemini_client") as mock_genai_client, \
|
||||
patch("ai_client.confirm_and_run_callback") as mock_run, \
|
||||
patch("ai_client.run_tier4_analysis") as mock_qa, \
|
||||
patch("ai_client._ensure_gemini_client") as mock_ensure, \
|
||||
patch("ai_client._gemini_tool_declaration", return_value=None):
|
||||
|
||||
# Ensure _gemini_client is restored by the mock ensure function
|
||||
import ai_client
|
||||
|
||||
def restore_client():
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
mock_ensure.side_effect = restore_client
|
||||
ai_client._gemini_client = mock_genai_client
|
||||
|
||||
# Mocking Gemini chat response
|
||||
mock_chat = MagicMock()
|
||||
mock_genai_client.chats.create.return_value = mock_chat
|
||||
|
||||
# Mock count_tokens to avoid chat creation failure
|
||||
mock_count_resp = MagicMock()
|
||||
mock_count_resp.total_tokens = 100
|
||||
mock_genai_client.models.count_tokens.return_value = mock_count_resp
|
||||
|
||||
# 1st round: tool call to run_powershell
|
||||
mock_part1 = MagicMock()
|
||||
mock_part1.text = "I will run a command."
|
||||
mock_part1.function_call = MagicMock()
|
||||
mock_part1.function_call.name = "run_powershell"
|
||||
mock_part1.function_call.args = {"script": "dir"}
|
||||
|
||||
mock_resp1 = MagicMock()
|
||||
mock_resp1.candidates = [MagicMock(content=MagicMock(parts=[mock_part1]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp1.usage_metadata.prompt_token_count = 10
|
||||
mock_resp1.usage_metadata.candidates_token_count = 5
|
||||
|
||||
# 2nd round: Final text after tool result
|
||||
mock_part2 = MagicMock()
|
||||
mock_part2.text = "The command failed but I understand why. Task done."
|
||||
mock_part2.function_call = None
|
||||
|
||||
mock_resp2 = MagicMock()
|
||||
mock_resp2.candidates = [MagicMock(content=MagicMock(parts=[mock_part2]), finish_reason=MagicMock(name="STOP"))]
|
||||
mock_resp2.usage_metadata.prompt_token_count = 20
|
||||
mock_resp2.usage_metadata.candidates_token_count = 10
|
||||
|
||||
mock_chat.send_message.side_effect = [mock_resp1, mock_resp2]
|
||||
|
||||
# Mock run_powershell behavior: it should call the qa_callback on error
|
||||
|
||||
def run_side_effect(script, base_dir, qa_callback):
|
||||
if qa_callback:
|
||||
analysis = qa_callback("Error: file not found")
|
||||
@@ -111,26 +94,20 @@ async def test_headless_verification_error_and_qa_interceptor():
|
||||
QA ANALYSIS:
|
||||
{analysis}"""
|
||||
return "Error: file not found"
|
||||
|
||||
mock_run.side_effect = run_side_effect
|
||||
mock_qa.return_value = "FIX: Check if path exists."
|
||||
|
||||
await engine.run_linear()
|
||||
|
||||
# Verify QA analysis was triggered
|
||||
mock_qa.assert_called_once_with("Error: file not found")
|
||||
|
||||
# Verify the 2nd send_message call includes the QA ANALYSIS in its payload (f_resps)
|
||||
# The first call is the user message, the second is the tool response.
|
||||
assert mock_chat.send_message.call_count == 2
|
||||
args, kwargs = mock_chat.send_message.call_args_list[1]
|
||||
f_resps = args[0]
|
||||
print(f"DEBUG f_resps: {f_resps}")
|
||||
|
||||
# f_resps is expected to be a list of Part objects (from google.genai.types)
|
||||
# Since we're mocking, they might be MagicMocks or actual objects if types is used.
|
||||
# In our case, ai_client.Part.from_function_response is used.
|
||||
|
||||
found_qa = False
|
||||
for part in f_resps:
|
||||
# Check if it's a function response and contains our QA analysis
|
||||
@@ -139,5 +116,4 @@ QA ANALYSIS:
|
||||
print(f"DEBUG part_str: {part_str}")
|
||||
if "QA ANALYSIS:" in part_str and "FIX: Check if path exists." in part_str:
|
||||
found_qa = True
|
||||
|
||||
assert found_qa, "QA Analysis was not injected into the next round"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user