checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -15,98 +15,94 @@ import tomllib
import re import re
import glob import glob
from pathlib import Path, PureWindowsPath from pathlib import Path, PureWindowsPath
from typing import Any
import summarize import summarize
import project_manager import project_manager
from file_cache import ASTParser from file_cache import ASTParser
def find_next_increment(output_dir: Path, namespace: str) -> int: def find_next_increment(output_dir: Path, namespace: str) -> int:
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$") pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
max_num = 0 max_num = 0
for f in output_dir.iterdir(): for f in output_dir.iterdir():
if f.is_file(): if f.is_file():
match = pattern.match(f.name) match = pattern.match(f.name)
if match: if match:
max_num = max(max_num, int(match.group(1))) max_num = max(max_num, int(match.group(1)))
return max_num + 1 return max_num + 1
def is_absolute_with_drive(entry: str) -> bool: def is_absolute_with_drive(entry: str) -> bool:
try: try:
p = PureWindowsPath(entry) p = PureWindowsPath(entry)
return p.drive != "" return p.drive != ""
except Exception: except Exception:
return False return False
def resolve_paths(base_dir: Path, entry: str) -> list[Path]: def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
has_drive = is_absolute_with_drive(entry) has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry is_wildcard = "*" in entry
matches = []
matches = [] if is_wildcard:
if is_wildcard: root = Path(entry) if has_drive else base_dir / entry
root = Path(entry) if has_drive else base_dir / entry matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()] else:
else: p = Path(entry) if has_drive else (base_dir / entry).resolve()
p = Path(entry) if has_drive else (base_dir / entry).resolve() matches = [p]
matches = [p] # Blacklist filter
filtered = []
# Blacklist filter for p in matches:
filtered = [] name = p.name.lower()
for p in matches: if name == "history.toml" or name.endswith("_history.toml"):
name = p.name.lower() continue
if name == "history.toml" or name.endswith("_history.toml"): filtered.append(p)
continue return sorted(filtered)
filtered.append(p)
return sorted(filtered)
def build_discussion_section(history: list[str]) -> str: def build_discussion_section(history: list[str]) -> str:
sections = [] sections = []
for i, paste in enumerate(history, start=1): for i, paste in enumerate(history, start=1):
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}") sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections) return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict]) -> str: def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
sections = [] sections = []
for entry_raw in files: for entry_raw in files:
if isinstance(entry_raw, dict): if isinstance(entry_raw, dict):
entry = entry_raw.get("path") entry = entry_raw.get("path")
else: else:
entry = entry_raw entry = entry_raw
paths = resolve_paths(base_dir, entry)
paths = resolve_paths(base_dir, entry) if not paths:
if not paths: sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```") continue
continue for path in paths:
for path in paths: suffix = path.suffix.lstrip(".")
suffix = path.suffix.lstrip(".") lang = suffix if suffix else "text"
lang = suffix if suffix else "text" try:
try: content = path.read_text(encoding="utf-8")
content = path.read_text(encoding="utf-8") except FileNotFoundError:
except FileNotFoundError: content = f"ERROR: file not found: {path}"
content = f"ERROR: file not found: {path}" except Exception as e:
except Exception as e: content = f"ERROR: {e}"
content = f"ERROR: {e}" original = entry if "*" not in entry else str(path)
original = entry if "*" not in entry else str(path) sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```") return "\n\n---\n\n".join(sections)
return "\n\n---\n\n".join(sections)
def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str: def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
sections = [] sections = []
for entry in screenshots: for entry in screenshots:
paths = resolve_paths(base_dir, entry) paths = resolve_paths(base_dir, entry)
if not paths: if not paths:
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_") sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
continue continue
for path in paths: for path in paths:
original = entry if "*" not in entry else str(path) original = entry if "*" not in entry else str(path)
if not path.exists(): if not path.exists():
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_") sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
continue continue
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})") sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections) return "\n\n---\n\n".join(sections)
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]: """
"""
Return a list of dicts describing each file, for use by ai_client when it Return a list of dicts describing each file, for use by ai_client when it
wants to upload individual files rather than inline everything as markdown. wants to upload individual files rather than inline everything as markdown.
@@ -118,240 +114,213 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
mtime : float (last modification time, for skip-if-unchanged optimization) mtime : float (last modification time, for skip-if-unchanged optimization)
tier : int | None (optional tier for context management) tier : int | None (optional tier for context management)
""" """
items = [] items = []
for entry_raw in files: for entry_raw in files:
if isinstance(entry_raw, dict): if isinstance(entry_raw, dict):
entry = entry_raw.get("path") entry = entry_raw.get("path")
tier = entry_raw.get("tier") tier = entry_raw.get("tier")
else: else:
entry = entry_raw entry = entry_raw
tier = None tier = None
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
paths = resolve_paths(base_dir, entry) def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
if not paths: """
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
"""
Build a compact summary section using summarize.py — one short block per file. Build a compact summary section using summarize.py — one short block per file.
Used as the initial <context> block instead of full file contents. Used as the initial <context> block instead of full file contents.
""" """
items = build_file_items(base_dir, files) items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items) return summarize.build_summary_markdown(items)
def _build_files_section_from_items(file_items: list[dict]) -> str: def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O).""" """Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = [] sections = []
for item in file_items: for item in file_items:
path = item.get("path") path = item.get("path")
entry = item.get("entry", "unknown") entry = item.get("entry", "unknown")
content = item.get("content", "") content = item.get("content", "")
if path is None: if path is None:
sections.append(f"### `{entry}`\n\n```text\n{content}\n```") sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
continue continue
suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text" suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text"
lang = suffix if suffix else "text" lang = suffix if suffix else "text"
original = entry if "*" not in entry else str(path) original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```") sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections) return "\n\n---\n\n".join(sections)
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str: def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk.""" """Build markdown with only files + screenshots (no history). Used for stable caching."""
parts = [] return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_no_history(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_discussion_text(history: list[str]) -> str: def build_discussion_text(history: list[str]) -> str:
"""Build just the discussion history section text. Returns empty string if no history.""" """Build just the discussion history section text. Returns empty string if no history."""
if not history: if not history:
return "" return ""
return "## Discussion History\n\n" + build_discussion_section(history) return "## Discussion History\n\n" + build_discussion_section(history)
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str: """
"""
Tier 1 Context: Strategic/Orchestration. Tier 1 Context: Strategic/Orchestration.
Full content for core conductor files and files with tier=1, summaries for others. Full content for core conductor files and files with tier=1, summaries for others.
""" """
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"} core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
parts = [] # Files section
if file_items:
# Files section sections = []
if file_items: for item in file_items:
sections = [] path = item.get("path")
for item in file_items: name = path.name if path else ""
path = item.get("path") if name in core_files or item.get("tier") == 1:
name = path.name if path else "" # Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
if name in core_files or item.get("tier") == 1: f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```")
# Include in full else:
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" + # Summarize
f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```") sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
else: summarize.summarise_file(path, item.get("content", "")))
# Summarize parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" + if screenshots:
summarize.summarise_file(path, item.get("content", ""))) parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections)) parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots: def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) """
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_tier2_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 2 Context: Architectural/Tech Lead. Tier 2 Context: Architectural/Tech Lead.
Full content for all files (standard behavior). Full content for all files (standard behavior).
""" """
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False) return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str: """
"""
Tier 3 Context: Execution/Worker. Tier 3 Context: Execution/Worker.
Full content for focus_files and files with tier=3, summaries/skeletons for others. Full content for focus_files and files with tier=3, summaries/skeletons for others.
""" """
parts = [] parts = []
if file_items:
if file_items: sections = []
sections = [] for item in file_items:
for item in file_items: path = item.get("path")
path = item.get("path") entry = item.get("entry", "")
entry = item.get("entry", "") path_str = str(path) if path else ""
path_str = str(path) if path else "" # Check if this file is in focus_files (by name or path)
is_focus = False
# Check if this file is in focus_files (by name or path) for focus in focus_files:
is_focus = False if focus == entry or (path and focus == path.name) or focus in path_str:
for focus in focus_files: is_focus = True
if focus == entry or (path and focus == path.name) or focus in path_str: break
is_focus = True if is_focus or item.get("tier") == 3:
break sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
if is_focus or item.get("tier") == 3: else:
sections.append("### `" + (entry or path_str) + "`\n\n" + content = item.get("content", "")
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```") if path and path.suffix == ".py" and not item.get("error"):
else: try:
content = item.get("content", "") parser = ASTParser("python")
if path and path.suffix == ".py" and not item.get("error"): skeleton = parser.get_skeleton(content)
try: sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
parser = ASTParser("python") except Exception as e:
skeleton = parser.get_skeleton(content) # Fallback to summary if AST parsing fails
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```") sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
except Exception as e: else:
# Fallback to summary if AST parsing fails sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content)) parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
else: if screenshots:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content)) parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections)) parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots: def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if history: if files:
parts.append("## Discussion History\n\n" + build_discussion_section(history)) if summary_only:
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
return "\n\n---\n\n".join(parts) else:
parts.append("## Files\n\n" + build_files_section(base_dir, files))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str: def main() -> None:
parts = [] # Load global config to find active project
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits config_path = Path("config.toml")
if files: if not config_path.exists():
if summary_only: print("config.toml not found.")
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files)) return
else: with open(config_path, "rb") as f:
parts.append("## Files\n\n" + build_files_section(base_dir, files)) global_cfg = tomllib.load(f)
if screenshots: active_path = global_cfg.get("projects", {}).get("active")
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) if not active_path:
# DYNAMIC SUFFIX: History changes every turn, must go last print("No active project found in config.toml.")
if history: return
parts.append("## Discussion History\n\n" + build_discussion_section(history)) # Use project_manager to load project (handles history segregation)
return "\n\n---\n\n".join(parts) proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
def run(config: dict) -> tuple[str, Path, list[dict]]: config = project_manager.flat_config(proj)
namespace = config.get("project", {}).get("name") markdown, output_file, _ = run(config)
if not namespace: print(f"Written: {output_file}")
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def main():
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

File diff suppressed because it is too large Load Diff

View File

@@ -3,246 +3,240 @@ import json
import time import time
class ApiHookClient: class ApiHookClient:
def __init__(self, base_url="http://127.0.0.1:8999", max_retries=5, retry_delay=0.2): def __init__(self, base_url="http://127.0.0.1:8999", max_retries=5, retry_delay=0.2):
self.base_url = base_url self.base_url = base_url
self.max_retries = max_retries self.max_retries = max_retries
self.retry_delay = retry_delay self.retry_delay = retry_delay
def wait_for_server(self, timeout=3): def wait_for_server(self, timeout=3):
""" """
Polls the /status endpoint until the server is ready or timeout is reached. Polls the /status endpoint until the server is ready or timeout is reached.
""" """
start_time = time.time() start_time = time.time()
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
try: try:
if self.get_status().get('status') == 'ok': if self.get_status().get('status') == 'ok':
return True return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
time.sleep(0.1) time.sleep(0.1)
return False return False
def _make_request(self, method, endpoint, data=None, timeout=None): def _make_request(self, method, endpoint, data=None, timeout=None):
url = f"{self.base_url}{endpoint}" url = f"{self.base_url}{endpoint}"
headers = {'Content-Type': 'application/json'} headers = {'Content-Type': 'application/json'}
last_exception = None
last_exception = None # Increase default request timeout for local server
# Increase default request timeout for local server req_timeout = timeout if timeout is not None else 2.0
req_timeout = timeout if timeout is not None else 2.0 for attempt in range(self.max_retries + 1):
try:
for attempt in range(self.max_retries + 1): if method == 'GET':
try: response = requests.get(url, timeout=req_timeout)
if method == 'GET': elif method == 'POST':
response = requests.get(url, timeout=req_timeout) response = requests.post(url, json=data, headers=headers, timeout=req_timeout)
elif method == 'POST': else:
response = requests.post(url, json=data, headers=headers, timeout=req_timeout) raise ValueError(f"Unsupported HTTP method: {method}")
else: response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
raise ValueError(f"Unsupported HTTP method: {method}") return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
last_exception = e
if attempt < self.max_retries:
time.sleep(self.retry_delay)
continue
else:
if isinstance(e, requests.exceptions.Timeout):
raise requests.exceptions.Timeout(f"Request to {endpoint} timed out after {self.max_retries} retries.") from e
else:
raise requests.exceptions.ConnectionError(f"Could not connect to API hook server at {self.base_url} after {self.max_retries} retries.") from e
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
except json.JSONDecodeError as e:
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
if last_exception:
raise last_exception
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) def get_status(self):
return response.json() """Checks the health of the hook server."""
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: url = f"{self.base_url}/status"
last_exception = e try:
if attempt < self.max_retries: response = requests.get(url, timeout=0.2)
time.sleep(self.retry_delay) response.raise_for_status()
continue return response.json()
else: except Exception:
if isinstance(e, requests.exceptions.Timeout): raise requests.exceptions.ConnectionError(f"Could not reach /status at {self.base_url}")
raise requests.exceptions.Timeout(f"Request to {endpoint} timed out after {self.max_retries} retries.") from e
else:
raise requests.exceptions.ConnectionError(f"Could not connect to API hook server at {self.base_url} after {self.max_retries} retries.") from e
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
except json.JSONDecodeError as e:
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
if last_exception:
raise last_exception
def get_status(self): def get_project(self):
"""Checks the health of the hook server.""" return self._make_request('GET', '/api/project')
url = f"{self.base_url}/status"
try:
response = requests.get(url, timeout=0.2)
response.raise_for_status()
return response.json()
except Exception:
raise requests.exceptions.ConnectionError(f"Could not reach /status at {self.base_url}")
def get_project(self): def post_project(self, project_data):
return self._make_request('GET', '/api/project') return self._make_request('POST', '/api/project', data={'project': project_data})
def post_project(self, project_data): def get_session(self):
return self._make_request('POST', '/api/project', data={'project': project_data}) return self._make_request('GET', '/api/session')
def get_session(self): def get_mma_status(self):
return self._make_request('GET', '/api/session') """Retrieves current MMA status (track, tickets, tier, etc.)"""
return self._make_request('GET', '/api/gui/mma_status')
def get_mma_status(self): def push_event(self, event_type, payload):
"""Retrieves current MMA status (track, tickets, tier, etc.)""" """Pushes an event to the GUI's AsyncEventQueue via the /api/gui endpoint."""
return self._make_request('GET', '/api/gui/mma_status') return self.post_gui({
"action": event_type,
"payload": payload
})
def push_event(self, event_type, payload): def get_performance(self):
"""Pushes an event to the GUI's AsyncEventQueue via the /api/gui endpoint.""" """Retrieves UI performance metrics."""
return self.post_gui({ return self._make_request('GET', '/api/performance')
"action": event_type,
"payload": payload
})
def get_performance(self): def post_session(self, session_entries):
"""Retrieves UI performance metrics.""" return self._make_request('POST', '/api/session', data={'session': {'entries': session_entries}})
return self._make_request('GET', '/api/performance')
def post_session(self, session_entries): def post_gui(self, gui_data):
return self._make_request('POST', '/api/session', data={'session': {'entries': session_entries}}) return self._make_request('POST', '/api/gui', data=gui_data)
def post_gui(self, gui_data): def select_tab(self, tab_bar, tab):
return self._make_request('POST', '/api/gui', data=gui_data) """Tells the GUI to switch to a specific tab in a tab bar."""
return self.post_gui({
"action": "select_tab",
"tab_bar": tab_bar,
"tab": tab
})
def select_tab(self, tab_bar, tab): def select_list_item(self, listbox, item_value):
"""Tells the GUI to switch to a specific tab in a tab bar.""" """Tells the GUI to select an item in a listbox by its value."""
return self.post_gui({ return self.post_gui({
"action": "select_tab", "action": "select_list_item",
"tab_bar": tab_bar, "listbox": listbox,
"tab": tab "item_value": item_value
}) })
def select_list_item(self, listbox, item_value): def set_value(self, item, value):
"""Tells the GUI to select an item in a listbox by its value.""" """Sets the value of a GUI item."""
return self.post_gui({ return self.post_gui({
"action": "select_list_item", "action": "set_value",
"listbox": listbox, "item": item,
"item_value": item_value "value": value
}) })
def set_value(self, item, value): def get_value(self, item):
"""Sets the value of a GUI item.""" """Gets the value of a GUI item via its mapped field."""
return self.post_gui({ try:
"action": "set_value", # First try direct field querying via POST
"item": item, res = self._make_request('POST', '/api/gui/value', data={"field": item})
"value": value if res and "value" in res:
}) v = res.get("value")
if v is not None:
return v
except Exception:
pass
try:
# Try GET fallback
res = self._make_request('GET', f'/api/gui/value/{item}')
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
try:
# Fallback for thinking/live/prior which are in diagnostics
diag = self._make_request('GET', '/api/gui/diagnostics')
if item in diag:
return diag[item]
# Map common indicator tags to diagnostics keys
mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(item)
if key and key in diag:
return diag[key]
except Exception:
pass
return None
def get_value(self, item): def get_text_value(self, item_tag):
"""Gets the value of a GUI item via its mapped field.""" """Wraps get_value and returns its string representation, or None."""
try: val = self.get_value(item_tag)
# First try direct field querying via POST return str(val) if val is not None else None
res = self._make_request('POST', '/api/gui/value', data={"field": item})
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
try: def get_node_status(self, node_tag):
# Try GET fallback """Wraps get_value for a DAG node or queries the diagnostic endpoint for its status."""
res = self._make_request('GET', f'/api/gui/value/{item}') val = self.get_value(node_tag)
if res and "value" in res: if val is not None:
v = res.get("value") return val
if v is not None: try:
return v diag = self._make_request('GET', '/api/gui/diagnostics')
except Exception: if 'nodes' in diag and node_tag in diag['nodes']:
pass return diag['nodes'][node_tag]
if node_tag in diag:
return diag[node_tag]
except Exception:
pass
return None
try: def click(self, item, *args, **kwargs):
# Fallback for thinking/live/prior which are in diagnostics """Simulates a click on a GUI button or item."""
diag = self._make_request('GET', '/api/gui/diagnostics') user_data = kwargs.pop('user_data', None)
if item in diag: return self.post_gui({
return diag[item] "action": "click",
# Map common indicator tags to diagnostics keys "item": item,
mapping = { "args": args,
"thinking_indicator": "thinking", "kwargs": kwargs,
"operations_live_indicator": "live", "user_data": user_data
"prior_session_indicator": "prior" })
}
key = mapping.get(item)
if key and key in diag:
return diag[key]
except Exception:
pass
return None
def get_text_value(self, item_tag): def get_indicator_state(self, tag):
"""Wraps get_value and returns its string representation, or None.""" """Checks if an indicator is shown using the diagnostics endpoint."""
val = self.get_value(item_tag) # Mapping tag to the keys used in diagnostics endpoint
return str(val) if val is not None else None mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(tag, tag)
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
return {"tag": tag, "shown": diag.get(key, False)}
except Exception as e:
return {"tag": tag, "shown": False, "error": str(e)}
def get_node_status(self, node_tag): def get_events(self):
"""Wraps get_value for a DAG node or queries the diagnostic endpoint for its status.""" """Fetches and clears the event queue from the server."""
val = self.get_value(node_tag) try:
if val is not None: return self._make_request('GET', '/api/events').get("events", [])
return val except Exception:
try: return []
diag = self._make_request('GET', '/api/gui/diagnostics')
if 'nodes' in diag and node_tag in diag['nodes']:
return diag['nodes'][node_tag]
if node_tag in diag:
return diag[node_tag]
except Exception:
pass
return None
def click(self, item, *args, **kwargs): def wait_for_event(self, event_type, timeout=5):
"""Simulates a click on a GUI button or item.""" """Polls for a specific event type."""
user_data = kwargs.pop('user_data', None) start = time.time()
return self.post_gui({ while time.time() - start < timeout:
"action": "click", events = self.get_events()
"item": item, for ev in events:
"args": args, if ev.get("type") == event_type:
"kwargs": kwargs, return ev
"user_data": user_data time.sleep(0.1) # Fast poll
}) return None
def get_indicator_state(self, tag): def wait_for_value(self, item, expected, timeout=5):
"""Checks if an indicator is shown using the diagnostics endpoint.""" """Polls until get_value(item) == expected."""
# Mapping tag to the keys used in diagnostics endpoint start = time.time()
mapping = { while time.time() - start < timeout:
"thinking_indicator": "thinking", if self.get_value(item) == expected:
"operations_live_indicator": "live", return True
"prior_session_indicator": "prior" time.sleep(0.1) # Fast poll
} return False
key = mapping.get(tag, tag)
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
return {"tag": tag, "shown": diag.get(key, False)}
except Exception as e:
return {"tag": tag, "shown": False, "error": str(e)}
def get_events(self): def reset_session(self):
"""Fetches and clears the event queue from the server.""" """Simulates clicking the 'Reset Session' button in the GUI."""
try: return self.click("btn_reset")
return self._make_request('GET', '/api/events').get("events", [])
except Exception:
return []
def wait_for_event(self, event_type, timeout=5): def request_confirmation(self, tool_name, args):
"""Polls for a specific event type.""" """Asks the user for confirmation via the GUI (blocking call)."""
start = time.time() # Using a long timeout as this waits for human input (60 seconds)
while time.time() - start < timeout: res = self._make_request('POST', '/api/ask',
events = self.get_events() data={'type': 'tool_approval', 'tool': tool_name, 'args': args},
for ev in events: timeout=60.0)
if ev.get("type") == event_type: return res.get('response')
return ev
time.sleep(0.1) # Fast poll
return None
def wait_for_value(self, item, expected, timeout=5):
"""Polls until get_value(item) == expected."""
start = time.time()
while time.time() - start < timeout:
if self.get_value(item) == expected:
return True
time.sleep(0.1) # Fast poll
return False
def reset_session(self):
"""Simulates clicking the 'Reset Session' button in the GUI."""
return self.click("btn_reset")
def request_confirmation(self, tool_name, args):
"""Asks the user for confirmation via the GUI (blocking call)."""
# Using a long timeout as this waits for human input (60 seconds)
res = self._make_request('POST', '/api/ask',
data={'type': 'tool_approval', 'tool': tool_name, 'args': args},
timeout=60.0)
return res.get('response')

View File

@@ -6,338 +6,313 @@ import logging
import session_logger import session_logger
class HookServerInstance(ThreadingHTTPServer): class HookServerInstance(ThreadingHTTPServer):
"""Custom HTTPServer that carries a reference to the main App instance.""" """Custom HTTPServer that carries a reference to the main App instance."""
def __init__(self, server_address, RequestHandlerClass, app):
super().__init__(server_address, RequestHandlerClass) def __init__(self, server_address, RequestHandlerClass, app):
self.app = app super().__init__(server_address, RequestHandlerClass)
self.app = app
class HookHandler(BaseHTTPRequestHandler): class HookHandler(BaseHTTPRequestHandler):
"""Handles incoming HTTP requests for the API hooks.""" """Handles incoming HTTP requests for the API hooks."""
def do_GET(self):
app = self.server.app
session_logger.log_api_hook("GET", self.path, "")
if self.path == '/status':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
elif self.path == '/api/project':
import project_manager
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
flat = project_manager.flat_config(app.project)
self.wfile.write(json.dumps({'project': flat}).encode('utf-8'))
elif self.path == '/api/session':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'session': {'entries': app.disc_entries}}).
encode('utf-8'))
elif self.path == '/api/performance':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
metrics = {}
if hasattr(app, 'perf_monitor'):
metrics = app.perf_monitor.get_metrics()
self.wfile.write(json.dumps({'performance': metrics}).encode('utf-8'))
elif self.path == '/api/events':
# Long-poll or return current event queue
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
events = []
if hasattr(app, '_api_event_queue'):
with app._api_event_queue_lock:
events = list(app._api_event_queue)
app._api_event_queue.clear()
self.wfile.write(json.dumps({'events': events}).encode('utf-8'))
elif self.path == '/api/gui/value':
# POST with {"field": "field_tag"} to get value
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
data = json.loads(body.decode('utf-8'))
field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
event = threading.Event() def do_GET(self):
result = {"value": None} app = self.server.app
session_logger.log_api_hook("GET", self.path, "")
if self.path == '/status':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
elif self.path == '/api/project':
import project_manager
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
flat = project_manager.flat_config(app.project)
self.wfile.write(json.dumps({'project': flat}).encode('utf-8'))
elif self.path == '/api/session':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'session': {'entries': app.disc_entries}}).
encode('utf-8'))
elif self.path == '/api/performance':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
metrics = {}
if hasattr(app, 'perf_monitor'):
metrics = app.perf_monitor.get_metrics()
self.wfile.write(json.dumps({'performance': metrics}).encode('utf-8'))
elif self.path == '/api/events':
# Long-poll or return current event queue
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
events = []
if hasattr(app, '_api_event_queue'):
with app._api_event_queue_lock:
events = list(app._api_event_queue)
app._api_event_queue.clear()
self.wfile.write(json.dumps({'events': events}).encode('utf-8'))
elif self.path == '/api/gui/value':
# POST with {"field": "field_tag"} to get value
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
data = json.loads(body.decode('utf-8'))
field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
event = threading.Event()
result = {"value": None}
def get_val(): def get_val():
try: try:
if field_tag in app._settable_fields: if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag] attr = app._settable_fields[field_tag]
val = getattr(app, attr, None) val = getattr(app, attr, None)
print(f"[DEBUG] Hook Server: attr={attr}, val={val}") print(f"[DEBUG] Hook Server: attr={attr}, val={val}")
result["value"] = val result["value"] = val
else: else:
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields") print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
finally: finally:
event.set() event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path.startswith('/api/gui/value/'):
# Generic endpoint to get the value of any settable field
field_tag = self.path.split('/')[-1]
event = threading.Event()
result = {"value": None}
with app._pending_gui_tasks_lock: def get_val():
app._pending_gui_tasks.append({ try:
"action": "custom_callback", if field_tag in app._settable_fields:
"callback": get_val attr = app._settable_fields[field_tag]
}) result["value"] = getattr(app, attr, None)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/mma_status':
event = threading.Event()
result = {}
if event.wait(timeout=2): def get_mma():
self.send_response(200) try:
self.send_header('Content-Type', 'application/json') result["mma_status"] = getattr(app, "mma_status", "idle")
self.end_headers() result["active_tier"] = getattr(app, "active_tier", None)
self.wfile.write(json.dumps(result).encode('utf-8')) result["active_track"] = getattr(app, "active_track", None)
else: result["active_tickets"] = getattr(app, "active_tickets", [])
self.send_response(504) result["mma_step_mode"] = getattr(app, "mma_step_mode", False)
self.end_headers() result["pending_approval"] = app._pending_mma_approval is not None
elif self.path.startswith('/api/gui/value/'): finally:
# Generic endpoint to get the value of any settable field event.set()
field_tag = self.path.split('/')[-1] with app._pending_gui_tasks_lock:
event = threading.Event() app._pending_gui_tasks.append({
result = {"value": None} "action": "custom_callback",
"callback": get_mma
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/diagnostics':
# Safe way to query multiple states at once via the main thread queue
event = threading.Event()
result = {}
def get_val(): def check_all():
try: try:
if field_tag in app._settable_fields: # Generic state check based on App attributes (works for both DPG and ImGui versions)
attr = app._settable_fields[field_tag] status = getattr(app, "ai_status", "idle")
result["value"] = getattr(app, attr, None) result["thinking"] = status in ["sending...", "running powershell..."]
finally: result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
event.set() result["prior"] = getattr(app, "is_viewing_prior_session", False)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": check_all
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
with app._pending_gui_tasks_lock: def do_POST(self):
app._pending_gui_tasks.append({ app = self.server.app
"action": "custom_callback", content_length = int(self.headers.get('Content-Length', 0))
"callback": get_val body = self.rfile.read(content_length)
}) body_str = body.decode('utf-8') if body else ""
session_logger.log_api_hook("POST", self.path, body_str)
try:
data = json.loads(body_str) if body_str else {}
if self.path == '/api/project':
app.project = data.get('project', app.project)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/session':
app.disc_entries = data.get('session', {}).get(
'entries', app.disc_entries)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/gui':
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'queued'}).encode('utf-8'))
elif self.path == '/api/ask':
request_id = str(uuid.uuid4())
event = threading.Event()
if not hasattr(app, '_pending_asks'):
app._pending_asks = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock:
app._api_event_queue.append({
"type": "ask_received",
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id)
# Clean up response after reading
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8'))
else:
if request_id in app._pending_asks:
del app._pending_asks[request_id]
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
elif self.path == '/api/ask/respond':
request_id = data.get('request_id')
response_data = data.get('response')
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id]
event.set()
# Clean up pending ask entry
del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
else:
self.send_response(404)
self.end_headers()
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode('utf-8'))
if event.wait(timeout=2): def log_message(self, format, *args):
self.send_response(200) logging.info("Hook API: " + format % args)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/mma_status':
event = threading.Event()
result = {}
def get_mma():
try:
result["mma_status"] = getattr(app, "mma_status", "idle")
result["active_tier"] = getattr(app, "active_tier", None)
result["active_track"] = getattr(app, "active_track", None)
result["active_tickets"] = getattr(app, "active_tickets", [])
result["mma_step_mode"] = getattr(app, "mma_step_mode", False)
result["pending_approval"] = app._pending_mma_approval is not None
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_mma
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/diagnostics':
# Safe way to query multiple states at once via the main thread queue
event = threading.Event()
result = {}
def check_all():
try:
# Generic state check based on App attributes (works for both DPG and ImGui versions)
status = getattr(app, "ai_status", "idle")
result["thinking"] = status in ["sending...", "running powershell..."]
result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
result["prior"] = getattr(app, "is_viewing_prior_session", False)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": check_all
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
app = self.server.app
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
body_str = body.decode('utf-8') if body else ""
session_logger.log_api_hook("POST", self.path, body_str)
try:
data = json.loads(body_str) if body_str else {}
if self.path == '/api/project':
app.project = data.get('project', app.project)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/session':
app.disc_entries = data.get('session', {}).get(
'entries', app.disc_entries)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/gui':
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'queued'}).encode('utf-8'))
elif self.path == '/api/ask':
request_id = str(uuid.uuid4())
event = threading.Event()
if not hasattr(app, '_pending_asks'):
app._pending_asks = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock:
app._api_event_queue.append({
"type": "ask_received",
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id)
# Clean up response after reading
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8'))
else:
if request_id in app._pending_asks:
del app._pending_asks[request_id]
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
elif self.path == '/api/ask/respond':
request_id = data.get('request_id')
response_data = data.get('response')
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id]
event.set()
# Clean up pending ask entry
del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
else:
self.send_response(404)
self.end_headers()
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode('utf-8'))
def log_message(self, format, *args):
logging.info("Hook API: " + format % args)
class HookServer: class HookServer:
def __init__(self, app, port=8999): def __init__(self, app, port=8999):
self.app = app self.app = app
self.port = port self.port = port
self.server = None self.server = None
self.thread = None self.thread = None
def start(self): def start(self):
if self.thread and self.thread.is_alive(): if self.thread and self.thread.is_alive():
return return
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
return
# Ensure the app has the task queue and lock initialized
if not hasattr(self.app, '_pending_gui_tasks'):
self.app._pending_gui_tasks = []
if not hasattr(self.app, '_pending_gui_tasks_lock'):
self.app._pending_gui_tasks_lock = threading.Lock()
# Initialize ask-related dictionaries
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start()
logging.info(f"Hook server started on port {self.port}")
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli' def stop(self):
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli: if self.server:
return self.server.shutdown()
self.server.server_close()
# Ensure the app has the task queue and lock initialized if self.thread:
if not hasattr(self.app, '_pending_gui_tasks'): self.thread.join()
self.app._pending_gui_tasks = [] logging.info("Hook server stopped")
if not hasattr(self.app, '_pending_gui_tasks_lock'):
self.app._pending_gui_tasks_lock = threading.Lock()
# Initialize ask-related dictionaries
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start()
logging.info(f"Hook server started on port {self.port}")
def stop(self):
if self.server:
self.server.shutdown()
self.server.server_close()
if self.thread:
self.thread.join()
logging.info("Hook server stopped")

View File

@@ -3,23 +3,22 @@ import sys
import os import os
def run_diag(role, prompt): def run_diag(role, prompt):
print(f"--- Running Diag for {role} ---") print(f"--- Running Diag for {role} ---")
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt] cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
try: try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8') result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
print("STDOUT:") print("STDOUT:")
print(result.stdout) print(result.stdout)
print("STDERR:") print("STDERR:")
print(result.stderr) print(result.stderr)
return result.stdout return result.stdout
except Exception as e: except Exception as e:
print(f"FAILED: {e}") print(f"FAILED: {e}")
return str(e) return str(e)
if __name__ == "__main__": if __name__ == "__main__":
# Test 1: Simple read # Test 1: Simple read
print("TEST 1: read_file") print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.") run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
print("\nTEST 2: run_shell_command") run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")

View File

@@ -3,55 +3,51 @@ import pytest
import os import os
def run_ps_script(role, prompt): def run_ps_script(role, prompt):
"""Helper to run the run_subagent.ps1 script.""" """Helper to run the run_subagent.ps1 script."""
# Using -File is safer and handles arguments better # Using -File is safer and handles arguments better
cmd = [ cmd = [
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
"-File", "./scripts/run_subagent.ps1", "-File", "./scripts/run_subagent.ps1",
"-Role", role, "-Role", role,
"-Prompt", prompt "-Prompt", prompt
] ]
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout: if result.stdout:
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}") print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
if result.stderr: if result.stderr:
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}") print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
return result return result
def test_subagent_script_qa_live(): def test_subagent_script_qa_live():
"""Verify that the QA role works and returns a compressed fix.""" """Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero" prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt) result = run_ps_script("QA", prompt)
assert result.returncode == 0
assert result.returncode == 0 # Expected output should mention the fix for division by zero
# Expected output should mention the fix for division by zero assert "zero" in result.stdout.lower()
assert "zero" in result.stdout.lower() # It should be short (QA agents compress)
# It should be short (QA agents compress) assert len(result.stdout.split()) < 40
assert len(result.stdout.split()) < 40
def test_subagent_script_worker_live(): def test_subagent_script_worker_live():
"""Verify that the Worker role works and returns code.""" """Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'" prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt) result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert result.returncode == 0 assert "def" in result.stdout.lower()
assert "def" in result.stdout.lower() assert "hello" in result.stdout.lower()
assert "hello" in result.stdout.lower()
def test_subagent_script_utility_live(): def test_subagent_script_utility_live():
"""Verify that the Utility role works.""" """Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'" prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt) result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert result.returncode == 0 assert "true" in result.stdout.lower()
assert "true" in result.stdout.lower()
def test_subagent_isolation_live(): def test_subagent_isolation_live():
"""Verify that the sub-agent is stateless and does not see the parent's conversation context.""" """Verify that the sub-agent is stateless and does not see the parent's conversation context."""
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt. # This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'." prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt) result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert result.returncode == 0 # A stateless agent should not know any previous context.
# A stateless agent should not know any previous context. assert "unknown" in result.stdout.lower()
assert "unknown" in result.stdout.lower()

View File

@@ -4,148 +4,137 @@ from unittest.mock import patch, MagicMock
from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies
def test_parser_role_choices(): def test_parser_role_choices():
"""Test that the parser accepts valid roles and the prompt argument.""" """Test that the parser accepts valid roles and the prompt argument."""
parser = create_parser() parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4'] valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks." test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
for role in valid_roles: args = parser.parse_args(['--role', role, test_prompt])
args = parser.parse_args(['--role', role, test_prompt]) assert args.role == role
assert args.role == role assert args.prompt == test_prompt
assert args.prompt == test_prompt
def test_parser_invalid_role(): def test_parser_invalid_role():
"""Test that the parser rejects roles outside the specified choices.""" """Test that the parser rejects roles outside the specified choices."""
parser = create_parser() parser = create_parser()
with pytest.raises(SystemExit): with pytest.raises(SystemExit):
parser.parse_args(['--role', 'tier5', 'Some prompt']) parser.parse_args(['--role', 'tier5', 'Some prompt'])
def test_parser_prompt_optional(): def test_parser_prompt_optional():
"""Test that the prompt argument is optional if role is provided (or handled in main).""" """Test that the prompt argument is optional if role is provided (or handled in main)."""
parser = create_parser() parser = create_parser()
# Prompt is now optional (nargs='?') # Prompt is now optional (nargs='?')
args = parser.parse_args(['--role', 'tier3']) args = parser.parse_args(['--role', 'tier3'])
assert args.role == 'tier3' assert args.role == 'tier3'
assert args.prompt is None assert args.prompt is None
def test_parser_help(): def test_parser_help():
"""Test that the help flag works without raising errors (exits with 0).""" """Test that the help flag works without raising errors (exits with 0)."""
parser = create_parser() parser = create_parser()
with pytest.raises(SystemExit) as excinfo: with pytest.raises(SystemExit) as excinfo:
parser.parse_args(['--help']) parser.parse_args(['--help'])
assert excinfo.value.code == 0 assert excinfo.value.code == 0
def test_get_role_documents(): def test_get_role_documents():
"""Test that get_role_documents returns the correct documentation paths for each tier.""" """Test that get_role_documents returns the correct documentation paths for each tier."""
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md'] assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md'] assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
assert get_role_documents('tier3') == ['conductor/workflow.md'] assert get_role_documents('tier3') == ['conductor/workflow.md']
assert get_role_documents('tier4') == [] assert get_role_documents('tier4') == []
def test_get_model_for_role(): def test_get_model_for_role():
"""Test that get_model_for_role returns the correct model for each role.""" """Test that get_model_for_role returns the correct model for each role."""
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview' assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite' assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite' assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite' assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
def test_execute_agent(): def test_execute_agent():
""" """
Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments
including the model specified for the role. including the model specified for the role.
""" """
role = "tier3-worker" role = "tier3-worker"
prompt = "Write a unit test." prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"] docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
expected_model = "gemini-2.5-flash-lite" mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_stdout = "Mocked AI Response" mock_process = MagicMock()
mock_process.stdout = mock_stdout
with patch("subprocess.run") as mock_run: mock_process.returncode = 0
mock_process = MagicMock() mock_run.return_value = mock_process
mock_process.stdout = mock_stdout result = execute_agent(role, prompt, docs)
mock_process.returncode = 0 mock_run.assert_called_once()
mock_run.return_value = mock_process args, kwargs = mock_run.call_args
cmd_list = args[0]
result = execute_agent(role, prompt, docs) assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
mock_run.assert_called_once() ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
args, kwargs = mock_run.call_args assert "gemini" in ps_cmd
cmd_list = args[0] assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
assert cmd_list[0] == "powershell.exe" input_text = kwargs.get("input")
assert "-Command" in cmd_list assert "STRICT SYSTEM DIRECTIVE" in input_text
ps_cmd = cmd_list[cmd_list.index("-Command") + 1] assert "TASK: Write a unit test." in input_text
assert "gemini" in ps_cmd assert kwargs.get("capture_output") is True
assert f"--model {expected_model}" in ps_cmd assert kwargs.get("text") is True
assert result == mock_stdout
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
def test_get_dependencies(tmp_path): def test_get_dependencies(tmp_path):
content = ( content = (
"import os\n" "import os\n"
"import sys\n" "import sys\n"
"import file_cache\n" "import file_cache\n"
"from mcp_client import something\n" "from mcp_client import something\n"
) )
filepath = tmp_path / "mock_script.py" filepath = tmp_path / "mock_script.py"
filepath.write_text(content) filepath.write_text(content)
dependencies = get_dependencies(str(filepath)) dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client'] assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
import re import re
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log" def test_execute_agent_logging(tmp_path):
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master log_file = tmp_path / "mma_delegation.log"
# We will patch LOG_FILE to point to our temp location # mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \ # We will patch LOG_FILE to point to our temp location
patch("subprocess.run") as mock_run: with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
mock_process = MagicMock() patch("subprocess.run") as mock_run:
mock_process.stdout = "" mock_process = MagicMock()
mock_process.returncode = 0 mock_process.stdout = ""
mock_run.return_value = mock_process mock_process.returncode = 0
test_role = "tier1" mock_run.return_value = mock_process
test_prompt = "Plan the next phase" test_role = "tier1"
execute_agent(test_role, test_prompt, []) test_prompt = "Plan the next phase"
assert log_file.exists() execute_agent(test_role, test_prompt, [])
log_content = log_file.read_text() assert log_file.exists()
assert test_role in log_content log_content = log_file.read_text()
assert test_prompt in log_content # Master log should now have the summary prompt assert test_role in log_content
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content) assert test_prompt in log_content # Master log should now have the summary prompt
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
def test_execute_agent_tier3_injection(tmp_path):
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n" def test_execute_agent_tier3_injection(tmp_path):
main_file = tmp_path / "main.py" main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
main_file.write_text(main_content) main_file = tmp_path / "main.py"
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n" main_file.write_text(main_content)
dep_file = tmp_path / "dependency.py" dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file.write_text(dep_content) dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working # We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd() old_cwd = os.getcwd()
os.chdir(tmp_path) os.chdir(tmp_path)
try: try:
with patch("subprocess.run") as mock_run: with patch("subprocess.run") as mock_run:
mock_process = MagicMock() mock_process = MagicMock()
mock_process.stdout = "OK" mock_process.stdout = "OK"
mock_process.returncode = 0 mock_process.returncode = 0
mock_run.return_value = mock_process mock_run.return_value = mock_process
execute_agent('tier3-worker', 'Modify main.py', ['main.py']) execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
assert mock_run.called assert mock_run.called
input_text = mock_run.call_args[1].get("input") input_text = mock_run.call_args[1].get("input")
assert "DEPENDENCY SKELETON: dependency.py" in input_text assert "DEPENDENCY SKELETON: dependency.py" in input_text
assert "def do_work():" in input_text assert "def do_work():" in input_text
assert "Modify main.py" in input_text assert "Modify main.py" in input_text
finally: finally:
os.chdir(old_cwd) os.chdir(old_cwd)

View File

@@ -2,7 +2,7 @@ import pytest
from scripts.mma_exec import generate_skeleton from scripts.mma_exec import generate_skeleton
def test_generate_skeleton(): def test_generate_skeleton():
sample_code = ''' sample_code = '''
class Calculator: class Calculator:
"""Performs basic math operations.""" """Performs basic math operations."""
@@ -15,26 +15,21 @@ def log_message(msg):
timestamp = "2026-02-25" timestamp = "2026-02-25"
print(f"[{timestamp}] {msg}") print(f"[{timestamp}] {msg}")
''' '''
skeleton = generate_skeleton(sample_code)
skeleton = generate_skeleton(sample_code) # Check that signatures are preserved
assert "class Calculator:" in skeleton
# Check that signatures are preserved assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "class Calculator:" in skeleton assert "def log_message(msg):" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton # Check that docstrings are preserved
assert "def log_message(msg):" in skeleton assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that docstrings are preserved # Check that implementation details are removed
assert '"""Performs basic math operations."""' in skeleton assert "result = a + b" not in skeleton
assert '"""Adds two numbers."""' in skeleton assert "return result" not in skeleton
assert "timestamp =" not in skeleton
# Check that implementation details are removed assert "print(" not in skeleton
assert "result = a + b" not in skeleton # Check that bodies are replaced with ellipsis
assert "return result" not in skeleton assert "..." in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton
if __name__ == "__main__": if __name__ == "__main__":
pytest.main([__file__]) pytest.main([__file__])

View File

@@ -9,5 +9,5 @@ This file tracks all major tracks for the project. Each track has its own detail
--- ---
- [ ] **Track: AI-Optimized Python Style Refactor** - [~] **Track: AI-Optimized Python Style Refactor**
*Link: [./tracks/python_style_refactor_20260227/](./tracks/python_style_refactor_20260227/)* *Link: [./tracks/python_style_refactor_20260227/](./tracks/python_style_refactor_20260227/)*

View File

@@ -6,14 +6,18 @@
- [x] Task: Conductor - User Manual Verification 'Phase 1: Pilot and Tooling' (Protocol in workflow.md) [checkpoint: Phase1] - [x] Task: Conductor - User Manual Verification 'Phase 1: Pilot and Tooling' (Protocol in workflow.md) [checkpoint: Phase1]
## Phase 2: Core Refactor - Indentation and Newlines ## Phase 2: Core Refactor - Indentation and Newlines
- [~] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`). - [x] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`). [db65162]
- [ ] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`). - [x] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`). [db65162]
- [ ] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`). - [x] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`). [db65162]
- [ ] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`). - [x] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`). [db65162]
- [ ] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md) - [x] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md) [checkpoint: Phase2]
## Phase 3: AI-Optimized Metadata and Final Cleanup ## Phase 3: AI-Optimized Metadata and Final Cleanup
- [ ] Task: Conductor - Implement Strict Type Hinting and Compact Imports across the Entire Codebase. - [~] Task: Conductor - Implement Strict Type Hinting across the Entire Codebase.
- [x] Engine Core (`ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py`)
- [x] Develop/Integrate Surgical AST Tools in `mcp_client.py` and `tools.json`.
- [x] Management Modules (project_manager.py, session_logger.py) [19c28a1]
- [~] UI Modules (`gui_2.py`, `gui_legacy.py`)
- [ ] Task: Conductor - Update `conductor/code_styleguides/python.md` with the new AI-optimized standard. - [ ] Task: Conductor - Update `conductor/code_styleguides/python.md` with the new AI-optimized standard.
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Metadata and Final Documentation' (Protocol in workflow.md) - [ ] Task: Conductor - User Manual Verification 'Phase 3: Metadata and Final Documentation' (Protocol in workflow.md)

View File

@@ -8,7 +8,6 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **Newlines (Ultra-Compact):** - **Newlines (Ultra-Compact):**
- Maximum **one (1)** blank line between top-level definitions (classes, functions). - Maximum **one (1)** blank line between top-level definitions (classes, functions).
- **Zero (0)** blank lines inside function or method bodies. - **Zero (0)** blank lines inside function or method bodies.
- **Imports (Compact):** Consolidate imports into compact blocks to reduce vertical space.
- **Typing (Strict):** Ensure all function and method signatures include strict type hints for `Args` and `Returns`. - **Typing (Strict):** Ensure all function and method signatures include strict type hints for `Args` and `Returns`.
- **Scope:** - **Scope:**
- Target: All `.py` files in the project root and subdirectories. - Target: All `.py` files in the project root and subdirectories.
@@ -19,14 +18,22 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **AST Compatibility:** The style must not interfere with existing AST tools (`ast`, `tree-sitter`) used for interface extraction and code outlines. - **AST Compatibility:** The style must not interfere with existing AST tools (`ast`, `tree-sitter`) used for interface extraction and code outlines.
- **Token Efficiency:** The primary goal is to reduce the total token count of the codebase. - **Token Efficiency:** The primary goal is to reduce the total token count of the codebase.
## 4. Acceptance Criteria ## 4. Current Status (Progress Checkpoint)
- [ ] Codebase indentation is uniformly 1 space. - **Phase 1: Completed.** Tooling developed (`scripts/ai_style_formatter.py`) and verified.
- [ ] No `.py` file contains consecutive blank lines. - **Phase 2: Completed.** Global codebase refactor for indentation and ultra-compact newlines (including 1-line gap before definitions) applied to all Python files.
- [ ] No `.py` file contains blank lines within function or method bodies. - **Phase 3: In Progress.**
- [ ] All functions/methods have complete type hints. - **Surgical Tooling:** New tools added to `mcp_client.py` and `.gemini/tools.json`: `get_file_slice`, `set_file_slice`, `py_update_definition`, `py_get_signature`, `py_set_signature`, `py_get_class_summary`, `py_get_var_declaration`, `py_set_var_declaration`.
- [ ] Application remains functional and passes existing tests. - **Core Typing:** `ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py` fully updated with strict type hints.
- **Remaining:** `project_manager.py`, `session_logger.py`, `gui_2.py`, `gui_legacy.py` need strict typing.
## 5. Out of Scope ## 5. Acceptance Criteria
- [x] Codebase indentation is uniformly 1 space.
- [x] No `.py` file contains consecutive blank lines.
- [x] No `.py` file contains blank lines within function or method bodies.
- [~] All functions/methods have complete type hints (Core Engine complete, UI/Manager pending).
- [x] Application remains functional and passes existing tests.
## 6. Out of Scope
- Architectural changes or logic refactoring. - Architectural changes or logic refactoring.
- Modification of non-Python files (e.g., `.md`, `.toml`, `.ps1`). - Modification of non-Python files (e.g., `.md`, `.toml`, `.ps1`).
- Breaking PEP 8 compliance where it's not strictly necessary for token reduction (though indentation and blank lines are explicitly targeted). - Import compaction (discarded per user request).

View File

@@ -5,7 +5,7 @@
- [x] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`). 2a30e62 - [x] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`). 2a30e62
## Phase 2: Epic & Track Verification ## Phase 2: Epic & Track Verification
- [ ] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly. - [~] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
- [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer. - [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer.
## Phase 3: DAG & Spawn Interception Verification ## Phase 3: DAG & Spawn Interception Verification

View File

@@ -4,85 +4,76 @@ import mma_prompts
import re import re
def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]: def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
""" """
Tier 2 (Tech Lead) call. Tier 2 (Tech Lead) call.
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets. Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
""" """
# 1. Set Tier 2 Model (Tech Lead - Flash) # 1. Set Tier 2 Model (Tech Lead - Flash)
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite') ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
ai_client.reset_session() ai_client.reset_session()
# 2. Construct Prompt
# 2. Construct Prompt system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning") user_message = (
f"### TRACK BRIEF:\n{track_brief}\n\n"
user_message = ( f"### MODULE SKELETONS:\n{module_skeletons}\n\n"
f"### TRACK BRIEF:\n{track_brief}\n\n" "Please generate the implementation tickets for this track."
f"### MODULE SKELETONS:\n{module_skeletons}\n\n" )
"Please generate the implementation tickets for this track." # Set custom system prompt for this call
) old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
# Set custom system prompt for this call try:
old_system_prompt = ai_client._custom_system_prompt # 3. Call Tier 2 Model
ai_client.set_custom_system_prompt(system_prompt) response = ai_client.send(
md_content="",
try: user_message=user_message
# 3. Call Tier 2 Model )
response = ai_client.send( # 4. Parse JSON Output
md_content="", # Extract JSON array from markdown code blocks if present
user_message=user_message json_match = response.strip()
) if "```json" in json_match:
json_match = json_match.split("```json")[1].split("```")[0].strip()
# 4. Parse JSON Output elif "```" in json_match:
# Extract JSON array from markdown code blocks if present json_match = json_match.split("```")[1].split("```")[0].strip()
json_match = response.strip() # If it's still not valid JSON, try to find a [ ... ] block
if "```json" in json_match: if not (json_match.startswith('[') and json_match.endswith(']')):
json_match = json_match.split("```json")[1].split("```")[0].strip() match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
elif "```" in json_match: if match:
json_match = json_match.split("```")[1].split("```")[0].strip() json_match = match.group(0)
tickets = json.loads(json_match)
# If it's still not valid JSON, try to find a [ ... ] block return tickets
if not (json_match.startswith('[') and json_match.endswith(']')): except Exception as e:
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL) print(f"Error parsing Tier 2 response: {e}")
if match: # print(f"Raw response: {response}")
json_match = match.group(0) return []
finally:
tickets = json.loads(json_match) # Restore old system prompt
return tickets ai_client.set_custom_system_prompt(old_system_prompt)
except Exception as e:
print(f"Error parsing Tier 2 response: {e}")
# print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
from dag_engine import TrackDAG from dag_engine import TrackDAG
from models import Ticket from models import Ticket
def topological_sort(tickets: list[dict]) -> list[dict]: def topological_sort(tickets: list[dict]) -> list[dict]:
""" """
Sorts a list of tickets based on their 'depends_on' field. Sorts a list of tickets based on their 'depends_on' field.
Raises ValueError if a circular dependency or missing internal dependency is detected. Raises ValueError if a circular dependency or missing internal dependency is detected.
""" """
# 1. Convert to Ticket objects for TrackDAG # 1. Convert to Ticket objects for TrackDAG
ticket_objs = [] ticket_objs = []
for t_data in tickets: for t_data in tickets:
ticket_objs.append(Ticket.from_dict(t_data)) ticket_objs.append(Ticket.from_dict(t_data))
# 2. Use TrackDAG for validation and sorting
# 2. Use TrackDAG for validation and sorting dag = TrackDAG(ticket_objs)
dag = TrackDAG(ticket_objs) try:
try: sorted_ids = dag.topological_sort()
sorted_ids = dag.topological_sort() except ValueError as e:
except ValueError as e: raise ValueError(f"DAG Validation Error: {e}")
raise ValueError(f"DAG Validation Error: {e}") # 3. Return sorted dictionaries
ticket_map = {t['id']: t for t in tickets}
# 3. Return sorted dictionaries return [ticket_map[tid] for tid in sorted_ids]
ticket_map = {t['id']: t for t in tickets}
return [ticket_map[tid] for tid in sorted_ids]
if __name__ == "__main__": if __name__ == "__main__":
# Quick test if run directly # Quick test if run directly
test_brief = "Implement a new feature." test_brief = "Implement a new feature."
test_skeletons = "class NewFeature: pass" test_skeletons = "class NewFeature: pass"
tickets = generate_tickets(test_brief, test_skeletons) tickets = generate_tickets(test_brief, test_skeletons)
print(json.dumps(tickets, indent=2)) print(json.dumps(tickets, indent=2))

View File

@@ -22,7 +22,7 @@ paths = [
"C:\\projects\\manual_slop\\tests\\temp_livetoolssim.toml", "C:\\projects\\manual_slop\\tests\\temp_livetoolssim.toml",
"C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml", "C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml",
] ]
active = "C:\\projects\\manual_slop\\tests\\temp_project.toml" active = "C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml"
[gui.show_windows] [gui.show_windows]
"Context Hub" = true "Context Hub" = true

View File

@@ -2,160 +2,152 @@ from typing import List, Optional
from models import Ticket from models import Ticket
class TrackDAG: class TrackDAG:
""" """
Manages a Directed Acyclic Graph of implementation tickets. Manages a Directed Acyclic Graph of implementation tickets.
Provides methods for dependency resolution, cycle detection, and topological sorting. Provides methods for dependency resolution, cycle detection, and topological sorting.
""" """
def __init__(self, tickets: List[Ticket]):
""" def __init__(self, tickets: List[Ticket]):
"""
Initializes the TrackDAG with a list of Ticket objects. Initializes the TrackDAG with a list of Ticket objects.
Args: Args:
tickets: A list of Ticket instances defining the graph nodes and edges. tickets: A list of Ticket instances defining the graph nodes and edges.
""" """
self.tickets = tickets self.tickets = tickets
self.ticket_map = {t.id: t for t in tickets} self.ticket_map = {t.id: t for t in tickets}
def get_ready_tasks(self) -> List[Ticket]: def get_ready_tasks(self) -> List[Ticket]:
""" """
Returns a list of tickets that are in 'todo' status and whose dependencies are all 'completed'. Returns a list of tickets that are in 'todo' status and whose dependencies are all 'completed'.
Returns: Returns:
A list of Ticket objects ready for execution. A list of Ticket objects ready for execution.
""" """
ready = [] ready = []
for ticket in self.tickets: for ticket in self.tickets:
if ticket.status == 'todo': if ticket.status == 'todo':
# Check if all dependencies exist and are completed # Check if all dependencies exist and are completed
all_done = True all_done = True
for dep_id in ticket.depends_on: for dep_id in ticket.depends_on:
dep = self.ticket_map.get(dep_id) dep = self.ticket_map.get(dep_id)
if not dep or dep.status != 'completed': if not dep or dep.status != 'completed':
all_done = False all_done = False
break break
if all_done: if all_done:
ready.append(ticket) ready.append(ticket)
return ready return ready
def has_cycle(self) -> bool: def has_cycle(self) -> bool:
""" """
Performs a Depth-First Search to detect cycles in the dependency graph. Performs a Depth-First Search to detect cycles in the dependency graph.
Returns: Returns:
True if a cycle is detected, False otherwise. True if a cycle is detected, False otherwise.
""" """
visited = set() visited = set()
rec_stack = set() rec_stack = set()
def is_cyclic(ticket_id: str) -> bool: def is_cyclic(ticket_id: str) -> bool:
"""Internal recursive helper for cycle detection.""" """Internal recursive helper for cycle detection."""
if ticket_id in rec_stack: if ticket_id in rec_stack:
return True return True
if ticket_id in visited: if ticket_id in visited:
return False return False
visited.add(ticket_id)
rec_stack.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for neighbor in ticket.depends_on:
if is_cyclic(neighbor):
return True
rec_stack.remove(ticket_id)
return False
for ticket in self.tickets:
if ticket.id not in visited:
if is_cyclic(ticket.id):
return True
return False
visited.add(ticket_id) def topological_sort(self) -> List[str]:
rec_stack.add(ticket_id) """
ticket = self.ticket_map.get(ticket_id)
if ticket:
for neighbor in ticket.depends_on:
if is_cyclic(neighbor):
return True
rec_stack.remove(ticket_id)
return False
for ticket in self.tickets:
if ticket.id not in visited:
if is_cyclic(ticket.id):
return True
return False
def topological_sort(self) -> List[str]:
"""
Returns a list of ticket IDs in topological order (dependencies before dependents). Returns a list of ticket IDs in topological order (dependencies before dependents).
Returns: Returns:
A list of ticket ID strings. A list of ticket ID strings.
Raises: Raises:
ValueError: If a dependency cycle is detected. ValueError: If a dependency cycle is detected.
""" """
if self.has_cycle(): if self.has_cycle():
raise ValueError("Dependency cycle detected") raise ValueError("Dependency cycle detected")
visited = set()
stack = []
visited = set() def visit(ticket_id: str):
stack = [] """Internal recursive helper for topological sorting."""
if ticket_id in visited:
def visit(ticket_id: str): return
"""Internal recursive helper for topological sorting.""" visited.add(ticket_id)
if ticket_id in visited: ticket = self.ticket_map.get(ticket_id)
return if ticket:
visited.add(ticket_id) for dep_id in ticket.depends_on:
ticket = self.ticket_map.get(ticket_id) visit(dep_id)
if ticket: stack.append(ticket_id)
for dep_id in ticket.depends_on: for ticket in self.tickets:
visit(dep_id) visit(ticket.id)
stack.append(ticket_id) return stack
for ticket in self.tickets:
visit(ticket.id)
return stack
class ExecutionEngine: class ExecutionEngine:
""" """
A state machine that governs the progression of tasks within a TrackDAG. A state machine that governs the progression of tasks within a TrackDAG.
Handles automatic queueing and manual task approval. Handles automatic queueing and manual task approval.
""" """
def __init__(self, dag: TrackDAG, auto_queue: bool = False):
""" def __init__(self, dag: TrackDAG, auto_queue: bool = False):
"""
Initializes the ExecutionEngine. Initializes the ExecutionEngine.
Args: Args:
dag: The TrackDAG instance to manage. dag: The TrackDAG instance to manage.
auto_queue: If True, ready tasks will automatically move to 'in_progress'. auto_queue: If True, ready tasks will automatically move to 'in_progress'.
""" """
self.dag = dag self.dag = dag
self.auto_queue = auto_queue self.auto_queue = auto_queue
def tick(self) -> List[Ticket]: def tick(self) -> List[Ticket]:
""" """
Evaluates the DAG and returns a list of tasks that are currently 'ready' for execution. Evaluates the DAG and returns a list of tasks that are currently 'ready' for execution.
If auto_queue is enabled, tasks without 'step_mode' will be marked as 'in_progress'. If auto_queue is enabled, tasks without 'step_mode' will be marked as 'in_progress'.
Returns: Returns:
A list of ready Ticket objects. A list of ready Ticket objects.
""" """
ready = self.dag.get_ready_tasks() ready = self.dag.get_ready_tasks()
if self.auto_queue:
if self.auto_queue: for ticket in ready:
for ticket in ready: if not ticket.step_mode:
if not ticket.step_mode: ticket.status = "in_progress"
ticket.status = "in_progress" return ready
return ready
def approve_task(self, task_id: str): def approve_task(self, task_id: str):
""" """
Manually transitions a task from 'todo' to 'in_progress' if its dependencies are met. Manually transitions a task from 'todo' to 'in_progress' if its dependencies are met.
Args: Args:
task_id: The ID of the task to approve. task_id: The ID of the task to approve.
""" """
ticket = self.dag.ticket_map.get(task_id) ticket = self.dag.ticket_map.get(task_id)
if ticket and ticket.status == "todo": if ticket and ticket.status == "todo":
# Check if dependencies are met first # Check if dependencies are met first
all_done = True all_done = True
for dep_id in ticket.depends_on: for dep_id in ticket.depends_on:
dep = self.dag.ticket_map.get(dep_id) dep = self.dag.ticket_map.get(dep_id)
if not dep or dep.status != "completed": if not dep or dep.status != "completed":
all_done = False all_done = False
break break
if all_done:
ticket.status = "in_progress"
if all_done: def update_task_status(self, task_id: str, status: str):
ticket.status = "in_progress" """
def update_task_status(self, task_id: str, status: str):
"""
Force-updates the status of a specific task. Force-updates the status of a specific task.
Args: Args:
task_id: The ID of the task. task_id: The ID of the task.
status: The new status string (e.g., 'todo', 'in_progress', 'completed', 'blocked'). status: The new status string (e.g., 'todo', 'in_progress', 'completed', 'blocked').
""" """
ticket = self.dag.ticket_map.get(task_id) ticket = self.dag.ticket_map.get(task_id)
if ticket: if ticket:
ticket.status = status ticket.status = status

View File

@@ -10,9 +10,9 @@ parser = tree_sitter.Parser(PY_LANGUAGE)
tree = parser.parse(bytes(code, "utf8")) tree = parser.parse(bytes(code, "utf8"))
def walk(node, indent=0): def walk(node, indent=0):
content = code[node.start_byte:node.end_byte].strip() content = code[node.start_byte:node.end_byte].strip()
print(f"{' ' * indent}{node.type} ({node.start_byte}-{node.end_byte}): {content[:20]}") print(f"{' ' * indent}{node.type} ({node.start_byte}-{node.end_byte}): {content[:20]}")
for child in node.children: for child in node.children:
walk(child, indent + 1) walk(child, indent + 1)
walk(tree.root_node) walk(tree.root_node)

View File

@@ -2,81 +2,77 @@ import tree_sitter
import tree_sitter_python import tree_sitter_python
class ASTParser: class ASTParser:
def __init__(self, language: str): def __init__(self, language: str):
self.language = tree_sitter.Language(tree_sitter_python.language()) self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language) self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree: def parse(self, code: str) -> tree_sitter.Tree:
return self.parser.parse(bytes(code, "utf8")) return self.parser.parse(bytes(code, "utf8"))
def get_curated_view(self, code: str) -> str: def get_curated_view(self, code: str) -> str:
tree = self.parse(code) tree = self.parse(code)
edits = [] edits = []
def is_docstring(node): def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0: if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string": if node.children[0].type == "string":
return True return True
return False return False
def has_core_logic_decorator(node): def has_core_logic_decorator(node):
parent = node.parent parent = node.parent
if parent and parent.type == "decorated_definition": if parent and parent.type == "decorated_definition":
for child in parent.children: for child in parent.children:
if child.type == "decorator": if child.type == "decorator":
if "@core_logic" in code[child.start_byte:child.end_byte]: if "@core_logic" in code[child.start_byte:child.end_byte]:
return True return True
return False return False
def has_hot_comment(func_node): def has_hot_comment(func_node):
print(f"Checking {code[func_node.start_byte:func_node.start_byte+20].strip()}...") print(f"Checking {code[func_node.start_byte:func_node.start_byte+20].strip()}...")
stack = [func_node] stack = [func_node]
while stack: while stack:
curr = stack.pop() curr = stack.pop()
if curr.type == "comment": if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte] comment_text = code[curr.start_byte:curr.end_byte]
print(f" Found comment: {comment_text}") print(f" Found comment: {comment_text}")
if "[HOT]" in comment_text: if "[HOT]" in comment_text:
print(" [HOT] FOUND!") print(" [HOT] FOUND!")
return True return True
for child in curr.children: for child in curr.children:
stack.append(child) stack.append(child)
return False return False
def walk(node): def walk(node):
if node.type == "function_definition": if node.type == "function_definition":
body = node.child_by_field_name("body") body = node.child_by_field_name("body")
if body and body.type == "block": if body and body.type == "block":
preserve = has_core_logic_decorator(node) or has_hot_comment(node) preserve = has_core_logic_decorator(node) or has_hot_comment(node)
print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}") print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}")
if not preserve:
if not preserve: indent = " " * body.start_point.column
indent = " " * body.start_point.column first_stmt = None
first_stmt = None for child in body.children:
for child in body.children: if child.type != "comment":
if child.type != "comment": first_stmt = child
first_stmt = child break
break if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
if first_stmt and is_docstring(first_stmt): end_byte = body.end_byte
start_byte = first_stmt.end_byte if end_byte > start_byte:
end_byte = body.end_byte edits.append((start_byte, end_byte, "\\n" + indent + "..."))
if end_byte > start_byte: else:
edits.append((start_byte, end_byte, "\\n" + indent + "...")) start_byte = body.start_byte
else: end_byte = body.end_byte
start_byte = body.start_byte edits.append((start_byte, end_byte, "..."))
end_byte = body.end_byte for child in node.children:
edits.append((start_byte, end_byte, "...")) walk(child)
walk(tree.root_node)
for child in node.children: edits.sort(key=lambda x: x[0], reverse=True)
walk(child) code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
walk(tree.root_node) code_bytes[start:end] = bytes(replacement, "utf8")
edits.sort(key=lambda x: x[0], reverse=True) return code_bytes.decode("utf8")
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
parser = ASTParser("python") parser = ASTParser("python")
code = ''' code = '''

View File

@@ -5,27 +5,28 @@ import asyncio
from typing import Callable, Any, Dict, List, Tuple from typing import Callable, Any, Dict, List, Tuple
class EventEmitter: class EventEmitter:
""" """
Simple event emitter for decoupled communication between modules. Simple event emitter for decoupled communication between modules.
""" """
def __init__(self):
"""Initializes the EventEmitter with an empty listener map."""
self._listeners: Dict[str, List[Callable]] = {}
def on(self, event_name: str, callback: Callable): def __init__(self):
""" """Initializes the EventEmitter with an empty listener map."""
self._listeners: Dict[str, List[Callable]] = {}
def on(self, event_name: str, callback: Callable):
"""
Registers a callback for a specific event. Registers a callback for a specific event.
Args: Args:
event_name: The name of the event to listen for. event_name: The name of the event to listen for.
callback: The function to call when the event is emitted. callback: The function to call when the event is emitted.
""" """
if event_name not in self._listeners: if event_name not in self._listeners:
self._listeners[event_name] = [] self._listeners[event_name] = []
self._listeners[event_name].append(callback) self._listeners[event_name].append(callback)
def emit(self, event_name: str, *args: Any, **kwargs: Any): def emit(self, event_name: str, *args: Any, **kwargs: Any):
""" """
Emits an event, calling all registered callbacks. Emits an event, calling all registered callbacks.
Args: Args:
@@ -33,53 +34,55 @@ class EventEmitter:
*args: Positional arguments to pass to callbacks. *args: Positional arguments to pass to callbacks.
**kwargs: Keyword arguments to pass to callbacks. **kwargs: Keyword arguments to pass to callbacks.
""" """
if event_name in self._listeners: if event_name in self._listeners:
for callback in self._listeners[event_name]: for callback in self._listeners[event_name]:
callback(*args, **kwargs) callback(*args, **kwargs)
class AsyncEventQueue: class AsyncEventQueue:
""" """
Asynchronous event queue for decoupled communication using asyncio.Queue. Asynchronous event queue for decoupled communication using asyncio.Queue.
""" """
def __init__(self):
"""Initializes the AsyncEventQueue with an internal asyncio.Queue."""
self._queue: asyncio.Queue = asyncio.Queue()
async def put(self, event_name: str, payload: Any = None): def __init__(self):
""" """Initializes the AsyncEventQueue with an internal asyncio.Queue."""
self._queue: asyncio.Queue = asyncio.Queue()
async def put(self, event_name: str, payload: Any = None):
"""
Puts an event into the queue. Puts an event into the queue.
Args: Args:
event_name: The name of the event. event_name: The name of the event.
payload: Optional data associated with the event. payload: Optional data associated with the event.
""" """
await self._queue.put((event_name, payload)) await self._queue.put((event_name, payload))
async def get(self) -> Tuple[str, Any]: async def get(self) -> Tuple[str, Any]:
""" """
Gets an event from the queue. Gets an event from the queue.
Returns: Returns:
A tuple containing (event_name, payload). A tuple containing (event_name, payload).
""" """
return await self._queue.get() return await self._queue.get()
class UserRequestEvent: class UserRequestEvent:
""" """
Payload for a user request event. Payload for a user request event.
""" """
def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
self.prompt = prompt
self.stable_md = stable_md
self.file_items = file_items
self.disc_text = disc_text
self.base_dir = base_dir
def to_dict(self) -> Dict[str, Any]: def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
return { self.prompt = prompt
"prompt": self.prompt, self.stable_md = stable_md
"stable_md": self.stable_md, self.file_items = file_items
"file_items": self.file_items, self.disc_text = disc_text
"disc_text": self.disc_text, self.base_dir = base_dir
"base_dir": self.base_dir
} def to_dict(self) -> Dict[str, Any]:
return {
"prompt": self.prompt,
"stable_md": self.stable_md,
"file_items": self.file_items,
"disc_text": self.disc_text,
"base_dir": self.base_dir
}

View File

@@ -10,164 +10,148 @@ from typing import Optional
import tree_sitter import tree_sitter
import tree_sitter_python import tree_sitter_python
class ASTParser: class ASTParser:
""" """
Parser for extracting AST-based views of source code. Parser for extracting AST-based views of source code.
Currently supports Python. Currently supports Python.
""" """
def __init__(self, language: str):
if language != "python":
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree: def __init__(self, language: str):
"""Parse the given code and return the tree-sitter Tree.""" if language != "python":
return self.parser.parse(bytes(code, "utf8")) raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def get_skeleton(self, code: str) -> str: def parse(self, code: str) -> tree_sitter.Tree:
""" """Parse the given code and return the tree-sitter Tree."""
return self.parser.parse(bytes(code, "utf8"))
def get_skeleton(self, code: str) -> str:
"""
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies). Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
""" """
tree = self.parse(code) tree = self.parse(code)
edits = [] edits = []
def is_docstring(node): def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0: if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string": if node.children[0].type == "string":
return True return True
return False return False
def walk(node): def walk(node):
if node.type == "function_definition": if node.type == "function_definition":
body = node.child_by_field_name("body") body = node.child_by_field_name("body")
if body and body.type == "block": if body and body.type == "block":
indent = " " * body.start_point.column indent = " " * body.start_point.column
first_stmt = None first_stmt = None
for child in body.children: for child in body.children:
if child.type != "comment": if child.type != "comment":
first_stmt = child first_stmt = child
break break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
if first_stmt and is_docstring(first_stmt): def get_curated_view(self, code: str) -> str:
start_byte = first_stmt.end_byte """
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def get_curated_view(self, code: str) -> str:
"""
Returns a curated skeleton of a Python file. Returns a curated skeleton of a Python file.
Preserves function bodies if they have @core_logic decorator or # [HOT] comment. Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
Otherwise strips bodies but preserves docstrings. Otherwise strips bodies but preserves docstrings.
""" """
tree = self.parse(code) tree = self.parse(code)
edits = [] edits = []
def is_docstring(node): def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0: if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string": if node.children[0].type == "string":
return True return True
return False return False
def has_core_logic_decorator(node): def has_core_logic_decorator(node):
# Check if parent is decorated_definition # Check if parent is decorated_definition
parent = node.parent parent = node.parent
if parent and parent.type == "decorated_definition": if parent and parent.type == "decorated_definition":
for child in parent.children: for child in parent.children:
if child.type == "decorator": if child.type == "decorator":
# decorator -> ( '@', identifier ) or ( '@', call ) # decorator -> ( '@', identifier ) or ( '@', call )
if "@core_logic" in code[child.start_byte:child.end_byte]: if "@core_logic" in code[child.start_byte:child.end_byte]:
return True return True
return False return False
def has_hot_comment(func_node): def has_hot_comment(func_node):
# Check all descendants of the function_definition for a [HOT] comment # Check all descendants of the function_definition for a [HOT] comment
stack = [func_node] stack = [func_node]
while stack: while stack:
curr = stack.pop() curr = stack.pop()
if curr.type == "comment": if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte] comment_text = code[curr.start_byte:curr.end_byte]
if "[HOT]" in comment_text: if "[HOT]" in comment_text:
return True return True
for child in curr.children: for child in curr.children:
stack.append(child) stack.append(child)
return False return False
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def reset_client(): def reset_client():
pass pass
def content_block_type(path: Path) -> str: def content_block_type(path: Path) -> str:
return "unsupported" return "unsupported"
def get_file_id(path: Path) -> Optional[str]: def get_file_id(path: Path) -> Optional[str]:
return None return None
def evict(path: Path): def evict(path: Path):
pass pass
def list_cached() -> list[dict]: def list_cached() -> list[dict]:
return [] return []

View File

@@ -8,28 +8,28 @@ _client = None
_chat = None _chat = None
def _load_key() -> str: def _load_key() -> str:
with open("credentials.toml", "rb") as f: with open("credentials.toml", "rb") as f:
return tomllib.load(f)["gemini"]["api_key"] return tomllib.load(f)["gemini"]["api_key"]
def _ensure_client(): def _ensure_client():
global _client global _client
if _client is None: if _client is None:
_client = genai.Client(api_key=_load_key()) _client = genai.Client(api_key=_load_key())
def _ensure_chat(): def _ensure_chat():
global _chat global _chat
if _chat is None: if _chat is None:
_ensure_client() _ensure_client()
_chat = _client.chats.create(model="gemini-2.0-flash") _chat = _client.chats.create(model="gemini-2.0-flash")
def send(md_content: str, user_message: str) -> str: def send(md_content: str, user_message: str) -> str:
global _chat global _chat
_ensure_chat() _ensure_chat()
full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}" full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
response = _chat.send_message(full_message) response = _chat.send_message(full_message)
return response.text return response.text
def reset_session(): def reset_session():
global _client, _chat global _client, _chat
_client = None _client = None
_chat = None _chat = None

View File

@@ -6,135 +6,118 @@ import os
import session_logger # Import session_logger import session_logger # Import session_logger
class GeminiCliAdapter: class GeminiCliAdapter:
def __init__(self, binary_path="gemini"): def __init__(self, binary_path="gemini"):
self.binary_path = binary_path self.binary_path = binary_path
self.last_usage = None self.last_usage = None
self.session_id = None self.session_id = None
self.last_latency = 0.0 self.last_latency = 0.0
def count_tokens(self, contents: list[str]) -> int: def count_tokens(self, contents: list[str]) -> int:
""" """
Counts the tokens for a list of string contents using a character-based estimation. Counts the tokens for a list of string contents using a character-based estimation.
Approximates tokens by assuming 4 characters per token. Approximates tokens by assuming 4 characters per token.
This replaces the broken 'gemini count' CLI call. This replaces the broken 'gemini count' CLI call.
""" """
input_text = "\n".join(contents) input_text = "\n".join(contents)
total_chars = len(input_text) total_chars = len(input_text)
estimated_tokens = total_chars // 4 estimated_tokens = total_chars // 4
return estimated_tokens return estimated_tokens
def send(self, message, safety_settings=None, system_instruction=None, model: str = None): def send(self, message, safety_settings=None, system_instruction=None, model: str = None):
""" """
Sends a message to the Gemini CLI and processes the streaming JSON output. Sends a message to the Gemini CLI and processes the streaming JSON output.
Logs the CLI call details using session_logger.log_cli_call. Logs the CLI call details using session_logger.log_cli_call.
System instruction is prepended to the message. System instruction is prepended to the message.
Uses --prompt flag with a placeholder and sends the content via stdin. Uses --prompt flag with a placeholder and sends the content via stdin.
""" """
start_time = time.time() start_time = time.time()
command_parts = [self.binary_path]
command_parts = [self.binary_path] if model:
command_parts.extend(['-m', f'"{model}"'])
if model: # Use an empty string placeholder.
command_parts.extend(['-m', f'"{model}"']) command_parts.extend(['--prompt', '""'])
if self.session_id:
# Use an empty string placeholder. command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--prompt', '""']) command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts)
if self.session_id: # Construct the prompt text by prepending system_instruction if available
command_parts.extend(['--resume', self.session_id]) prompt_text = message
if system_instruction:
command_parts.extend(['--output-format', 'stream-json']) prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = ""
command = " ".join(command_parts) tool_calls = []
env = os.environ.copy()
# Construct the prompt text by prepending system_instruction if available env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
prompt_text = message process = None
if system_instruction: stdout_content = ""
prompt_text = f"{system_instruction}\n\n{message}" stderr_content = ""
stdin_content = prompt_text
accumulated_text = "" try:
tool_calls = [] process = subprocess.Popen(
command,
env = os.environ.copy() stdin=subprocess.PIPE,
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop" stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
process = None text=True,
stdout_content = "" shell=True,
stderr_content = "" env=env
stdin_content = prompt_text )
stdout_output, stderr_output = process.communicate(input=prompt_text)
try: stdout_content = stdout_output
process = subprocess.Popen( stderr_content = stderr_output
command, for line in stdout_content.splitlines():
stdin=subprocess.PIPE, line = line.strip()
stdout=subprocess.PIPE, if not line:
stderr=subprocess.PIPE, continue
text=True, try:
shell=True, data = json.loads(line)
env=env msg_type = data.get("type")
) if msg_type == "init":
if "session_id" in data:
stdout_output, stderr_output = process.communicate(input=prompt_text) self.session_id = data.get("session_id")
elif msg_type == "message":
stdout_content = stdout_output # CRITICAL: Only accumulate content from the assistant/model role.
stderr_content = stderr_output # The CLI echoes back the 'user' prompt in the stream, which we must skip.
role = data.get("role", "")
for line in stdout_content.splitlines(): if role in ["assistant", "model"]:
line = line.strip() content = data.get("content", data.get("text"))
if not line: if content:
continue accumulated_text += content
try: elif msg_type == "result":
data = json.loads(line) self.last_usage = data.get("stats") or data.get("usage")
msg_type = data.get("type") if "session_id" in data:
self.session_id = data.get("session_id")
if msg_type == "init": elif msg_type == "tool_use":
if "session_id" in data: # Standardize format for ai_client.py
self.session_id = data.get("session_id") # Real CLI might use 'tool_name'/'tool_id'/'parameters'
elif msg_type == "message": # or 'name'/'id'/'args'. We'll map to 'name'/'id'/'args'.
# CRITICAL: Only accumulate content from the assistant/model role. tc = {
# The CLI echoes back the 'user' prompt in the stream, which we must skip. "name": data.get("tool_name", data.get("name")),
role = data.get("role", "") "args": data.get("parameters", data.get("args", {})),
if role in ["assistant", "model"]: "id": data.get("tool_id", data.get("id"))
content = data.get("content", data.get("text")) }
if content: if tc["name"]:
accumulated_text += content tool_calls.append(tc)
elif msg_type == "result": except json.JSONDecodeError:
self.last_usage = data.get("stats") or data.get("usage") continue
if "session_id" in data: except Exception as e:
self.session_id = data.get("session_id") if process:
elif msg_type == "tool_use": process.kill()
# Standardize format for ai_client.py raise e
# Real CLI might use 'tool_name'/'tool_id'/'parameters' finally:
# or 'name'/'id'/'args'. We'll map to 'name'/'id'/'args'. current_latency = time.time() - start_time
tc = { if process:
"name": data.get("tool_name", data.get("name")), session_logger.open_session()
"args": data.get("parameters", data.get("args", {})), session_logger.log_cli_call(
"id": data.get("tool_id", data.get("id")) command=command,
} stdin_content=stdin_content,
if tc["name"]: stdout_content=stdout_content,
tool_calls.append(tc) stderr_content=stderr_content,
except json.JSONDecodeError: latency=current_latency
continue )
self.last_latency = current_latency
except Exception as e: return {
if process: "text": accumulated_text,
process.kill() "tool_calls": tool_calls,
raise e "stderr": stderr_content
finally: }
current_latency = time.time() - start_time
if process:
session_logger.open_session()
session_logger.log_cli_call(
command=command,
stdin_content=stdin_content,
stdout_content=stdout_content,
stderr_content=stderr_content,
latency=current_latency
)
self.last_latency = current_latency
return {
"text": accumulated_text,
"tool_calls": tool_calls,
"stderr": stderr_content
}

6029
gui_2.py

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,8 @@ def normal_func():
tree = parser.parse(bytes(code, "utf8")) tree = parser.parse(bytes(code, "utf8"))
def print_node(node, indent=0): def print_node(node, indent=0):
print(" " * indent + f"{node.type} [{node.start_byte}-{node.end_byte}] " + (f"'{code[node.start_byte:node.end_byte]}'" if node.type in ["decorator", "comment", "identifier"] else "")) print(" " * indent + f"{node.type} [{node.start_byte}-{node.end_byte}] " + (f"'{code[node.start_byte:node.end_byte]}'" if node.type in ["decorator", "comment", "identifier"] else ""))
for child in node.children: for child in node.children:
print_node(child, indent + 1) print_node(child, indent + 1)
print_node(tree.root_node) print_node(tree.root_node)

View File

@@ -4,24 +4,25 @@ from datetime import datetime, timedelta
from log_registry import LogRegistry from log_registry import LogRegistry
class LogPruner: class LogPruner:
""" """
Handles the automated deletion of old and insignificant session logs. Handles the automated deletion of old and insignificant session logs.
Ensures that only whitelisted or significant sessions (based on size/content) Ensures that only whitelisted or significant sessions (based on size/content)
are preserved long-term. are preserved long-term.
""" """
def __init__(self, log_registry: LogRegistry, logs_dir: str):
""" def __init__(self, log_registry: LogRegistry, logs_dir: str):
"""
Initializes the LogPruner. Initializes the LogPruner.
Args: Args:
log_registry: An instance of LogRegistry to check session data. log_registry: An instance of LogRegistry to check session data.
logs_dir: The path to the directory containing session sub-directories. logs_dir: The path to the directory containing session sub-directories.
""" """
self.log_registry = log_registry self.log_registry = log_registry
self.logs_dir = logs_dir self.logs_dir = logs_dir
def prune(self): def prune(self):
""" """
Prunes old and small session directories from the logs directory. Prunes old and small session directories from the logs directory.
Deletes session directories that meet the following criteria: Deletes session directories that meet the following criteria:
@@ -29,37 +30,31 @@ class LogPruner:
2. The session name is NOT in the whitelist provided by the LogRegistry. 2. The session name is NOT in the whitelist provided by the LogRegistry.
3. The total size of all files within the session directory is less than 2KB (2048 bytes). 3. The total size of all files within the session directory is less than 2KB (2048 bytes).
""" """
now = datetime.now() now = datetime.now()
cutoff_time = now - timedelta(hours=24) cutoff_time = now - timedelta(hours=24)
# Ensure the base logs directory exists.
# Ensure the base logs directory exists. if not os.path.isdir(self.logs_dir):
if not os.path.isdir(self.logs_dir): return
return # Get sessions that are old and not whitelisted from the registry
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time)
# Get sessions that are old and not whitelisted from the registry # Prune sessions if their size is less than 2048 bytes
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time) for session_info in old_sessions_to_check:
session_id = session_info['session_id']
# Prune sessions if their size is less than 2048 bytes session_path = session_info['path']
for session_info in old_sessions_to_check: if not session_path or not os.path.isdir(session_path):
session_id = session_info['session_id'] continue
session_path = session_info['path'] # Calculate total size of files in the directory
total_size = 0
if not session_path or not os.path.isdir(session_path): try:
continue for entry in os.scandir(session_path):
if entry.is_file():
# Calculate total size of files in the directory total_size += entry.stat().st_size
total_size = 0 except OSError:
try: continue
for entry in os.scandir(session_path): # Prune if the total size is less than 2KB (2048 bytes)
if entry.is_file(): if total_size < 2048: # 2KB
total_size += entry.stat().st_size try:
except OSError: shutil.rmtree(session_path)
continue # print(f"Pruned session '{session_id}' (Size: {total_size} bytes)")
except OSError:
# Prune if the total size is less than 2KB (2048 bytes) pass
if total_size < 2048: # 2KB
try:
shutil.rmtree(session_path)
# print(f"Pruned session '{session_id}' (Size: {total_size} bytes)")
except OSError:
pass

View File

@@ -4,85 +4,85 @@ from datetime import datetime
import os import os
class LogRegistry: class LogRegistry:
""" """
Manages a persistent registry of session logs using a TOML file. Manages a persistent registry of session logs using a TOML file.
Tracks session paths, start times, whitelisting status, and metadata. Tracks session paths, start times, whitelisting status, and metadata.
""" """
def __init__(self, registry_path):
""" def __init__(self, registry_path):
"""
Initializes the LogRegistry with a path to the registry file. Initializes the LogRegistry with a path to the registry file.
Args: Args:
registry_path (str): The file path to the TOML registry. registry_path (str): The file path to the TOML registry.
""" """
self.registry_path = registry_path self.registry_path = registry_path
self.data = {} self.data = {}
self.load_registry() self.load_registry()
def load_registry(self): def load_registry(self):
""" """
Loads the registry data from the TOML file into memory. Loads the registry data from the TOML file into memory.
Handles date/time conversions from TOML-native formats to strings for consistency. Handles date/time conversions from TOML-native formats to strings for consistency.
""" """
if os.path.exists(self.registry_path): if os.path.exists(self.registry_path):
try: try:
with open(self.registry_path, 'rb') as f: with open(self.registry_path, 'rb') as f:
loaded_data = tomllib.load(f) loaded_data = tomllib.load(f)
# Keep data as it is from TOML (strings or native datetimes) # Keep data as it is from TOML (strings or native datetimes)
# If we want to satisfy tests that expect strings, we ensure they are strings. # If we want to satisfy tests that expect strings, we ensure they are strings.
self.data = {} self.data = {}
for session_id, session_data in loaded_data.items(): for session_id, session_data in loaded_data.items():
new_session_data = session_data.copy() new_session_data = session_data.copy()
# If tomllib parsed it as a datetime, convert it back to string for the tests # If tomllib parsed it as a datetime, convert it back to string for the tests
if 'start_time' in new_session_data and isinstance(new_session_data['start_time'], datetime): if 'start_time' in new_session_data and isinstance(new_session_data['start_time'], datetime):
new_session_data['start_time'] = new_session_data['start_time'].isoformat() new_session_data['start_time'] = new_session_data['start_time'].isoformat()
if 'metadata' in new_session_data and isinstance(new_session_data['metadata'], dict): if 'metadata' in new_session_data and isinstance(new_session_data['metadata'], dict):
m = new_session_data['metadata'] m = new_session_data['metadata']
if 'timestamp' in m and isinstance(m['timestamp'], datetime): if 'timestamp' in m and isinstance(m['timestamp'], datetime):
m['timestamp'] = m['timestamp'].isoformat() m['timestamp'] = m['timestamp'].isoformat()
self.data[session_id] = new_session_data self.data[session_id] = new_session_data
except Exception as e: except Exception as e:
print(f"Error loading registry from {self.registry_path}: {e}") print(f"Error loading registry from {self.registry_path}: {e}")
self.data = {} self.data = {}
else: else:
self.data = {} self.data = {}
def save_registry(self): def save_registry(self):
""" """
Serializes and saves the current registry data to the TOML file. Serializes and saves the current registry data to the TOML file.
Converts internal datetime objects to ISO format strings for compatibility. Converts internal datetime objects to ISO format strings for compatibility.
""" """
try: try:
# Convert datetime objects to ISO format strings for TOML serialization # Convert datetime objects to ISO format strings for TOML serialization
data_to_save = {} data_to_save = {}
for session_id, session_data in self.data.items(): for session_id, session_data in self.data.items():
session_data_copy = {} session_data_copy = {}
for k, v in session_data.items(): for k, v in session_data.items():
if v is None: if v is None:
continue continue
if k == 'start_time' and isinstance(v, datetime): if k == 'start_time' and isinstance(v, datetime):
session_data_copy[k] = v.isoformat() session_data_copy[k] = v.isoformat()
elif k == 'metadata' and isinstance(v, dict): elif k == 'metadata' and isinstance(v, dict):
metadata_copy = {} metadata_copy = {}
for mk, mv in v.items(): for mk, mv in v.items():
if mv is None: if mv is None:
continue continue
if mk == 'timestamp' and isinstance(mv, datetime): if mk == 'timestamp' and isinstance(mv, datetime):
metadata_copy[mk] = mv.isoformat() metadata_copy[mk] = mv.isoformat()
else: else:
metadata_copy[mk] = mv metadata_copy[mk] = mv
session_data_copy[k] = metadata_copy session_data_copy[k] = metadata_copy
else: else:
session_data_copy[k] = v session_data_copy[k] = v
data_to_save[session_id] = session_data_copy data_to_save[session_id] = session_data_copy
with open(self.registry_path, 'wb') as f:
with open(self.registry_path, 'wb') as f: tomli_w.dump(data_to_save, f)
tomli_w.dump(data_to_save, f) except Exception as e:
except Exception as e: print(f"Error saving registry to {self.registry_path}: {e}")
print(f"Error saving registry to {self.registry_path}: {e}")
def register_session(self, session_id, path, start_time): def register_session(self, session_id, path, start_time):
""" """
Registers a new session in the registry. Registers a new session in the registry.
Args: Args:
@@ -90,25 +90,23 @@ class LogRegistry:
path (str): File path to the session's log directory. path (str): File path to the session's log directory.
start_time (datetime|str): The timestamp when the session started. start_time (datetime|str): The timestamp when the session started.
""" """
if session_id in self.data: if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.") print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
# Store start_time internally as a string to satisfy tests
if isinstance(start_time, datetime):
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
}
self.save_registry()
# Store start_time internally as a string to satisfy tests def update_session_metadata(self, session_id, message_count, errors, size_kb, whitelisted, reason):
if isinstance(start_time, datetime): """
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
}
self.save_registry()
def update_session_metadata(self, session_id, message_count, errors, size_kb, whitelisted, reason):
"""
Updates metadata fields for an existing session. Updates metadata fields for an existing session.
Args: Args:
@@ -119,30 +117,26 @@ class LogRegistry:
whitelisted (bool): Whether the session should be protected from pruning. whitelisted (bool): Whether the session should be protected from pruning.
reason (str): Explanation for the current whitelisting status. reason (str): Explanation for the current whitelisting status.
""" """
if session_id not in self.data: if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.") print(f"Error: Session ID '{session_id}' not found for metadata update.")
return return
# Ensure metadata exists
if self.data[session_id].get('metadata') is None:
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
self.data[session_id]['metadata']['size_kb'] = size_kb
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
# Ensure metadata exists def is_session_whitelisted(self, session_id):
if self.data[session_id].get('metadata') is None: """
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
self.data[session_id]['metadata']['size_kb'] = size_kb
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
def is_session_whitelisted(self, session_id):
"""
Checks if a specific session is marked as whitelisted. Checks if a specific session is marked as whitelisted.
Args: Args:
@@ -151,15 +145,14 @@ class LogRegistry:
Returns: Returns:
bool: True if whitelisted, False otherwise. bool: True if whitelisted, False otherwise.
""" """
session_data = self.data.get(session_id) session_data = self.data.get(session_id)
if session_data is None: if session_data is None:
return False # Non-existent sessions are not whitelisted return False # Non-existent sessions are not whitelisted
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted. return session_data.get('whitelisted', False)
return session_data.get('whitelisted', False)
def update_auto_whitelist_status(self, session_id: str): def update_auto_whitelist_status(self, session_id: str):
""" """
Analyzes session logs and updates whitelisting status based on heuristics. Analyzes session logs and updates whitelisting status based on heuristics.
Sessions are automatically whitelisted if they contain error keywords, Sessions are automatically whitelisted if they contain error keywords,
have a high message count, or exceed a size threshold. have a high message count, or exceed a size threshold.
@@ -167,64 +160,57 @@ class LogRegistry:
Args: Args:
session_id (str): Unique identifier for the session to analyze. session_id (str): Unique identifier for the session to analyze.
""" """
if session_id not in self.data: if session_id not in self.data:
return return
session_data = self.data[session_id]
session_path = session_data.get('path')
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
message_count += 1
for kw in keywords_to_check:
if kw in line and kw not in found_keywords:
found_keywords.append(kw)
except Exception:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
)
session_data = self.data[session_id] def get_old_non_whitelisted_sessions(self, cutoff_datetime):
session_path = session_data.get('path') """
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
message_count += 1
for kw in keywords_to_check:
if kw in line and kw not in found_keywords:
found_keywords.append(kw)
except Exception:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
)
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
"""
Retrieves a list of sessions that are older than a specific cutoff time Retrieves a list of sessions that are older than a specific cutoff time
and are not marked as whitelisted. and are not marked as whitelisted.
@@ -234,24 +220,22 @@ class LogRegistry:
Returns: Returns:
list: A list of dictionaries containing session details (id, path, start_time). list: A list of dictionaries containing session details (id, path, start_time).
""" """
old_sessions = [] old_sessions = []
for session_id, session_data in self.data.items(): for session_id, session_data in self.data.items():
# Check if session is older than cutoff and not whitelisted # Check if session is older than cutoff and not whitelisted
start_time_raw = session_data.get('start_time') start_time_raw = session_data.get('start_time')
if isinstance(start_time_raw, str): if isinstance(start_time_raw, str):
try: try:
start_time = datetime.fromisoformat(start_time_raw) start_time = datetime.fromisoformat(start_time_raw)
except ValueError: except ValueError:
start_time = None start_time = None
else: else:
start_time = start_time_raw start_time = start_time_raw
is_whitelisted = session_data.get('whitelisted', False)
is_whitelisted = session_data.get('whitelisted', False) if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
old_sessions.append({
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted: 'session_id': session_id,
old_sessions.append({ 'path': session_data.get('path'),
'session_id': session_id, 'start_time': start_time_raw
'path': session_data.get('path'), })
'start_time': start_time_raw return old_sessions
})
return old_sessions

File diff suppressed because it is too large Load Diff

View File

@@ -143,11 +143,11 @@ Return the Ticket set in Godot ECS Flat List format (JSON array).
""" """
PROMPTS: Dict[str, str] = { PROMPTS: Dict[str, str] = {
"tier1_epic_init": TIER1_EPIC_INIT, "tier1_epic_init": TIER1_EPIC_INIT,
"tier1_track_delegation": TIER1_TRACK_DELEGATION, "tier1_track_delegation": TIER1_TRACK_DELEGATION,
"tier1_macro_merge": TIER1_MACRO_MERGE, "tier1_macro_merge": TIER1_MACRO_MERGE,
"tier2_sprint_planning": TIER2_SPRINT_PLANNING, "tier2_sprint_planning": TIER2_SPRINT_PLANNING,
"tier2_code_review": TIER2_CODE_REVIEW, "tier2_code_review": TIER2_CODE_REVIEW,
"tier2_track_finalization": TIER2_TRACK_FINALIZATION, "tier2_track_finalization": TIER2_TRACK_FINALIZATION,
"tier2_contract_first": TIER2_CONTRACT_FIRST, "tier2_contract_first": TIER2_CONTRACT_FIRST,
} }

244
models.py
View File

@@ -4,156 +4,152 @@ from datetime import datetime
@dataclass @dataclass
class Ticket: class Ticket:
""" """
Represents a discrete unit of work within a track. Represents a discrete unit of work within a track.
""" """
id: str id: str
description: str description: str
status: str status: str
assigned_to: str assigned_to: str
target_file: Optional[str] = None target_file: Optional[str] = None
context_requirements: List[str] = field(default_factory=list) context_requirements: List[str] = field(default_factory=list)
depends_on: List[str] = field(default_factory=list) depends_on: List[str] = field(default_factory=list)
blocked_reason: Optional[str] = None blocked_reason: Optional[str] = None
step_mode: bool = False step_mode: bool = False
def mark_blocked(self, reason: str): def mark_blocked(self, reason: str):
"""Sets the ticket status to 'blocked' and records the reason.""" """Sets the ticket status to 'blocked' and records the reason."""
self.status = "blocked" self.status = "blocked"
self.blocked_reason = reason self.blocked_reason = reason
def mark_complete(self): def mark_complete(self):
"""Sets the ticket status to 'completed'.""" """Sets the ticket status to 'completed'."""
self.status = "completed" self.status = "completed"
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return { return {
"id": self.id, "id": self.id,
"description": self.description, "description": self.description,
"status": self.status, "status": self.status,
"assigned_to": self.assigned_to, "assigned_to": self.assigned_to,
"target_file": self.target_file, "target_file": self.target_file,
"context_requirements": self.context_requirements, "context_requirements": self.context_requirements,
"depends_on": self.depends_on, "depends_on": self.depends_on,
"blocked_reason": self.blocked_reason, "blocked_reason": self.blocked_reason,
"step_mode": self.step_mode, "step_mode": self.step_mode,
} }
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Ticket": def from_dict(cls, data: Dict[str, Any]) -> "Ticket":
return cls( return cls(
id=data["id"], id=data["id"],
description=data.get("description"), description=data.get("description"),
status=data.get("status"), status=data.get("status"),
assigned_to=data.get("assigned_to"), assigned_to=data.get("assigned_to"),
target_file=data.get("target_file"), target_file=data.get("target_file"),
context_requirements=data.get("context_requirements", []), context_requirements=data.get("context_requirements", []),
depends_on=data.get("depends_on", []), depends_on=data.get("depends_on", []),
blocked_reason=data.get("blocked_reason"), blocked_reason=data.get("blocked_reason"),
step_mode=data.get("step_mode", False), step_mode=data.get("step_mode", False),
) )
@dataclass @dataclass
class Track: class Track:
""" """
Represents a collection of tickets that together form an architectural track or epic. Represents a collection of tickets that together form an architectural track or epic.
""" """
id: str id: str
description: str description: str
tickets: List[Ticket] = field(default_factory=list) tickets: List[Ticket] = field(default_factory=list)
def get_executable_tickets(self) -> List[Ticket]: def get_executable_tickets(self) -> List[Ticket]:
""" """
Returns all 'todo' tickets whose dependencies are all 'completed'. Returns all 'todo' tickets whose dependencies are all 'completed'.
""" """
# Map ticket IDs to their current status for efficient lookup # Map ticket IDs to their current status for efficient lookup
status_map = {t.id: t.status for t in self.tickets} status_map = {t.id: t.status for t in self.tickets}
executable = []
executable = [] for ticket in self.tickets:
for ticket in self.tickets: if ticket.status != "todo":
if ticket.status != "todo": continue
continue # Check if all dependencies are completed
all_deps_completed = True
# Check if all dependencies are completed for dep_id in ticket.depends_on:
all_deps_completed = True # If a dependency is missing from the track, we treat it as not completed (or we could raise an error)
for dep_id in ticket.depends_on: if status_map.get(dep_id) != "completed":
# If a dependency is missing from the track, we treat it as not completed (or we could raise an error) all_deps_completed = False
if status_map.get(dep_id) != "completed": break
all_deps_completed = False if all_deps_completed:
break executable.append(ticket)
return executable
if all_deps_completed:
executable.append(ticket)
return executable
@dataclass @dataclass
class WorkerContext: class WorkerContext:
""" """
Represents the context provided to a Tier 3 Worker for a specific ticket. Represents the context provided to a Tier 3 Worker for a specific ticket.
""" """
ticket_id: str ticket_id: str
model_name: str model_name: str
messages: List[dict] messages: List[dict]
@dataclass @dataclass
class Metadata: class Metadata:
id: str id: str
name: str name: str
status: str status: str
created_at: datetime created_at: datetime
updated_at: datetime updated_at: datetime
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return { return {
"id": self.id, "id": self.id,
"name": self.name, "name": self.name,
"status": self.status, "status": self.status,
"created_at": self.created_at.isoformat() if self.created_at else None, "created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None, "updated_at": self.updated_at.isoformat() if self.updated_at else None,
} }
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Metadata": def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
return cls( return cls(
id=data["id"], id=data["id"],
name=data["name"], name=data["name"],
status=data.get("status"), status=data.get("status"),
created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None, created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None, updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None,
) )
@dataclass @dataclass
class TrackState: class TrackState:
metadata: Metadata metadata: Metadata
discussion: List[Dict[str, Any]] discussion: List[Dict[str, Any]]
tasks: List[Ticket] tasks: List[Ticket]
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
return { return {
"metadata": self.metadata.to_dict(), "metadata": self.metadata.to_dict(),
"discussion": [ "discussion": [
{ {
k: v.isoformat() if isinstance(v, datetime) else v k: v.isoformat() if isinstance(v, datetime) else v
for k, v in item.items() for k, v in item.items()
} }
for item in self.discussion for item in self.discussion
], ],
"tasks": [task.to_dict() for task in self.tasks], "tasks": [task.to_dict() for task in self.tasks],
} }
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "TrackState": def from_dict(cls, data: Dict[str, Any]) -> "TrackState":
metadata = Metadata.from_dict(data["metadata"]) metadata = Metadata.from_dict(data["metadata"])
tasks = [Ticket.from_dict(task_data) for task_data in data["tasks"]] tasks = [Ticket.from_dict(task_data) for task_data in data["tasks"]]
return cls( return cls(
metadata=metadata, metadata=metadata,
discussion=[ discussion=[
{ {
k: datetime.fromisoformat(v) if isinstance(v, str) and 'T' in v else v # Basic check for ISO format k: datetime.fromisoformat(v) if isinstance(v, str) and 'T' in v else v # Basic check for ISO format
for k, v in item.items() for k, v in item.items()
} }
for item in data["discussion"] for item in data["discussion"]
], ],
tasks=tasks, tasks=tasks,
) )

View File

@@ -13,231 +13,208 @@ from pathlib import Path
from dag_engine import TrackDAG, ExecutionEngine from dag_engine import TrackDAG, ExecutionEngine
class ConductorEngine: class ConductorEngine:
""" """
Orchestrates the execution of tickets within a track. Orchestrates the execution of tickets within a track.
""" """
def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
self.track = track
self.event_queue = event_queue
self.tier_usage = {
"Tier 1": {"input": 0, "output": 0},
"Tier 2": {"input": 0, "output": 0},
"Tier 3": {"input": 0, "output": 0},
"Tier 4": {"input": 0, "output": 0},
}
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=auto_queue)
async def _push_state(self, status: str = "running", active_tier: str = None): def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
if not self.event_queue: self.track = track
return self.event_queue = event_queue
self.tier_usage = {
payload = { "Tier 1": {"input": 0, "output": 0},
"status": status, "Tier 2": {"input": 0, "output": 0},
"active_tier": active_tier, "Tier 3": {"input": 0, "output": 0},
"tier_usage": self.tier_usage, "Tier 4": {"input": 0, "output": 0},
"track": { }
"id": self.track.id, self.dag = TrackDAG(self.track.tickets)
"title": self.track.description, self.engine = ExecutionEngine(self.dag, auto_queue=auto_queue)
},
"tickets": [asdict(t) for t in self.track.tickets]
}
await self.event_queue.put("mma_state_update", payload)
def parse_json_tickets(self, json_str: str): async def _push_state(self, status: str = "running", active_tier: str = None):
""" if not self.event_queue:
return
payload = {
"status": status,
"active_tier": active_tier,
"tier_usage": self.tier_usage,
"track": {
"id": self.track.id,
"title": self.track.description,
},
"tickets": [asdict(t) for t in self.track.tickets]
}
await self.event_queue.put("mma_state_update", payload)
def parse_json_tickets(self, json_str: str):
"""
Parses a JSON string of ticket definitions (Godot ECS Flat List format) Parses a JSON string of ticket definitions (Godot ECS Flat List format)
and populates the Track's ticket list. and populates the Track's ticket list.
""" """
try: try:
data = json.loads(json_str) data = json.loads(json_str)
if not isinstance(data, list): if not isinstance(data, list):
print("Error: JSON input must be a list of ticket definitions.") print("Error: JSON input must be a list of ticket definitions.")
return return
for ticket_data in data:
# Construct Ticket object, using defaults for optional fields
ticket = Ticket(
id=ticket_data["id"],
description=ticket_data["description"],
status=ticket_data.get("status", "todo"),
assigned_to=ticket_data.get("assigned_to", "unassigned"),
depends_on=ticket_data.get("depends_on", []),
step_mode=ticket_data.get("step_mode", False)
)
self.track.tickets.append(ticket)
# Rebuild DAG and Engine after parsing new tickets
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
except json.JSONDecodeError as e:
print(f"Error parsing JSON tickets: {e}")
except KeyError as e:
print(f"Missing required field in ticket definition: {e}")
for ticket_data in data: async def run(self, md_content: str = ""):
# Construct Ticket object, using defaults for optional fields """
ticket = Ticket(
id=ticket_data["id"],
description=ticket_data["description"],
status=ticket_data.get("status", "todo"),
assigned_to=ticket_data.get("assigned_to", "unassigned"),
depends_on=ticket_data.get("depends_on", []),
step_mode=ticket_data.get("step_mode", False)
)
self.track.tickets.append(ticket)
# Rebuild DAG and Engine after parsing new tickets
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
except json.JSONDecodeError as e:
print(f"Error parsing JSON tickets: {e}")
except KeyError as e:
print(f"Missing required field in ticket definition: {e}")
async def run(self, md_content: str = ""):
"""
Main execution loop using the DAG engine. Main execution loop using the DAG engine.
Args: Args:
md_content: The full markdown context (history + files) for AI workers. md_content: The full markdown context (history + files) for AI workers.
""" """
await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)") await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)")
while True:
while True: # 1. Identify ready tasks
# 1. Identify ready tasks ready_tasks = self.engine.tick()
ready_tasks = self.engine.tick() # 2. Check for completion or blockage
if not ready_tasks:
# 2. Check for completion or blockage all_done = all(t.status == "completed" for t in self.track.tickets)
if not ready_tasks: if all_done:
all_done = all(t.status == "completed" for t in self.track.tickets) print("Track completed successfully.")
if all_done: await self._push_state(status="done", active_tier=None)
print("Track completed successfully.") else:
await self._push_state(status="done", active_tier=None) # Check if any tasks are in-progress or could be ready
else: if any(t.status == "in_progress" for t in self.track.tickets):
# Check if any tasks are in-progress or could be ready # Wait for async tasks to complete
if any(t.status == "in_progress" for t in self.track.tickets): await asyncio.sleep(1)
# Wait for async tasks to complete continue
await asyncio.sleep(1) print("No more executable tickets. Track is blocked or finished.")
continue await self._push_state(status="blocked", active_tier=None)
break
print("No more executable tickets. Track is blocked or finished.") # 3. Process ready tasks
await self._push_state(status="blocked", active_tier=None) loop = asyncio.get_event_loop()
break for ticket in ready_tasks:
# If auto_queue is on and step_mode is off, engine.tick() already marked it 'in_progress'
# 3. Process ready tasks # but we need to verify and handle the lifecycle.
loop = asyncio.get_event_loop() if ticket.status == "in_progress" or (not ticket.step_mode and self.engine.auto_queue):
for ticket in ready_tasks: ticket.status = "in_progress"
# If auto_queue is on and step_mode is off, engine.tick() already marked it 'in_progress' print(f"Executing ticket {ticket.id}: {ticket.description}")
# but we need to verify and handle the lifecycle. await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
if ticket.status == "in_progress" or (not ticket.step_mode and self.engine.auto_queue): context = WorkerContext(
ticket.status = "in_progress" ticket_id=ticket.id,
print(f"Executing ticket {ticket.id}: {ticket.description}") model_name="gemini-2.5-flash-lite",
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}") messages=[]
)
context = WorkerContext( # Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
ticket_id=ticket.id, # We pass the md_content so the worker has full context.
model_name="gemini-2.5-flash-lite", context_files = ticket.context_requirements if ticket.context_requirements else None
messages=[] await loop.run_in_executor(
) None,
run_worker_lifecycle,
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop. ticket,
# We pass the md_content so the worker has full context. context,
context_files = ticket.context_requirements if ticket.context_requirements else None context_files,
await loop.run_in_executor( self.event_queue,
None, self,
run_worker_lifecycle, md_content
ticket, )
context, await self._push_state(active_tier="Tier 2 (Tech Lead)")
context_files, elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
self.event_queue, # Task is ready but needs approval
self, print(f"Ticket {ticket.id} is ready and awaiting approval.")
md_content await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}")
) # In a real UI, this would wait for a user event.
await self._push_state(active_tier="Tier 2 (Tech Lead)") # For now, we'll treat it as a pause point if not auto-queued.
pass
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
# Task is ready but needs approval
print(f"Ticket {ticket.id} is ready and awaiting approval.")
await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}")
# In a real UI, this would wait for a user event.
# For now, we'll treat it as a pause point if not auto-queued.
pass
def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> bool: def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> bool:
""" """
Pushes an approval request to the GUI and waits for response. Pushes an approval request to the GUI and waits for response.
""" """
# We use a list container so the GUI can inject the actual Dialog object back to us # We use a list container so the GUI can inject the actual Dialog object back to us
# since the dialog is created in the GUI thread. # since the dialog is created in the GUI thread.
dialog_container = [None] dialog_container = [None]
task = {
task = { "action": "mma_step_approval",
"action": "mma_step_approval", "ticket_id": ticket_id,
"ticket_id": ticket_id, "payload": payload,
"payload": payload, "dialog_container": dialog_container
"dialog_container": dialog_container }
} # Push to queue
try:
# Push to queue loop = asyncio.get_event_loop()
try: if loop.is_running():
loop = asyncio.get_event_loop() asyncio.run_coroutine_threadsafe(event_queue.put("mma_step_approval", task), loop)
if loop.is_running(): else:
asyncio.run_coroutine_threadsafe(event_queue.put("mma_step_approval", task), loop) event_queue._queue.put_nowait(("mma_step_approval", task))
else: except Exception:
event_queue._queue.put_nowait(("mma_step_approval", task)) # Fallback if no loop
except Exception: event_queue._queue.put_nowait(("mma_step_approval", task))
# Fallback if no loop # Wait for the GUI to create the dialog and for the user to respond
event_queue._queue.put_nowait(("mma_step_approval", task)) start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
# Wait for the GUI to create the dialog and for the user to respond time.sleep(0.1)
start = time.time() if dialog_container[0]:
while dialog_container[0] is None and time.time() - start < 60: approved, final_payload = dialog_container[0].wait()
time.sleep(0.1) return approved
return False
if dialog_container[0]:
approved, final_payload = dialog_container[0].wait()
return approved
return False
def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> Tuple[bool, str, str]: def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> Tuple[bool, str, str]:
""" """
Pushes a spawn approval request to the GUI and waits for response. Pushes a spawn approval request to the GUI and waits for response.
Returns (approved, modified_prompt, modified_context) Returns (approved, modified_prompt, modified_context)
""" """
dialog_container = [None]
dialog_container = [None] task = {
"action": "mma_spawn_approval",
task = { "ticket_id": ticket_id,
"action": "mma_spawn_approval", "role": role,
"ticket_id": ticket_id, "prompt": prompt,
"role": role, "context_md": context_md,
"prompt": prompt, "dialog_container": dialog_container
"context_md": context_md, }
"dialog_container": dialog_container # Push to queue
} try:
loop = asyncio.get_event_loop()
# Push to queue if loop.is_running():
try: asyncio.run_coroutine_threadsafe(event_queue.put("mma_spawn_approval", task), loop)
loop = asyncio.get_event_loop() else:
if loop.is_running(): event_queue._queue.put_nowait(("mma_spawn_approval", task))
asyncio.run_coroutine_threadsafe(event_queue.put("mma_spawn_approval", task), loop) except Exception:
else: # Fallback if no loop
event_queue._queue.put_nowait(("mma_spawn_approval", task)) event_queue._queue.put_nowait(("mma_spawn_approval", task))
except Exception: # Wait for the GUI to create the dialog and for the user to respond
# Fallback if no loop start = time.time()
event_queue._queue.put_nowait(("mma_spawn_approval", task)) while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
# Wait for the GUI to create the dialog and for the user to respond if dialog_container[0]:
start = time.time() res = dialog_container[0].wait()
while dialog_container[0] is None and time.time() - start < 60: if isinstance(res, dict):
time.sleep(0.1) approved = res.get("approved", False)
abort = res.get("abort", False)
if dialog_container[0]: modified_prompt = res.get("prompt", prompt)
res = dialog_container[0].wait() modified_context = res.get("context_md", context_md)
return approved and not abort, modified_prompt, modified_context
if isinstance(res, dict): else:
approved = res.get("approved", False) # Fallback for old tuple style if any
abort = res.get("abort", False) approved, final_payload = res
modified_prompt = res.get("prompt", prompt) modified_prompt = prompt
modified_context = res.get("context_md", context_md) modified_context = context_md
return approved and not abort, modified_prompt, modified_context if isinstance(final_payload, dict):
else: modified_prompt = final_payload.get("prompt", prompt)
# Fallback for old tuple style if any modified_context = final_payload.get("context_md", context_md)
approved, final_payload = res return approved, modified_prompt, modified_context
modified_prompt = prompt return False, prompt, context_md
modified_context = context_md
if isinstance(final_payload, dict):
modified_prompt = final_payload.get("prompt", prompt)
modified_context = final_payload.get("context_md", context_md)
return approved, modified_prompt, modified_context
return False, prompt, context_md
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None, md_content: str = ""): def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None, md_content: str = ""):
""" """
Simulates the lifecycle of a single agent working on a ticket. Simulates the lifecycle of a single agent working on a ticket.
Calls the AI client and updates the ticket status based on the response. Calls the AI client and updates the ticket status based on the response.
Args: Args:
@@ -248,78 +225,69 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
engine: The conductor engine. engine: The conductor engine.
md_content: The markdown context (history + files) for AI workers. md_content: The markdown context (history + files) for AI workers.
""" """
# Enforce Context Amnesia: each ticket starts with a clean slate. # Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session() ai_client.reset_session()
context_injection = ""
context_injection = "" if context_files:
if context_files: parser = ASTParser(language="python")
parser = ASTParser(language="python") for i, file_path in enumerate(context_files):
for i, file_path in enumerate(context_files): try:
try: abs_path = Path(file_path)
abs_path = Path(file_path) # (This is a bit simplified, but helps)
# (This is a bit simplified, but helps) with open(file_path, 'r', encoding='utf-8') as f:
with open(file_path, 'r', encoding='utf-8') as f: content = f.read()
content = f.read() if i == 0:
if i == 0: view = parser.get_curated_view(content)
view = parser.get_curated_view(content)
else:
view = parser.get_skeleton(content)
context_injection += f"\nFile: {file_path}\n{view}\n"
except Exception as e:
context_injection += f"\nError reading {file_path}: {e}\n"
# Build a prompt for the worker
user_message = (
f"You are assigned to Ticket {ticket.id}.\n"
f"Task Description: {ticket.description}\n"
)
if context_injection:
user_message += f"\nContext Files:\n{context_injection}\n"
user_message += (
"Please complete this task. If you are blocked and cannot proceed, "
"start your response with 'BLOCKED' and explain why."
)
# HITL Clutch: call confirm_spawn if event_queue is provided
if event_queue:
approved, modified_prompt, modified_context = confirm_spawn(
role="Tier 3 Worker",
prompt=user_message,
context_md=md_content,
event_queue=event_queue,
ticket_id=ticket.id
)
if not approved:
ticket.mark_blocked("Spawn rejected by user.")
return "BLOCKED: Spawn rejected by user."
user_message = modified_prompt
md_content = modified_context
# HITL Clutch: pass the queue and ticket_id to confirm_execution
def clutch_callback(payload: str) -> bool:
if not event_queue:
return True
return confirm_execution(payload, event_queue, ticket.id)
response = ai_client.send(
md_content=md_content,
user_message=user_message,
base_dir=".",
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis
)
# Update usage in engine if provided
if engine:
stats = {} # ai_client.get_token_stats() is not available
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
if "BLOCKED" in response.upper():
ticket.mark_blocked(response)
else: else:
ticket.mark_complete() view = parser.get_skeleton(content)
context_injection += f"\nFile: {file_path}\n{view}\n"
return response except Exception as e:
context_injection += f"\nError reading {file_path}: {e}\n"
# Build a prompt for the worker
user_message = (
f"You are assigned to Ticket {ticket.id}.\n"
f"Task Description: {ticket.description}\n"
)
if context_injection:
user_message += f"\nContext Files:\n{context_injection}\n"
user_message += (
"Please complete this task. If you are blocked and cannot proceed, "
"start your response with 'BLOCKED' and explain why."
)
# HITL Clutch: call confirm_spawn if event_queue is provided
if event_queue:
approved, modified_prompt, modified_context = confirm_spawn(
role="Tier 3 Worker",
prompt=user_message,
context_md=md_content,
event_queue=event_queue,
ticket_id=ticket.id
)
if not approved:
ticket.mark_blocked("Spawn rejected by user.")
return "BLOCKED: Spawn rejected by user."
user_message = modified_prompt
md_content = modified_context
# HITL Clutch: pass the queue and ticket_id to confirm_execution
def clutch_callback(payload: str) -> bool:
if not event_queue:
return True
return confirm_execution(payload, event_queue, ticket.id)
response = ai_client.send(
md_content=md_content,
user_message=user_message,
base_dir=".",
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis
)
# Update usage in engine if provided
if engine:
stats = {} # ai_client.get_token_stats() is not available
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
if "BLOCKED" in response.upper():
ticket.mark_blocked(response)
else:
ticket.mark_complete()
return response

View File

@@ -9,126 +9,106 @@ from pathlib import Path
CONDUCTOR_PATH = Path("conductor") CONDUCTOR_PATH = Path("conductor")
def get_track_history_summary() -> str: def get_track_history_summary() -> str:
""" """
Scans conductor/archive/ and conductor/tracks/ to build a summary of past work. Scans conductor/archive/ and conductor/tracks/ to build a summary of past work.
""" """
summary_parts = [] summary_parts = []
archive_path = CONDUCTOR_PATH / "archive"
archive_path = CONDUCTOR_PATH / "archive" tracks_path = CONDUCTOR_PATH / "tracks"
tracks_path = CONDUCTOR_PATH / "tracks" paths_to_scan = []
if archive_path.exists():
paths_to_scan = [] paths_to_scan.extend(list(archive_path.iterdir()))
if archive_path.exists(): if tracks_path.exists():
paths_to_scan.extend(list(archive_path.iterdir())) paths_to_scan.extend(list(tracks_path.iterdir()))
if tracks_path.exists(): for track_dir in paths_to_scan:
paths_to_scan.extend(list(tracks_path.iterdir())) if not track_dir.is_dir():
continue
for track_dir in paths_to_scan: metadata_file = track_dir / "metadata.json"
if not track_dir.is_dir(): spec_file = track_dir / "spec.md"
continue title = track_dir.name
status = "unknown"
metadata_file = track_dir / "metadata.json" overview = "No overview available."
spec_file = track_dir / "spec.md" if metadata_file.exists():
try:
title = track_dir.name with open(metadata_file, "r", encoding="utf-8") as f:
status = "unknown" meta = json.load(f)
overview = "No overview available." title = meta.get("title", title)
status = meta.get("status", status)
if metadata_file.exists(): except Exception:
try: pass
with open(metadata_file, "r", encoding="utf-8") as f: if spec_file.exists():
meta = json.load(f) try:
title = meta.get("title", title) with open(spec_file, "r", encoding="utf-8") as f:
status = meta.get("status", status) content = f.read()
except Exception: # Basic extraction of Overview section if it exists
pass if "## Overview" in content:
overview = content.split("## Overview")[1].split("##")[0].strip()
if spec_file.exists(): else:
try: # Just take a snippet of the beginning
with open(spec_file, "r", encoding="utf-8") as f: overview = content[:200] + "..."
content = f.read() except Exception:
# Basic extraction of Overview section if it exists pass
if "## Overview" in content: summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
overview = content.split("## Overview")[1].split("##")[0].strip() if not summary_parts:
else: return "No previous tracks found."
# Just take a snippet of the beginning return "\n".join(summary_parts)
overview = content[:200] + "..."
except Exception:
pass
summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
if not summary_parts:
return "No previous tracks found."
return "\n".join(summary_parts)
def generate_tracks(user_request: str, project_config: dict, file_items: list[dict], history_summary: str = None) -> list[dict]: def generate_tracks(user_request: str, project_config: dict, file_items: list[dict], history_summary: str = None) -> list[dict]:
""" """
Tier 1 (Strategic PM) call. Tier 1 (Strategic PM) call.
Analyzes the project state and user request to generate a list of Tracks. Analyzes the project state and user request to generate a list of Tracks.
""" """
# 1. Build Repository Map (Summary View) # 1. Build Repository Map (Summary View)
repo_map = summarize.build_summary_markdown(file_items) repo_map = summarize.build_summary_markdown(file_items)
# 2. Construct Prompt
# 2. Construct Prompt system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init")
system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init") user_message_parts = [
f"### USER REQUEST:\n{user_request}\n",
user_message_parts = [ f"### REPOSITORY MAP:\n{repo_map}\n"
f"### USER REQUEST:\n{user_request}\n", ]
f"### REPOSITORY MAP:\n{repo_map}\n" if history_summary:
] user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n")
user_message_parts.append("Please generate the implementation tracks for this request.")
if history_summary: user_message = "\n".join(user_message_parts)
user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n") # Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
user_message_parts.append("Please generate the implementation tracks for this request.") ai_client.set_custom_system_prompt(system_prompt)
try:
user_message = "\n".join(user_message_parts) # 3. Call Tier 1 Model (Strategic - Pro)
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1
# Set custom system prompt for this call response = ai_client.send(
old_system_prompt = ai_client._custom_system_prompt md_content="", # We pass everything in user_message for clarity
ai_client.set_custom_system_prompt(system_prompt) user_message=user_message
)
try: # 4. Parse JSON Output
# 3. Call Tier 1 Model (Strategic - Pro) try:
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1 # The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks.
response = ai_client.send( json_match = response.strip()
md_content="", # We pass everything in user_message for clarity if "```json" in json_match:
user_message=user_message json_match = json_match.split("```json")[1].split("```")[0].strip()
) elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
# 4. Parse JSON Output tracks = json.loads(json_match)
try: # Ensure each track has a 'title' for the GUI
# The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks. for t in tracks:
json_match = response.strip() if "title" not in t:
if "```json" in json_match: t["title"] = t.get("goal", "Untitled Track")[:50]
json_match = json_match.split("```json")[1].split("```")[0].strip() return tracks
elif "```" in json_match: except Exception as e:
json_match = json_match.split("```")[1].split("```")[0].strip() print(f"Error parsing Tier 1 response: {e}")
print(f"Raw response: {response}")
tracks = json.loads(json_match) return []
# Ensure each track has a 'title' for the GUI finally:
for t in tracks: # Restore old system prompt
if "title" not in t: ai_client.set_custom_system_prompt(old_system_prompt)
t["title"] = t.get("goal", "Untitled Track")[:50]
return tracks
except Exception as e:
print(f"Error parsing Tier 1 response: {e}")
print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
if __name__ == "__main__": if __name__ == "__main__":
# Quick CLI test # Quick CLI test
import project_manager import project_manager
proj = project_manager.load_project("manual_slop.toml") proj = project_manager.load_project("manual_slop.toml")
flat = project_manager.flat_config(proj) flat = project_manager.flat_config(proj)
file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", [])) file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", []))
print("Testing Tier 1 Track Generation...")
print("Testing Tier 1 Track Generation...") history = get_track_history_summary()
history = get_track_history_summary() tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history)
tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history) print(json.dumps(tracks, indent=2))
print(json.dumps(tracks, indent=2))

View File

@@ -2,60 +2,54 @@ import ast
from pathlib import Path from pathlib import Path
class CodeOutliner: class CodeOutliner:
def __init__(self): def __init__(self):
pass pass
def outline(self, code: str) -> str: def outline(self, code: str) -> str:
code = code.lstrip(chr(0xFEFF)) code = code.lstrip(chr(0xFEFF))
try: try:
tree = ast.parse(code) tree = ast.parse(code)
except SyntaxError as e: except SyntaxError as e:
return f"ERROR parsing code: {e}" return f"ERROR parsing code: {e}"
output = []
output = [] def get_docstring(node):
doc = ast.get_docstring(node)
if doc:
return doc.splitlines()[0]
return None
def get_docstring(node): def walk(node, indent=0):
doc = ast.get_docstring(node) if isinstance(node, ast.ClassDef):
if doc: start_line = node.lineno
return doc.splitlines()[0] end_line = getattr(node, "end_lineno", start_line)
return None output.append(f"{' ' * indent}[Class] {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
def walk(node, indent=0): if doc:
if isinstance(node, ast.ClassDef): output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
start_line = node.lineno for item in node.body:
end_line = getattr(node, "end_lineno", start_line) walk(item, indent + 1)
output.append(f"{' ' * indent}[Class] {node.name} (Lines {start_line}-{end_line})") elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
doc = get_docstring(node) start_line = node.lineno
if doc: end_line = getattr(node, "end_lineno", start_line)
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"") prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]"
for item in node.body: # Check if it's a method
walk(item, indent + 1) # We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method.
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if indent > 0:
start_line = node.lineno prefix = "[Method]"
end_line = getattr(node, "end_lineno", start_line) output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]" doc = get_docstring(node)
if doc:
# Check if it's a method output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
# We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef for node in tree.body:
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method. walk(node)
if indent > 0: return "\n".join(output)
prefix = "[Method]"
output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for node in tree.body:
walk(node)
return "\n".join(output)
def get_outline(path: Path, code: str) -> str: def get_outline(path: Path, code: str) -> str:
suffix = path.suffix.lower() suffix = path.suffix.lower()
if suffix == ".py": if suffix == ".py":
outliner = CodeOutliner() outliner = CodeOutliner()
return outliner.outline(code) return outliner.outline(code)
else: else:
return f"Outlining not supported for {suffix} files yet." return f"Outlining not supported for {suffix} files yet."

View File

@@ -3,132 +3,117 @@ import psutil
import threading import threading
class PerformanceMonitor: class PerformanceMonitor:
def __init__(self): def __init__(self):
self._start_time = None self._start_time = None
self._last_frame_time = 0.0 self._last_frame_time = 0.0
self._fps = 0.0 self._fps = 0.0
self._frame_count = 0 self._frame_count = 0
self._fps_last_time = time.time() self._fps_last_time = time.time()
self._process = psutil.Process() self._process = psutil.Process()
self._cpu_usage = 0.0 self._cpu_usage = 0.0
self._cpu_lock = threading.Lock() self._cpu_lock = threading.Lock()
# Input lag tracking
# Input lag tracking self._last_input_time = None
self._last_input_time = None self._input_lag_ms = 0.0
self._input_lag_ms = 0.0 # Alerts
self.alert_callback = None
# Alerts self.thresholds = {
self.alert_callback = None 'frame_time_ms': 33.3, # < 30 FPS
self.thresholds = { 'cpu_percent': 80.0,
'frame_time_ms': 33.3, # < 30 FPS 'input_lag_ms': 100.0
'cpu_percent': 80.0, }
'input_lag_ms': 100.0 self._last_alert_time = 0
} self._alert_cooldown = 30 # seconds
self._last_alert_time = 0 # Detailed profiling
self._alert_cooldown = 30 # seconds self._component_timings = {}
self._comp_start = {}
# Start CPU usage monitoring thread
self._stop_event = threading.Event()
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True)
self._cpu_thread.start()
# Detailed profiling def _monitor_cpu(self):
self._component_timings = {} while not self._stop_event.is_set():
self._comp_start = {} # psutil.cpu_percent with interval=1.0 is blocking for 1 second.
# To be responsive to stop_event, we use a smaller interval or no interval
# Start CPU usage monitoring thread # and handle the timing ourselves.
self._stop_event = threading.Event() try:
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True) usage = self._process.cpu_percent()
self._cpu_thread.start() with self._cpu_lock:
self._cpu_usage = usage
except Exception:
pass
# Sleep in small increments to stay responsive to stop_event
for _ in range(10):
if self._stop_event.is_set():
break
time.sleep(0.1)
def _monitor_cpu(self): def start_frame(self):
while not self._stop_event.is_set(): self._start_time = time.time()
# psutil.cpu_percent with interval=1.0 is blocking for 1 second.
# To be responsive to stop_event, we use a smaller interval or no interval
# and handle the timing ourselves.
try:
usage = self._process.cpu_percent()
with self._cpu_lock:
self._cpu_usage = usage
except Exception:
pass
# Sleep in small increments to stay responsive to stop_event
for _ in range(10):
if self._stop_event.is_set():
break
time.sleep(0.1)
def start_frame(self): def record_input_event(self):
self._start_time = time.time() self._last_input_time = time.time()
def record_input_event(self): def start_component(self, name: str):
self._last_input_time = time.time() self._comp_start[name] = time.time()
def start_component(self, name: str): def end_component(self, name: str):
self._comp_start[name] = time.time() if name in self._comp_start:
elapsed = (time.time() - self._comp_start[name]) * 1000.0
self._component_timings[name] = elapsed
def end_component(self, name: str): def end_frame(self):
if name in self._comp_start: if self._start_time is None:
elapsed = (time.time() - self._comp_start[name]) * 1000.0 return
self._component_timings[name] = elapsed end_time = time.time()
self._last_frame_time = (end_time - self._start_time) * 1000.0
self._frame_count += 1
# Calculate input lag if an input occurred during this frame
if self._last_input_time is not None:
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
self._last_input_time = None
self._check_alerts()
elapsed_since_fps = end_time - self._fps_last_time
if elapsed_since_fps >= 1.0:
self._fps = self._frame_count / elapsed_since_fps
self._frame_count = 0
self._fps_last_time = end_time
def end_frame(self): def _check_alerts(self):
if self._start_time is None: if not self.alert_callback:
return return
now = time.time()
end_time = time.time() if now - self._last_alert_time < self._alert_cooldown:
self._last_frame_time = (end_time - self._start_time) * 1000.0 return
self._frame_count += 1 metrics = self.get_metrics()
alerts = []
# Calculate input lag if an input occurred during this frame if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
if self._last_input_time is not None: alerts.append(f"Frame time high: {metrics['last_frame_time_ms']:.1f}ms")
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0 if metrics['cpu_percent'] > self.thresholds['cpu_percent']:
self._last_input_time = None alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
if alerts:
self._last_alert_time = now
self.alert_callback("; ".join(alerts))
self._check_alerts() def get_metrics(self):
with self._cpu_lock:
elapsed_since_fps = end_time - self._fps_last_time cpu_usage = self._cpu_usage
if elapsed_since_fps >= 1.0: metrics = {
self._fps = self._frame_count / elapsed_since_fps 'last_frame_time_ms': self._last_frame_time,
self._frame_count = 0 'fps': self._fps,
self._fps_last_time = end_time 'cpu_percent': cpu_usage,
'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag
}
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
metrics['input_lag_ms'] = self._input_lag_ms
# Add detailed timings
for name, elapsed in self._component_timings.items():
metrics[f'time_{name}_ms'] = elapsed
return metrics
def _check_alerts(self): def stop(self):
if not self.alert_callback: self._stop_event.set()
return self._cpu_thread.join(timeout=2.0)
now = time.time()
if now - self._last_alert_time < self._alert_cooldown:
return
metrics = self.get_metrics()
alerts = []
if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
alerts.append(f"Frame time high: {metrics['last_frame_time_ms']:.1f}ms")
if metrics['cpu_percent'] > self.thresholds['cpu_percent']:
alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
if alerts:
self._last_alert_time = now
self.alert_callback("; ".join(alerts))
def get_metrics(self):
with self._cpu_lock:
cpu_usage = self._cpu_usage
metrics = {
'last_frame_time_ms': self._last_frame_time,
'fps': self._fps,
'cpu_percent': cpu_usage,
'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag
}
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
metrics['input_lag_ms'] = self._input_lag_ms
# Add detailed timings
for name, elapsed in self._component_timings.items():
metrics[f'time_{name}_ms'] = elapsed
return metrics
def stop(self):
self._stop_event.set()
self._cpu_thread.join(timeout=2.0)

View File

@@ -3,7 +3,7 @@
Note(Gemini): Note(Gemini):
Handles loading/saving of project .toml configurations. Handles loading/saving of project .toml configurations.
Also handles serializing the discussion history into the TOML format using a special Also handles serializing the discussion history into the TOML format using a special
@timestamp prefix to preserve the exact sequence of events. @timestamp prefix to preserve the exact sequence of events.
""" """
import subprocess import subprocess
import datetime import datetime
@@ -11,384 +11,307 @@ import tomllib
import tomli_w import tomli_w
import re import re
import json import json
from typing import Any, Optional, TYPE_CHECKING, Union
from pathlib import Path from pathlib import Path
if TYPE_CHECKING:
TS_FMT = "%Y-%m-%dT%H:%M:%S" from models import TrackState
TS_FMT: str = "%Y-%m-%dT%H:%M:%S"
def now_ts() -> str: def now_ts() -> str:
return datetime.datetime.now().strftime(TS_FMT) return datetime.datetime.now().strftime(TS_FMT)
def parse_ts(s: str) -> Optional[datetime.datetime]:
try:
def parse_ts(s: str): return datetime.datetime.strptime(s, TS_FMT)
try: except Exception:
return datetime.datetime.strptime(s, TS_FMT) return None
except Exception:
return None
# ── entry serialisation ────────────────────────────────────────────────────── # ── entry serialisation ──────────────────────────────────────────────────────
def entry_to_str(entry: dict[str, Any]) -> str:
def entry_to_str(entry: dict) -> str: """Serialise a disc entry dict -> stored string."""
"""Serialise a disc entry dict -> stored string.""" ts = entry.get("ts", "")
ts = entry.get("ts", "") role = entry.get("role", "User")
role = entry.get("role", "User") content = entry.get("content", "")
content = entry.get("content", "") if ts:
if ts: return f"@{ts}\n{role}:\n{content}"
return f"@{ts}\n{role}:\n{content}" return f"{role}:\n{content}"
return f"{role}:\n{content}" def str_to_entry(raw: str, roles: list[str]) -> dict[str, Any]:
"""Parse a stored string back to a disc entry dict."""
ts = ""
def str_to_entry(raw: str, roles: list[str]) -> dict: rest = raw
"""Parse a stored string back to a disc entry dict.""" if rest.startswith("@"):
ts = "" nl = rest.find("\n")
rest = raw if nl != -1:
if rest.startswith("@"): ts = rest[1:nl]
nl = rest.find("\n") rest = rest[nl + 1:]
if nl != -1: known = roles or ["User", "AI", "Vendor API", "System"]
ts = rest[1:nl] role_pat = re.compile(
rest = rest[nl + 1:] r"^(?:\[)?(" + "|".join(re.escape(r) for r in known) + r")(?:\])?:?\s*$",
known = roles or ["User", "AI", "Vendor API", "System"] re.IGNORECASE,
role_pat = re.compile( )
r"^(?:\[)?(" + "|".join(re.escape(r) for r in known) + r")(?:\])?:?\s*$", parts = rest.split("\n", 1)
re.IGNORECASE, matched_role = "User"
) content = rest.strip()
parts = rest.split("\n", 1) if parts:
matched_role = "User" m = role_pat.match(parts[0].strip())
content = rest.strip() if m:
if parts: raw_role = m.group(1)
m = role_pat.match(parts[0].strip()) matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
if m: content = parts[1].strip() if len(parts) > 1 else ""
raw_role = m.group(1) return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
content = parts[1].strip() if len(parts) > 1 else ""
return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
# ── git helpers ────────────────────────────────────────────────────────────── # ── git helpers ──────────────────────────────────────────────────────────────
def get_git_commit(git_dir: str) -> str: def get_git_commit(git_dir: str) -> str:
try: try:
r = subprocess.run( r = subprocess.run(
["git", "rev-parse", "HEAD"], ["git", "rev-parse", "HEAD"],
capture_output=True, text=True, cwd=git_dir, timeout=5, capture_output=True, text=True, cwd=git_dir, timeout=5,
) )
return r.stdout.strip() if r.returncode == 0 else "" return r.stdout.strip() if r.returncode == 0 else ""
except Exception: except Exception:
return "" return ""
def get_git_log(git_dir: str, n: int = 5) -> str: def get_git_log(git_dir: str, n: int = 5) -> str:
try: try:
r = subprocess.run( r = subprocess.run(
["git", "log", "--oneline", f"-{n}"], ["git", "log", "--oneline", f"-{n}"],
capture_output=True, text=True, cwd=git_dir, timeout=5, capture_output=True, text=True, cwd=git_dir, timeout=5,
) )
return r.stdout.strip() if r.returncode == 0 else "" return r.stdout.strip() if r.returncode == 0 else ""
except Exception: except Exception:
return "" return ""
# ── default structures ─────────────────────────────────────────────────────── # ── default structures ───────────────────────────────────────────────────────
def default_discussion() -> dict[str, Any]:
def default_discussion() -> dict: return {"git_commit": "", "last_updated": now_ts(), "history": []}
return {"git_commit": "", "last_updated": now_ts(), "history": []} def default_project(name: str = "unnamed") -> dict[str, Any]:
return {
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""},
def default_project(name: str = "unnamed") -> dict: "output": {"output_dir": "./md_gen"},
return { "files": {"base_dir": ".", "paths": [], "tier_assignments": {}},
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""}, "screenshots": {"base_dir": ".", "paths": []},
"output": {"output_dir": "./md_gen"}, "gemini_cli": {"binary_path": "gemini"},
"files": {"base_dir": ".", "paths": [], "tier_assignments": {}}, "deepseek": {"reasoning_effort": "medium"},
"screenshots": {"base_dir": ".", "paths": []}, "agent": {
"gemini_cli": {"binary_path": "gemini"}, "tools": {
"deepseek": {"reasoning_effort": "medium"}, "run_powershell": True,
"agent": { "read_file": True,
"tools": { "list_directory": True,
"run_powershell": True, "search_files": True,
"read_file": True, "get_file_summary": True,
"list_directory": True, "web_search": True,
"search_files": True, "fetch_url": True
"get_file_summary": True, }
"web_search": True, },
"fetch_url": True "discussion": {
} "roles": ["User", "AI", "Vendor API", "System", "Reasoning"],
}, "active": "main",
"discussion": { "discussions": {"main": default_discussion()},
"roles": ["User", "AI", "Vendor API", "System", "Reasoning"], },
"active": "main", "mma": {
"discussions": {"main": default_discussion()}, "epic": "",
}, "active_track_id": "",
"mma": { "tracks": []
"epic": "", }
"active_track_id": "", }
"tracks": []
}
}
# ── load / save ────────────────────────────────────────────────────────────── # ── load / save ──────────────────────────────────────────────────────────────
def get_history_path(project_path: Union[str, Path]) -> Path:
def get_history_path(project_path: str | Path) -> Path: """Return the Path to the sibling history TOML file for a given project."""
"""Return the Path to the sibling history TOML file for a given project.""" p = Path(project_path)
p = Path(project_path) return p.parent / f"{p.stem}_history.toml"
return p.parent / f"{p.stem}_history.toml" def load_project(path: Union[str, Path]) -> dict[str, Any]:
"""
def load_project(path: str | Path) -> dict:
"""
Load a project TOML file. Load a project TOML file.
Automatically migrates legacy 'discussion' keys to a sibling history file. Automatically migrates legacy 'discussion' keys to a sibling history file.
""" """
with open(path, "rb") as f: with open(path, "rb") as f:
proj = tomllib.load(f) proj = tomllib.load(f)
hist_path = get_history_path(path)
# Automatic Migration: move legacy 'discussion' to sibling file if "discussion" in proj:
hist_path = get_history_path(path) disc = proj.pop("discussion")
if "discussion" in proj: with open(hist_path, "wb") as f:
disc = proj.pop("discussion") tomli_w.dump(disc, f)
# Save to history file if it doesn't exist yet (or overwrite to migrate) save_project(proj, path)
with open(hist_path, "wb") as f: proj["discussion"] = disc
tomli_w.dump(disc, f) else:
# Save the stripped project file if hist_path.exists():
save_project(proj, path) proj["discussion"] = load_history(path)
# Restore for the returned dict so GUI works as before return proj
proj["discussion"] = disc def load_history(project_path: Union[str, Path]) -> dict[str, Any]:
else: """Load the segregated discussion history from its dedicated TOML file."""
# Load from sibling if it exists hist_path = get_history_path(project_path)
if hist_path.exists(): if hist_path.exists():
proj["discussion"] = load_history(path) with open(hist_path, "rb") as f:
return tomllib.load(f)
return proj return {}
def clean_nones(data: Any) -> Any:
"""Recursively remove None values from a dictionary/list."""
def load_history(project_path: str | Path) -> dict: if isinstance(data, dict):
"""Load the segregated discussion history from its dedicated TOML file.""" return {k: clean_nones(v) for k, v in data.items() if v is not None}
hist_path = get_history_path(project_path) elif isinstance(data, list):
if hist_path.exists(): return [clean_nones(v) for v in data if v is not None]
with open(hist_path, "rb") as f: return data
return tomllib.load(f) def save_project(proj: dict[str, Any], path: Union[str, Path], disc_data: Optional[dict[str, Any]] = None) -> None:
return {} """
def clean_nones(data):
"""Recursively remove None values from a dictionary/list."""
if isinstance(data, dict):
return {k: clean_nones(v) for k, v in data.items() if v is not None}
elif isinstance(data, list):
return [clean_nones(v) for v in data if v is not None]
return data
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
"""
Save the project TOML. Save the project TOML.
If 'discussion' is present in proj, it is moved to the sibling history file. If 'discussion' is present in proj, it is moved to the sibling history file.
""" """
# Clean None values as TOML doesn't support them proj = clean_nones(proj)
proj = clean_nones(proj) if "discussion" in proj:
if disc_data is None:
# Ensure 'discussion' is NOT in the main project dict disc_data = proj["discussion"]
if "discussion" in proj: proj = dict(proj)
# If disc_data wasn't provided, use the one from proj del proj["discussion"]
if disc_data is None: with open(path, "wb") as f:
disc_data = proj["discussion"] tomli_w.dump(proj, f)
# Remove it so it doesn't get saved to the main file if disc_data:
proj = dict(proj) # shallow copy to avoid mutating caller's dict disc_data = clean_nones(disc_data)
del proj["discussion"] hist_path = get_history_path(path)
with open(hist_path, "wb") as f:
with open(path, "wb") as f: tomli_w.dump(disc_data, f)
tomli_w.dump(proj, f)
if disc_data:
disc_data = clean_nones(disc_data)
hist_path = get_history_path(path)
with open(hist_path, "wb") as f:
tomli_w.dump(disc_data, f)
# ── migration helper ───────────────────────────────────────────────────────── # ── migration helper ─────────────────────────────────────────────────────────
def migrate_from_legacy_config(cfg: dict[str, Any]) -> dict[str, Any]:
def migrate_from_legacy_config(cfg: dict) -> dict: """Build a fresh project dict from a legacy flat config.toml. Does NOT save."""
"""Build a fresh project dict from a legacy flat config.toml. Does NOT save.""" name = cfg.get("output", {}).get("namespace", "project")
name = cfg.get("output", {}).get("namespace", "project") proj = default_project(name)
proj = default_project(name) for key in ("output", "files", "screenshots"):
for key in ("output", "files", "screenshots"): if key in cfg:
if key in cfg: proj[key] = dict(cfg[key])
proj[key] = dict(cfg[key]) disc = cfg.get("discussion", {})
disc = cfg.get("discussion", {}) proj["discussion"]["roles"] = disc.get("roles", ["User", "AI", "Vendor API", "System"])
proj["discussion"]["roles"] = disc.get("roles", ["User", "AI", "Vendor API", "System"]) main_disc = proj["discussion"]["discussions"]["main"]
main_disc = proj["discussion"]["discussions"]["main"] main_disc["history"] = disc.get("history", [])
main_disc["history"] = disc.get("history", []) main_disc["last_updated"] = now_ts()
main_disc["last_updated"] = now_ts() return proj
return proj
# ── flat config for aggregate.run() ───────────────────────────────────────── # ── flat config for aggregate.run() ─────────────────────────────────────────
def flat_config(proj: dict[str, Any], disc_name: Optional[str] = None, track_id: Optional[str] = None) -> dict[str, Any]:
def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None = None) -> dict: """Return a flat config dict compatible with aggregate.run()."""
"""Return a flat config dict compatible with aggregate.run().""" disc_sec = proj.get("discussion", {})
disc_sec = proj.get("discussion", {}) if track_id:
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", "."))
if track_id: else:
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", ".")) name = disc_name or disc_sec.get("active", "main")
else: disc_data = disc_sec.get("discussions", {}).get(name, {})
name = disc_name or disc_sec.get("active", "main") history = disc_data.get("history", [])
disc_data = disc_sec.get("discussions", {}).get(name, {}) return {
history = disc_data.get("history", []) "project": proj.get("project", {}),
"output": proj.get("output", {}),
return { "files": proj.get("files", {}),
"project": proj.get("project", {}), "screenshots": proj.get("screenshots", {}),
"output": proj.get("output", {}), "discussion": {
"files": proj.get("files", {}), "roles": disc_sec.get("roles", []),
"screenshots": proj.get("screenshots", {}), "history": history,
"discussion": { },
"roles": disc_sec.get("roles", []), }
"history": history,
},
}
# ── track state persistence ───────────────────────────────────────────────── # ── track state persistence ─────────────────────────────────────────────────
def save_track_state(track_id: str, state: 'TrackState', base_dir: Union[str, Path] = ".") -> None:
def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."): """
"""
Saves a TrackState object to conductor/tracks/<track_id>/state.toml. Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
""" """
track_dir = Path(base_dir) / "conductor" / "tracks" / track_id track_dir = Path(base_dir) / "conductor" / "tracks" / track_id
track_dir.mkdir(parents=True, exist_ok=True) track_dir.mkdir(parents=True, exist_ok=True)
state_file = track_dir / "state.toml" state_file = track_dir / "state.toml"
data = clean_nones(state.to_dict()) data = clean_nones(state.to_dict())
with open(state_file, "wb") as f: with open(state_file, "wb") as f:
tomli_w.dump(data, f) tomli_w.dump(data, f)
def load_track_state(track_id: str, base_dir: Union[str, Path] = ".") -> Optional['TrackState']:
"""
def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
"""
Loads a TrackState object from conductor/tracks/<track_id>/state.toml. Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
""" """
from models import TrackState from models import TrackState
state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml" state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml"
if not state_file.exists(): if not state_file.exists():
return None return None
with open(state_file, "rb") as f: with open(state_file, "rb") as f:
data = tomllib.load(f) data = tomllib.load(f)
return TrackState.from_dict(data) return TrackState.from_dict(data)
def load_track_history(track_id: str, base_dir: Union[str, Path] = ".") -> list[str]:
"""
def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
"""
Loads the discussion history for a specific track from its state.toml. Loads the discussion history for a specific track from its state.toml.
Returns a list of entry strings formatted with @timestamp. Returns a list of entry strings formatted with @timestamp.
""" """
from models import TrackState from models import TrackState
state = load_track_state(track_id, base_dir) state = load_track_state(track_id, base_dir)
if not state: if not state:
return [] return []
history: list[str] = []
history = [] for entry in state.discussion:
for entry in state.discussion: e = dict(entry)
e = dict(entry) ts = e.get("ts")
ts = e.get("ts") if isinstance(ts, datetime.datetime):
if isinstance(ts, datetime.datetime): e["ts"] = ts.strftime(TS_FMT)
e["ts"] = ts.strftime(TS_FMT) history.append(entry_to_str(e))
history.append(entry_to_str(e)) return history
return history def save_track_history(track_id: str, history: list[str], base_dir: Union[str, Path] = ".") -> None:
"""
def save_track_history(track_id: str, history: list, base_dir: str | Path = "."):
"""
Saves the discussion history for a specific track to its state.toml. Saves the discussion history for a specific track to its state.toml.
'history' is expected to be a list of formatted strings. 'history' is expected to be a list of formatted strings.
""" """
from models import TrackState from models import TrackState
state = load_track_state(track_id, base_dir) state = load_track_state(track_id, base_dir)
if not state: if not state:
return return
roles = ["User", "AI", "Vendor API", "System", "Reasoning"]
roles = ["User", "AI", "Vendor API", "System", "Reasoning"] entries = [str_to_entry(h, roles) for h in history]
entries = [str_to_entry(h, roles) for h in history] state.discussion = entries
state.discussion = entries save_track_state(track_id, state, base_dir)
save_track_state(track_id, state, base_dir) def get_all_tracks(base_dir: Union[str, Path] = ".") -> list[dict[str, Any]]:
"""
Scans the conductor/tracks/ directory and returns a list of dictionaries
def get_all_tracks(base_dir: str | Path = ".") -> list[dict]: containing track metadata: 'id', 'title', 'status', 'complete', 'total',
"""
Scans the conductor/tracks/ directory and returns a list of dictionaries
containing track metadata: 'id', 'title', 'status', 'complete', 'total',
and 'progress' (0.0 to 1.0). and 'progress' (0.0 to 1.0).
Handles missing or malformed metadata.json or state.toml by falling back Handles missing or malformed metadata.json or state.toml by falling back
to available info or defaults. to available info or defaults.
""" """
from models import TrackState from models import TrackState
tracks_dir = Path(base_dir) / "conductor" / "tracks" tracks_dir = Path(base_dir) / "conductor" / "tracks"
if not tracks_dir.exists(): if not tracks_dir.exists():
return [] return []
results: list[dict[str, Any]] = []
results = [] for entry in tracks_dir.iterdir():
for entry in tracks_dir.iterdir(): if not entry.is_dir():
if not entry.is_dir(): continue
continue track_id = entry.name
track_info: dict[str, Any] = {
track_id = entry.name "id": track_id,
track_info = { "title": track_id,
"id": track_id, "status": "unknown",
"title": track_id, "complete": 0,
"status": "unknown", "total": 0,
"complete": 0, "progress": 0.0
"total": 0, }
"progress": 0.0 state_found = False
} try:
state = load_track_state(track_id, base_dir)
state_found = False if state:
# Try loading state.toml track_info["id"] = state.metadata.id or track_id
try: track_info["title"] = state.metadata.name or track_id
state = load_track_state(track_id, base_dir) track_info["status"] = state.metadata.status or "unknown"
if state: track_info["complete"] = len([t for t in state.tasks if t.status == "completed"])
track_info["id"] = state.metadata.id or track_id track_info["total"] = len(state.tasks)
track_info["title"] = state.metadata.name or track_id if track_info["total"] > 0:
track_info["status"] = state.metadata.status or "unknown" track_info["progress"] = track_info["complete"] / track_info["total"]
track_info["complete"] = len([t for t in state.tasks if t.status == "completed"]) state_found = True
track_info["total"] = len(state.tasks) except Exception:
if track_info["total"] > 0: pass
track_info["progress"] = track_info["complete"] / track_info["total"] if not state_found:
state_found = True metadata_file = entry / "metadata.json"
except Exception: if metadata_file.exists():
pass try:
with open(metadata_file, "r") as f:
if not state_found: data = json.load(f)
# Try loading metadata.json track_info["id"] = data.get("id", data.get("track_id", track_id))
metadata_file = entry / "metadata.json" track_info["title"] = data.get("title", data.get("name", data.get("description", track_id)))
if metadata_file.exists(): track_info["status"] = data.get("status", "unknown")
try: except Exception:
with open(metadata_file, "r") as f: pass
data = json.load(f) if track_info["total"] == 0:
track_info["id"] = data.get("id", data.get("track_id", track_id)) plan_file = entry / "plan.md"
track_info["title"] = data.get("title", data.get("name", data.get("description", track_id))) if plan_file.exists():
track_info["status"] = data.get("status", "unknown") try:
except Exception: with open(plan_file, "r", encoding="utf-8") as f:
pass content = f.read()
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
# Try parsing plan.md for complete/total if state was missing or empty completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
if track_info["total"] == 0: track_info["total"] = len(tasks)
plan_file = entry / "plan.md" track_info["complete"] = len(completed_tasks)
if plan_file.exists(): if track_info["total"] > 0:
try: track_info["progress"] = float(track_info["complete"]) / track_info["total"]
with open(plan_file, "r", encoding="utf-8") as f: except Exception:
content = f.read() pass
# Simple regex to count tasks results.append(track_info)
# - [ ] Task: ... return results
# - [x] Task: ...
# - [~] Task: ...
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
track_info["total"] = len(tasks)
track_info["complete"] = len(completed_tasks)
if track_info["total"] > 0:
track_info["progress"] = float(track_info["complete"]) / track_info["total"]
except Exception:
pass
results.append(track_info)
return results

10
refactor_ui_task.toml Normal file
View File

@@ -0,0 +1,10 @@
role = "tier3-worker"
prompt = """Implement strict type hints for ALL functions and methods in @gui_2.py and @gui_legacy.py.
1. Use specific types (e.g., dict[str, Any], list[str], Union[str, Path], etc.) for arguments and returns.
2. Maintain the 'AI-Optimized' style: 1-space indentation, NO blank lines within function bodies, and maximum 1 blank line between definitions.
3. Since these files are very large, you MUST use surgical tools (discovered_tool_py_update_definition, discovered_tool_py_set_signature, discovered_tool_py_set_var_declaration) to apply changes. Do NOT try to overwrite the entire file at once.
4. Do NOT change any logic.
5. Use discovered_tool_py_check_syntax after each major change to verify syntax.
6. Ensure 'from typing import Any, dict, list, Union, Optional, Callable' etc. are present.
7. Focus on completing the task efficiently without hitting timeouts."""
docs = ["gui_2.py", "gui_legacy.py", "conductor/workflow.md"]

View File

@@ -3,34 +3,29 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine from dag_engine import TrackDAG, ExecutionEngine
def test_auto_queue_and_step_mode(): def test_auto_queue_and_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1, t2])
dag = TrackDAG([t1, t2]) # Expectation: ExecutionEngine takes auto_queue parameter
# Expectation: ExecutionEngine takes auto_queue parameter try:
try: engine = ExecutionEngine(dag, auto_queue=True)
engine = ExecutionEngine(dag, auto_queue=True) except TypeError:
except TypeError: pytest.fail("ExecutionEngine does not accept auto_queue parameter")
pytest.fail("ExecutionEngine does not accept auto_queue parameter") # Tick 1: T1 should be 'in-progress' because auto_queue=True
# T2 should remain 'todo' because step_mode=True
# Tick 1: T1 should be 'in-progress' because auto_queue=True engine.tick()
# T2 should remain 'todo' because step_mode=True assert t1.status == "in_progress"
engine.tick() assert t2.status == "todo"
# Approve T2
assert t1.status == "in_progress" try:
assert t2.status == "todo" engine.approve_task("T2")
except AttributeError:
# Approve T2 pytest.fail("ExecutionEngine does not have approve_task method")
try: assert t2.status == "in_progress"
engine.approve_task("T2")
except AttributeError:
pytest.fail("ExecutionEngine does not have approve_task method")
assert t2.status == "in_progress"
if __name__ == "__main__": if __name__ == "__main__":
try: try:
test_auto_queue_and_step_mode() test_auto_queue_and_step_mode()
print("Test passed (unexpectedly)") print("Test passed (unexpectedly)")
except Exception as e: except Exception as e:
print(f"Test failed as expected: {e}") print(f"Test failed as expected: {e}")

View File

@@ -0,0 +1,21 @@
import subprocess
import sys
def test_type_hints():
files = ["project_manager.py", "session_logger.py"]
all_missing = []
for f in files:
print(f"Scanning {f}...")
result = subprocess.run(["uv", "run", "python", "scripts/type_hint_scanner.py", f], capture_output=True, text=True)
if result.stdout.strip():
print(f"Missing hints in {f}:\n{result.stdout}")
all_missing.append(f)
if all_missing:
print(f"FAILURE: Missing type hints in: {', '.join(all_missing)}")
sys.exit(1)
else:
print("SUCCESS: All functions have type hints.")
sys.exit(0)
if __name__ == "__main__":
test_type_hints()

View File

@@ -5,7 +5,7 @@ import pytest
from typing import Dict, List, Any from typing import Dict, List, Any
def load_manifest(path: str) -> Dict[str, Any]: def load_manifest(path: str) -> Dict[str, Any]:
""" """
Loads a manifest file (expected to be in TOML format) from the given path. Loads a manifest file (expected to be in TOML format) from the given path.
Args: Args:
@@ -18,18 +18,18 @@ def load_manifest(path: str) -> Dict[str, Any]:
FileNotFoundError: If the manifest file does not exist. FileNotFoundError: If the manifest file does not exist.
tomllib.TOMLDecodeError: If the manifest file is not valid TOML. tomllib.TOMLDecodeError: If the manifest file is not valid TOML.
""" """
try: try:
with open(path, 'rb') as f: with open(path, 'rb') as f:
return tomllib.load(f) return tomllib.load(f)
except FileNotFoundError: except FileNotFoundError:
print(f"Error: Manifest file not found at {path}", file=sys.stderr) print(f"Error: Manifest file not found at {path}", file=sys.stderr)
raise raise
except tomllib.TOMLDecodeError: except tomllib.TOMLDecodeError:
print(f"Error: Could not decode TOML from {path}", file=sys.stderr) print(f"Error: Could not decode TOML from {path}", file=sys.stderr)
raise raise
def get_test_files(manifest: Dict[str, Any], category: str) -> List[str]: def get_test_files(manifest: Dict[str, Any], category: str) -> List[str]:
""" """
Determines the list of test files based on the manifest and a specified category. Determines the list of test files based on the manifest and a specified category.
Args: Args:
@@ -40,16 +40,16 @@ def get_test_files(manifest: Dict[str, Any], category: str) -> List[str]:
A list of file paths corresponding to the tests in the given category. A list of file paths corresponding to the tests in the given category.
Returns an empty list if the category is not found or has no tests. Returns an empty list if the category is not found or has no tests.
""" """
print(f"DEBUG: Looking for category '{category}' in manifest.", file=sys.stderr) print(f"DEBUG: Looking for category '{category}' in manifest.", file=sys.stderr)
files = manifest.get("categories", {}).get(category, {}).get("files", []) files = manifest.get("categories", {}).get(category, {}).get("files", [])
print(f"DEBUG: Found test files for category '{category}': {files}", file=sys.stderr) print(f"DEBUG: Found test files for category '{category}': {files}", file=sys.stderr)
return files return files
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Run tests with optional manifest and category filtering, passing additional pytest arguments.", description="Run tests with optional manifest and category filtering, passing additional pytest arguments.",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\ epilog="""\
Example usage: Example usage:
python run_tests.py --manifest tests.toml --category unit -- --verbose --cov=my_module python run_tests.py --manifest tests.toml --category unit -- --verbose --cov=my_module
python run_tests.py --manifest tests.toml --category integration python run_tests.py --manifest tests.toml --category integration
@@ -57,65 +57,57 @@ Example usage:
python run_tests.py --manifest tests.toml # Runs tests from default_categories python run_tests.py --manifest tests.toml # Runs tests from default_categories
python run_tests.py -- --capture=no # Runs all tests with pytest args python run_tests.py -- --capture=no # Runs all tests with pytest args
""" """
) )
parser.add_argument( parser.add_argument(
"--manifest", "--manifest",
type=str, type=str,
help="Path to the TOML manifest file containing test configurations." help="Path to the TOML manifest file containing test configurations."
) )
parser.add_argument( parser.add_argument(
"--category", "--category",
type=str, type=str,
help="Category of tests to run (e.g., 'unit', 'integration')." help="Category of tests to run (e.g., 'unit', 'integration')."
) )
# Parse known arguments for the script itself, then parse remaining args for pytest
# Parse known arguments for the script itself, then parse remaining args for pytest args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:])
args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:]) selected_test_files = []
manifest_data = None
selected_test_files = [] if args.manifest:
manifest_data = None try:
manifest_data = load_manifest(args.manifest)
if args.manifest: except (FileNotFoundError, tomllib.TOMLDecodeError):
try: # Error message already printed by load_manifest
manifest_data = load_manifest(args.manifest) sys.exit(1)
except (FileNotFoundError, tomllib.TOMLDecodeError): if args.category:
# Error message already printed by load_manifest # Case 1: --manifest and --category provided
sys.exit(1) files = get_test_files(manifest_data, args.category)
selected_test_files.extend(files)
if args.category: else:
# Case 1: --manifest and --category provided # Case 2: --manifest provided, but no --category
files = get_test_files(manifest_data, args.category) # Load default categories from manifest['execution']['default_categories']
selected_test_files.extend(files) default_categories = manifest_data.get("execution", {}).get("default_categories", [])
else: if not default_categories:
# Case 2: --manifest provided, but no --category print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr)
# Load default categories from manifest['execution']['default_categories'] parser.print_help(sys.stderr)
default_categories = manifest_data.get("execution", {}).get("default_categories", []) sys.exit(1)
if not default_categories: print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr)
print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr) for cat in default_categories:
parser.print_help(sys.stderr) files = get_test_files(manifest_data, cat)
sys.exit(1) selected_test_files.extend(files)
elif args.category:
print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr) # Case 3: --category provided without --manifest
for cat in default_categories: print("Error: --category requires --manifest to be specified.", file=sys.stderr)
files = get_test_files(manifest_data, cat) parser.print_help(sys.stderr)
selected_test_files.extend(files) sys.exit(1)
elif args.category: # Combine selected test files with any remaining pytest arguments that were not parsed by this script.
# Case 3: --category provided without --manifest # We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times.
print("Error: --category requires --manifest to be specified.", file=sys.stderr) pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--']
parser.print_help(sys.stderr) # Filter out any empty strings that might have been included.
sys.exit(1) final_pytest_args = [arg for arg in pytest_command_args if arg]
# If no specific tests were selected from manifest/category and no manifest was provided,
# Combine selected test files with any remaining pytest arguments that were not parsed by this script. # and no other pytest args were given, pytest.main([]) runs default test discovery.
# We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times. print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)
pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--'] sys.exit(pytest.main(final_pytest_args))
# Filter out any empty strings that might have been included.
final_pytest_args = [arg for arg in pytest_command_args if arg]
# If no specific tests were selected from manifest/category and no manifest was provided,
# and no other pytest args were given, pytest.main([]) runs default test discovery.
print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)
sys.exit(pytest.main(final_pytest_args))
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -89,13 +89,20 @@ def main():
# This prevents the hook from affecting normal CLI usage. # This prevents the hook from affecting normal CLI usage.
hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT") hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'") logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
if hook_context != "manual_slop": if hook_context != "manual_slop" and hook_context != "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.") logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop' or 'mma_headless'. Allowing execution without confirmation.")
print(json.dumps({ print(json.dumps({
"decision": "allow", "decision": "allow",
"reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})." "reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
})) }))
return return
if hook_context == "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is 'mma_headless'. Allowing execution for sub-agent.")
print(json.dumps({
"decision": "allow",
"reason": "Sub-agent headless mode (MMA)."
}))
return
# 5. Use 'ApiHookClient' (assuming GUI is on http://127.0.0.1:8999) # 5. Use 'ApiHookClient' (assuming GUI is on http://127.0.0.1:8999)
logging.debug("GEMINI_CLI_HOOK_CONTEXT is 'manual_slop'. Proceeding with API Hook Client.") logging.debug("GEMINI_CLI_HOOK_CONTEXT is 'manual_slop'. Proceeding with API Hook Client.")
client = ApiHookClient(base_url="http://127.0.0.1:8999") client = ApiHookClient(base_url="http://127.0.0.1:8999")

View File

@@ -189,15 +189,15 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
command_text += f"\n\nTASK: {prompt}\n\n" command_text += f"\n\nTASK: {prompt}\n\n"
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206. # Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing. # We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
# Whitelist tools to ensure they are available to the model in headless mode.
allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,discovered_tool_search_files,discovered_tool_get_file_summary,discovered_tool_py_get_skeleton,discovered_tool_py_get_code_outline,discovered_tool_py_get_definition,discovered_tool_py_update_definition,discovered_tool_py_get_signature,discovered_tool_py_set_signature,discovered_tool_py_get_class_summary,discovered_tool_py_get_var_declaration,discovered_tool_py_set_var_declaration,discovered_tool_get_git_diff,discovered_tool_run_powershell,activate_skill,codebase_investigator,discovered_tool_web_search,discovered_tool_fetch_url,discovered_tool_py_find_usages,discovered_tool_py_get_imports,discovered_tool_py_check_syntax,discovered_tool_py_get_hierarchy,discovered_tool_py_get_docstring,discovered_tool_get_tree"
ps_command = ( ps_command = (
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; " f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}" f"gemini -p '{role}' --output-format json --model {model}"
) )
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command] cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
try: try:
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8') env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "mma_headless"
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8', env=env)
result = process.stdout result = process.stdout
if not process.stdout and process.stderr: if not process.stdout and process.stderr:
result = f"Error: {process.stderr}" result = f"Error: {process.stderr}"

View File

@@ -3,7 +3,6 @@
Opens timestamped log/script files at startup and keeps them open for the Opens timestamped log/script files at startup and keeps them open for the
lifetime of the process. The next run of the GUI creates new files; the lifetime of the process. The next run of the GUI creates new files; the
previous run's files are simply closed when the process exits. previous run's files are simply closed when the process exits.
File layout File layout
----------- -----------
logs/ logs/
@@ -12,195 +11,149 @@ logs/
clicalls_<ts>.log - sequential record of every CLI subprocess call clicalls_<ts>.log - sequential record of every CLI subprocess call
scripts/generated/ scripts/generated/
<ts>_<seq:04d>.ps1 - each PowerShell script the AI generated, in order <ts>_<seq:04d>.ps1 - each PowerShell script the AI generated, in order
Where <ts> = YYYYMMDD_HHMMSS of when this session was started. Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
""" """
import atexit import atexit
import datetime import datetime
import json import json
import threading import threading
from typing import Any, Optional, TextIO
from pathlib import Path from pathlib import Path
_LOG_DIR: Path = Path("./logs")
_LOG_DIR = Path("./logs") _SCRIPTS_DIR: Path = Path("./scripts/generated")
_SCRIPTS_DIR = Path("./scripts/generated")
_ts: str = "" # session timestamp string e.g. "20260301_142233" _ts: str = "" # session timestamp string e.g. "20260301_142233"
_session_id: str = "" # YYYYMMDD_HHMMSS[_Label] _session_id: str = "" # YYYYMMDD_HHMMSS[_Label]
_session_dir: Path = None # Path to the sub-directory for this session _session_dir: Optional[Path] = None # Path to the sub-directory for this session
_seq: int = 0 # monotonic counter for script files this session _seq: int = 0 # monotonic counter for script files this session
_seq_lock = threading.Lock() _seq_lock: threading.Lock = threading.Lock()
_comms_fh: Optional[TextIO] = None # file handle: logs/<session_id>/comms.log
_comms_fh = None # file handle: logs/<session_id>/comms.log _tool_fh: Optional[TextIO] = None # file handle: logs/<session_id>/toolcalls.log
_tool_fh = None # file handle: logs/<session_id>/toolcalls.log _api_fh: Optional[TextIO] = None # file handle: logs/<session_id>/apihooks.log
_api_fh = None # file handle: logs/<session_id>/apihooks.log _cli_fh: Optional[TextIO] = None # file handle: logs/<session_id>/clicalls.log
_cli_fh = None # file handle: logs/<session_id>/clicalls.log
def _now_ts() -> str: def _now_ts() -> str:
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S") return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
def open_session(label: Optional[str] = None) -> None:
"""
def open_session(label: str | None = None):
"""
Called once at GUI startup. Creates the log directories if needed and Called once at GUI startup. Creates the log directories if needed and
opens the log files for this session within a sub-directory. opens the log files for this session within a sub-directory.
""" """
global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq
if _comms_fh is not None:
if _comms_fh is not None: return
return # already open _ts = _now_ts()
_session_id = _ts
_ts = _now_ts() if label:
_session_id = _ts safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label)
if label: _session_id += f"_{safe_label}"
# Sanitize label: remove non-alphanumeric chars _session_dir = _LOG_DIR / _session_id
safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label) _session_dir.mkdir(parents=True, exist_ok=True)
_session_id += f"_{safe_label}" _SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
_seq = 0
_session_dir = _LOG_DIR / _session_id _comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1)
_session_dir.mkdir(parents=True, exist_ok=True) _tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1)
_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True) _api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1)
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1)
_seq = 0 _tool_fh.write(f"# Tool-call log — session {_session_id}\n\n")
_tool_fh.flush()
_comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1) _cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n")
_tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1) _cli_fh.flush()
_api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1) try:
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1) from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
_tool_fh.write(f"# Tool-call log — session {_session_id}\n\n") registry.register_session(_session_id, str(_session_dir), datetime.datetime.now())
_tool_fh.flush() except Exception as e:
_cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n") print(f"Warning: Could not register session in LogRegistry: {e}")
_cli_fh.flush() atexit.register(close_session)
def close_session() -> None:
# Register this session in the log registry """Flush and close all log files. Called on clean exit."""
try: global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR
from log_registry import LogRegistry if _comms_fh is None:
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml")) return
registry.register_session(_session_id, str(_session_dir), datetime.datetime.now()) if _comms_fh:
except Exception as e: _comms_fh.close()
print(f"Warning: Could not register session in LogRegistry: {e}") _comms_fh = None
if _tool_fh:
atexit.register(close_session) _tool_fh.close()
_tool_fh = None
if _api_fh:
def close_session(): _api_fh.close()
"""Flush and close all log files. Called on clean exit.""" _api_fh = None
global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR if _cli_fh:
_cli_fh.close()
if _comms_fh is None: _cli_fh = None
return try:
from log_registry import LogRegistry
# Close files first to ensure all data is flushed to disk registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
if _comms_fh: registry.update_auto_whitelist_status(_session_id)
_comms_fh.close() except Exception as e:
_comms_fh = None print(f"Warning: Could not update auto-whitelist on close: {e}")
if _tool_fh: def log_api_hook(method: str, path: str, payload: str) -> None:
_tool_fh.close() """Log an API hook invocation."""
_tool_fh = None if _api_fh is None:
if _api_fh: return
_api_fh.close() ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
_api_fh = None try:
if _cli_fh: _api_fh.write(f"[{ts_entry}] {method} {path} - Payload: {payload}\n")
_cli_fh.close() _api_fh.flush()
_cli_fh = None except Exception:
pass
# Trigger auto-whitelist update for this session after closing def log_comms(entry: dict[str, Any]) -> None:
try: """
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.update_auto_whitelist_status(_session_id)
except Exception as e:
print(f"Warning: Could not update auto-whitelist on close: {e}")
def log_api_hook(method: str, path: str, payload: str):
"""
Log an API hook invocation.
"""
if _api_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
_api_fh.write(f"[{ts_entry}] {method} {path} - Payload: {payload}\n")
_api_fh.flush()
except Exception:
pass
def log_comms(entry: dict):
"""
Append one comms entry to the comms log file as a JSON-L line. Append one comms entry to the comms log file as a JSON-L line.
Thread-safe (GIL + line-buffered file). Thread-safe (GIL + line-buffered file).
""" """
if _comms_fh is None: if _comms_fh is None:
return return
try: try:
_comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n") _comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
except Exception: except Exception:
pass pass
def log_tool_call(script: str, result: str, script_path: Optional[str]) -> Optional[str]:
"""
def log_tool_call(script: str, result: str, script_path: str | None):
"""
Append a tool-call record to the toolcalls log and write the PS1 script to Append a tool-call record to the toolcalls log and write the PS1 script to
scripts/generated/. Returns the path of the written script file. scripts/generated/. Returns the path of the written script file.
""" """
global _seq global _seq
if _tool_fh is None:
if _tool_fh is None: return script_path
return script_path # logger not open yet with _seq_lock:
_seq += 1
with _seq_lock: seq = _seq
_seq += 1 ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
seq = _seq ps1_name = f"{_ts}_{seq:04d}.ps1"
ps1_path: Optional[Path] = _SCRIPTS_DIR / ps1_name
ts_entry = datetime.datetime.now().strftime("%H:%M:%S") try:
ps1_path.write_text(script, encoding="utf-8")
# Write the .ps1 file except Exception as exc:
ps1_name = f"{_ts}_{seq:04d}.ps1" ps1_path = None
ps1_path = _SCRIPTS_DIR / ps1_name ps1_name = f"(write error: {exc})"
try: try:
ps1_path.write_text(script, encoding="utf-8") _tool_fh.write(
except Exception as exc: f"## Call #{seq} [{ts_entry}]\n"
ps1_path = None f"Script file: {ps1_path}\n\n"
ps1_name = f"(write error: {exc})" f"### Result\n\n"
f"```\n{result}\n```\n\n"
# Append to the tool-call sequence log (script body omitted - see .ps1 file) f"---\n\n"
try: )
_tool_fh.write( _tool_fh.flush()
f"## Call #{seq} [{ts_entry}]\n" except Exception:
f"Script file: {ps1_path}\n\n" pass
f"### Result\n\n" return str(ps1_path) if ps1_path else None
f"```\n{result}\n```\n\n" def log_cli_call(command: str, stdin_content: Optional[str], stdout_content: Optional[str], stderr_content: Optional[str], latency: float) -> None:
f"---\n\n" """Log details of a CLI subprocess execution."""
) if _cli_fh is None:
_tool_fh.flush() return
except Exception: ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
pass try:
log_data = {
return str(ps1_path) if ps1_path else None "timestamp": ts_entry,
"command": command,
"stdin": stdin_content,
def log_cli_call(command: str, stdin_content: str | None, stdout_content: str | None, stderr_content: str | None, latency: float): "stdout": stdout_content,
""" "stderr": stderr_content,
Log details of a CLI subprocess execution. "latency_sec": latency
""" }
if _cli_fh is None: _cli_fh.write(json.dumps(log_data, ensure_ascii=False, default=str) + "\n")
return _cli_fh.flush()
except Exception:
ts_entry = datetime.datetime.now().strftime("%H:%M:%S") pass
try:
log_data = {
"timestamp": ts_entry,
"command": command,
"stdin": stdin_content,
"stdout": stdout_content,
"stderr": stderr_content,
"latency_sec": latency
}
_cli_fh.write(json.dumps(log_data, ensure_ascii=False, default=str) + "\n")
_cli_fh.flush()
except Exception:
pass

View File

@@ -3,37 +3,46 @@ import subprocess, shutil
from pathlib import Path from pathlib import Path
from typing import Callable, Optional from typing import Callable, Optional
TIMEOUT_SECONDS = 60 TIMEOUT_SECONDS: int = 60
def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None) -> str: def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None) -> str:
""" """
Run a PowerShell script with working directory set to base_dir. Run a PowerShell script with working directory set to base_dir.
Returns a string combining stdout, stderr, and exit code. Returns a string combining stdout, stderr, and exit code.
If qa_callback is provided and the command fails or has stderr, If qa_callback is provided and the command fails or has stderr,
the callback is called with the stderr content and its result is appended. the callback is called with the stderr content and its result is appended.
""" """
safe_dir = str(base_dir).replace("'", "''") safe_dir: str = str(base_dir).replace("'", "''")
full_script = f"Set-Location -LiteralPath '{safe_dir}'\n{script}" full_script: str = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
# Try common executable names # Try common executable names
exe = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None) exe: Optional[str] = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
if not exe: return "ERROR: Neither powershell nor pwsh found in PATH" if not exe: return "ERROR: Neither powershell nor pwsh found in PATH"
try:
try: process = subprocess.Popen(
r = subprocess.run( [exe, "-NoProfile", "-NonInteractive", "-Command", full_script],
[exe, "-NoProfile", "-NonInteractive", "-Command", full_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=base_dir
capture_output=True, text=True, timeout=TIMEOUT_SECONDS, cwd=base_dir )
) stdout, stderr = process.communicate(timeout=TIMEOUT_SECONDS)
parts = []
if r.stdout.strip(): parts.append(f"STDOUT:\n{r.stdout.strip()}") parts: list[str] = []
if r.stderr.strip(): parts.append(f"STDERR:\n{r.stderr.strip()}") if stdout.strip(): parts.append(f"STDOUT:\n{stdout.strip()}")
parts.append(f"EXIT CODE: {r.returncode}") if stderr.strip(): parts.append(f"STDERR:\n{stderr.strip()}")
parts.append(f"EXIT CODE: {process.returncode}")
# QA Interceptor logic
if (r.returncode != 0 or r.stderr.strip()) and qa_callback: if (process.returncode != 0 or stderr.strip()) and qa_callback:
qa_analysis = qa_callback(r.stderr.strip()) qa_analysis: Optional[str] = qa_callback(stderr.strip())
if qa_analysis: if qa_analysis:
parts.append(f"\nQA ANALYSIS:\n{qa_analysis}") parts.append(f"\nQA ANALYSIS:\n{qa_analysis}")
return "\n".join(parts)
return "\n".join(parts) except subprocess.TimeoutExpired:
except subprocess.TimeoutExpired: return f"ERROR: timed out after {TIMEOUT_SECONDS}s" if 'process' in locals() and process:
except Exception as e: return f"ERROR: {e}" subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
except KeyboardInterrupt:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
raise
except Exception as e:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: {e}"

View File

@@ -6,74 +6,59 @@ from api_hook_client import ApiHookClient
from simulation.workflow_sim import WorkflowSimulator from simulation.workflow_sim import WorkflowSimulator
def main(): def main():
client = ApiHookClient() client = ApiHookClient()
print("=== Manual Slop: Live UX Walkthrough ===") print("=== Manual Slop: Live UX Walkthrough ===")
print("Connecting to GUI...") print("Connecting to GUI...")
if not client.wait_for_server(timeout=10): if not client.wait_for_server(timeout=10):
print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks") print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
return return
sim = WorkflowSimulator(client)
sim = WorkflowSimulator(client) # 1. Start Clean
print("\n[Action] Resetting Session...")
# 1. Start Clean client.click("btn_reset")
print("\n[Action] Resetting Session...") time.sleep(2)
client.click("btn_reset") # 2. Project Scaffolding
time.sleep(2) project_name = f"LiveTest_{int(time.time())}"
# Use actual project dir for realism
# 2. Project Scaffolding git_dir = os.path.abspath(".")
project_name = f"LiveTest_{int(time.time())}" project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
# Use actual project dir for realism print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
git_dir = os.path.abspath(".") sim.setup_new_project(project_name, git_dir, project_path)
project_path = os.path.join(git_dir, "tests", f"{project_name}.toml") # Enable auto-add so results appear in history automatically
client.set_value("auto_add_history", True)
print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}") time.sleep(1)
sim.setup_new_project(project_name, git_dir, project_path) # 3. Discussion Loop (3 turns for speed, but logic supports more)
turns = [
# Enable auto-add so results appear in history automatically "Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
client.set_value("auto_add_history", True) "That looks great. Can you also add a feature to print the name of the operating system?",
time.sleep(1) "Excellent. Now, please create a requirements.txt file with 'requests' in it."
]
# 3. Discussion Loop (3 turns for speed, but logic supports more) for i, msg in enumerate(turns):
turns = [ print(f"\n--- Turn {i+1} ---")
"Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?", # Switch to Comms Log to see the send
"That looks great. Can you also add a feature to print the name of the operating system?", client.select_tab("operations_tabs", "tab_comms")
"Excellent. Now, please create a requirements.txt file with 'requests' in it." sim.run_discussion_turn(msg)
] # Check thinking indicator
state = client.get_indicator_state("thinking_indicator")
for i, msg in enumerate(turns): if state.get('shown'):
print(f"\n--- Turn {i+1} ---") print("[Status] Thinking indicator is visible.")
# Switch to Tool Log halfway through wait
# Switch to Comms Log to see the send time.sleep(2)
client.select_tab("operations_tabs", "tab_comms") client.select_tab("operations_tabs", "tab_tool")
# Wait for AI response if not already finished
sim.run_discussion_turn(msg) # (run_discussion_turn already waits, so we just observe)
# 4. History Management
# Check thinking indicator print("\n[Action] Creating new discussion thread...")
state = client.get_indicator_state("thinking_indicator") sim.create_discussion("Refinement")
if state.get('shown'): print("\n[Action] Switching back to Default...")
print("[Status] Thinking indicator is visible.") sim.switch_discussion("Default")
# 5. Manual Sign-off Simulation
# Switch to Tool Log halfway through wait print("\n=== Walkthrough Complete ===")
time.sleep(2) print("Please verify the following in the GUI:")
client.select_tab("operations_tabs", "tab_tool") print("1. The project metadata reflects the new project.")
print("2. The discussion history contains the 3 turns.")
# Wait for AI response if not already finished print("3. The 'Refinement' discussion exists in the list.")
# (run_discussion_turn already waits, so we just observe) print("\nWalkthrough finished successfully.")
# 4. History Management
print("\n[Action] Creating new discussion thread...")
sim.create_discussion("Refinement")
print("\n[Action] Switching back to Default...")
sim.switch_discussion("Default")
# 5. Manual Sign-off Simulation
print("\n=== Walkthrough Complete ===")
print("Please verify the following in the GUI:")
print("1. The project metadata reflects the new project.")
print("2. The discussion history contains the 3 turns.")
print("3. The 'Refinement' discussion exists in the list.")
print("\nWalkthrough finished successfully.")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -9,49 +9,42 @@ from api_hook_client import ApiHookClient
from simulation.user_agent import UserSimAgent from simulation.user_agent import UserSimAgent
def main(): def main():
client = ApiHookClient() client = ApiHookClient()
print("Waiting for hook server...") print("Waiting for hook server...")
if not client.wait_for_server(timeout=5): if not client.wait_for_server(timeout=5):
print("Hook server not found. Start GUI with --enable-test-hooks") print("Hook server not found. Start GUI with --enable-test-hooks")
return return
sim_agent = UserSimAgent(client)
sim_agent = UserSimAgent(client) # 1. Reset session to start clean
print("Resetting session...")
# 1. Reset session to start clean client.click("btn_reset")
print("Resetting session...") time.sleep(2) # Give it time to clear
client.click("btn_reset") # 2. Initial message
time.sleep(2) # Give it time to clear initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
print(f"
# 2. Initial message [USER]: {initial_msg}")
initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?" client.set_value("ai_input", initial_msg)
client.click("btn_gen_send")
# 3. Wait for AI response
print("Waiting for AI response...", end="", flush=True)
last_entry_count = 0
for _ in range(60): # 60 seconds max
time.sleep(1)
print(".", end="", flush=True)
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if len(entries) > last_entry_count:
# Something happened
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
print(f" print(f"
[USER]: {initial_msg}") [AI]: {last_entry.get('content')[:100]}...")
client.set_value("ai_input", initial_msg)
client.click("btn_gen_send")
# 3. Wait for AI response
print("Waiting for AI response...", end="", flush=True)
last_entry_count = 0
for _ in range(60): # 60 seconds max
time.sleep(1)
print(".", end="", flush=True)
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if len(entries) > last_entry_count:
# Something happened
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
print(f"
[AI]: {last_entry.get('content')[:100]}...")
print("
Ping-pong successful!")
return
last_entry_count = len(entries)
print(" print("
Timeout waiting for AI response") Ping-pong successful!")
return
last_entry_count = len(entries)
print("
Timeout waiting for AI response")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -4,35 +4,30 @@ import time
from simulation.sim_base import BaseSimulation, run_sim from simulation.sim_base import BaseSimulation, run_sim
class AISettingsSimulation(BaseSimulation): class AISettingsSimulation(BaseSimulation):
def run(self): def run(self):
print("\n--- Running AI Settings Simulation (Gemini Only) ---") print("\n--- Running AI Settings Simulation (Gemini Only) ---")
# 1. Verify initial model
# 1. Verify initial model provider = self.client.get_value("current_provider")
provider = self.client.get_value("current_provider") model = self.client.get_value("current_model")
model = self.client.get_value("current_model") print(f"[Sim] Initial Provider: {provider}, Model: {model}")
print(f"[Sim] Initial Provider: {provider}, Model: {model}") assert provider == "gemini", f"Expected gemini, got {provider}"
assert provider == "gemini", f"Expected gemini, got {provider}" # 2. Switch to another Gemini model
other_gemini = "gemini-1.5-flash"
# 2. Switch to another Gemini model print(f"[Sim] Switching to {other_gemini}...")
other_gemini = "gemini-1.5-flash" self.client.set_value("current_model", other_gemini)
print(f"[Sim] Switching to {other_gemini}...") time.sleep(2)
self.client.set_value("current_model", other_gemini) # Verify
time.sleep(2) new_model = self.client.get_value("current_model")
print(f"[Sim] Updated Model: {new_model}")
# Verify assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}"
new_model = self.client.get_value("current_model") # 3. Switch back to flash-lite
print(f"[Sim] Updated Model: {new_model}") target_model = "gemini-2.5-flash-lite"
assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}" print(f"[Sim] Switching back to {target_model}...")
self.client.set_value("current_model", target_model)
# 3. Switch back to flash-lite time.sleep(2)
target_model = "gemini-2.5-flash-lite" final_model = self.client.get_value("current_model")
print(f"[Sim] Switching back to {target_model}...") print(f"[Sim] Final Model: {final_model}")
self.client.set_value("current_model", target_model) assert final_model == target_model, f"Expected {target_model}, got {final_model}"
time.sleep(2)
final_model = self.client.get_value("current_model")
print(f"[Sim] Final Model: {final_model}")
assert final_model == target_model, f"Expected {target_model}, got {final_model}"
if __name__ == "__main__": if __name__ == "__main__":
run_sim(AISettingsSimulation) run_sim(AISettingsSimulation)

View File

@@ -9,80 +9,75 @@ from simulation.workflow_sim import WorkflowSimulator
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
class BaseSimulation: class BaseSimulation:
def __init__(self, client: ApiHookClient = None): def __init__(self, client: ApiHookClient = None):
if client is None: if client is None:
self.client = ApiHookClient() self.client = ApiHookClient()
else: else:
self.client = client self.client = client
self.sim = WorkflowSimulator(self.client)
self.sim = WorkflowSimulator(self.client) self.project_path = None
self.project_path = None
def setup(self, project_name="SimProject"): def setup(self, project_name="SimProject"):
print(f"\n[BaseSim] Connecting to GUI...") print(f"\n[BaseSim] Connecting to GUI...")
if not self.client.wait_for_server(timeout=5): if not self.client.wait_for_server(timeout=5):
raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks") raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks")
print("[BaseSim] Resetting session...")
self.client.click("btn_reset")
time.sleep(0.5)
git_dir = os.path.abspath(".")
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml")
if os.path.exists(self.project_path):
os.remove(self.project_path)
print(f"[BaseSim] Scaffolding Project: {project_name}")
self.sim.setup_new_project(project_name, git_dir, self.project_path)
# Standard test settings
self.client.set_value("auto_add_history", True)
self.client.set_value("current_provider", "gemini")
self.client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.2)
print("[BaseSim] Resetting session...") def teardown(self):
self.client.click("btn_reset") if self.project_path and os.path.exists(self.project_path):
time.sleep(0.5) # We keep it for debugging if it failed, but usually we'd clean up
# os.remove(self.project_path)
pass
print("[BaseSim] Teardown complete.")
git_dir = os.path.abspath(".") def get_value(self, tag):
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml") return self.client.get_value(tag)
if os.path.exists(self.project_path):
os.remove(self.project_path)
print(f"[BaseSim] Scaffolding Project: {project_name}") def wait_for_event(self, event_type, timeout=5):
self.sim.setup_new_project(project_name, git_dir, self.project_path) return self.client.wait_for_event(event_type, timeout)
# Standard test settings
self.client.set_value("auto_add_history", True)
self.client.set_value("current_provider", "gemini")
self.client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.2)
def teardown(self): def assert_panel_visible(self, panel_tag, msg=None):
if self.project_path and os.path.exists(self.project_path): # This assumes we have a hook to check panel visibility or just check if an element in it exists
# We keep it for debugging if it failed, but usually we'd clean up # For now, we'll check if we can get a value from an element that should be in that panel
# os.remove(self.project_path) # or use a specific hook if available.
pass # Actually, let's just check if get_indicator_state or similar works for generic tags.
print("[BaseSim] Teardown complete.") pass
def get_value(self, tag): def wait_for_element(self, tag, timeout=2):
return self.client.get_value(tag) start = time.time()
while time.time() - start < timeout:
def wait_for_event(self, event_type, timeout=5): try:
return self.client.wait_for_event(event_type, timeout) # If we can get_value without error, it's likely there
self.client.get_value(tag)
def assert_panel_visible(self, panel_tag, msg=None): return True
# This assumes we have a hook to check panel visibility or just check if an element in it exists except:
# For now, we'll check if we can get a value from an element that should be in that panel time.sleep(0.1)
# or use a specific hook if available. return False
# Actually, let's just check if get_indicator_state or similar works for generic tags.
pass
def wait_for_element(self, tag, timeout=2):
start = time.time()
while time.time() - start < timeout:
try:
# If we can get_value without error, it's likely there
self.client.get_value(tag)
return True
except:
time.sleep(0.1)
return False
def run_sim(sim_class): def run_sim(sim_class):
"""Helper to run a simulation class standalone.""" """Helper to run a simulation class standalone."""
sim = sim_class() sim = sim_class()
try: try:
sim.setup() sim.setup()
sim.run() sim.run()
print(f"\n[SUCCESS] {sim_class.__name__} completed successfully.") print(f"\n[SUCCESS] {sim_class.__name__} completed successfully.")
except Exception as e: except Exception as e:
print(f"\n[FAILURE] {sim_class.__name__} failed: {e}") print(f"\n[FAILURE] {sim_class.__name__} failed: {e}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
sys.exit(1) sys.exit(1)
finally: finally:
sim.teardown() sim.teardown()

View File

@@ -4,78 +4,67 @@ import time
from simulation.sim_base import BaseSimulation, run_sim from simulation.sim_base import BaseSimulation, run_sim
class ContextSimulation(BaseSimulation): class ContextSimulation(BaseSimulation):
def run(self): def run(self):
print("\n--- Running Context & Chat Simulation ---") print("\n--- Running Context & Chat Simulation ---")
# 1. Test Discussion Creation
# 1. Test Discussion Creation disc_name = f"TestDisc_{int(time.time())}"
disc_name = f"TestDisc_{int(time.time())}" print(f"[Sim] Creating discussion: {disc_name}")
print(f"[Sim] Creating discussion: {disc_name}") self.sim.create_discussion(disc_name)
self.sim.create_discussion(disc_name) time.sleep(1)
time.sleep(1) # Verify it's in the list
session = self.client.get_session()
# Verify it's in the list # The session structure usually has discussions listed somewhere, or we can check the listbox
session = self.client.get_session() # For now, we'll trust the click and check the session update
# The session structure usually has discussions listed somewhere, or we can check the listbox # 2. Test File Aggregation & Context Refresh
# For now, we'll trust the click and check the session update print("[Sim] Testing context refresh and token budget...")
proj = self.client.get_project()
# 2. Test File Aggregation & Context Refresh # Add many files to ensure we cross the 1% threshold (~9000 tokens)
print("[Sim] Testing context refresh and token budget...") import glob
proj = self.client.get_project() all_py = [os.path.basename(f) for f in glob.glob("*.py")]
# Add many files to ensure we cross the 1% threshold (~9000 tokens) for f in all_py:
import glob if f not in proj['project']['files']['paths']:
all_py = [os.path.basename(f) for f in glob.glob("*.py")] proj['project']['files']['paths'].append(f)
for f in all_py: # Update project via hook
if f not in proj['project']['files']['paths']: self.client.post_project(proj['project'])
proj['project']['files']['paths'].append(f) time.sleep(1)
# Trigger MD Only to refresh context and token budget
# Update project via hook print("[Sim] Clicking MD Only...")
self.client.post_project(proj['project']) self.client.click("btn_md_only")
time.sleep(1) time.sleep(5)
# Verify status
# Trigger MD Only to refresh context and token budget proj_updated = self.client.get_project()
print("[Sim] Clicking MD Only...") status = self.client.get_value("ai_status")
self.client.click("btn_md_only") print(f"[Sim] Status: {status}")
time.sleep(5) assert "md written" in status, f"Expected 'md written' in status, got {status}"
# Verify token budget
# Verify status pct = self.client.get_value("token_budget_pct")
proj_updated = self.client.get_project() current = self.client.get_value("token_budget_current")
status = self.client.get_value("ai_status") print(f"[Sim] Token budget pct: {pct}, current={current}")
print(f"[Sim] Status: {status}") # We'll just warn if it's 0 but the MD was written, as it might be a small context
assert "md written" in status, f"Expected 'md written' in status, got {status}" if pct == 0:
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
# Verify token budget # 3. Test Chat Turn
pct = self.client.get_value("token_budget_pct") msg = "What is the current date and time? Answer in one sentence."
current = self.client.get_value("token_budget_current") print(f"[Sim] Sending message: {msg}")
print(f"[Sim] Token budget pct: {pct}, current={current}") self.sim.run_discussion_turn(msg)
# We'll just warn if it's 0 but the MD was written, as it might be a small context # 4. Verify History
if pct == 0: print("[Sim] Verifying history...")
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.") session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# 3. Test Chat Turn # We expect at least 2 entries (User and AI)
msg = "What is the current date and time? Answer in one sentence." assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
print(f"[Sim] Sending message: {msg}") assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
self.sim.run_discussion_turn(msg) assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 4. Verify History # 5. Test History Truncation
print("[Sim] Verifying history...") print("[Sim] Testing history truncation...")
session = self.client.get_session() self.sim.truncate_history(1)
entries = session.get('session', {}).get('entries', []) time.sleep(1)
session = self.client.get_session()
# We expect at least 2 entries (User and AI) entries = session.get('session', {}).get('entries', [])
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}" # Truncating to 1 pair means 2 entries max (if it's already at 2, it might not change,
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User" # but if we had more, it would).
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI" assert len(entries) <= 2, f"Expected <= 2 entries after truncation, found {len(entries)}"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 5. Test History Truncation
print("[Sim] Testing history truncation...")
self.sim.truncate_history(1)
time.sleep(1)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Truncating to 1 pair means 2 entries max (if it's already at 2, it might not change,
# but if we had more, it would).
assert len(entries) <= 2, f"Expected <= 2 entries after truncation, found {len(entries)}"
if __name__ == "__main__": if __name__ == "__main__":
run_sim(ContextSimulation) run_sim(ContextSimulation)

View File

@@ -4,76 +4,66 @@ import time
from simulation.sim_base import BaseSimulation, run_sim from simulation.sim_base import BaseSimulation, run_sim
class ExecutionSimulation(BaseSimulation): class ExecutionSimulation(BaseSimulation):
def setup(self, project_name="SimProject"): def setup(self, project_name="SimProject"):
super().setup(project_name) super().setup(project_name)
if os.path.exists("hello.ps1"): if os.path.exists("hello.ps1"):
os.remove("hello.ps1") os.remove("hello.ps1")
def run(self): def run(self):
print("\n--- Running Execution & Modals Simulation ---") print("\n--- Running Execution & Modals Simulation ---")
# 1. Trigger script generation (Async so we don't block on the wait loop)
# 1. Trigger script generation (Async so we don't block on the wait loop) msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it." print(f"[Sim] Sending message to trigger script: {msg}")
print(f"[Sim] Sending message to trigger script: {msg}") self.sim.run_discussion_turn_async(msg)
self.sim.run_discussion_turn_async(msg) # 2. Monitor for events and text responses
print("[Sim] Monitoring for script approvals and AI text...")
# 2. Monitor for events and text responses start_wait = time.time()
print("[Sim] Monitoring for script approvals and AI text...") approved_count = 0
start_wait = time.time() success = False
approved_count = 0 consecutive_errors = 0
success = False while time.time() - start_wait < 90:
# Check for error status (be lenient with transients)
consecutive_errors = 0 status = self.client.get_value("ai_status")
while time.time() - start_wait < 90: if status and status.lower().startswith("error"):
# Check for error status (be lenient with transients) consecutive_errors += 1
status = self.client.get_value("ai_status") if consecutive_errors >= 3:
if status and status.lower().startswith("error"): print(f"[ABORT] Execution simulation aborted due to persistent GUI error: {status}")
consecutive_errors += 1 break
if consecutive_errors >= 3: else:
print(f"[ABORT] Execution simulation aborted due to persistent GUI error: {status}") consecutive_errors = 0
break # Check for script confirmation event
else: ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
consecutive_errors = 0 if ev:
print(f"[Sim] Approving script #{approved_count+1}: {ev.get('script', '')[:50]}...")
# Check for script confirmation event self.client.click("btn_approve_script")
ev = self.client.wait_for_event("script_confirmation_required", timeout=1) approved_count += 1
if ev: # Give more time if we just approved a script
print(f"[Sim] Approving script #{approved_count+1}: {ev.get('script', '')[:50]}...") start_wait = time.time()
self.client.click("btn_approve_script") # Check if AI has responded with text yet
approved_count += 1 session = self.client.get_session()
# Give more time if we just approved a script entries = session.get('session', {}).get('entries', [])
start_wait = time.time() # Debug: log last few roles/content
if entries:
# Check if AI has responded with text yet last_few = entries[-3:]
session = self.client.get_session() print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
entries = session.get('session', {}).get('entries', []) if any(e.get('role') == 'AI' and e.get('content') for e in entries):
# Double check content for our keyword
# Debug: log last few roles/content for e in entries:
if entries: if e.get('role') == 'AI' and "Simulation Test" in e.get('content', ''):
last_few = entries[-3:] print("[Sim] AI responded with expected text. Success.")
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}") success = True
break
if any(e.get('role') == 'AI' and e.get('content') for e in entries): if success: break
# Double check content for our keyword # Also check if output is already in history via tool role
for e in entries: for e in entries:
if e.get('role') == 'AI' and "Simulation Test" in e.get('content', ''): if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
print("[Sim] AI responded with expected text. Success.") print(f"[Sim] Expected output found in {e.get('role')} results. Success.")
success = True success = True
break break
if success: break if success: break
time.sleep(1.0)
# Also check if output is already in history via tool role assert success, "Failed to observe script execution output or AI confirmation text"
for e in entries: print(f"[Sim] Final check: approved {approved_count} scripts.")
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
print(f"[Sim] Expected output found in {e.get('role')} results. Success.")
success = True
break
if success: break
time.sleep(1.0)
assert success, "Failed to observe script execution output or AI confirmation text"
print(f"[Sim] Final check: approved {approved_count} scripts.")
if __name__ == "__main__": if __name__ == "__main__":
run_sim(ExecutionSimulation) run_sim(ExecutionSimulation)

View File

@@ -4,44 +4,37 @@ import time
from simulation.sim_base import BaseSimulation, run_sim from simulation.sim_base import BaseSimulation, run_sim
class ToolsSimulation(BaseSimulation): class ToolsSimulation(BaseSimulation):
def run(self): def run(self):
print("\n--- Running Tools Simulation ---") print("\n--- Running Tools Simulation ---")
# 1. Trigger list_directory tool
# 1. Trigger list_directory tool msg = "List the files in the current directory."
msg = "List the files in the current directory." print(f"[Sim] Sending message to trigger tool: {msg}")
print(f"[Sim] Sending message to trigger tool: {msg}") self.sim.run_discussion_turn(msg)
self.sim.run_discussion_turn(msg) # 2. Wait for AI to execute tool
print("[Sim] Waiting for tool execution...")
# 2. Wait for AI to execute tool time.sleep(5) # Give it some time
print("[Sim] Waiting for tool execution...") # 3. Verify Tool Log
time.sleep(5) # Give it some time # We need a hook to get the tool log
# In gui_2.py, there is _on_tool_log which appends to self._tool_log
# 3. Verify Tool Log # We need a hook to read self._tool_log
# We need a hook to get the tool log # 4. Trigger read_file tool
# In gui_2.py, there is _on_tool_log which appends to self._tool_log msg = "Read the first 10 lines of aggregate.py."
# We need a hook to read self._tool_log print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 4. Trigger read_file tool # 5. Wait and Verify
msg = "Read the first 10 lines of aggregate.py." print("[Sim] Waiting for tool execution...")
print(f"[Sim] Sending message to trigger tool: {msg}") time.sleep(5)
self.sim.run_discussion_turn(msg) session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# 5. Wait and Verify # Tool outputs are usually in the conversation history as 'Tool' role or similar
print("[Sim] Waiting for tool execution...") tool_outputs = [e for e in entries if e.get('role') in ['Tool', 'Function']]
time.sleep(5) print(f"[Sim] Found {len(tool_outputs)} tool outputs in history.")
# Actually in Gemini history, they might be nested.
session = self.client.get_session() # But our GUI disc_entries list usually has them as separate entries or
entries = session.get('session', {}).get('entries', []) # they are part of the AI turn.
# Tool outputs are usually in the conversation history as 'Tool' role or similar # Let's check if the AI mentions it in its response
tool_outputs = [e for e in entries if e.get('role') in ['Tool', 'Function']] last_ai_msg = entries[-1]['content']
print(f"[Sim] Found {len(tool_outputs)} tool outputs in history.") print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")
# Actually in Gemini history, they might be nested.
# But our GUI disc_entries list usually has them as separate entries or
# they are part of the AI turn.
# Let's check if the AI mentions it in its response
last_ai_msg = entries[-1]['content']
print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")
if __name__ == "__main__": if __name__ == "__main__":
run_sim(ToolsSimulation) run_sim(ToolsSimulation)

View File

@@ -3,48 +3,45 @@ import random
import ai_client import ai_client
class UserSimAgent: class UserSimAgent:
def __init__(self, hook_client, model="gemini-2.5-flash-lite"): def __init__(self, hook_client, model="gemini-2.5-flash-lite"):
self.hook_client = hook_client self.hook_client = hook_client
self.model = model self.model = model
self.system_prompt = ( self.system_prompt = (
"You are a software engineer testing an AI coding assistant called 'Manual Slop'. " "You are a software engineer testing an AI coding assistant called 'Manual Slop'. "
"You want to build a small Python project and verify the assistant's capabilities. " "You want to build a small Python project and verify the assistant's capabilities. "
"Keep your responses concise and human-like. " "Keep your responses concise and human-like. "
"Do not use markdown blocks for your main message unless you are providing code." "Do not use markdown blocks for your main message unless you are providing code."
) )
def generate_response(self, conversation_history): def generate_response(self, conversation_history):
""" """
Generates a human-like response based on the conversation history. Generates a human-like response based on the conversation history.
conversation_history: list of dicts with 'role' and 'content' conversation_history: list of dicts with 'role' and 'content'
""" """
# Format history for ai_client # Format history for ai_client
# ai_client expects md_content and user_message. # ai_client expects md_content and user_message.
# It handles its own internal history. # It handles its own internal history.
# We want the 'User AI' to have context of what the 'Assistant AI' said. # We want the 'User AI' to have context of what the 'Assistant AI' said.
# For now, let's just use the last message from Assistant as the prompt.
# For now, let's just use the last message from Assistant as the prompt. last_ai_msg = ""
last_ai_msg = "" for entry in reversed(conversation_history):
for entry in reversed(conversation_history): if entry.get('role') == 'AI':
if entry.get('role') == 'AI': last_ai_msg = entry.get('content', '')
last_ai_msg = entry.get('content', '') break
break # We need to set a custom system prompt for the User Simulator
try:
# We need to set a custom system prompt for the User Simulator ai_client.set_custom_system_prompt(self.system_prompt)
try: # We'll use a blank md_content for now as the 'User' doesn't need to read its own files
ai_client.set_custom_system_prompt(self.system_prompt) # via the same mechanism, but we could provide it if needed.
# We'll use a blank md_content for now as the 'User' doesn't need to read its own files response = ai_client.send(md_content="", user_message=last_ai_msg)
# via the same mechanism, but we could provide it if needed. finally:
response = ai_client.send(md_content="", user_message=last_ai_msg) ai_client.set_custom_system_prompt("")
finally: return response
ai_client.set_custom_system_prompt("")
return response
def perform_action_with_delay(self, action_func, *args, **kwargs): def perform_action_with_delay(self, action_func, *args, **kwargs):
""" """
Executes an action with a human-like delay. Executes an action with a human-like delay.
""" """
delay = random.uniform(0.5, 2.0) delay = random.uniform(0.5, 2.0)
time.sleep(delay) time.sleep(delay)
return action_func(*args, **kwargs) return action_func(*args, **kwargs)

View File

@@ -4,84 +4,80 @@ from api_hook_client import ApiHookClient
from simulation.user_agent import UserSimAgent from simulation.user_agent import UserSimAgent
class WorkflowSimulator: class WorkflowSimulator:
def __init__(self, hook_client: ApiHookClient): def __init__(self, hook_client: ApiHookClient):
self.client = hook_client self.client = hook_client
self.user_agent = UserSimAgent(hook_client) self.user_agent = UserSimAgent(hook_client)
def setup_new_project(self, name, git_dir, project_path=None): def setup_new_project(self, name, git_dir, project_path=None):
print(f"Setting up new project: {name}") print(f"Setting up new project: {name}")
if project_path: if project_path:
self.client.click("btn_project_new_automated", user_data=project_path) self.client.click("btn_project_new_automated", user_data=project_path)
else: else:
self.client.click("btn_project_new") self.client.click("btn_project_new")
time.sleep(1) time.sleep(1)
self.client.set_value("project_git_dir", git_dir) self.client.set_value("project_git_dir", git_dir)
self.client.click("btn_project_save") self.client.click("btn_project_save")
time.sleep(1) time.sleep(1)
def create_discussion(self, name): def create_discussion(self, name):
print(f"Creating discussion: {name}") print(f"Creating discussion: {name}")
self.client.set_value("disc_new_name_input", name) self.client.set_value("disc_new_name_input", name)
self.client.click("btn_disc_create") self.client.click("btn_disc_create")
time.sleep(1) time.sleep(1)
def switch_discussion(self, name): def switch_discussion(self, name):
print(f"Switching to discussion: {name}") print(f"Switching to discussion: {name}")
self.client.select_list_item("disc_listbox", name) self.client.select_list_item("disc_listbox", name)
time.sleep(1) time.sleep(1)
def load_prior_log(self): def load_prior_log(self):
print("Loading prior log") print("Loading prior log")
self.client.click("btn_load_log") self.client.click("btn_load_log")
# This usually opens a file dialog which we can't easily automate from here # This usually opens a file dialog which we can't easily automate from here
# without more hooks, but we can verify the button click. # without more hooks, but we can verify the button click.
time.sleep(1) time.sleep(1)
def truncate_history(self, pairs): def truncate_history(self, pairs):
print(f"Truncating history to {pairs} pairs") print(f"Truncating history to {pairs} pairs")
self.client.set_value("disc_truncate_pairs", pairs) self.client.set_value("disc_truncate_pairs", pairs)
self.client.click("btn_disc_truncate") self.client.click("btn_disc_truncate")
time.sleep(1) time.sleep(1)
def run_discussion_turn(self, user_message=None): def run_discussion_turn(self, user_message=None):
self.run_discussion_turn_async(user_message) self.run_discussion_turn_async(user_message)
# Wait for AI # Wait for AI
return self.wait_for_ai_response() return self.wait_for_ai_response()
def run_discussion_turn_async(self, user_message=None): def run_discussion_turn_async(self, user_message=None):
if user_message is None: if user_message is None:
# Generate from AI history # Generate from AI history
session = self.client.get_session() session = self.client.get_session()
entries = session.get('session', {}).get('entries', []) entries = session.get('session', {}).get('entries', [])
user_message = self.user_agent.generate_response(entries) user_message = self.user_agent.generate_response(entries)
print(f"\n[USER]: {user_message}")
self.client.set_value("ai_input", user_message)
self.client.click("btn_gen_send")
print(f"\n[USER]: {user_message}") def wait_for_ai_response(self, timeout=60):
self.client.set_value("ai_input", user_message) print("Waiting for AI response...", end="", flush=True)
self.client.click("btn_gen_send") start_time = time.time()
last_count = len(self.client.get_session().get('session', {}).get('entries', []))
def wait_for_ai_response(self, timeout=60): while time.time() - start_time < timeout:
print("Waiting for AI response...", end="", flush=True) # Check for error status first
start_time = time.time() status = self.client.get_value("ai_status")
last_count = len(self.client.get_session().get('session', {}).get('entries', [])) if status and status.lower().startswith("error"):
print(f"\n[ABORT] GUI reported error status: {status}")
while time.time() - start_time < timeout: return {"role": "AI", "content": f"ERROR: {status}"}
# Check for error status first time.sleep(1)
status = self.client.get_value("ai_status") print(".", end="", flush=True)
if status and status.lower().startswith("error"): entries = self.client.get_session().get('session', {}).get('entries', [])
print(f"\n[ABORT] GUI reported error status: {status}") if len(entries) > last_count:
return {"role": "AI", "content": f"ERROR: {status}"} last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
time.sleep(1) content = last_entry.get('content')
print(".", end="", flush=True) print(f"\n[AI]: {content[:100]}...")
entries = self.client.get_session().get('session', {}).get('entries', []) if "error" in content.lower() or "blocked" in content.lower():
if len(entries) > last_count: print(f"[WARN] AI response appears to contain an error message.")
last_entry = entries[-1] return last_entry
if last_entry.get('role') == 'AI' and last_entry.get('content'): print("\nTimeout waiting for AI")
content = last_entry.get('content') return None
print(f"\n[AI]: {content[:100]}...")
if "error" in content.lower() or "blocked" in content.lower():
print(f"[WARN] AI response appears to contain an error message.")
return last_entry
print("\nTimeout waiting for AI")
return None

View File

@@ -27,185 +27,165 @@ import ast
import re import re
from pathlib import Path from pathlib import Path
# ------------------------------------------------------------------ per-type extractors # ------------------------------------------------------------------ per-type extractors
def _summarise_python(path: Path, content: str) -> str: def _summarise_python(path: Path, content: str) -> str:
lines = content.splitlines() lines = content.splitlines()
line_count = len(lines) line_count = len(lines)
parts = [f"**Python** — {line_count} lines"] parts = [f"**Python** — {line_count} lines"]
try:
try: tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path)) except SyntaxError as e:
except SyntaxError as e: parts.append(f"_Parse error: {e}_")
parts.append(f"_Parse error: {e}_") return "\n".join(parts)
return "\n".join(parts) # Imports
imports = []
# Imports for node in ast.walk(tree):
imports = [] if isinstance(node, ast.Import):
for node in ast.walk(tree): for alias in node.names:
if isinstance(node, ast.Import): imports.append(alias.name.split(".")[0])
for alias in node.names: elif isinstance(node, ast.ImportFrom):
imports.append(alias.name.split(".")[0]) if node.module:
elif isinstance(node, ast.ImportFrom): imports.append(node.module.split(".")[0])
if node.module: if imports:
imports.append(node.module.split(".")[0]) unique_imports = sorted(set(imports))
if imports: parts.append(f"imports: {', '.join(unique_imports)}")
unique_imports = sorted(set(imports)) # Top-level constants (ALL_CAPS assignments)
parts.append(f"imports: {', '.join(unique_imports)}") constants = []
for node in ast.iter_child_nodes(tree):
# Top-level constants (ALL_CAPS assignments) if isinstance(node, ast.Assign):
constants = [] for t in node.targets:
for node in ast.iter_child_nodes(tree): if isinstance(t, ast.Name) and t.id.isupper():
if isinstance(node, ast.Assign): constants.append(t.id)
for t in node.targets: elif isinstance(node, (ast.AnnAssign,)):
if isinstance(t, ast.Name) and t.id.isupper(): if isinstance(node.target, ast.Name) and node.target.id.isupper():
constants.append(t.id) constants.append(node.target.id)
elif isinstance(node, (ast.AnnAssign,)): if constants:
if isinstance(node.target, ast.Name) and node.target.id.isupper(): parts.append(f"constants: {', '.join(constants)}")
constants.append(node.target.id) # Classes + their methods
if constants: for node in ast.iter_child_nodes(tree):
parts.append(f"constants: {', '.join(constants)}") if isinstance(node, ast.ClassDef):
methods = [
# Classes + their methods n.name for n in ast.iter_child_nodes(node)
for node in ast.iter_child_nodes(tree): if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
if isinstance(node, ast.ClassDef): ]
methods = [ if methods:
n.name for n in ast.iter_child_nodes(node) parts.append(f"class {node.name}: {', '.join(methods)}")
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef)) else:
] parts.append(f"class {node.name}")
if methods:
parts.append(f"class {node.name}: {', '.join(methods)}")
else:
parts.append(f"class {node.name}")
# Top-level functions # Top-level functions
top_fns = [ top_fns = [
node.name for node in ast.iter_child_nodes(tree) node.name for node in ast.iter_child_nodes(tree)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
] ]
if top_fns: if top_fns:
parts.append(f"functions: {', '.join(top_fns)}") parts.append(f"functions: {', '.join(top_fns)}")
return "\n".join(parts)
return "\n".join(parts)
def _summarise_toml(path: Path, content: str) -> str: def _summarise_toml(path: Path, content: str) -> str:
lines = content.splitlines() lines = content.splitlines()
line_count = len(lines) line_count = len(lines)
parts = [f"**TOML** — {line_count} lines"] parts = [f"**TOML** — {line_count} lines"]
# Extract top-level table headers [key] and [[key]]
# Extract top-level table headers [key] and [[key]] table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}") tables = []
tables = [] for line in lines:
for line in lines: m = table_pat.match(line)
m = table_pat.match(line) if m:
if m: tables.append(m.group(1).strip())
tables.append(m.group(1).strip()) if tables:
if tables: parts.append(f"tables: {', '.join(tables)}")
parts.append(f"tables: {', '.join(tables)}") # Top-level key = value (not inside a [table])
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
# Top-level key = value (not inside a [table]) in_table = False
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=") top_keys = []
in_table = False for line in lines:
top_keys = [] if table_pat.match(line):
for line in lines: in_table = True
if table_pat.match(line): continue
in_table = True if not in_table:
continue m = kv_pat.match(line)
if not in_table: if m:
m = kv_pat.match(line) top_keys.append(m.group(1))
if m: if top_keys:
top_keys.append(m.group(1)) parts.append(f"top-level keys: {', '.join(top_keys)}")
if top_keys: return "\n".join(parts)
parts.append(f"top-level keys: {', '.join(top_keys)}")
return "\n".join(parts)
def _summarise_markdown(path: Path, content: str) -> str: def _summarise_markdown(path: Path, content: str) -> str:
lines = content.splitlines() lines = content.splitlines()
line_count = len(lines) line_count = len(lines)
parts = [f"**Markdown** — {line_count} lines"] parts = [f"**Markdown** — {line_count} lines"]
headings = []
headings = [] for line in lines:
for line in lines: m = re.match(r"^(#{1,3})\s+(.+)", line)
m = re.match(r"^(#{1,3})\s+(.+)", line) if m:
if m: level = len(m.group(1))
level = len(m.group(1)) text = m.group(2).strip()
text = m.group(2).strip() indent = " " * (level - 1)
indent = " " * (level - 1) headings.append(f"{indent}{text}")
headings.append(f"{indent}{text}") if headings:
if headings: parts.append("headings:\n" + "\n".join(f" {h}" for h in headings))
parts.append("headings:\n" + "\n".join(f" {h}" for h in headings)) return "\n".join(parts)
return "\n".join(parts)
def _summarise_generic(path: Path, content: str) -> str: def _summarise_generic(path: Path, content: str) -> str:
lines = content.splitlines() lines = content.splitlines()
line_count = len(lines) line_count = len(lines)
suffix = path.suffix.lstrip(".").upper() or "TEXT" suffix = path.suffix.lstrip(".").upper() or "TEXT"
parts = [f"**{suffix}** — {line_count} lines"] parts = [f"**{suffix}** — {line_count} lines"]
preview = lines[:8] preview = lines[:8]
if preview: if preview:
parts.append("preview:\n```\n" + "\n".join(preview) + "\n```") parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
return "\n".join(parts) return "\n".join(parts)
# ------------------------------------------------------------------ dispatch
# ------------------------------------------------------------------ dispatch
_SUMMARISERS = { _SUMMARISERS = {
".py": _summarise_python, ".py": _summarise_python,
".toml": _summarise_toml, ".toml": _summarise_toml,
".md": _summarise_markdown, ".md": _summarise_markdown,
".ini": _summarise_generic, ".ini": _summarise_generic,
".txt": _summarise_generic, ".txt": _summarise_generic,
".ps1": _summarise_generic, ".ps1": _summarise_generic,
} }
def summarise_file(path: Path, content: str) -> str: def summarise_file(path: Path, content: str) -> str:
""" """
Return a compact markdown summary string for a single file. Return a compact markdown summary string for a single file.
`content` is the already-read file text (or an error string). `content` is the already-read file text (or an error string).
""" """
suffix = path.suffix.lower() if hasattr(path, "suffix") else "" suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
fn = _SUMMARISERS.get(suffix, _summarise_generic) fn = _SUMMARISERS.get(suffix, _summarise_generic)
try: try:
return fn(path, content) return fn(path, content)
except Exception as e: except Exception as e:
return f"_Summariser error: {e}_" return f"_Summariser error: {e}_"
def summarise_items(file_items: list[dict]) -> list[dict]: def summarise_items(file_items: list[dict]) -> list[dict]:
""" """
Given a list of file_item dicts (as returned by aggregate.build_file_items), Given a list of file_item dicts (as returned by aggregate.build_file_items),
return a parallel list of dicts with an added `summary` key. return a parallel list of dicts with an added `summary` key.
""" """
result = [] result = []
for item in file_items: for item in file_items:
path = item.get("path") path = item.get("path")
content = item.get("content", "") content = item.get("content", "")
error = item.get("error", False) error = item.get("error", False)
if error or path is None: if error or path is None:
summary = f"_Error reading file_" summary = f"_Error reading file_"
else: else:
p = Path(path) if not isinstance(path, Path) else path p = Path(path) if not isinstance(path, Path) else path
summary = summarise_file(p, content) summary = summarise_file(p, content)
result.append({**item, "summary": summary}) result.append({**item, "summary": summary})
return result return result
def build_summary_markdown(file_items: list[dict]) -> str: def build_summary_markdown(file_items: list[dict]) -> str:
""" """
Build a compact markdown string of file summaries, suitable for the Build a compact markdown string of file summaries, suitable for the
initial <context> block instead of full file contents. initial <context> block instead of full file contents.
""" """
summarised = summarise_items(file_items) summarised = summarise_items(file_items)
parts = [] parts = []
for item in summarised: for item in summarised:
path = item.get("path") or item.get("entry", "unknown") path = item.get("path") or item.get("entry", "unknown")
summary = item.get("summary", "") summary = item.get("summary", "")
parts.append(f"### `{path}`\n\n{summary}") parts.append(f"### `{path}`\n\n{summary}")
return "\n\n---\n\n".join(parts) return "\n\n---\n\n".join(parts)

View File

@@ -6,28 +6,27 @@ import project_manager
from models import Track, Ticket from models import Track, Ticket
class TestMMAPersistence(unittest.TestCase): class TestMMAPersistence(unittest.TestCase):
def test_default_project_has_mma(self): def test_default_project_has_mma(self):
proj = project_manager.default_project("test") proj = project_manager.default_project("test")
self.assertIn("mma", proj) self.assertIn("mma", proj)
self.assertEqual(proj["mma"], {"epic": "", "active_track_id": "", "tracks": []}) self.assertEqual(proj["mma"], {"epic": "", "active_track_id": "", "tracks": []})
def test_save_load_mma(self): def test_save_load_mma(self):
proj = project_manager.default_project("test") proj = project_manager.default_project("test")
proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]} proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]}
test_file = Path("test_mma_proj.toml")
test_file = Path("test_mma_proj.toml") try:
try: project_manager.save_project(proj, test_file)
project_manager.save_project(proj, test_file) loaded = project_manager.load_project(test_file)
loaded = project_manager.load_project(test_file) self.assertIn("mma", loaded)
self.assertIn("mma", loaded) self.assertEqual(loaded["mma"]["epic"], "Test Epic")
self.assertEqual(loaded["mma"]["epic"], "Test Epic") self.assertEqual(len(loaded["mma"]["tracks"]), 1)
self.assertEqual(len(loaded["mma"]["tracks"]), 1) finally:
finally: if test_file.exists():
if test_file.exists(): test_file.unlink()
test_file.unlink() hist_file = Path("test_mma_proj_history.toml")
hist_file = Path("test_mma_proj_history.toml") if hist_file.exists():
if hist_file.exists(): hist_file.unlink()
hist_file.unlink()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -15,82 +15,76 @@ import ai_client
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def reset_ai_client(): def reset_ai_client():
"""Reset ai_client global state between every test to prevent state pollution.""" """Reset ai_client global state between every test to prevent state pollution."""
ai_client.reset_session() ai_client.reset_session()
# Default to a safe model # Default to a safe model
ai_client.set_provider("gemini", "gemini-2.5-flash-lite") ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
yield yield
def kill_process_tree(pid): def kill_process_tree(pid):
"""Robustly kills a process and all its children.""" """Robustly kills a process and all its children."""
if pid is None: if pid is None:
return return
try: try:
print(f"[Fixture] Attempting to kill process tree for PID {pid}...") print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
if os.name == 'nt': if os.name == 'nt':
# /F is force, /T is tree (includes children) # /F is force, /T is tree (includes children)
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)], subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
check=False) check=False)
else: else:
# On Unix, kill the process group # On Unix, kill the process group
os.killpg(os.getpgid(pid), signal.SIGKILL) os.killpg(os.getpgid(pid), signal.SIGKILL)
print(f"[Fixture] Process tree {pid} killed.") print(f"[Fixture] Process tree {pid} killed.")
except Exception as e: except Exception as e:
print(f"[Fixture] Error killing process tree {pid}: {e}") print(f"[Fixture] Error killing process tree {pid}: {e}")
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def live_gui(): def live_gui():
""" """
Session-scoped fixture that starts gui_2.py with --enable-test-hooks. Session-scoped fixture that starts gui_2.py with --enable-test-hooks.
""" """
gui_script = "gui_2.py" gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...") print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
os.makedirs("logs", exist_ok=True) log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8") process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
process = subprocess.Popen( stdout=log_file,
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"], stderr=log_file,
stdout=log_file, text=True,
stderr=log_file, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
text=True, )
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0 max_retries = 15 # Slightly more time for gui_2
) ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
max_retries = 15 # Slightly more time for gui_2 start_time = time.time()
ready = False while time.time() - start_time < max_retries:
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...") try:
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
start_time = time.time() if response.status_code == 200:
while time.time() - start_time < max_retries: ready = True
try: print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5) break
if response.status_code == 200: except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
ready = True if process.poll() is not None:
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.") print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): time.sleep(0.5)
if process.poll() is not None: if not ready:
print(f"[Fixture] {gui_script} process died unexpectedly during startup.") print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
break kill_process_tree(process.pid)
time.sleep(0.5) pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
if not ready: yield process, gui_script
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.") finally:
kill_process_tree(process.pid) print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
pytest.fail(f"Failed to start {gui_script} with test hooks.") # Reset the GUI state before shutting down
try:
try: client = ApiHookClient()
yield process, gui_script client.reset_session()
finally: time.sleep(0.5)
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...") except: pass
# Reset the GUI state before shutting down kill_process_tree(process.pid)
try: log_file.close()
client = ApiHookClient()
client.reset_session()
time.sleep(0.5)
except: pass
kill_process_tree(process.pid)
log_file.close()

21
tests/mock_alias_tool.py Normal file
View File

@@ -0,0 +1,21 @@
import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
else:
# We must call the bridge to trigger the GUI approval!
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
stdout, _ = proc.communicate(input=json.dumps(tool_call))
# Even if bridge says allow, we emit the tool_use to the adapter
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
print(json.dumps({
"type": "tool_use",
"name": "list_directory",
"id": "alias_call",
"args": {"dir_path": "."}
}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)

View File

@@ -4,104 +4,92 @@ import subprocess
import os import os
def main(): def main():
# Debug log to stderr # Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n") sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n") sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
# Read prompt from stdin try:
try: # On Windows, stdin might be closed or behave weirdly if not handled
# On Windows, stdin might be closed or behave weirdly if not handled prompt = sys.stdin.read()
prompt = sys.stdin.read() except EOFError:
except EOFError: prompt = ""
prompt = "" sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n") # Skip management commands
sys.stderr.flush() if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# Skip management commands # If the prompt contains tool results, provide final answer
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]: if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
return print(json.dumps({
"type": "message",
# If the prompt contains tool results, provide final answer "role": "assistant",
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt: "content": "I have processed the tool results. Everything looks good!"
print(json.dumps({ }), flush=True)
"type": "message", print(json.dumps({
"role": "assistant", "type": "result",
"content": "I have processed the tool results. Everything looks good!" "status": "success",
}), flush=True) "stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
print(json.dumps({ "session_id": "mock-session-final"
"type": "result", }), flush=True)
"status": "success", return
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20}, # Default flow: simulate a tool call
"session_id": "mock-session-final" bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
}), flush=True) # Using format that bridge understands
return bridge_tool_call = {
"name": "read_file",
# Default flow: simulate a tool call "input": {"path": "test.txt"}
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py") }
# Using format that bridge understands sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
bridge_tool_call = { sys.stderr.flush()
"name": "read_file", try:
"input": {"path": "test.txt"} # CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
} process = subprocess.Popen(
[sys.executable, bridge_path],
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n") stdin=subprocess.PIPE,
sys.stderr.flush() stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
try: text=True,
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed env=os.environ
process = subprocess.Popen( )
[sys.executable, bridge_path], stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
stdin=subprocess.PIPE, sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
stdout=subprocess.PIPE, sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
stderr=subprocess.PIPE, decision_data = json.loads(stdout.strip())
text=True, decision = decision_data.get("decision")
env=os.environ except Exception as e:
) sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call)) decision = "deny"
if decision == "allow":
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n") # Simulate REAL CLI field names for adapter normalization test
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n") print(json.dumps({
"type": "tool_use",
decision_data = json.loads(stdout.strip()) "tool_name": "read_file",
decision = decision_data.get("decision") "tool_id": "call_123",
except Exception as e: "parameters": {"path": "test.txt"}
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n") }), flush=True)
decision = "deny" print(json.dumps({
"type": "message",
if decision == "allow": "role": "assistant",
# Simulate REAL CLI field names for adapter normalization test "content": "I am reading the file now..."
print(json.dumps({ }), flush=True)
"type": "tool_use", print(json.dumps({
"tool_name": "read_file", "type": "result",
"tool_id": "call_123", "status": "success",
"parameters": {"path": "test.txt"} "stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
}), flush=True) "session_id": "mock-session-123"
}), flush=True)
print(json.dumps({ else:
"type": "message", print(json.dumps({
"role": "assistant", "type": "message",
"content": "I am reading the file now..." "role": "assistant",
}), flush=True) "content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({ print(json.dumps({
"type": "result", "type": "result",
"status": "success", "status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10}, "stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-123" "session_id": "mock-session-denied"
}), flush=True) }), flush=True)
else:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "." base_dir = "."
paths = [] paths = []
[files.tier_assignments]
[screenshots] [screenshots]
base_dir = "." base_dir = "."
paths = [] paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main] [discussions.main]
git_commit = "" git_commit = ""
last_updated = "2026-02-27T18:56:53" last_updated = "2026-02-28T07:35:03"
history = [] history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "." base_dir = "."
paths = [] paths = []
[files.tier_assignments]
[screenshots] [screenshots]
base_dir = "." base_dir = "."
paths = [] paths = []

View File

@@ -6,10 +6,10 @@ roles = [
"Reasoning", "Reasoning",
] ]
history = [] history = []
active = "TestDisc_1772236592" active = "TestDisc_1772282083"
auto_add = true auto_add = true
[discussions.TestDisc_1772236592] [discussions.TestDisc_1772282083]
git_commit = "" git_commit = ""
last_updated = "2026-02-27T18:56:46" last_updated = "2026-02-28T07:34:56"
history = [] history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "." base_dir = "."
paths = [] paths = []
[files.tier_assignments]
[screenshots] [screenshots]
base_dir = "." base_dir = "."
paths = [] paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main] [discussions.main]
git_commit = "" git_commit = ""
last_updated = "2026-02-27T18:57:53" last_updated = "2026-02-28T07:35:49"
history = [] history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "." base_dir = "."
paths = [] paths = []
[files.tier_assignments]
[screenshots] [screenshots]
base_dir = "." base_dir = "."
paths = [] paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main] [discussions.main]
git_commit = "" git_commit = ""
last_updated = "2026-02-27T18:57:10" last_updated = "2026-02-28T07:35:20"
history = [] history = []

View File

@@ -18,7 +18,5 @@ history = [
[discussions.AutoDisc] [discussions.AutoDisc]
git_commit = "" git_commit = ""
last_updated = "2026-02-27T23:54:05" last_updated = "2026-02-28T07:34:41"
history = [ history = []
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
]

View File

@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client import ai_client
def test_agent_capabilities_listing(): def test_agent_capabilities_listing():
# Verify that the agent exposes its available tools correctly # Verify that the agent exposes its available tools correctly
pass pass

View File

@@ -9,14 +9,14 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import set_agent_tools, _build_anthropic_tools from ai_client import set_agent_tools, _build_anthropic_tools
def test_set_agent_tools(): def test_set_agent_tools():
# Correct usage: pass a dict # Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False} agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools) set_agent_tools(agent_tools)
def test_build_anthropic_tools_conversion(): def test_build_anthropic_tools_conversion():
# _build_anthropic_tools takes no arguments and uses the global _agent_tools # _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output # We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True}) set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools() anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools] tool_names = [t["name"] for t in anthropic_tools]
assert "read_file" in tool_names assert "read_file" in tool_names

View File

@@ -3,39 +3,33 @@ from unittest.mock import MagicMock, patch
import ai_client import ai_client
def test_ai_client_send_gemini_cli(): def test_ai_client_send_gemini_cli():
""" """
Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter' Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter'
when the 'gemini_cli' provider is specified. when the 'gemini_cli' provider is specified.
""" """
test_message = "Hello, this is a test prompt for the CLI adapter." test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI." test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
# Set provider to gemini_cli ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite") # 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add) mock_adapter_instance = MockAdapterClass.return_value
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass: mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
mock_adapter_instance = MockAdapterClass.return_value mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []} mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.last_usage = {"total_tokens": 100} mock_adapter_instance.session_id = "test-session"
mock_adapter_instance.last_latency = 0.5 # Verify that 'events' are emitted correctly
mock_adapter_instance.session_id = "test-session" with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
# Verify that 'events' are emitted correctly md_content="<context></context>",
with patch.object(ai_client.events, 'emit') as mock_emit: user_message=test_message,
response = ai_client.send( base_dir="."
md_content="<context></context>", )
user_message=test_message, # Check that the adapter's send method was called.
base_dir="." mock_adapter_instance.send.assert_called()
) # Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
# Check that the adapter's send method was called. assert 'request_start' in emitted_event_names
mock_adapter_instance.send.assert_called() assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
# Verify that the expected lifecycle events were emitted. assert response == test_response
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response

View File

@@ -3,15 +3,14 @@ from unittest.mock import patch, MagicMock
import ai_client import ai_client
def test_list_models_gemini_cli(): def test_list_models_gemini_cli():
""" """
Verifies that 'ai_client.list_models' correctly returns a list of models Verifies that 'ai_client.list_models' correctly returns a list of models
for the 'gemini_cli' provider. for the 'gemini_cli' provider.
""" """
models = ai_client.list_models("gemini_cli") models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3.1-pro-preview" in models assert "gemini-3-flash-preview" in models
assert "gemini-3-flash-preview" in models assert "gemini-2.5-pro" in models
assert "gemini-2.5-pro" in models assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash" in models assert "gemini-2.5-flash-lite" in models
assert "gemini-2.5-flash-lite" in models assert len(models) == 5
assert len(models) == 5

View File

@@ -3,22 +3,22 @@ import textwrap
from scripts.ai_style_formatter import format_code from scripts.ai_style_formatter import format_code
def test_basic_indentation(): def test_basic_indentation():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
def hello(): def hello():
print("world") print("world")
if True: if True:
print("nested") print("nested")
""") """)
expected = ( expected = (
"def hello():\n" "def hello():\n"
" print(\"world\")\n" " print(\"world\")\n"
" if True:\n" " if True:\n"
" print(\"nested\")\n" " print(\"nested\")\n"
) )
assert format_code(source) == expected assert format_code(source) == expected
def test_top_level_blank_lines(): def test_top_level_blank_lines():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
def a(): def a():
pass pass
@@ -26,31 +26,31 @@ def test_top_level_blank_lines():
def b(): def b():
pass pass
""") """)
expected = ( expected = (
"def a():\n" "def a():\n"
" pass\n" " pass\n"
"\n" "\n"
"def b():\n" "def b():\n"
" pass\n" " pass\n"
) )
assert format_code(source) == expected assert format_code(source) == expected
def test_inner_blank_lines(): def test_inner_blank_lines():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
def a(): def a():
print("start") print("start")
print("end") print("end")
""") """)
expected = ( expected = (
"def a():\n" "def a():\n"
" print(\"start\")\n" " print(\"start\")\n"
" print(\"end\")\n" " print(\"end\")\n"
) )
assert format_code(source) == expected assert format_code(source) == expected
def test_multiline_string_safety(): def test_multiline_string_safety():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
def a(): def a():
''' '''
This is a multiline This is a multiline
@@ -60,21 +60,20 @@ def test_multiline_string_safety():
''' '''
pass pass
""") """)
# Note: the indentation of the ''' itself becomes 1 space. # Note: the indentation of the ''' itself becomes 1 space.
# The content inside remains exactly as in source. # The content inside remains exactly as in source.
# textwrap.dedent will remove the common leading whitespace from the source. # textwrap.dedent will remove the common leading whitespace from the source.
# The source's ''' is at 4 spaces. Content is at 4 spaces. # The source's ''' is at 4 spaces. Content is at 4 spaces.
# After dedent: # After dedent:
# def a(): # def a():
# ''' # '''
# This is a... # This is a...
result = format_code(source)
result = format_code(source) assert " This is a multiline" in result
assert " This is a multiline" in result assert result.startswith("def a():\n '''")
assert result.startswith("def a():\n '''")
def test_continuation_indentation(): def test_continuation_indentation():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
def long_func( def long_func(
a, a,
b b
@@ -84,20 +83,20 @@ def test_continuation_indentation():
b b
) )
""") """)
expected = ( expected = (
"def long_func(\n" "def long_func(\n"
" a,\n" " a,\n"
" b\n" " b\n"
"):\n" "):\n"
" return (\n" " return (\n"
" a +\n" " a +\n"
" b\n" " b\n"
" )\n" " )\n"
) )
assert format_code(source) == expected assert format_code(source) == expected
def test_multiple_top_level_definitions(): def test_multiple_top_level_definitions():
source = textwrap.dedent("""\ source = textwrap.dedent("""\
class MyClass: class MyClass:
def __init__(self): def __init__(self):
self.x = 1 self.x = 1
@@ -109,14 +108,14 @@ def test_multiple_top_level_definitions():
def top_level(): def top_level():
pass pass
""") """)
expected = ( expected = (
"class MyClass:\n" "class MyClass:\n"
" def __init__(self):\n" " def __init__(self):\n"
" self.x = 1\n" " self.x = 1\n"
" def method(self):\n" " def method(self):\n"
" pass\n" " pass\n"
"\n" "\n"
"def top_level():\n" "def top_level():\n"
" pass\n" " pass\n"
) )
assert format_code(source) == expected assert format_code(source) == expected

View File

@@ -3,127 +3,104 @@ from unittest.mock import MagicMock, patch
import ai_client import ai_client
class MockUsage: class MockUsage:
def __init__(self): def __init__(self):
self.prompt_token_count = 10 self.prompt_token_count = 10
self.candidates_token_count = 5 self.candidates_token_count = 5
self.total_token_count = 15 self.total_token_count = 15
self.cached_content_token_count = 0 self.cached_content_token_count = 0
class MockPart: class MockPart:
def __init__(self, text, function_call): def __init__(self, text, function_call):
self.text = text self.text = text
self.function_call = function_call self.function_call = function_call
class MockContent: class MockContent:
def __init__(self, parts): def __init__(self, parts):
self.parts = parts self.parts = parts
class MockCandidate: class MockCandidate:
def __init__(self, parts): def __init__(self, parts):
self.content = MockContent(parts) self.content = MockContent(parts)
self.finish_reason = MagicMock() self.finish_reason = MagicMock()
self.finish_reason.name = "STOP" self.finish_reason.name = "STOP"
def test_ai_client_event_emitter_exists(): def test_ai_client_event_emitter_exists():
# This should fail initially because 'events' won't exist on ai_client # This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events') assert hasattr(ai_client, 'events')
def test_event_emission(): def test_event_emission():
callback = MagicMock() callback = MagicMock()
ai_client.events.on("test_event", callback) ai_client.events.on("test_event", callback)
ai_client.events.emit("test_event", payload={"data": 123}) ai_client.events.emit("test_event", payload={"data": 123})
callback.assert_called_once_with(payload={"data": 123}) callback.assert_called_once_with(payload={"data": 123})
def test_send_emits_events(): def test_send_emits_events():
with patch("ai_client._send_gemini") as mock_send_gemini, \ with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic: patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
mock_send_gemini.return_value = "gemini response" start_callback = MagicMock()
response_callback = MagicMock()
start_callback = MagicMock() ai_client.events.on("request_start", start_callback)
response_callback = MagicMock() ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.events.on("request_start", start_callback) ai_client.send("context", "message")
ai_client.events.on("response_received", response_callback) # We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
ai_client.set_provider("gemini", "gemini-2.5-flash-lite") # Actually, ai_client.send delegates to _send_gemini.
ai_client.send("context", "message") # Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
def test_send_emits_events_proper(): def test_send_emits_events_proper():
with patch("ai_client._ensure_gemini_client"), \ with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client: patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_chat = MagicMock() mock_client.chats.create.return_value = mock_chat
mock_client.chats.create.return_value = mock_chat mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response = MagicMock() mock_response.usage_metadata = MockUsage()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])] mock_chat.send_message.return_value = mock_response
mock_response.usage_metadata = MockUsage() start_callback = MagicMock()
mock_chat.send_message.return_value = mock_response response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
start_callback = MagicMock() ai_client.events.on("response_received", response_callback)
response_callback = MagicMock() ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
ai_client.events.on("request_start", start_callback) assert start_callback.called
ai_client.events.on("response_received", response_callback) assert response_callback.called
args, kwargs = start_callback.call_args
ai_client.set_provider("gemini", "gemini-2.5-flash-lite") assert kwargs['payload']['provider'] == 'gemini'
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
def test_send_emits_tool_events(): def test_send_emits_tool_events():
import mcp_client import mcp_client
with patch("ai_client._ensure_gemini_client"), \ with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \ patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch: patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_chat = MagicMock() mock_client.chats.create.return_value = mock_chat
mock_client.chats.create.return_value = mock_chat # 1. Setup mock response with a tool call
mock_fc = MagicMock()
# 1. Setup mock response with a tool call mock_fc.name = "read_file"
mock_fc = MagicMock() mock_fc.args = {"path": "test.txt"}
mock_fc.name = "read_file" mock_response_with_tool = MagicMock()
mock_fc.args = {"path": "test.txt"} mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
mock_response_with_tool = MagicMock() # 2. Setup second mock response (final answer)
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])] mock_response_final = MagicMock()
mock_response_with_tool.usage_metadata = MockUsage() mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer) mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_response_final = MagicMock() mock_dispatch.return_value = "file content"
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])] ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
mock_response_final.usage_metadata = MockUsage() tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final] ai_client.send("context", "message")
mock_dispatch.return_value = "file content" # Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
ai_client.set_provider("gemini", "gemini-2.5-flash-lite") # Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
tool_callback = MagicMock() assert kwargs['payload']['status'] == 'started'
ai_client.events.on("tool_execution", tool_callback) assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
ai_client.send("context", "message") args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
# Should be called twice: once for 'started', once for 'completed' assert kwargs['payload']['result'] == 'file content'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
assert kwargs['payload']['result'] == 'file content'

View File

@@ -13,88 +13,84 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
def test_get_status_success(live_gui): def test_get_status_success(live_gui):
""" """
Test that get_status successfully retrieves the server status Test that get_status successfully retrieves the server status
when the live GUI is running. when the live GUI is running.
""" """
client = ApiHookClient() client = ApiHookClient()
status = client.get_status() status = client.get_status()
assert status == {'status': 'ok'} assert status == {'status': 'ok'}
def test_get_project_success(live_gui): def test_get_project_success(live_gui):
""" """
Test successful retrieval of project data from the live GUI. Test successful retrieval of project data from the live GUI.
""" """
client = ApiHookClient() client = ApiHookClient()
response = client.get_project() response = client.get_project()
assert 'project' in response assert 'project' in response
# We don't assert specific content as it depends on the environment's active project # We don't assert specific content as it depends on the environment's active project
def test_get_session_success(live_gui): def test_get_session_success(live_gui):
""" """
Test successful retrieval of session data. Test successful retrieval of session data.
""" """
client = ApiHookClient() client = ApiHookClient()
response = client.get_session() response = client.get_session()
assert 'session' in response assert 'session' in response
assert 'entries' in response['session'] assert 'entries' in response['session']
def test_post_gui_success(live_gui): def test_post_gui_success(live_gui):
""" """
Test successful posting of GUI data. Test successful posting of GUI data.
""" """
client = ApiHookClient() client = ApiHookClient()
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'} gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
response = client.post_gui(gui_data) response = client.post_gui(gui_data)
assert response == {'status': 'queued'} assert response == {'status': 'queued'}
def test_get_performance_success(live_gui): def test_get_performance_success(live_gui):
""" """
Test successful retrieval of performance metrics. Test successful retrieval of performance metrics.
""" """
client = ApiHookClient() client = ApiHookClient()
response = client.get_performance() response = client.get_performance()
assert "performance" in response assert "performance" in response
def test_unsupported_method_error(): def test_unsupported_method_error():
""" """
Test that calling an unsupported HTTP method raises a ValueError. Test that calling an unsupported HTTP method raises a ValueError.
""" """
client = ApiHookClient() client = ApiHookClient()
with pytest.raises(ValueError, match="Unsupported HTTP method"): with pytest.raises(ValueError, match="Unsupported HTTP method"):
client._make_request('PUT', '/some_endpoint', data={'key': 'value'}) client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
def test_get_text_value(): def test_get_text_value():
""" """
Test retrieval of string representation using get_text_value. Test retrieval of string representation using get_text_value.
""" """
client = ApiHookClient() client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123): with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123" assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, 'get_value', return_value=None): assert client.get_text_value("dummy_tag") is None
assert client.get_text_value("dummy_tag") is None
def test_get_node_status(): def test_get_node_status():
""" """
Test retrieval of DAG node status using get_node_status. Test retrieval of DAG node status using get_node_status.
""" """
client = ApiHookClient() client = ApiHookClient()
# When get_value returns a status directly # When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"): with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running" assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
# When get_value returns None and diagnostics provides a nodes dict with patch.object(client, 'get_value', return_value=None):
with patch.object(client, 'get_value', return_value=None): with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}): assert client.get_node_status("my_node") == "completed"
assert client.get_node_status("my_node") == "completed" # When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
# When get_value returns None and diagnostics provides a direct key with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
with patch.object(client, 'get_value', return_value=None): assert client.get_node_status("my_node") == "failed"
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}): # When neither works
assert client.get_node_status("my_node") == "failed" with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
# When neither works assert client.get_node_status("my_node") is None
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
assert client.get_node_status("my_node") is None

View File

@@ -8,68 +8,64 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
def test_api_client_has_extensions(): def test_api_client_has_extensions():
client = ApiHookClient() client = ApiHookClient()
# These should fail initially as they are not implemented # These should fail initially as they are not implemented
assert hasattr(client, 'select_tab') assert hasattr(client, 'select_tab')
assert hasattr(client, 'select_list_item') assert hasattr(client, 'select_list_item')
def test_select_tab_integration(live_gui): def test_select_tab_integration(live_gui):
client = ApiHookClient() client = ApiHookClient()
# We'll need to make sure the tags exist in gui_legacy.py # We'll need to make sure the tags exist in gui_legacy.py
# For now, this is a placeholder for the integration test # For now, this is a placeholder for the integration test
response = client.select_tab("operations_tabs", "tab_tool") response = client.select_tab("operations_tabs", "tab_tool")
assert response == {'status': 'queued'} assert response == {'status': 'queued'}
def test_select_list_item_integration(live_gui): def test_select_list_item_integration(live_gui):
client = ApiHookClient() client = ApiHookClient()
# Assuming 'Default' discussion exists or we can just test that it queues # Assuming 'Default' discussion exists or we can just test that it queues
response = client.select_list_item("disc_listbox", "Default") response = client.select_list_item("disc_listbox", "Default")
assert response == {'status': 'queued'} assert response == {'status': 'queued'}
def test_get_indicator_state_integration(live_gui): def test_get_indicator_state_integration(live_gui):
client = ApiHookClient() client = ApiHookClient()
# thinking_indicator is usually hidden unless AI is running # thinking_indicator is usually hidden unless AI is running
response = client.get_indicator_state("thinking_indicator") response = client.get_indicator_state("thinking_indicator")
assert 'shown' in response assert 'shown' in response
assert response['tag'] == "thinking_indicator" assert response['tag'] == "thinking_indicator"
def test_app_processes_new_actions(): def test_app_processes_new_actions():
import gui_legacy import gui_legacy
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg import dearpygui.dearpygui as dpg
dpg.create_context()
dpg.create_context() try:
try: with patch('gui_legacy.load_config', return_value={}), \
with patch('gui_legacy.load_config', return_value={}), \ patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.PerformanceMonitor'), \ patch('gui_legacy.shell_runner'), \
patch('gui_legacy.shell_runner'), \ patch('gui_legacy.project_manager'), \
patch('gui_legacy.project_manager'), \ patch.object(gui_legacy.App, '_load_active_project'):
patch.object(gui_legacy.App, '_load_active_project'): app = gui_legacy.App()
app = gui_legacy.App() with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \ patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \ # Test select_tab
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb: app._pending_gui_tasks.append({
"action": "select_tab",
# Test select_tab "tab_bar": "some_tab_bar",
app._pending_gui_tasks.append({ "tab": "some_tab"
"action": "select_tab", })
"tab_bar": "some_tab_bar", app._process_pending_gui_tasks()
"tab": "some_tab" mock_set_value.assert_any_call("some_tab_bar", "some_tab")
}) # Test select_list_item
app._process_pending_gui_tasks() mock_cb = MagicMock()
mock_set_value.assert_any_call("some_tab_bar", "some_tab") mock_get_cb.return_value = mock_cb
app._pending_gui_tasks.append({
# Test select_list_item "action": "select_list_item",
mock_cb = MagicMock() "listbox": "some_listbox",
mock_get_cb.return_value = mock_cb "item_value": "some_value"
app._pending_gui_tasks.append({ })
"action": "select_list_item", app._process_pending_gui_tasks()
"listbox": "some_listbox", mock_set_value.assert_any_call("some_listbox", "some_value")
"item_value": "some_value" mock_cb.assert_called_with("some_listbox", "some_value")
}) finally:
app._process_pending_gui_tasks() dpg.destroy_context()
mock_set_value.assert_any_call("some_listbox", "some_value")
mock_cb.assert_called_with("some_listbox", "some_value")
finally:
dpg.destroy_context()

View File

@@ -3,24 +3,24 @@ import tree_sitter
from file_cache import ASTParser from file_cache import ASTParser
def test_ast_parser_initialization(): def test_ast_parser_initialization():
"""Verify that ASTParser can be initialized with a language string.""" """Verify that ASTParser can be initialized with a language string."""
parser = ASTParser("python") parser = ASTParser("python")
assert parser.language_name == "python" assert parser.language_name == "python"
def test_ast_parser_parse(): def test_ast_parser_parse():
"""Verify that the parse method returns a tree_sitter.Tree.""" """Verify that the parse method returns a tree_sitter.Tree."""
parser = ASTParser("python") parser = ASTParser("python")
code = """def example_func(): code = """def example_func():
return 42""" return 42"""
tree = parser.parse(code) tree = parser.parse(code)
assert isinstance(tree, tree_sitter.Tree) assert isinstance(tree, tree_sitter.Tree)
# Basic check that it parsed something # Basic check that it parsed something
assert tree.root_node.type == "module" assert tree.root_node.type == "module"
def test_ast_parser_get_skeleton_python(): def test_ast_parser_get_skeleton_python():
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings.""" """Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
parser = ASTParser("python") parser = ASTParser("python")
code = ''' code = '''
def complex_function(a, b): def complex_function(a, b):
""" """
This is a docstring. This is a docstring.
@@ -36,35 +36,32 @@ class MyClass:
print("doing something") print("doing something")
return None return None
''' '''
skeleton = parser.get_skeleton(code) skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
# Check that signatures are preserved assert "def complex_function(a, b):" in skeleton
assert "def complex_function(a, b):" in skeleton assert "class MyClass:" in skeleton
assert "class MyClass:" in skeleton assert "def method_without_docstring(self):" in skeleton
assert "def method_without_docstring(self):" in skeleton # Check that docstring is preserved
assert '"""' in skeleton
# Check that docstring is preserved assert "This is a docstring." in skeleton
assert '"""' in skeleton assert "It should be preserved." in skeleton
assert "This is a docstring." in skeleton # Check that bodies are replaced with '...'
assert "It should be preserved." in skeleton assert "..." in skeleton
assert "result = a + b" not in skeleton
# Check that bodies are replaced with '...' assert "return result" not in skeleton
assert "..." in skeleton assert 'print("doing something")' not in skeleton
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert 'print("doing something")' not in skeleton
def test_ast_parser_invalid_language(): def test_ast_parser_invalid_language():
"""Verify handling of unsupported or invalid languages.""" """Verify handling of unsupported or invalid languages."""
# This might raise an error or return a default, depending on implementation # This might raise an error or return a default, depending on implementation
# For now, we expect it to either fail gracefully or raise an exception we can catch # For now, we expect it to either fail gracefully or raise an exception we can catch
with pytest.raises(Exception): with pytest.raises(Exception):
ASTParser("not-a-language") ASTParser("not-a-language")
def test_ast_parser_get_curated_view(): def test_ast_parser_get_curated_view():
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT].""" """Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
parser = ASTParser("python") parser = ASTParser("python")
code = ''' code = '''
@core_logic @core_logic
def core_func(): def core_func():
"""Core logic doc.""" """Core logic doc."""
@@ -86,20 +83,16 @@ class MyClass:
def core_method(self, x): def core_method(self, x):
print("method preserved", x) print("method preserved", x)
''' '''
curated = parser.get_curated_view(code) curated = parser.get_curated_view(code)
# Check that core_func is preserved
# Check that core_func is preserved assert 'print("this should be preserved")' in curated
assert 'print("this should be preserved")' in curated assert 'return True' in curated
assert 'return True' in curated # Check that hot_func is preserved
assert '# [HOT]' in curated
# Check that hot_func is preserved assert 'print("this should also be preserved")' in curated
assert '# [HOT]' in curated # Check that normal_func is stripped but docstring is preserved
assert 'print("this should also be preserved")' in curated assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
# Check that normal_func is stripped but docstring is preserved assert '...' in curated
assert '"""Normal doc."""' in curated # Check that core_method is preserved
assert 'print("this should be stripped")' not in curated assert 'print("method preserved", x)' in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated

View File

@@ -2,8 +2,8 @@ import pytest
from file_cache import ASTParser from file_cache import ASTParser
def test_ast_parser_get_curated_view(): def test_ast_parser_get_curated_view():
parser = ASTParser("python") parser = ASTParser("python")
code = ''' code = '''
@core_logic @core_logic
def core_func(): def core_func():
"""Core logic doc.""" """Core logic doc."""
@@ -25,20 +25,16 @@ class MyClass:
def core_method(self): def core_method(self):
print("method preserved") print("method preserved")
''' '''
curated = parser.get_curated_view(code) curated = parser.get_curated_view(code)
# Check that core_func is preserved
# Check that core_func is preserved assert 'print("this should be preserved")' in curated
assert 'print("this should be preserved")' in curated assert 'return True' in curated
assert 'return True' in curated # Check that hot_func is preserved
assert '# [HOT]' in curated
# Check that hot_func is preserved assert 'print("this should also be preserved")' in curated
assert '# [HOT]' in curated # Check that normal_func is stripped but docstring is preserved
assert 'print("this should also be preserved")' in curated assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
# Check that normal_func is stripped but docstring is preserved assert '...' in curated
assert '"""Normal doc."""' in curated # Check that core_method is preserved
assert 'print("this should be stripped")' not in curated assert 'print("method preserved")' in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated

View File

@@ -3,45 +3,40 @@ import pytest
from events import AsyncEventQueue from events import AsyncEventQueue
def test_async_event_queue_put_get(): def test_async_event_queue_put_get():
"""Verify that an event can be asynchronously put and retrieved from the queue.""" """Verify that an event can be asynchronously put and retrieved from the queue."""
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
asyncio.run(run_test()) async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
asyncio.run(run_test())
def test_async_event_queue_multiple(): def test_async_event_queue_multiple():
"""Verify that multiple events can be asynchronously put and retrieved in order.""" """Verify that multiple events can be asynchronously put and retrieved in order."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
asyncio.run(run_test()) async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
asyncio.run(run_test())
def test_async_event_queue_none_payload(): def test_async_event_queue_none_payload():
"""Verify that an event with None payload works correctly.""" """Verify that an event with None payload works correctly."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
asyncio.run(run_test()) async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
asyncio.run(run_test())

View File

@@ -5,72 +5,60 @@ from log_registry import LogRegistry
@pytest.fixture @pytest.fixture
def registry_setup(tmp_path): def registry_setup(tmp_path):
registry_path = tmp_path / "log_registry.toml" registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs" logs_dir = tmp_path / "logs"
logs_dir.mkdir() logs_dir.mkdir()
registry = LogRegistry(str(registry_path)) registry = LogRegistry(str(registry_path))
return registry, logs_dir return registry, logs_dir
def test_auto_whitelist_keywords(registry_setup): def test_auto_whitelist_keywords(registry_setup):
registry, logs_dir = registry_setup registry, logs_dir = registry_setup
session_id = "test_kw" session_id = "test_kw"
session_dir = logs_dir / session_id session_dir = logs_dir / session_id
session_dir.mkdir() session_dir.mkdir()
# Create comms.log with ERROR
# Create comms.log with ERROR comms_log = session_dir / "comms.log"
comms_log = session_dir / "comms.log" comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text") registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
registry.register_session(session_id, str(session_dir), datetime.now()) assert registry.is_session_whitelisted(session_id)
registry.update_auto_whitelist_status(session_id) assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
def test_auto_whitelist_message_count(registry_setup): def test_auto_whitelist_message_count(registry_setup):
registry, logs_dir = registry_setup registry, logs_dir = registry_setup
session_id = "test_msg_count" session_id = "test_msg_count"
session_dir = logs_dir / session_id session_dir = logs_dir / session_id
session_dir.mkdir() session_dir.mkdir()
# Create comms.log with > 10 lines
# Create comms.log with > 10 lines comms_log = session_dir / "comms.log"
comms_log = session_dir / "comms.log" comms_log.write_text("\n".join(["msg"] * 15))
comms_log.write_text("\n".join(["msg"] * 15)) registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
registry.register_session(session_id, str(session_dir), datetime.now()) assert registry.is_session_whitelisted(session_id)
registry.update_auto_whitelist_status(session_id) assert registry.data[session_id]["metadata"]["message_count"] == 15
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
def test_auto_whitelist_large_size(registry_setup): def test_auto_whitelist_large_size(registry_setup):
registry, logs_dir = registry_setup registry, logs_dir = registry_setup
session_id = "test_large" session_id = "test_large"
session_dir = logs_dir / session_id session_dir = logs_dir / session_id
session_dir.mkdir() session_dir.mkdir()
# Create large file (> 50KB)
# Create large file (> 50KB) large_file = session_dir / "large.log"
large_file = session_dir / "large.log" large_file.write_text("x" * 60000)
large_file.write_text("x" * 60000) registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
registry.register_session(session_id, str(session_dir), datetime.now()) assert registry.is_session_whitelisted(session_id)
registry.update_auto_whitelist_status(session_id) assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
def test_no_auto_whitelist_insignificant(registry_setup): def test_no_auto_whitelist_insignificant(registry_setup):
registry, logs_dir = registry_setup registry, logs_dir = registry_setup
session_id = "test_insignificant" session_id = "test_insignificant"
session_dir = logs_dir / session_id session_dir = logs_dir / session_id
session_dir.mkdir() session_dir.mkdir()
# Small file, few lines, no keywords
# Small file, few lines, no keywords comms_log = session_dir / "comms.log"
comms_log = session_dir / "comms.log" comms_log.write_text("hello\nworld")
comms_log.write_text("hello\nworld") registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
registry.register_session(session_id, str(session_dir), datetime.now()) assert not registry.is_session_whitelisted(session_id)
registry.update_auto_whitelist_status(session_id) assert registry.data[session_id]["metadata"]["message_count"] == 2
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2

View File

@@ -12,64 +12,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main from scripts.cli_tool_bridge import main
class TestCliToolBridge(unittest.TestCase): class TestCliToolBridge(unittest.TestCase):
def setUp(self): def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop' os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
self.tool_call = { self.tool_call = {
'tool_name': 'read_file', 'tool_name': 'read_file',
'tool_input': {'path': 'test.txt'} 'tool_input': {'path': 'test.txt'}
} }
@patch('sys.stdin', new_callable=io.StringIO) @patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO) @patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation') @patch('api_hook_client.ApiHookClient.request_confirmation')
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin): def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
# 1. Mock stdin with a JSON string tool call # 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call)) mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0) mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
# 2. Mock ApiHookClient to return approved @patch('sys.stdin', new_callable=io.StringIO)
mock_request.return_value = {'approved': True} @patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
# Run main @patch('sys.stdin', new_callable=io.StringIO)
main() @patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
# 3. Capture stdout and assert allow def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
output = json.loads(mock_stdout.getvalue().strip()) # Mock stdin
self.assertEqual(output.get('decision'), 'allow') mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
@patch('sys.stdin', new_callable=io.StringIO) # 5. Test case where hook server is unreachable (exception)
@patch('sys.stdout', new_callable=io.StringIO) mock_request.side_effect = Exception("Connection refused")
@patch('api_hook_client.ApiHookClient.request_confirmation') main()
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin): # Assert deny on error
# Mock stdin output = json.loads(mock_stdout.getvalue().strip())
mock_stdin.write(json.dumps(self.tool_call)) self.assertEqual(output.get('decision'), 'deny')
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -12,42 +12,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main from scripts.cli_tool_bridge import main
class TestCliToolBridgeMapping(unittest.TestCase): class TestCliToolBridgeMapping(unittest.TestCase):
def setUp(self): def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop' os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
@patch('sys.stdin', new_callable=io.StringIO) @patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO) @patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation') @patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin): def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
""" """
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format) Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
into tool_name and tool_input for the hook client. into tool_name and tool_input for the hook client.
""" """
api_tool_call = { api_tool_call = {
'id': 'call123', 'id': 'call123',
'name': 'read_file', 'name': 'read_file',
'input': {'path': 'test.txt'} 'input': {'path': 'test.txt'}
} }
# 1. Mock stdin with the API format JSON
# 1. Mock stdin with the API format JSON mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.write(json.dumps(api_tool_call)) mock_stdin.seek(0)
mock_stdin.seek(0) # 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# 2. Mock ApiHookClient to return approved # Run main
mock_request.return_value = {'approved': True} main()
# 3. Verify that request_confirmation was called with mapped values
# Run main # If it's not mapped, it will likely be called with None or fail
main() mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
# 3. Verify that request_confirmation was called with mapped values output_str = mock_stdout.getvalue().strip()
# If it's not mapped, it will likely be called with None or fail self.assertTrue(output_str, "Stdout should not be empty")
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'}) output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -13,61 +13,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
def simulate_conductor_phase_completion(client: ApiHookClient): def simulate_conductor_phase_completion(client: ApiHookClient):
""" """
Simulates the Conductor agent's logic for phase completion using ApiHookClient. Simulates the Conductor agent's logic for phase completion using ApiHookClient.
""" """
results = { results = {
"verification_successful": False, "verification_successful": False,
"verification_message": "" "verification_message": ""
} }
try:
try: status = client.get_status()
status = client.get_status() if status.get('status') == 'ok':
if status.get('status') == 'ok': results["verification_successful"] = True
results["verification_successful"] = True results["verification_message"] = "Automated verification completed successfully."
results["verification_message"] = "Automated verification completed successfully." else:
else: results["verification_successful"] = False
results["verification_successful"] = False results["verification_message"] = f"Automated verification failed: {status}"
results["verification_message"] = f"Automated verification failed: {status}" except Exception as e:
except Exception as e: results["verification_successful"] = False
results["verification_successful"] = False results["verification_message"] = f"Automated verification failed: {e}"
results["verification_message"] = f"Automated verification failed: {e}" return results
return results
def test_conductor_integrates_api_hook_client_for_verification(live_gui): def test_conductor_integrates_api_hook_client_for_verification(live_gui):
""" """
Verify that Conductor's simulated phase completion logic properly integrates Verify that Conductor's simulated phase completion logic properly integrates
and uses the ApiHookClient for verification against the live GUI. and uses the ApiHookClient for verification against the live GUI.
""" """
client = ApiHookClient() client = ApiHookClient()
results = simulate_conductor_phase_completion(client) results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert results["verification_successful"] is True assert "successfully" in results["verification_message"]
assert "successfully" in results["verification_message"]
def test_conductor_handles_api_hook_failure(live_gui): def test_conductor_handles_api_hook_failure(live_gui):
""" """
Verify Conductor handles a simulated API hook verification failure. Verify Conductor handles a simulated API hook verification failure.
We patch the client's get_status to simulate failure even with live GUI. We patch the client's get_status to simulate failure even with live GUI.
""" """
client = ApiHookClient() client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
with patch.object(ApiHookClient, 'get_status') as mock_get_status: mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'} results = simulate_conductor_phase_completion(client)
results = simulate_conductor_phase_completion(client) assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
def test_conductor_handles_api_hook_connection_error(): def test_conductor_handles_api_hook_connection_error():
""" """
Verify Conductor handles a simulated API hook connection error (server down). Verify Conductor handles a simulated API hook connection error (server down).
""" """
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0) client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client) results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert results["verification_successful"] is False # Check for expected error substrings from ApiHookClient
# Check for expected error substrings from ApiHookClient msg = results["verification_message"]
msg = results["verification_message"] assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])

View File

@@ -7,284 +7,234 @@ import ai_client
# which will be implemented in the next phase of TDD. # which will be implemented in the next phase of TDD.
def test_conductor_engine_initialization(): def test_conductor_engine_initialization():
""" """
Test that ConductorEngine can be initialized with a Track. Test that ConductorEngine can be initialized with a Track.
""" """
track = Track(id="test_track", description="Test Track") track = Track(id="test_track", description="Test Track")
from multi_agent_conductor import ConductorEngine from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track) engine = ConductorEngine(track=track)
assert engine.track == track assert engine.track == track
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch): async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
""" """
Test that run_linear iterates through executable tickets and calls the worker lifecycle. Test that run_linear iterates through executable tickets and calls the worker lifecycle.
""" """
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1") ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"]) ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2]) track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
from multi_agent_conductor import ConductorEngine engine = ConductorEngine(track=track)
engine = ConductorEngine(track=track) # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() # We mock run_worker_lifecycle as it is expected to be in the same module
monkeypatch.setattr(ai_client, 'send', mock_send) with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle: def side_effect(ticket, context, *args, **kwargs):
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved ticket.mark_complete()
def side_effect(ticket, context, *args, **kwargs): return "Success"
ticket.mark_complete() mock_lifecycle.side_effect = side_effect
return "Success" await engine.run_linear()
mock_lifecycle.side_effect = side_effect # Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
await engine.run_linear() assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
# Track.get_executable_tickets() should be called repeatedly until all are done assert ticket2.status == "completed"
# T1 should run first, then T2. # Verify sequence: T1 before T2
assert mock_lifecycle.call_count == 2 calls = mock_lifecycle.call_args_list
assert ticket1.status == "completed" assert calls[0][0][0].id == "T1"
assert ticket2.status == "completed" assert calls[1][0][0].id == "T2"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
assert calls[1][0][0].id == "T2"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch): async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
""" """
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success. Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
""" """
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1") ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[]) context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
from multi_agent_conductor import run_worker_lifecycle # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() mock_send.return_value = "Task complete. I have updated the file."
monkeypatch.setattr(ai_client, 'send', mock_send) result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
mock_send.return_value = "Task complete. I have updated the file." assert ticket.status == "completed"
mock_send.assert_called_once()
result = run_worker_lifecycle(ticket, context) # Check if description was passed to send()
args, kwargs = mock_send.call_args
assert result == "Task complete. I have updated the file." # user_message is passed as a keyword argument
assert ticket.status == "completed" assert ticket.description in kwargs["user_message"]
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
assert ticket.description in kwargs["user_message"]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_worker_lifecycle_context_injection(monkeypatch): async def test_run_worker_lifecycle_context_injection(monkeypatch):
""" """
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt. Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
""" """
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1") ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[]) context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"] context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
from multi_agent_conductor import run_worker_lifecycle # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() # We mock ASTParser which is expected to be imported in multi_agent_conductor
monkeypatch.setattr(ai_client, 'send', mock_send) with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# We mock ASTParser which is expected to be imported in multi_agent_conductor # Setup open mock to return different content for different files
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \ file_contents = {
patch("builtins.open", new_callable=MagicMock) as mock_open: "primary.py": "def primary(): pass",
"secondary.py": "def secondary(): pass"
# Setup open mock to return different content for different files }
file_contents = {
"primary.py": "def primary(): pass", def mock_open_side_effect(file, *args, **kwargs):
"secondary.py": "def secondary(): pass" content = file_contents.get(file, "")
} mock_file = MagicMock()
mock_file.read.return_value = content
def mock_open_side_effect(file, *args, **kwargs): mock_file.__enter__.return_value = mock_file
content = file_contents.get(file, "") return mock_file
mock_file = MagicMock() mock_open.side_effect = mock_open_side_effect
mock_file.read.return_value = content # Setup ASTParser mock
mock_file.__enter__.return_value = mock_file mock_ast_parser = mock_ast_parser_class.return_value
return mock_file mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_open.side_effect = mock_open_side_effect mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Setup ASTParser mock # Verify ASTParser calls:
mock_ast_parser = mock_ast_parser_class.return_value # First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW" mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW" mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
mock_send.return_value = "Success" _, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
run_worker_lifecycle(ticket, context, context_files=context_files) assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
# Verify ASTParser calls: assert "primary.py" in user_message
# First file (primary) should get curated view, others (secondary) get skeleton assert "secondary.py" in user_message
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
assert "primary.py" in user_message
assert "secondary.py" in user_message
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch): async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
""" """
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed. Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
""" """
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1") ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[]) context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
from multi_agent_conductor import run_worker_lifecycle # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() # Simulate a response indicating a block
monkeypatch.setattr(ai_client, 'send', mock_send) mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
# Simulate a response indicating a block assert ticket.status == "blocked"
mock_send.return_value = "I am BLOCKED because I don't have enough information." assert "BLOCKED" in ticket.blocked_reason
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch): async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
""" """
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True. Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback), Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
the flow works as expected. the flow works as expected.
""" """
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True) ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[]) context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
from multi_agent_conductor import run_worker_lifecycle # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
monkeypatch.setattr(ai_client, 'send', mock_send) # We simulate ai_client.send by making it call the pre_tool_callback it received
with patch("multi_agent_conductor.confirm_execution") as mock_confirm: def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
# We simulate ai_client.send by making it call the pre_tool_callback it received if callback:
def mock_send_side_effect(md_content, user_message, **kwargs): # Simulate calling it with some payload
callback = kwargs.get("pre_tool_callback") callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
if callback: return "Success"
# Simulate calling it with some payload mock_send.side_effect = mock_send_side_effect
callback('{"tool": "read_file", "args": {"path": "test.txt"}}') mock_confirm.return_value = True
return "Success" mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
mock_send.side_effect = mock_send_side_effect # Verify confirm_execution was called
mock_confirm.return_value = True mock_confirm.assert_called_once()
assert ticket.status == "completed"
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch): async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
""" """
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here) Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
would prevent execution. In run_worker_lifecycle, we just check if it's passed. would prevent execution. In run_worker_lifecycle, we just check if it's passed.
""" """
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True) ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[]) context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
from multi_agent_conductor import run_worker_lifecycle # Mock ai_client.send using monkeypatch
mock_send = MagicMock()
# Mock ai_client.send using monkeypatch monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send = MagicMock() with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
monkeypatch.setattr(ai_client, 'send', mock_send) mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
with patch("multi_agent_conductor.confirm_execution") as mock_confirm: run_worker_lifecycle(ticket, context)
# Verify it was passed to send
mock_confirm.return_value = False args, kwargs = mock_send.call_args
mock_send.return_value = "Task failed because tool execution was rejected." assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
run_worker_lifecycle(ticket, context) # here we just verify the wiring.
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch): async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
""" """
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order. Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
""" """
import json import json
from multi_agent_conductor import ConductorEngine from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
track = Track(id="dynamic_track", description="Dynamic Track") engine = ConductorEngine(track=track)
engine = ConductorEngine(track=track) tickets_json = json.dumps([
{
tickets_json = json.dumps([ "id": "T1",
{ "description": "Initial task",
"id": "T1", "status": "todo",
"description": "Initial task", "assigned_to": "worker1",
"status": "todo", "depends_on": []
"assigned_to": "worker1", },
"depends_on": [] {
}, "id": "T2",
{ "description": "Dependent task",
"id": "T2", "status": "todo",
"description": "Dependent task", "assigned_to": "worker2",
"status": "todo", "depends_on": ["T1"]
"assigned_to": "worker2", },
"depends_on": ["T1"] {
}, "id": "T3",
{ "description": "Another initial task",
"id": "T3", "status": "todo",
"description": "Another initial task", "assigned_to": "worker3",
"status": "todo", "depends_on": []
"assigned_to": "worker3", }
"depends_on": [] ])
} engine.parse_json_tickets(tickets_json)
]) assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
engine.parse_json_tickets(tickets_json) assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
assert len(engine.track.tickets) == 3 # Mock ai_client.send using monkeypatch
assert engine.track.tickets[0].id == "T1" mock_send = MagicMock()
assert engine.track.tickets[1].id == "T2" monkeypatch.setattr(ai_client, 'send', mock_send)
assert engine.track.tickets[2].id == "T3" # Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mock ai_client.send using monkeypatch def side_effect(ticket, context, *args, **kwargs):
mock_send = MagicMock() ticket.mark_complete()
monkeypatch.setattr(ai_client, 'send', mock_send) return "Success"
mock_lifecycle.side_effect = side_effect
# Mock run_worker_lifecycle to mark tickets as complete await engine.run_linear()
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle: assert mock_lifecycle.call_count == 3
def side_effect(ticket, context, *args, **kwargs): # Verify dependency order: T1 must be called before T2
ticket.mark_complete() calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
return "Success" t1_idx = calls.index("T1")
mock_lifecycle.side_effect = side_effect t2_idx = calls.index("T2")
assert t1_idx < t2_idx
await engine.run_linear() # T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls

View File

@@ -4,112 +4,106 @@ import json
import conductor_tech_lead import conductor_tech_lead
class TestConductorTechLead(unittest.TestCase): class TestConductorTechLead(unittest.TestCase):
@patch('ai_client.send') @patch('ai_client.send')
@patch('ai_client.set_provider') @patch('ai_client.set_provider')
@patch('ai_client.reset_session') @patch('ai_client.reset_session')
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send): def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock response # Setup mock response
mock_tickets = [ mock_tickets = [
{ {
"id": "ticket_1", "id": "ticket_1",
"type": "Ticket", "type": "Ticket",
"goal": "Test goal", "goal": "Test goal",
"target_file": "test.py", "target_file": "test.py",
"depends_on": [], "depends_on": [],
"context_requirements": [] "context_requirements": []
} }
] ]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```" mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
track_brief = "Test track brief" module_skeletons = "Test skeletons"
module_skeletons = "Test skeletons" # Call the function
# Call the function tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons) # Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
# Verify set_provider was called @patch('ai_client.send')
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite') @patch('ai_client.set_provider')
mock_reset_session.assert_called_once() @patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Verify send was called # Setup mock invalid response
mock_send.assert_called_once() mock_send.return_value = "Invalid JSON"
args, kwargs = mock_send.call_args # Call the function
self.assertEqual(kwargs['md_content'], "") tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
self.assertIn(track_brief, kwargs['user_message']) # Verify it returns an empty list on parse error
self.assertIn(module_skeletons, kwargs['user_message']) self.assertEqual(tickets, [])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])
class TestTopologicalSort(unittest.TestCase): class TestTopologicalSort(unittest.TestCase):
def test_topological_sort_empty(self): def test_topological_sort_empty(self):
tickets = [] tickets = []
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets, []) self.assertEqual(sorted_tickets, [])
def test_topological_sort_linear(self): def test_topological_sort_linear(self):
tickets = [ tickets = [
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []}, {"id": "t1", "depends_on": []},
{"id": "t3", "depends_on": ["t2"]}, {"id": "t3", "depends_on": ["t2"]},
] ]
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets] ids = [t["id"] for t in sorted_tickets]
self.assertEqual(ids, ["t1", "t2", "t3"]) self.assertEqual(ids, ["t1", "t2", "t3"])
def test_topological_sort_complex(self): def test_topological_sort_complex(self):
# t1 # t1
# | \ # | \
# t2 t3 # t2 t3
# | / # | /
# t4 # t4
tickets = [ tickets = [
{"id": "t4", "depends_on": ["t2", "t3"]}, {"id": "t4", "depends_on": ["t2", "t3"]},
{"id": "t3", "depends_on": ["t1"]}, {"id": "t3", "depends_on": ["t1"]},
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []}, {"id": "t1", "depends_on": []},
] ]
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets] ids = [t["id"] for t in sorted_tickets]
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4] # Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
self.assertEqual(ids[0], "t1") self.assertEqual(ids[0], "t1")
self.assertEqual(ids[-1], "t4") self.assertEqual(ids[-1], "t4")
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"}) self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
def test_topological_sort_cycle(self): def test_topological_sort_cycle(self):
tickets = [ tickets = [
{"id": "t1", "depends_on": ["t2"]}, {"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
] ]
with self.assertRaises(ValueError) as cm: with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets) conductor_tech_lead.topological_sort(tickets)
self.assertIn("Circular dependency detected", str(cm.exception)) self.assertIn("Circular dependency detected", str(cm.exception))
def test_topological_sort_missing_dependency(self): def test_topological_sort_missing_dependency(self):
# If a ticket depends on something not in the list, we should probably handle it or let it fail. # If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track. # Usually in our context, we only care about dependencies within the same track.
tickets = [ tickets = [
{"id": "t1", "depends_on": ["missing"]}, {"id": "t1", "depends_on": ["missing"]},
] ]
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting, # For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
# OR it should just treat it as "ready" if it's external? # OR it should just treat it as "ready" if it's external?
# Actually, let's just test that it doesn't crash if it's not a cycle. # Actually, let's just test that it doesn't crash if it's not a cycle.
# But if 'missing' is not in tickets, it will never be satisfied. # But if 'missing' is not in tickets, it will never be satisfied.
# Let's say it raises ValueError for missing internal dependencies. # Let's say it raises ValueError for missing internal dependencies.
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
conductor_tech_lead.topological_sort(tickets) conductor_tech_lead.topological_sort(tickets)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -3,82 +3,72 @@ from models import Ticket
from dag_engine import TrackDAG from dag_engine import TrackDAG
def test_get_ready_tasks_linear(): def test_get_ready_tasks_linear():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"]) t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
dag = TrackDAG([t1, t2, t3]) ready = dag.get_ready_tasks()
ready = dag.get_ready_tasks() assert len(ready) == 1
assert len(ready) == 1 assert ready[0].id == "T2"
assert ready[0].id == "T2"
def test_get_ready_tasks_branching(): def test_get_ready_tasks_branching():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"]) t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
dag = TrackDAG([t1, t2, t3]) ready = dag.get_ready_tasks()
ready = dag.get_ready_tasks() assert len(ready) == 2
assert len(ready) == 2 ready_ids = {t.id for t in ready}
ready_ids = {t.id for t in ready} assert ready_ids == {"T2", "T3"}
assert ready_ids == {"T2", "T3"}
def test_has_cycle_no_cycle(): def test_has_cycle_no_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
dag = TrackDAG([t1, t2]) assert not dag.has_cycle()
assert not dag.has_cycle()
def test_has_cycle_direct_cycle(): def test_has_cycle_direct_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"]) t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
dag = TrackDAG([t1, t2]) assert dag.has_cycle()
assert dag.has_cycle()
def test_has_cycle_indirect_cycle(): def test_has_cycle_indirect_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"]) t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"]) t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
dag = TrackDAG([t1, t2, t3]) assert dag.has_cycle()
assert dag.has_cycle()
def test_has_cycle_complex_no_cycle(): def test_has_cycle_complex_no_cycle():
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4 # T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"]) t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"]) t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"]) t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker") t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
dag = TrackDAG([t1, t2, t3, t4]) assert not dag.has_cycle()
assert not dag.has_cycle()
def test_get_ready_tasks_multiple_deps(): def test_get_ready_tasks_multiple_deps():
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker") t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker") t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"]) t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
dag = TrackDAG([t1, t2, t3]) assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
assert [t.id for t in dag.get_ready_tasks()] == ["T3"] t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
def test_topological_sort(): def test_topological_sort():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"]) t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
dag = TrackDAG([t1, t2, t3]) sort = dag.topological_sort()
sort = dag.topological_sort() assert sort == ["T1", "T2", "T3"]
assert sort == ["T1", "T2", "T3"]
def test_topological_sort_cycle(): def test_topological_sort_cycle():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"]) t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
dag = TrackDAG([t1, t2]) with pytest.raises(ValueError, match="Dependency cycle detected"):
with pytest.raises(ValueError, match="Dependency cycle detected"): dag.topological_sort()
dag.topological_sort()

View File

@@ -12,54 +12,51 @@ import ai_client
import project_manager import project_manager
def test_credentials_error_mentions_deepseek(monkeypatch): def test_credentials_error_mentions_deepseek(monkeypatch):
""" """
Verify that the error message shown when credentials.toml is missing Verify that the error message shown when credentials.toml is missing
includes deepseek instructions. includes deepseek instructions.
""" """
# Monkeypatch SLOP_CREDENTIALS to a non-existent file # Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml") monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
with pytest.raises(FileNotFoundError) as excinfo: ai_client._load_credentials()
ai_client._load_credentials() err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
err_msg = str(excinfo.value) assert "api_key" in err_msg
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
def test_default_project_includes_reasoning_role(): def test_default_project_includes_reasoning_role():
""" """
Verify that 'Reasoning' is included in the default discussion roles Verify that 'Reasoning' is included in the default discussion roles
to support DeepSeek-R1 reasoning traces. to support DeepSeek-R1 reasoning traces.
""" """
proj = project_manager.default_project("test") proj = project_manager.default_project("test")
roles = proj["discussion"]["roles"] roles = proj["discussion"]["roles"]
assert "Reasoning" in roles assert "Reasoning" in roles
def test_gui_providers_list(): def test_gui_providers_list():
""" """
Check if 'deepseek' is in the GUI's provider list. Check if 'deepseek' is in the GUI's provider list.
""" """
import gui_2 import gui_2
assert "deepseek" in gui_2.PROVIDERS assert "deepseek" in gui_2.PROVIDERS
def test_deepseek_model_listing(): def test_deepseek_model_listing():
""" """
Verify that list_models for deepseek returns expected models. Verify that list_models for deepseek returns expected models.
""" """
models = ai_client.list_models("deepseek") models = ai_client.list_models("deepseek")
assert "deepseek-chat" in models assert "deepseek-chat" in models
assert "deepseek-reasoner" in models assert "deepseek-reasoner" in models
def test_gui_provider_list_via_hooks(live_gui): def test_gui_provider_list_via_hooks(live_gui):
""" """
Verify 'deepseek' is present in the GUI provider list using API hooks. Verify 'deepseek' is present in the GUI provider list using API hooks.
""" """
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
import time import time
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
# Attempt to set provider to deepseek to verify it's an allowed value client.set_value('current_provider', 'deepseek')
client.set_value('current_provider', 'deepseek') time.sleep(0.5)
time.sleep(0.5) assert client.get_value('current_provider') == 'deepseek'
assert client.get_value('current_provider') == 'deepseek'

View File

@@ -3,137 +3,124 @@ from unittest.mock import patch, MagicMock
import ai_client import ai_client
def test_deepseek_model_selection(): def test_deepseek_model_selection():
""" """
Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state. Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state.
""" """
ai_client.set_provider("deepseek", "deepseek-chat") ai_client.set_provider("deepseek", "deepseek-chat")
assert ai_client._provider == "deepseek" assert ai_client._provider == "deepseek"
assert ai_client._model == "deepseek-chat" assert ai_client._model == "deepseek-chat"
def test_deepseek_completion_logic(): def test_deepseek_completion_logic():
""" """
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content. Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
""" """
ai_client.set_provider("deepseek", "deepseek-chat") ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
with patch("requests.post") as mock_post: mock_response = MagicMock()
mock_response = MagicMock() mock_response.status_code = 200
mock_response.status_code = 200 mock_response.json.return_value = {
mock_response.json.return_value = { "choices": [{
"choices": [{ "message": {"role": "assistant", "content": "DeepSeek Response"},
"message": {"role": "assistant", "content": "DeepSeek Response"}, "finish_reason": "stop"
"finish_reason": "stop" }],
}], "usage": {"prompt_tokens": 10, "completion_tokens": 5}
"usage": {"prompt_tokens": 10, "completion_tokens": 5} }
} mock_post.return_value = mock_response
mock_post.return_value = mock_response result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".") assert mock_post.called
assert result == "DeepSeek Response"
assert mock_post.called
def test_deepseek_reasoning_logic(): def test_deepseek_reasoning_logic():
""" """
Verifies that reasoning_content is captured and wrapped in <thinking> tags. Verifies that reasoning_content is captured and wrapped in <thinking> tags.
""" """
ai_client.set_provider("deepseek", "deepseek-reasoner") ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
with patch("requests.post") as mock_post: mock_response = MagicMock()
mock_response = MagicMock() mock_response.status_code = 200
mock_response.status_code = 200 mock_response.json.return_value = {
mock_response.json.return_value = { "choices": [{
"choices": [{ "message": {
"message": { "role": "assistant",
"role": "assistant", "content": "Final Answer",
"content": "Final Answer", "reasoning_content": "Chain of thought"
"reasoning_content": "Chain of thought" },
}, "finish_reason": "stop"
"finish_reason": "stop" }],
}], "usage": {"prompt_tokens": 10, "completion_tokens": 20}
"usage": {"prompt_tokens": 10, "completion_tokens": 20} }
} mock_post.return_value = mock_response
mock_post.return_value = mock_response result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".") assert "Final Answer" in result
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
def test_deepseek_tool_calling(): def test_deepseek_tool_calling():
""" """
Verifies that DeepSeek provider correctly identifies and executes tool calls. Verifies that DeepSeek provider correctly identifies and executes tool calls.
""" """
ai_client.set_provider("deepseek", "deepseek-chat") ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
with patch("requests.post") as mock_post, \ patch("mcp_client.dispatch") as mock_dispatch:
patch("mcp_client.dispatch") as mock_dispatch: # 1. Mock first response with a tool call
mock_resp1 = MagicMock()
# 1. Mock first response with a tool call mock_resp1.status_code = 200
mock_resp1 = MagicMock() mock_resp1.json.return_value = {
mock_resp1.status_code = 200 "choices": [{
mock_resp1.json.return_value = { "message": {
"choices": [{ "role": "assistant",
"message": { "content": "Let me read that file.",
"role": "assistant", "tool_calls": [{
"content": "Let me read that file.", "id": "call_123",
"tool_calls": [{ "type": "function",
"id": "call_123", "function": {
"type": "function", "name": "read_file",
"function": { "arguments": '{"path": "test.txt"}'
"name": "read_file",
"arguments": '{"path": "test.txt"}'
}
}]
},
"finish_reason": "tool_calls"
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
} }
}]
# 2. Mock second response (final answer) },
mock_resp2 = MagicMock() "finish_reason": "tool_calls"
mock_resp2.status_code = 200 }],
mock_resp2.json.return_value = { "usage": {"prompt_tokens": 50, "completion_tokens": 10}
"choices": [{ }
"message": { # 2. Mock second response (final answer)
"role": "assistant", mock_resp2 = MagicMock()
"content": "File content is: Hello World" mock_resp2.status_code = 200
}, mock_resp2.json.return_value = {
"finish_reason": "stop" "choices": [{
}], "message": {
"usage": {"prompt_tokens": 100, "completion_tokens": 20} "role": "assistant",
} "content": "File content is: Hello World"
},
mock_post.side_effect = [mock_resp1, mock_resp2] "finish_reason": "stop"
mock_dispatch.return_value = "Hello World" }],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".") }
mock_post.side_effect = [mock_resp1, mock_resp2]
assert "File content is: Hello World" in result mock_dispatch.return_value = "Hello World"
assert mock_dispatch.called result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert mock_dispatch.call_args[0][0] == "read_file" assert "File content is: Hello World" in result
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"} assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
def test_deepseek_streaming(): def test_deepseek_streaming():
""" """
Verifies that DeepSeek provider correctly aggregates streaming chunks. Verifies that DeepSeek provider correctly aggregates streaming chunks.
""" """
ai_client.set_provider("deepseek", "deepseek-chat") ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
with patch("requests.post") as mock_post: # Mock a streaming response
# Mock a streaming response mock_response = MagicMock()
mock_response = MagicMock() mock_response.status_code = 200
mock_response.status_code = 200 # Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
# Simulate OpenAI-style server-sent events (SSE) for streaming chunks = [
# Each line starts with 'data: ' and contains a JSON object 'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
chunks = [ 'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}', 'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}', 'data: [DONE]'
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}', ]
'data: [DONE]' mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
] mock_post.return_value = mock_response
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks] result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
mock_post.return_value = mock_response assert result == "Hello World"
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"

View File

@@ -3,121 +3,99 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine from dag_engine import TrackDAG, ExecutionEngine
def test_execution_engine_basic_flow(): def test_execution_engine_basic_flow():
# Setup tickets with dependencies # Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"]) t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"]) t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
dag = TrackDAG([t1, t2, t3, t4]) engine = ExecutionEngine(dag)
engine = ExecutionEngine(dag) # Tick 1: Only T1 should be ready
ready = engine.tick()
# Tick 1: Only T1 should be ready assert len(ready) == 1
ready = engine.tick() assert ready[0].id == "T1"
assert len(ready) == 1 # Complete T1
assert ready[0].id == "T1" engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
# Complete T1 ready = engine.tick()
engine.update_task_status("T1", "completed") assert len(ready) == 2
ids = {t.id for t in ready}
# Tick 2: T2 and T3 should be ready assert ids == {"T2", "T3"}
ready = engine.tick() # Complete T2
assert len(ready) == 2 engine.update_task_status("T2", "completed")
ids = {t.id for t in ready} # Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
assert ids == {"T2", "T3"} ready = engine.tick()
assert len(ready) == 1
# Complete T2 assert ready[0].id == "T3"
engine.update_task_status("T2", "completed") # Complete T3
engine.update_task_status("T3", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3) # Tick 4: T4 should be ready
ready = engine.tick() ready = engine.tick()
assert len(ready) == 1 assert len(ready) == 1
assert ready[0].id == "T3" assert ready[0].id == "T4"
# Complete T4
# Complete T3 engine.update_task_status("T4", "completed")
engine.update_task_status("T3", "completed") # Tick 5: Nothing ready
ready = engine.tick()
# Tick 4: T4 should be ready assert len(ready) == 0
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_update_nonexistent_task(): def test_execution_engine_update_nonexistent_task():
dag = TrackDAG([]) dag = TrackDAG([])
engine = ExecutionEngine(dag) engine = ExecutionEngine(dag)
# Should not raise error, or handle gracefully # Should not raise error, or handle gracefully
engine.update_task_status("NONEXISTENT", "completed") engine.update_task_status("NONEXISTENT", "completed")
def test_execution_engine_status_persistence(): def test_execution_engine_status_persistence():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1]) dag = TrackDAG([t1])
engine = ExecutionEngine(dag) engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
engine.update_task_status("T1", "in_progress") assert t1.status == "in_progress"
assert t1.status == "in_progress" ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
def test_execution_engine_auto_queue(): def test_execution_engine_auto_queue():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"]) t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
dag = TrackDAG([t1, t2]) engine = ExecutionEngine(dag, auto_queue=True)
engine = ExecutionEngine(dag, auto_queue=True) # Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
# Tick 1: T1 is ready and should be automatically marked as 'in_progress' assert len(ready) == 1
ready = engine.tick() assert ready[0].id == "T1"
assert len(ready) == 1 assert t1.status == "in_progress"
assert ready[0].id == "T1" # Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
assert t1.status == "in_progress" ready = engine.tick()
assert len(ready) == 0
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed') assert t2.status == "todo"
ready = engine.tick() # Complete T1
assert len(ready) == 0 engine.update_task_status("T1", "completed")
assert t2.status == "todo" # Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
# Complete T1 assert len(ready) == 1
engine.update_task_status("T1", "completed") assert ready[0].id == "T2"
assert t2.status == "in_progress"
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T2"
assert t2.status == "in_progress"
def test_execution_engine_step_mode(): def test_execution_engine_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True) t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
dag = TrackDAG([t1]) engine = ExecutionEngine(dag, auto_queue=True)
engine = ExecutionEngine(dag, auto_queue=True) # Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress' assert len(ready) == 1
ready = engine.tick() assert ready[0].id == "T1"
assert len(ready) == 1 assert t1.status == "todo"
assert ready[0].id == "T1" # Manual approval
assert t1.status == "todo" engine.approve_task("T1")
assert t1.status == "in_progress"
# Manual approval # Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
engine.approve_task("T1") ready = engine.tick()
assert t1.status == "in_progress" assert len(ready) == 0
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_approve_task(): def test_execution_engine_approve_task():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker") t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1]) dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False) engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
# Should be able to approve even if auto_queue is False engine.approve_task("T1")
engine.approve_task("T1") assert t1.status == "in_progress"
assert t1.status == "in_progress"

View File

@@ -14,44 +14,40 @@ from simulation.sim_execution import ExecutionSimulation
@pytest.mark.integration @pytest.mark.integration
def test_context_sim_live(live_gui): def test_context_sim_live(live_gui):
"""Run the Context & Chat simulation against a live GUI.""" """Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim = ContextSimulation(client) sim.setup("LiveContextSim")
sim.setup("LiveContextSim") sim.run()
sim.run() sim.teardown()
sim.teardown()
@pytest.mark.integration @pytest.mark.integration
def test_ai_settings_sim_live(live_gui): def test_ai_settings_sim_live(live_gui):
"""Run the AI Settings simulation against a live GUI.""" """Run the AI Settings simulation against a live GUI."""
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim = AISettingsSimulation(client) sim.setup("LiveAISettingsSim")
sim.setup("LiveAISettingsSim") sim.run()
sim.run() sim.teardown()
sim.teardown()
@pytest.mark.integration @pytest.mark.integration
def test_tools_sim_live(live_gui): def test_tools_sim_live(live_gui):
"""Run the Tools & Search simulation against a live GUI.""" """Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim = ToolsSimulation(client) sim.setup("LiveToolsSim")
sim.setup("LiveToolsSim") sim.run()
sim.run() sim.teardown()
sim.teardown()
@pytest.mark.integration @pytest.mark.integration
def test_execution_sim_live(live_gui): def test_execution_sim_live(live_gui):
"""Run the Execution & Modals simulation against a live GUI.""" """Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim = ExecutionSimulation(client) sim.setup("LiveExecutionSim")
sim.setup("LiveExecutionSim") sim.run()
sim.run() sim.teardown()
sim.teardown()

View File

@@ -12,119 +12,105 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from gemini_cli_adapter import GeminiCliAdapter from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapter(unittest.TestCase): class TestGeminiCliAdapter(unittest.TestCase):
def setUp(self): def setUp(self):
self.adapter = GeminiCliAdapter(binary_path="gemini") self.adapter = GeminiCliAdapter(binary_path="gemini")
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen): def test_send_starts_subprocess_with_correct_args(self, mock_popen):
""" """
Verify that send(message) correctly starts the subprocess with Verify that send(message) correctly starts the subprocess with
--output-format stream-json and the provided message via stdin using communicate. --output-format stream-json and the provided message via stdin using communicate.
""" """
# Setup mock process with a minimal valid JSONL termination # Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock() process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (stdout_content, "") process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0 process_mock.poll.return_value = 0
process_mock.wait.return_value = 0 process_mock.wait.return_value = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
message = "Hello Gemini CLI" @patch('subprocess.Popen')
self.adapter.send(message) def test_send_parses_jsonl_output(self, mock_popen):
"""
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
@patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen):
"""
Verify that it correctly parses multiple JSONL 'message' events Verify that it correctly parses multiple JSONL 'message' events
and returns the combined text. and returns the combined text.
""" """
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}), json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}), json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}}) json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
] ]
stdout_content = "\n".join(jsonl_output) + "\n" stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock = MagicMock() process_mock.communicate.return_value = (stdout_content, "")
process_mock.communicate.return_value = (stdout_content, "") process_mock.poll.return_value = 0
process_mock.poll.return_value = 0 process_mock.wait.return_value = 0
process_mock.wait.return_value = 0 mock_popen.return_value = process_mock
mock_popen.return_value = process_mock result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
result = self.adapter.send("test message") @patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
self.assertEqual(result["text"], "The quick brown fox jumps.") """
self.assertEqual(result["tool_calls"], [])
@patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
"""
Verify that it correctly handles 'tool_use' events in the stream Verify that it correctly handles 'tool_use' events in the stream
by continuing to read until the final 'result' event. by continuing to read until the final 'result' event.
""" """
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}), json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}), json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}), json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
json.dumps({"type": "result", "usage": {}}) json.dumps({"type": "result", "usage": {}})
] ]
stdout_content = "\n".join(jsonl_output) + "\n" stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
process_mock = MagicMock() @patch('subprocess.Popen')
process_mock.communicate.return_value = (stdout_content, "") def test_send_captures_usage_metadata(self, mock_popen):
process_mock.poll.return_value = 0 """
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
@patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen):
"""
Verify that usage data is extracted from the 'result' event. Verify that usage data is extracted from the 'result' event.
""" """
usage_data = {"total_tokens": 42} usage_data = {"total_tokens": 42}
jsonl_output = [ jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}), json.dumps({"type": "message", "text": "Finalizing"}),
json.dumps({"type": "result", "usage": usage_data}) json.dumps({"type": "result", "usage": usage_data})
] ]
stdout_content = "\n".join(jsonl_output) + "\n" stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock = MagicMock() process_mock.communicate.return_value = (stdout_content, "")
process_mock.communicate.return_value = (stdout_content, "") process_mock.poll.return_value = 0
process_mock.poll.return_value = 0 process_mock.wait.return_value = 0
process_mock.wait.return_value = 0 mock_popen.return_value = process_mock
mock_popen.return_value = process_mock self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.adapter.send("usage test") self.assertEqual(self.adapter.last_usage, usage_data)
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -9,168 +9,143 @@ import os
# Ensure the project root is in sys.path to resolve imports correctly # Ensure the project root is in sys.path to resolve imports correctly
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path: if project_root not in sys.path:
sys.path.append(project_root) sys.path.append(project_root)
# Import the class to be tested
# Import the class to be tested
from gemini_cli_adapter import GeminiCliAdapter from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapterParity(unittest.TestCase): class TestGeminiCliAdapterParity(unittest.TestCase):
def setUp(self): def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test.""" """Set up a fresh adapter instance and reset session state for each test."""
# Patch session_logger to prevent file operations during tests # Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger') self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start() self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter = GeminiCliAdapter(binary_path="gemini") self.adapter.session_id = None
self.adapter.session_id = None self.adapter.last_usage = None
self.adapter.last_usage = None self.adapter.last_latency = 0.0
self.adapter.last_latency = 0.0
def tearDown(self): def tearDown(self):
self.session_logger_patcher.stop() self.session_logger_patcher.stop()
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen): def test_count_tokens_uses_estimation(self, mock_popen):
""" """
Test that count_tokens uses character-based estimation. Test that count_tokens uses character-based estimation.
""" """
contents_to_count = ["This is the first line.", "This is the second line."] contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count)) expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4 expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
token_count = self.adapter.count_tokens(contents=contents_to_count) self.assertEqual(token_count, expected_tokens)
self.assertEqual(token_count, expected_tokens) # Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
@patch('subprocess.Popen') @patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen): def test_send_with_safety_settings_no_flags_added(self, mock_popen):
""" """
Test that the send method does NOT add --safety flags when safety_settings are provided, Test that the send method does NOT add --safety flags when safety_settings are provided,
as this functionality is no longer supported via CLI flags. as this functionality is no longer supported via CLI flags.
""" """
process_mock = MagicMock() process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "") process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0 process_mock.returncode = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here." @patch('subprocess.Popen')
safety_settings = [ def test_send_without_safety_settings_no_flags(self, mock_popen):
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"}, """
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
Test that when safety_settings is None or an empty list, no --safety flags are added. Test that when safety_settings is None or an empty list, no --safety flags are added.
""" """
process_mock = MagicMock() process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "") process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0 process_mock.returncode = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
message_content = "Another prompt." @patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
self.adapter.send(message=message_content, safety_settings=None) """
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
Test that the send method prepends the system instruction to the prompt Test that the send method prepends the system instruction to the prompt
sent via stdin, and does NOT add a --system flag to the command. sent via stdin, and does NOT add a --system flag to the command.
""" """
process_mock = MagicMock() process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "") process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0 process_mock.returncode = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
message_content = "User's prompt here." @patch('subprocess.Popen')
system_instruction_text = "Some instruction" def test_send_with_model_parameter(self, mock_popen):
expected_input = f"{system_instruction_text}\n\n{message_content}" """
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
Test that the send method correctly adds the -m <model> flag when a model is specified. Test that the send method correctly adds the -m <model> flag when a model is specified.
""" """
process_mock = MagicMock() process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n" mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "") process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0 process_mock.returncode = 0
mock_popen.return_value = process_mock mock_popen.return_value = process_mock
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here." @patch('subprocess.Popen')
model_name = "gemini-1.5-flash" def test_send_kills_process_on_communicate_exception(self, mock_popen):
expected_command_part = f'-m "{model_name}"' """
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
Test that if subprocess.Popen().communicate() raises an exception, Test that if subprocess.Popen().communicate() raises an exception,
GeminiCliAdapter.send() kills the process and re-raises the exception. GeminiCliAdapter.send() kills the process and re-raises the exception.
""" """
mock_process = MagicMock() mock_process = MagicMock()
mock_popen.return_value = mock_process mock_popen.return_value = mock_process
# Define an exception to simulate
# Define an exception to simulate simulated_exception = RuntimeError("Simulated communicate error")
simulated_exception = RuntimeError("Simulated communicate error") mock_process.communicate.side_effect = simulated_exception
mock_process.communicate.side_effect = simulated_exception message_content = "User message"
# Assert that the exception is raised and process is killed
message_content = "User message" with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Assert that the exception is raised and process is killed # Verify that the process's kill method was called
with self.assertRaises(RuntimeError) as cm: mock_process.kill.assert_called_once()
self.adapter.send(message=message_content) # Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -7,66 +7,57 @@ import json
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
def test_gemini_cli_context_bleed_prevention(live_gui): def test_gemini_cli_context_bleed_prevention(live_gui):
""" """
Test that the GeminiCliAdapter correctly filters out echoed 'user' messages Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
and only shows assistant content in the GUI history. and only shows assistant content in the GUI history.
""" """
client = ApiHookClient("http://127.0.0.1:8999") client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset") client.click("btn_reset")
client.set_value("auto_add_history", True) client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
# Create a specialized mock for context bleed bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
bleed_mock = os.path.abspath("tests/mock_context_bleed.py") with open(bleed_mock, "w") as f:
with open(bleed_mock, "w") as f: f.write('''import sys, json
f.write('''import sys, json
print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True) print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True) print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True) print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True) print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
''') ''')
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
cli_cmd = f'"{sys.executable}" "{bleed_mock}"' client.set_value("current_provider", "gemini_cli")
client.set_value("current_provider", "gemini_cli") client.set_value("gcli_path", cli_cmd)
client.set_value("gcli_path", cli_cmd) client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
client.set_value("ai_input", "Test context bleed") # Wait for completion
client.click("btn_gen_send") time.sleep(3)
session = client.get_session()
# Wait for completion entries = session.get("session", {}).get("entries", [])
time.sleep(3) # Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
session = client.get_session() assert len(ai_entries) == 1
entries = session.get("session", {}).get("entries", []) assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message os.remove(bleed_mock)
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
def test_gemini_cli_parameter_resilience(live_gui): def test_gemini_cli_parameter_resilience(live_gui):
""" """
Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases
sent by the AI instead of 'path'. sent by the AI instead of 'path'.
""" """
client = ApiHookClient("http://127.0.0.1:8999") client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset") client.click("btn_reset")
client.set_value("auto_add_history", True) client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop") client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
# Create a mock that uses dir_path for list_directory alias_mock = os.path.abspath("tests/mock_alias_tool.py")
alias_mock = os.path.abspath("tests/mock_alias_tool.py") bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py") # Avoid backslashes in f-string expression part
# Avoid backslashes in f-string expression part if sys.platform == "win32":
if sys.platform == "win32": bridge_path_str = bridge_path.replace("\\", "/")
bridge_path_str = bridge_path.replace("\\", "/") else:
else: bridge_path_str = bridge_path
bridge_path_str = bridge_path with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
prompt = sys.stdin.read() prompt = sys.stdin.read()
if '"role": "tool"' in prompt: if '"role": "tool"' in prompt:
print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True) print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
@@ -88,83 +79,71 @@ else:
}}), flush=True) }}), flush=True)
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True) print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
''') ''')
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
cli_cmd = f'"{sys.executable}" "{alias_mock}"' client.set_value("current_provider", "gemini_cli")
client.set_value("current_provider", "gemini_cli") client.set_value("gcli_path", cli_cmd)
client.set_value("gcli_path", cli_cmd) client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
client.set_value("ai_input", "Test parameter aliases") # Handle approval
client.click("btn_gen_send") timeout = 15
start_time = time.time()
# Handle approval approved = False
timeout = 15 while time.time() - start_time < timeout:
start_time = time.time() for ev in client.get_events():
approved = False if ev.get("type") == "ask_received":
while time.time() - start_time < timeout: requests.post("http://127.0.0.1:8999/api/ask/respond",
for ev in client.get_events(): json={"request_id": ev.get("request_id"), "response": {"approved": True}})
if ev.get("type") == "ask_received": approved = True
requests.post("http://127.0.0.1:8999/api/ask/respond", if approved: break
json={"request_id": ev.get("request_id"), "response": {"approved": True}}) time.sleep(0.5)
approved = True assert approved, "Tool approval event never received"
if approved: break # Verify tool result in history
time.sleep(0.5) time.sleep(2)
session = client.get_session()
assert approved, "Tool approval event never received" entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
# Verify tool result in history found = any("Tool worked!" in e.get("content", "") for e in entries)
time.sleep(2) assert found, "Tool result indicating success not found in history"
session = client.get_session() os.remove(alias_mock)
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
def test_gemini_cli_loop_termination(live_gui): def test_gemini_cli_loop_termination(live_gui):
""" """
Test that multi-round tool calling correctly terminates and preserves Test that multi-round tool calling correctly terminates and preserves
payload (session context) between rounds. payload (session context) between rounds.
""" """
client = ApiHookClient("http://127.0.0.1:8999") client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset") client.click("btn_reset")
client.set_value("auto_add_history", True) client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop") client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds mock_script = os.path.abspath("tests/mock_gemini_cli.py")
mock_script = os.path.abspath("tests/mock_gemini_cli.py") cli_cmd = f'"{sys.executable}" "{mock_script}"'
cli_cmd = f'"{sys.executable}" "{mock_script}"' client.set_value("current_provider", "gemini_cli")
client.set_value("current_provider", "gemini_cli") client.set_value("gcli_path", cli_cmd)
client.set_value("gcli_path", cli_cmd) client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
client.set_value("ai_input", "Perform multi-round tool test") # Handle approvals (mock does one tool call)
client.click("btn_gen_send") timeout = 20
start_time = time.time()
# Handle approvals (mock does one tool call) approved = False
timeout = 20 while time.time() - start_time < timeout:
start_time = time.time() for ev in client.get_events():
approved = False if ev.get("type") == "ask_received":
while time.time() - start_time < timeout: requests.post("http://127.0.0.1:8999/api/ask/respond",
for ev in client.get_events(): json={"request_id": ev.get("request_id"), "response": {"approved": True}})
if ev.get("type") == "ask_received": approved = True
requests.post("http://127.0.0.1:8999/api/ask/respond", if approved: break
json={"request_id": ev.get("request_id"), "response": {"approved": True}}) time.sleep(0.5)
approved = True # Wait for the second round and final answer
if approved: break found_final = False
time.sleep(0.5) start_time = time.time()
while time.time() - start_time < 15:
# Wait for the second round and final answer session = client.get_session()
found_final = False entries = session.get("session", {}).get("entries", [])
start_time = time.time() for e in entries:
while time.time() - start_time < 15: if "processed the tool results" in e.get("content", ""):
session = client.get_session() found_final = True
entries = session.get("session", {}).get("entries", []) break
for e in entries: if found_final: break
if "processed the tool results" in e.get("content", ""): time.sleep(1)
found_final = True assert found_final, "Final message after multi-round tool loop not found"
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"

View File

@@ -6,136 +6,116 @@ import requests
from api_hook_client import ApiHookClient from api_hook_client import ApiHookClient
def test_gemini_cli_full_integration(live_gui): def test_gemini_cli_full_integration(live_gui):
""" """
Integration test for the Gemini CLI provider and tool bridge. Integration test for the Gemini CLI provider and tool bridge.
Handles 'ask_received' events from the bridge and any other approval requests. Handles 'ask_received' events from the bridge and any other approval requests.
""" """
client = ApiHookClient("http://127.0.0.1:8999") client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
# 0. Reset session and enable history client.click("btn_reset")
client.click("btn_reset") client.set_value("auto_add_history", True)
client.set_value("auto_add_history", True) # Switch to manual_slop project explicitly
# Switch to manual_slop project explicitly client.select_list_item("proj_files", "manual_slop")
client.select_list_item("proj_files", "manual_slop") # 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# 1. Setup paths and configure the GUI # For CI/testing we prefer mock
# Use the real gemini CLI if available, otherwise use mock mock_script = os.path.abspath("tests/mock_gemini_cli.py")
# For CI/testing we prefer mock cli_cmd = f'"{sys.executable}" "{mock_script}"'
mock_script = os.path.abspath("tests/mock_gemini_cli.py") print(f"[TEST] Setting current_provider to gemini_cli")
cli_cmd = f'"{sys.executable}" "{mock_script}"' client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
print(f"[TEST] Setting current_provider to gemini_cli") client.set_value("gcli_path", cli_cmd)
client.set_value("current_provider", "gemini_cli") # Verify settings
print(f"[TEST] Setting gcli_path to {cli_cmd}") assert client.get_value("current_provider") == "gemini_cli"
client.set_value("gcli_path", cli_cmd) # Clear events
client.get_events()
# Verify settings # 2. Trigger a message in the GUI
assert client.get_value("current_provider") == "gemini_cli" print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
# Clear events client.click("btn_gen_send")
client.get_events() # 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
# 2. Trigger a message in the GUI timeout = 45
print("[TEST] Sending user message...") start_time = time.time()
client.set_value("ai_input", "Please read test.txt") approved_count = 0
client.click("btn_gen_send") while time.time() - start_time < timeout:
events = client.get_events()
# 3. Monitor for approval events if events:
print("[TEST] Waiting for approval events...") for ev in events:
timeout = 45 etype = ev.get("type")
start_time = time.time() eid = ev.get("request_id") or ev.get("action_id")
approved_count = 0 print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
while time.time() - start_time < timeout: print(f"[TEST] Approving {etype} {eid}")
events = client.get_events() if etype == "script_confirmation_required":
if events: resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
for ev in events: else:
etype = ev.get("type") resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
eid = ev.get("request_id") or ev.get("action_id") json={"request_id": eid, "response": {"approved": True}})
print(f"[TEST] Received event: {etype} (ID: {eid})") assert resp.status_code == 200
approved_count += 1
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]: # Check if we got a final response in history
print(f"[TEST] Approving {etype} {eid}") session = client.get_session()
if etype == "script_confirmation_required": entries = session.get("session", {}).get("entries", [])
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True}) found_final = False
else: for entry in entries:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond", content = entry.get("content", "")
json={"request_id": eid, "response": {"approved": True}}) if "Hello from mock!" in content or "processed the tool results" in content:
assert resp.status_code == 200 print(f"[TEST] Success! Found final message in history.")
approved_count += 1 found_final = True
break
# Check if we got a final response in history if found_final:
session = client.get_session() break
entries = session.get("session", {}).get("entries", []) time.sleep(1.0)
found_final = False assert approved_count > 0, "No approval events were processed"
for entry in entries: assert found_final, "Final message from mock CLI was not found in the GUI history"
content = entry.get("content", "")
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
def test_gemini_cli_rejection_and_history(live_gui): def test_gemini_cli_rejection_and_history(live_gui):
""" """
Integration test for the Gemini CLI provider: Rejection flow and history. Integration test for the Gemini CLI provider: Rejection flow and history.
""" """
client = ApiHookClient("http://127.0.0.1:8999") client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
# 0. Reset session client.click("btn_reset")
client.click("btn_reset") client.set_value("auto_add_history", True)
client.set_value("auto_add_history", True) client.select_list_item("proj_files", "manual_slop")
client.select_list_item("proj_files", "manual_slop") mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
mock_script = os.path.abspath("tests/mock_gemini_cli.py") client.set_value("current_provider", "gemini_cli")
cli_cmd = f'"{sys.executable}" "{mock_script}"' client.set_value("gcli_path", cli_cmd)
client.set_value("current_provider", "gemini_cli") # 2. Trigger a message
client.set_value("gcli_path", cli_cmd) print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
# 2. Trigger a message client.click("btn_gen_send")
print("[TEST] Sending user message (to be denied)...") # 3. Wait for event and reject
client.set_value("ai_input", "Deny me") timeout = 20
client.click("btn_gen_send") start_time = time.time()
denied = False
# 3. Wait for event and reject while time.time() - start_time < timeout:
timeout = 20 for ev in client.get_events():
start_time = time.time() etype = ev.get("type")
denied = False eid = ev.get("request_id")
while time.time() - start_time < timeout: print(f"[TEST] Received event: {etype}")
for ev in client.get_events(): if etype == "ask_received":
etype = ev.get("type") print(f"[TEST] Denying request {eid}")
eid = ev.get("request_id") requests.post("http://127.0.0.1:8999/api/ask/respond",
print(f"[TEST] Received event: {etype}") json={"request_id": eid, "response": {"approved": False}})
if etype == "ask_received": denied = True
print(f"[TEST] Denying request {eid}") break
requests.post("http://127.0.0.1:8999/api/ask/respond", if denied: break
json={"request_id": eid, "response": {"approved": False}}) time.sleep(0.5)
denied = True assert denied, "No ask_received event to deny"
break # 4. Verify rejection in history
if denied: break print("[TEST] Waiting for rejection in history...")
time.sleep(0.5) rejection_found = False
start_time = time.time()
assert denied, "No ask_received event to deny" while time.time() - start_time < 20:
session = client.get_session()
# 4. Verify rejection in history entries = session.get("session", {}).get("entries", [])
print("[TEST] Waiting for rejection in history...") for entry in entries:
rejection_found = False if "Tool execution was denied" in entry.get("content", ""):
start_time = time.time() rejection_found = True
while time.time() - start_time < 20: break
session = client.get_session() if rejection_found: break
entries = session.get("session", {}).get("entries", []) time.sleep(1.0)
for entry in entries: assert rejection_found, "Rejection message not found in history"
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"

View File

@@ -10,43 +10,38 @@ import ai_client
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_ai_client(): def setup_ai_client():
ai_client.reset_session() ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash") ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution" ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None ai_client.tool_log_callback = lambda script, result: None
yield yield
@patch('ai_client.GeminiCliAdapter') @patch('ai_client.GeminiCliAdapter')
@patch('ai_client._get_combined_system_prompt') @patch('ai_client._get_combined_system_prompt')
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class): def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
mock_prompt.return_value = "Mocked Prompt" mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []} mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10} mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1 mock_instance.last_latency = 0.1
mock_instance.session_id = None mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
ai_client.send("context", "message", discussion_history="hist") expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage" args, kwargs = mock_instance.send.call_args
assert mock_instance.send.called assert args[0] == expected_payload
args, kwargs = mock_instance.send.call_args assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
@patch('ai_client.GeminiCliAdapter') @patch('ai_client.GeminiCliAdapter')
def test_get_history_bleed_stats(mock_adapter_class): def test_get_history_bleed_stats(mock_adapter_class):
mock_instance = mock_adapter_class.return_value mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []} mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500} mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5 mock_instance.last_latency = 0.5
mock_instance.session_id = "sess" mock_instance.session_id = "sess"
# Initialize by sending a message
# Initialize by sending a message ai_client.send("context", "msg")
ai_client.send("context", "msg") stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
stats = ai_client.get_history_bleed_stats() assert stats["current"] == 1500
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500

View File

@@ -10,41 +10,34 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import get_gemini_cache_stats, reset_session from ai_client import get_gemini_cache_stats, reset_session
def test_get_gemini_cache_stats_with_mock_client(): def test_get_gemini_cache_stats_with_mock_client():
""" """
Test that get_gemini_cache_stats correctly processes cache lists Test that get_gemini_cache_stats correctly processes cache lists
from a mocked client instance. from a mocked client instance.
""" """
# Ensure a clean state before the test by resetting the session # Ensure a clean state before the test by resetting the session
reset_session() reset_session()
# 1. Create a mock for the cache object that the client will return
# 1. Create a mock for the cache object that the client will return mock_cache = MagicMock()
mock_cache = MagicMock() mock_cache.name = "cachedContents/test-cache"
mock_cache.name = "cachedContents/test-cache" mock_cache.display_name = "Test Cache"
mock_cache.display_name = "Test Cache" mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.model = "models/gemini-1.5-pro-001" mock_cache.size_bytes = 1024
mock_cache.size_bytes = 1024 # 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# 2. Create a mock for the client instance # Configure its `caches.list` method to return our mock cache
mock_client_instance = MagicMock() mock_client_instance.caches.list.return_value = [mock_cache]
# Configure its `caches.list` method to return our mock cache # 3. Patch the Client constructor to return our mock instance
mock_client_instance.caches.list.return_value = [mock_cache] # This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 3. Patch the Client constructor to return our mock instance # 4. Call the function under test
# This intercepts the `_ensure_gemini_client` call inside the function stats = get_gemini_cache_stats()
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor: # 5. Assert that the function behaved as expected
# It should have constructed the client
# 4. Call the function under test mock_client_constructor.assert_called_once()
stats = get_gemini_cache_stats() # It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# 5. Assert that the function behaved as expected # The returned stats dictionary should be correct
assert "cache_count" in stats
# It should have constructed the client assert "total_size_bytes" in stats
mock_client_constructor.assert_called_once() assert stats["cache_count"] == 1
# It should have called the `list` method on the `caches` attribute assert stats["total_size_bytes"] == 1024
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats
assert stats["cache_count"] == 1
assert stats["total_size_bytes"] == 1024

View File

@@ -6,43 +6,40 @@ from events import EventEmitter
@pytest.fixture @pytest.fixture
def app_instance(): def app_instance():
""" """
Fixture to create an instance of the gui_2.App class for testing. Fixture to create an instance of the gui_2.App class for testing.
It mocks functions that would render a window or block execution. It mocks functions that would render a window or block execution.
""" """
if not hasattr(ai_client, 'events') or ai_client.events is None: if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter() ai_client.events = EventEmitter()
with (
with ( patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}), patch('gui_2.save_config'),
patch('gui_2.save_config'), patch('gui_2.project_manager'),
patch('gui_2.project_manager'), patch('gui_2.session_logger'),
patch('gui_2.session_logger'), patch('gui_2.immapp.run'),
patch('gui_2.immapp.run'), patch.object(App, '_load_active_project'),
patch.object(App, '_load_active_project'), patch.object(App, '_fetch_models'),
patch.object(App, '_fetch_models'), patch.object(App, '_load_fonts'),
patch.object(App, '_load_fonts'), patch.object(App, '_post_init')
patch.object(App, '_post_init') ):
): yield App
yield App
def test_app_subscribes_to_events(app_instance): def test_app_subscribes_to_events(app_instance):
""" """
This test checks that the App's __init__ method subscribes the necessary This test checks that the App's __init__ method subscribes the necessary
event handlers to the ai_client.events emitter. event handlers to the ai_client.events emitter.
This test will fail until the event subscription logic is added to gui_2.App. This test will fail until the event subscription logic is added to gui_2.App.
""" """
with patch.object(ai_client.events, 'on') as mock_on: with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance() app = app_instance()
mock_on.assert_called() mock_on.assert_called()
calls = mock_on.call_args_list
calls = mock_on.call_args_list event_names = [call.args[0] for call in calls]
event_names = [call.args[0] for call in calls] assert "request_start" in event_names
assert "request_start" in event_names assert "response_received" in event_names
assert "response_received" in event_names assert "tool_execution" in event_names
assert "tool_execution" in event_names for call in calls:
handler = call.args[1]
for call in calls: assert hasattr(handler, '__self__')
handler = call.args[1] assert handler.__self__ is app
assert hasattr(handler, '__self__')
assert handler.__self__ is app

View File

@@ -4,45 +4,43 @@ from gui_2 import App
@pytest.fixture @pytest.fixture
def app_instance(): def app_instance():
with ( with (
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}), patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
patch('gui_2.save_config'), patch('gui_2.save_config'),
patch('gui_2.project_manager'), patch('gui_2.project_manager'),
patch('gui_2.session_logger'), patch('gui_2.session_logger'),
patch('gui_2.immapp.run'), patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'), patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'), patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'), patch.object(App, '_load_fonts'),
patch.object(App, '_post_init') patch.object(App, '_post_init')
): ):
yield App() yield App()
def test_gui2_hubs_exist_in_show_windows(app_instance): def test_gui2_hubs_exist_in_show_windows(app_instance):
""" """
Verifies that the new consolidated Hub windows are defined in the App's show_windows. Verifies that the new consolidated Hub windows are defined in the App's show_windows.
This ensures they will be available in the 'Windows' menu. This ensures they will be available in the 'Windows' menu.
""" """
expected_hubs = [ expected_hubs = [
"Context Hub", "Context Hub",
"AI Settings", "AI Settings",
"Discussion Hub", "Discussion Hub",
"Operations Hub", "Operations Hub",
"Files & Media", "Files & Media",
"Theme", "Theme",
] ]
for hub in expected_hubs:
for hub in expected_hubs: assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
def test_gui2_old_windows_removed_from_show_windows(app_instance): def test_gui2_old_windows_removed_from_show_windows(app_instance):
""" """
Verifies that the old fragmented windows are removed from show_windows. Verifies that the old fragmented windows are removed from show_windows.
""" """
old_windows = [ old_windows = [
"Projects", "Files", "Screenshots", "Projects", "Files", "Screenshots",
"Provider", "System Prompts", "Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History" "Message", "Response", "Tool Calls", "Comms History"
] ]
for old_win in old_windows:
for old_win in old_windows: assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -6,74 +6,65 @@ from events import EventEmitter
@pytest.fixture @pytest.fixture
def app_instance(): def app_instance():
if not hasattr(ai_client, 'events') or ai_client.events is None: if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter() ai_client.events = EventEmitter()
with (
with ( patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}), patch('gui_2.save_config'),
patch('gui_2.save_config'), patch('gui_2.project_manager'),
patch('gui_2.project_manager'), patch('gui_2.session_logger'),
patch('gui_2.session_logger'), patch('gui_2.immapp.run'),
patch('gui_2.immapp.run'), patch.object(App, '_load_active_project'),
patch.object(App, '_load_active_project'), patch.object(App, '_fetch_models'),
patch.object(App, '_fetch_models'), patch.object(App, '_load_fonts'),
patch.object(App, '_load_fonts'), patch.object(App, '_post_init')
patch.object(App, '_post_init') ):
): yield App()
yield App()
def test_mcp_tool_call_is_dispatched(app_instance): def test_mcp_tool_call_is_dispatched(app_instance):
""" """
This test verifies that when the AI returns a tool call for an MCP function, This test verifies that when the AI returns a tool call for an MCP function,
the ai_client correctly dispatches it to mcp_client. the ai_client correctly dispatches it to mcp_client.
This will fail until mcp_client is properly integrated. This will fail until mcp_client is properly integrated.
""" """
# 1. Define the mock tool call from the AI # 1. Define the mock tool call from the AI
mock_fc = MagicMock() mock_fc = MagicMock()
mock_fc.name = "read_file" mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"} mock_fc.args = {"file_path": "test.txt"}
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = mock_fc
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "TOOL_CALLING"
mock_response_with_tool.candidates = [mock_candidate]
# 2. Construct the mock AI response (Gemini format) class DummyUsage:
mock_response_with_tool = MagicMock() prompt_token_count = 100
mock_part = MagicMock() candidates_token_count = 10
mock_part.text = "" cached_content_token_count = 0
mock_part.function_call = mock_fc mock_response_with_tool.usage_metadata = DummyUsage()
mock_candidate = MagicMock() # 3. Create a mock for the final AI response after the tool call
mock_candidate.content.parts = [mock_part] mock_response_final = MagicMock()
mock_candidate.finish_reason.name = "TOOL_CALLING" mock_response_final.text = "Final answer"
mock_response_with_tool.candidates = [mock_candidate] mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
class DummyUsage: # 4. Patch the necessary components
prompt_token_count = 100 with patch("ai_client._ensure_gemini_client"), \
candidates_token_count = 10 patch("ai_client._gemini_client") as mock_client, \
cached_content_token_count = 0 patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_response_with_tool.usage_metadata = DummyUsage() mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 3. Create a mock for the final AI response after the tool call # 5. Call the send function
mock_response_final = MagicMock() ai_client.send(
mock_response_final.text = "Final answer" md_content="some context",
mock_response_final.candidates = [] user_message="read the file",
mock_response_final.usage_metadata = DummyUsage() base_dir=".",
file_items=[],
# 4. Patch the necessary components discussion_history=""
with patch("ai_client._ensure_gemini_client"), \ )
patch("ai_client._gemini_client") as mock_client, \ # 6. Assert that the MCP dispatch function was called
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch: mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
user_message="read the file",
base_dir=".",
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})

View File

@@ -15,70 +15,62 @@ TEST_CALLBACK_FILE = Path("temp_callback_output.txt")
@pytest.fixture(scope="function", autouse=True) @pytest.fixture(scope="function", autouse=True)
def cleanup_callback_file(): def cleanup_callback_file():
"""Ensures the test callback file is cleaned up before and after each test.""" """Ensures the test callback file is cleaned up before and after each test."""
if TEST_CALLBACK_FILE.exists(): if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink() TEST_CALLBACK_FILE.unlink()
yield yield
if TEST_CALLBACK_FILE.exists(): if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink() TEST_CALLBACK_FILE.unlink()
def test_gui2_set_value_hook_works(live_gui): def test_gui2_set_value_hook_works(live_gui):
""" """
Tests that the 'set_value' GUI hook is correctly implemented. Tests that the 'set_value' GUI hook is correctly implemented.
""" """
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}" test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value} gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
response = client.post_gui(gui_data) assert response == {'status': 'queued'}
assert response == {'status': 'queued'} # Verify the value was actually set using the new get_value hook
time.sleep(0.5)
# Verify the value was actually set using the new get_value hook current_value = client.get_value('ai_input')
time.sleep(0.5) assert current_value == test_value
current_value = client.get_value('ai_input')
assert current_value == test_value
def test_gui2_click_hook_works(live_gui): def test_gui2_click_hook_works(live_gui):
""" """
Tests that the 'click' GUI hook for the 'Reset' button is implemented. Tests that the 'click' GUI hook for the 'Reset' button is implemented.
""" """
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
# First, set some state that 'Reset' would clear. test_value = "This text should be cleared by the reset button."
test_value = "This text should be cleared by the reset button." client.set_value('ai_input', test_value)
client.set_value('ai_input', test_value) time.sleep(0.5)
time.sleep(0.5) assert client.get_value('ai_input') == test_value
assert client.get_value('ai_input') == test_value # Now, trigger the click
client.click('btn_reset')
# Now, trigger the click time.sleep(0.5)
client.click('btn_reset') # Verify it was reset
time.sleep(0.5) assert client.get_value('ai_input') == ""
# Verify it was reset
assert client.get_value('ai_input') == ""
def test_gui2_custom_callback_hook_works(live_gui): def test_gui2_custom_callback_hook_works(live_gui):
""" """
Tests that the 'custom_callback' GUI hook is correctly implemented. Tests that the 'custom_callback' GUI hook is correctly implemented.
""" """
client = ApiHookClient() client = ApiHookClient()
assert client.wait_for_server(timeout=10) assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}" test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
gui_data = { 'action': 'custom_callback',
'action': 'custom_callback', 'callback': '_test_callback_func_write_to_file',
'callback': '_test_callback_func_write_to_file', 'args': [test_data]
'args': [test_data] }
} response = client.post_gui(gui_data)
response = client.post_gui(gui_data) assert response == {'status': 'queued'}
assert response == {'status': 'queued'} time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
time.sleep(1) # Give gui_2.py time to process its task queue assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:
# Assert that the file WAS created and contains the correct data content = f.read()
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!" assert content == test_data, "Callback executed, but file content is incorrect."
with open(TEST_CALLBACK_FILE, "r") as f:
content = f.read()
assert content == test_data, "Callback executed, but file content is incorrect."

View File

@@ -12,78 +12,66 @@ from api_hook_client import ApiHookClient
_shared_metrics = {} _shared_metrics = {}
def test_performance_benchmarking(live_gui): def test_performance_benchmarking(live_gui):
""" """
Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py). Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py).
""" """
process, gui_script = live_gui process, gui_script = live_gui
client = ApiHookClient() client = ApiHookClient()
# Wait for app to stabilize and render some frames
# Wait for app to stabilize and render some frames time.sleep(3.0)
time.sleep(3.0) # Collect metrics over 5 seconds
fps_values = []
# Collect metrics over 5 seconds cpu_values = []
fps_values = [] frame_time_values = []
cpu_values = [] start_time = time.time()
frame_time_values = [] while time.time() - start_time < 5:
try:
start_time = time.time() perf_data = client.get_performance()
while time.time() - start_time < 5: metrics = perf_data.get('performance', {})
try: if metrics:
perf_data = client.get_performance() fps = metrics.get('fps', 0.0)
metrics = perf_data.get('performance', {}) cpu = metrics.get('cpu_percent', 0.0)
if metrics: ft = metrics.get('last_frame_time_ms', 0.0)
fps = metrics.get('fps', 0.0) # In some CI environments without a display, metrics might be 0
cpu = metrics.get('cpu_percent', 0.0) # We only record positive ones to avoid skewing averages if hooks are failing
ft = metrics.get('last_frame_time_ms', 0.0) if fps > 0:
fps_values.append(fps)
# In some CI environments without a display, metrics might be 0 cpu_values.append(cpu)
# We only record positive ones to avoid skewing averages if hooks are failing frame_time_values.append(ft)
if fps > 0: time.sleep(0.1)
fps_values.append(fps) except Exception:
cpu_values.append(cpu) break
frame_time_values.append(ft) avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
time.sleep(0.1) avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
except Exception: avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
break _shared_metrics[gui_script] = {
"avg_fps": avg_fps,
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0 "avg_cpu": avg_cpu,
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0 "avg_ft": avg_ft
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0 }
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
_shared_metrics[gui_script] = { # Absolute minimum requirements
"avg_fps": avg_fps, if avg_fps > 0:
"avg_cpu": avg_cpu, assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
"avg_ft": avg_ft assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
def test_performance_parity(): def test_performance_parity():
""" """
Compare the metrics collected in the parameterized test_performance_benchmarking. Compare the metrics collected in the parameterized test_performance_benchmarking.
""" """
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics: if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2: if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.") pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui_m = _shared_metrics["gui_legacy.py"] gui2_m = _shared_metrics["gui_2.py"]
gui2_m = _shared_metrics["gui_2.py"] # FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
# FPS Parity Check (+/- 15% leeway for now, target is 5%) fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
# Actually I'll use 0.15 for assertion and log the actual. cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0 print(f"\n--- Performance Parity Results ---")
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0 print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
print(f"\n--- Performance Parity Results ---") # We follow the 5% requirement for FPS
print(f"FPS Diff: {fps_diff_pct*100:.2f}%") # For CPU we might need more leeway
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%") assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"

View File

@@ -6,75 +6,70 @@ from events import UserRequestEvent
@pytest.fixture @pytest.fixture
def mock_gui(): def mock_gui():
with ( with (
patch('gui_2.load_config', return_value={ patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"}, "ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""}, "projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}} "gui": {"show_windows": {}}
}), }),
patch('gui_2.project_manager.load_project', return_value={}), patch('gui_2.project_manager.load_project', return_value={}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}), patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'), patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'), patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'), patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models') patch('gui_2.App._fetch_models')
): ):
gui = App() gui = App()
return gui return gui
def test_handle_generate_send_pushes_event(mock_gui): def test_handle_generate_send_pushes_event(mock_gui):
# Mock _do_generate to return sample data # Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=( mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text" "full_md", "path", [], "stable_md", "disc_text"
)) ))
mock_gui.ui_ai_input = "test prompt" mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "." mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
# Mock event_queue.put mock_gui.event_queue.put = MagicMock()
mock_gui.event_queue.put = MagicMock() # We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute mock_gui._handle_generate_send()
with patch('asyncio.run_coroutine_threadsafe') as mock_run: # Verify run_coroutine_threadsafe was called
mock_gui._handle_generate_send() assert mock_run.called
# Verify the call to event_queue.put was correct
# Verify run_coroutine_threadsafe was called # This is a bit tricky since the first arg to run_coroutine_threadsafe
assert mock_run.called # is the coroutine returned by event_queue.put().
# Let's verify that the call to put occurred.
# Verify the call to event_queue.put was correct mock_gui.event_queue.put.assert_called_once()
# This is a bit tricky since the first arg to run_coroutine_threadsafe args, kwargs = mock_gui.event_queue.put.call_args
# is the coroutine returned by event_queue.put(). assert args[0] == "user_request"
# Let's verify that the call to put occurred. event = args[1]
mock_gui.event_queue.put.assert_called_once() assert isinstance(event, UserRequestEvent)
args, kwargs = mock_gui.event_queue.put.call_args assert event.prompt == "test prompt"
assert args[0] == "user_request" assert event.stable_md == "stable_md"
event = args[1] assert event.disc_text == "disc_text"
assert isinstance(event, UserRequestEvent) assert event.base_dir == "."
assert event.prompt == "test prompt"
assert event.stable_md == "stable_md"
assert event.disc_text == "disc_text"
assert event.base_dir == "."
def test_user_request_event_payload(): def test_user_request_event_payload():
payload = UserRequestEvent( payload = UserRequestEvent(
prompt="hello", prompt="hello",
stable_md="md", stable_md="md",
file_items=[], file_items=[],
disc_text="disc", disc_text="disc",
base_dir="." base_dir="."
) )
d = payload.to_dict()
d = payload.to_dict() assert d["prompt"] == "hello"
assert d["prompt"] == "hello" assert d["stable_md"] == "md"
assert d["stable_md"] == "md" assert d["file_items"] == []
assert d["file_items"] == [] assert d["disc_text"] == "disc"
assert d["disc_text"] == "disc" assert d["base_dir"] == "."
assert d["base_dir"] == "."
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_event_queue(): async def test_async_event_queue():
from events import AsyncEventQueue from events import AsyncEventQueue
q = AsyncEventQueue() q = AsyncEventQueue()
await q.put("test_event", {"data": 123}) await q.put("test_event", {"data": 123})
name, payload = await q.get() name, payload = await q.get()
assert name == "test_event" assert name == "test_event"
assert payload["data"] == 123 assert payload["data"] == 123

View File

@@ -13,53 +13,48 @@ from gui_legacy import App
@pytest.fixture @pytest.fixture
def app_instance(): def app_instance():
dpg.create_context() dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \ with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \ patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \ patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \ patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \ patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \ patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \ patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \ patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \ patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \ patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'): patch.object(App, '_refresh_project_widgets'):
app = App()
app = App() yield app
yield app dpg.destroy_context()
dpg.destroy_context()
def test_diagnostics_panel_initialization(app_instance): def test_diagnostics_panel_initialization(app_instance):
assert "Diagnostics" in app_instance.window_info assert "Diagnostics" in app_instance.window_info
assert app_instance.window_info["Diagnostics"] == "win_diagnostics" assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
assert "frame_time" in app_instance.perf_history assert "frame_time" in app_instance.perf_history
assert len(app_instance.perf_history["frame_time"]) == 100 assert len(app_instance.perf_history["frame_time"]) == 100
def test_diagnostics_panel_updates(app_instance): def test_diagnostics_panel_updates(app_instance):
# Mock dependencies # Mock dependencies
mock_metrics = { mock_metrics = {
'last_frame_time_ms': 10.0, 'last_frame_time_ms': 10.0,
'fps': 100.0, 'fps': 100.0,
'cpu_percent': 50.0, 'cpu_percent': 50.0,
'input_lag_ms': 5.0 'input_lag_ms': 5.0
} }
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics) app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \ patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \ patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \ patch('dearpygui.dearpygui.does_item_exist', return_value=True):
patch('dearpygui.dearpygui.does_item_exist', return_value=True): # We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
# We also need to mock ai_client stats app_instance._update_performance_diagnostics()
with patch('ai_client.get_history_bleed_stats', return_value={}): # Verify UI updates
app_instance._update_performance_diagnostics() mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
# Verify UI updates mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_fps_text", "100.0") mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms") # Verify history update
mock_set_value.assert_any_call("perf_cpu_text", "50.0%") assert app_instance.perf_history["frame_time"][-1] == 10.0
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0

Some files were not shown because too many files have changed in this diff Show More