checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -15,98 +15,94 @@ import tomllib
import re
import glob
from pathlib import Path, PureWindowsPath
from typing import Any
import summarize
import project_manager
from file_cache import ASTParser
def find_next_increment(output_dir: Path, namespace: str) -> int:
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
max_num = 0
for f in output_dir.iterdir():
if f.is_file():
match = pattern.match(f.name)
if match:
max_num = max(max_num, int(match.group(1)))
return max_num + 1
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
max_num = 0
for f in output_dir.iterdir():
if f.is_file():
match = pattern.match(f.name)
if match:
max_num = max(max_num, int(match.group(1)))
return max_num + 1
def is_absolute_with_drive(entry: str) -> bool:
try:
p = PureWindowsPath(entry)
return p.drive != ""
except Exception:
return False
try:
p = PureWindowsPath(entry)
return p.drive != ""
except Exception:
return False
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry
matches = []
if is_wildcard:
root = Path(entry) if has_drive else base_dir / entry
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
else:
p = Path(entry) if has_drive else (base_dir / entry).resolve()
matches = [p]
# Blacklist filter
filtered = []
for p in matches:
name = p.name.lower()
if name == "history.toml" or name.endswith("_history.toml"):
continue
filtered.append(p)
return sorted(filtered)
has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry
matches = []
if is_wildcard:
root = Path(entry) if has_drive else base_dir / entry
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
else:
p = Path(entry) if has_drive else (base_dir / entry).resolve()
matches = [p]
# Blacklist filter
filtered = []
for p in matches:
name = p.name.lower()
if name == "history.toml" or name.endswith("_history.toml"):
continue
filtered.append(p)
return sorted(filtered)
def build_discussion_section(history: list[str]) -> str:
sections = []
for i, paste in enumerate(history, start=1):
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections)
sections = []
for i, paste in enumerate(history, start=1):
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict]) -> str:
sections = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
else:
entry = entry_raw
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
continue
for path in paths:
suffix = path.suffix.lstrip(".")
lang = suffix if suffix else "text"
try:
content = path.read_text(encoding="utf-8")
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
except Exception as e:
content = f"ERROR: {e}"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
sections = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
else:
entry = entry_raw
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
continue
for path in paths:
suffix = path.suffix.lstrip(".")
lang = suffix if suffix else "text"
try:
content = path.read_text(encoding="utf-8")
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
except Exception as e:
content = f"ERROR: {e}"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
sections = []
for entry in screenshots:
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
continue
for path in paths:
original = entry if "*" not in entry else str(path)
if not path.exists():
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
continue
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections)
sections = []
for entry in screenshots:
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
continue
for path in paths:
original = entry if "*" not in entry else str(path)
if not path.exists():
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
continue
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections)
def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
"""
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
"""
Return a list of dicts describing each file, for use by ai_client when it
wants to upload individual files rather than inline everything as markdown.
@@ -118,240 +114,213 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
mtime : float (last modification time, for skip-if-unchanged optimization)
tier : int | None (optional tier for context management)
"""
items = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
tier = entry_raw.get("tier")
else:
entry = entry_raw
tier = None
items = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
tier = entry_raw.get("tier")
else:
entry = entry_raw
tier = None
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
"""
def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
"""
Build a compact summary section using summarize.py — one short block per file.
Used as the initial <context> block instead of full file contents.
"""
items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items)
items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items)
def _build_files_section_from_items(file_items: list[dict]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "unknown")
content = item.get("content", "")
if path is None:
sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
continue
suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text"
lang = suffix if suffix else "text"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "unknown")
content = item.get("content", "")
if path is None:
sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
continue
suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text"
lang = suffix if suffix else "text"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_no_history(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_discussion_text(history: list[str]) -> str:
"""Build just the discussion history section text. Returns empty string if no history."""
if not history:
return ""
return "## Discussion History\n\n" + build_discussion_section(history)
"""Build just the discussion history section text. Returns empty string if no history."""
if not history:
return ""
return "## Discussion History\n\n" + build_discussion_section(history)
def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 1 Context: Strategic/Orchestration.
Full content for core conductor files and files with tier=1, summaries for others.
"""
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
# Files section
if file_items:
sections = []
for item in file_items:
path = item.get("path")
name = path.name if path else ""
if name in core_files or item.get("tier") == 1:
# Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
# Summarize
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
summarize.summarise_file(path, item.get("content", "")))
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
# Files section
if file_items:
sections = []
for item in file_items:
path = item.get("path")
name = path.name if path else ""
if name in core_files or item.get("tier") == 1:
# Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
# Summarize
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
summarize.summarise_file(path, item.get("content", "")))
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_tier2_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 2 Context: Architectural/Tech Lead.
Full content for all files (standard behavior).
"""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
"""
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
"""
Tier 3 Context: Execution/Worker.
Full content for focus_files and files with tier=3, summaries/skeletons for others.
"""
parts = []
if file_items:
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "")
path_str = str(path) if path else ""
# Check if this file is in focus_files (by name or path)
is_focus = False
for focus in focus_files:
if focus == entry or (path and focus == path.name) or focus in path_str:
is_focus = True
break
if is_focus or item.get("tier") == 3:
sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
content = item.get("content", "")
if path and path.suffix == ".py" and not item.get("error"):
try:
parser = ASTParser("python")
skeleton = parser.get_skeleton(content)
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
except Exception as e:
# Fallback to summary if AST parsing fails
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
else:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
parts = []
if file_items:
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "")
path_str = str(path) if path else ""
# Check if this file is in focus_files (by name or path)
is_focus = False
for focus in focus_files:
if focus == entry or (path and focus == path.name) or focus in path_str:
is_focus = True
break
if is_focus or item.get("tier") == 3:
sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
content = item.get("content", "")
if path and path.suffix == ".py" and not item.get("error"):
try:
parser = ASTParser("python")
skeleton = parser.get_skeleton(content)
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
except Exception as e:
# Fallback to summary if AST parsing fails
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
else:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if files:
if summary_only:
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
else:
parts.append("## Files\n\n" + build_files_section(base_dir, files))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if files:
if summary_only:
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
else:
parts.append("## Files\n\n" + build_files_section(base_dir, files))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict) -> tuple[str, Path, list[dict]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def main():
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
def main() -> None:
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
if __name__ == "__main__":
main()
main()

File diff suppressed because it is too large Load Diff

View File

@@ -3,246 +3,240 @@ import json
import time
class ApiHookClient:
def __init__(self, base_url="http://127.0.0.1:8999", max_retries=5, retry_delay=0.2):
self.base_url = base_url
self.max_retries = max_retries
self.retry_delay = retry_delay
def __init__(self, base_url="http://127.0.0.1:8999", max_retries=5, retry_delay=0.2):
self.base_url = base_url
self.max_retries = max_retries
self.retry_delay = retry_delay
def wait_for_server(self, timeout=3):
"""
def wait_for_server(self, timeout=3):
"""
Polls the /status endpoint until the server is ready or timeout is reached.
"""
start_time = time.time()
while time.time() - start_time < timeout:
try:
if self.get_status().get('status') == 'ok':
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
time.sleep(0.1)
return False
start_time = time.time()
while time.time() - start_time < timeout:
try:
if self.get_status().get('status') == 'ok':
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
time.sleep(0.1)
return False
def _make_request(self, method, endpoint, data=None, timeout=None):
url = f"{self.base_url}{endpoint}"
headers = {'Content-Type': 'application/json'}
last_exception = None
# Increase default request timeout for local server
req_timeout = timeout if timeout is not None else 2.0
for attempt in range(self.max_retries + 1):
try:
if method == 'GET':
response = requests.get(url, timeout=req_timeout)
elif method == 'POST':
response = requests.post(url, json=data, headers=headers, timeout=req_timeout)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
def _make_request(self, method, endpoint, data=None, timeout=None):
url = f"{self.base_url}{endpoint}"
headers = {'Content-Type': 'application/json'}
last_exception = None
# Increase default request timeout for local server
req_timeout = timeout if timeout is not None else 2.0
for attempt in range(self.max_retries + 1):
try:
if method == 'GET':
response = requests.get(url, timeout=req_timeout)
elif method == 'POST':
response = requests.post(url, json=data, headers=headers, timeout=req_timeout)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
last_exception = e
if attempt < self.max_retries:
time.sleep(self.retry_delay)
continue
else:
if isinstance(e, requests.exceptions.Timeout):
raise requests.exceptions.Timeout(f"Request to {endpoint} timed out after {self.max_retries} retries.") from e
else:
raise requests.exceptions.ConnectionError(f"Could not connect to API hook server at {self.base_url} after {self.max_retries} retries.") from e
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
except json.JSONDecodeError as e:
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
if last_exception:
raise last_exception
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
last_exception = e
if attempt < self.max_retries:
time.sleep(self.retry_delay)
continue
else:
if isinstance(e, requests.exceptions.Timeout):
raise requests.exceptions.Timeout(f"Request to {endpoint} timed out after {self.max_retries} retries.") from e
else:
raise requests.exceptions.ConnectionError(f"Could not connect to API hook server at {self.base_url} after {self.max_retries} retries.") from e
except requests.exceptions.HTTPError as e:
raise requests.exceptions.HTTPError(f"HTTP error {e.response.status_code} for {endpoint}: {e.response.text}") from e
except json.JSONDecodeError as e:
raise ValueError(f"Failed to decode JSON from response for {endpoint}: {response.text}") from e
if last_exception:
raise last_exception
def get_status(self):
"""Checks the health of the hook server."""
url = f"{self.base_url}/status"
try:
response = requests.get(url, timeout=0.2)
response.raise_for_status()
return response.json()
except Exception:
raise requests.exceptions.ConnectionError(f"Could not reach /status at {self.base_url}")
def get_status(self):
"""Checks the health of the hook server."""
url = f"{self.base_url}/status"
try:
response = requests.get(url, timeout=0.2)
response.raise_for_status()
return response.json()
except Exception:
raise requests.exceptions.ConnectionError(f"Could not reach /status at {self.base_url}")
def get_project(self):
return self._make_request('GET', '/api/project')
def get_project(self):
return self._make_request('GET', '/api/project')
def post_project(self, project_data):
return self._make_request('POST', '/api/project', data={'project': project_data})
def post_project(self, project_data):
return self._make_request('POST', '/api/project', data={'project': project_data})
def get_session(self):
return self._make_request('GET', '/api/session')
def get_session(self):
return self._make_request('GET', '/api/session')
def get_mma_status(self):
"""Retrieves current MMA status (track, tickets, tier, etc.)"""
return self._make_request('GET', '/api/gui/mma_status')
def get_mma_status(self):
"""Retrieves current MMA status (track, tickets, tier, etc.)"""
return self._make_request('GET', '/api/gui/mma_status')
def push_event(self, event_type, payload):
"""Pushes an event to the GUI's AsyncEventQueue via the /api/gui endpoint."""
return self.post_gui({
"action": event_type,
"payload": payload
})
def push_event(self, event_type, payload):
"""Pushes an event to the GUI's AsyncEventQueue via the /api/gui endpoint."""
return self.post_gui({
"action": event_type,
"payload": payload
})
def get_performance(self):
"""Retrieves UI performance metrics."""
return self._make_request('GET', '/api/performance')
def get_performance(self):
"""Retrieves UI performance metrics."""
return self._make_request('GET', '/api/performance')
def post_session(self, session_entries):
return self._make_request('POST', '/api/session', data={'session': {'entries': session_entries}})
def post_session(self, session_entries):
return self._make_request('POST', '/api/session', data={'session': {'entries': session_entries}})
def post_gui(self, gui_data):
return self._make_request('POST', '/api/gui', data=gui_data)
def post_gui(self, gui_data):
return self._make_request('POST', '/api/gui', data=gui_data)
def select_tab(self, tab_bar, tab):
"""Tells the GUI to switch to a specific tab in a tab bar."""
return self.post_gui({
"action": "select_tab",
"tab_bar": tab_bar,
"tab": tab
})
def select_tab(self, tab_bar, tab):
"""Tells the GUI to switch to a specific tab in a tab bar."""
return self.post_gui({
"action": "select_tab",
"tab_bar": tab_bar,
"tab": tab
})
def select_list_item(self, listbox, item_value):
"""Tells the GUI to select an item in a listbox by its value."""
return self.post_gui({
"action": "select_list_item",
"listbox": listbox,
"item_value": item_value
})
def select_list_item(self, listbox, item_value):
"""Tells the GUI to select an item in a listbox by its value."""
return self.post_gui({
"action": "select_list_item",
"listbox": listbox,
"item_value": item_value
})
def set_value(self, item, value):
"""Sets the value of a GUI item."""
return self.post_gui({
"action": "set_value",
"item": item,
"value": value
})
def set_value(self, item, value):
"""Sets the value of a GUI item."""
return self.post_gui({
"action": "set_value",
"item": item,
"value": value
})
def get_value(self, item):
"""Gets the value of a GUI item via its mapped field."""
try:
# First try direct field querying via POST
res = self._make_request('POST', '/api/gui/value', data={"field": item})
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
try:
# Try GET fallback
res = self._make_request('GET', f'/api/gui/value/{item}')
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
try:
# Fallback for thinking/live/prior which are in diagnostics
diag = self._make_request('GET', '/api/gui/diagnostics')
if item in diag:
return diag[item]
# Map common indicator tags to diagnostics keys
mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(item)
if key and key in diag:
return diag[key]
except Exception:
pass
return None
def get_value(self, item):
"""Gets the value of a GUI item via its mapped field."""
try:
# First try direct field querying via POST
res = self._make_request('POST', '/api/gui/value', data={"field": item})
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
def get_text_value(self, item_tag):
"""Wraps get_value and returns its string representation, or None."""
val = self.get_value(item_tag)
return str(val) if val is not None else None
try:
# Try GET fallback
res = self._make_request('GET', f'/api/gui/value/{item}')
if res and "value" in res:
v = res.get("value")
if v is not None:
return v
except Exception:
pass
def get_node_status(self, node_tag):
"""Wraps get_value for a DAG node or queries the diagnostic endpoint for its status."""
val = self.get_value(node_tag)
if val is not None:
return val
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
if 'nodes' in diag and node_tag in diag['nodes']:
return diag['nodes'][node_tag]
if node_tag in diag:
return diag[node_tag]
except Exception:
pass
return None
try:
# Fallback for thinking/live/prior which are in diagnostics
diag = self._make_request('GET', '/api/gui/diagnostics')
if item in diag:
return diag[item]
# Map common indicator tags to diagnostics keys
mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(item)
if key and key in diag:
return diag[key]
except Exception:
pass
return None
def click(self, item, *args, **kwargs):
"""Simulates a click on a GUI button or item."""
user_data = kwargs.pop('user_data', None)
return self.post_gui({
"action": "click",
"item": item,
"args": args,
"kwargs": kwargs,
"user_data": user_data
})
def get_text_value(self, item_tag):
"""Wraps get_value and returns its string representation, or None."""
val = self.get_value(item_tag)
return str(val) if val is not None else None
def get_indicator_state(self, tag):
"""Checks if an indicator is shown using the diagnostics endpoint."""
# Mapping tag to the keys used in diagnostics endpoint
mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(tag, tag)
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
return {"tag": tag, "shown": diag.get(key, False)}
except Exception as e:
return {"tag": tag, "shown": False, "error": str(e)}
def get_node_status(self, node_tag):
"""Wraps get_value for a DAG node or queries the diagnostic endpoint for its status."""
val = self.get_value(node_tag)
if val is not None:
return val
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
if 'nodes' in diag and node_tag in diag['nodes']:
return diag['nodes'][node_tag]
if node_tag in diag:
return diag[node_tag]
except Exception:
pass
return None
def get_events(self):
"""Fetches and clears the event queue from the server."""
try:
return self._make_request('GET', '/api/events').get("events", [])
except Exception:
return []
def click(self, item, *args, **kwargs):
"""Simulates a click on a GUI button or item."""
user_data = kwargs.pop('user_data', None)
return self.post_gui({
"action": "click",
"item": item,
"args": args,
"kwargs": kwargs,
"user_data": user_data
})
def wait_for_event(self, event_type, timeout=5):
"""Polls for a specific event type."""
start = time.time()
while time.time() - start < timeout:
events = self.get_events()
for ev in events:
if ev.get("type") == event_type:
return ev
time.sleep(0.1) # Fast poll
return None
def get_indicator_state(self, tag):
"""Checks if an indicator is shown using the diagnostics endpoint."""
# Mapping tag to the keys used in diagnostics endpoint
mapping = {
"thinking_indicator": "thinking",
"operations_live_indicator": "live",
"prior_session_indicator": "prior"
}
key = mapping.get(tag, tag)
try:
diag = self._make_request('GET', '/api/gui/diagnostics')
return {"tag": tag, "shown": diag.get(key, False)}
except Exception as e:
return {"tag": tag, "shown": False, "error": str(e)}
def wait_for_value(self, item, expected, timeout=5):
"""Polls until get_value(item) == expected."""
start = time.time()
while time.time() - start < timeout:
if self.get_value(item) == expected:
return True
time.sleep(0.1) # Fast poll
return False
def get_events(self):
"""Fetches and clears the event queue from the server."""
try:
return self._make_request('GET', '/api/events').get("events", [])
except Exception:
return []
def reset_session(self):
"""Simulates clicking the 'Reset Session' button in the GUI."""
return self.click("btn_reset")
def wait_for_event(self, event_type, timeout=5):
"""Polls for a specific event type."""
start = time.time()
while time.time() - start < timeout:
events = self.get_events()
for ev in events:
if ev.get("type") == event_type:
return ev
time.sleep(0.1) # Fast poll
return None
def wait_for_value(self, item, expected, timeout=5):
"""Polls until get_value(item) == expected."""
start = time.time()
while time.time() - start < timeout:
if self.get_value(item) == expected:
return True
time.sleep(0.1) # Fast poll
return False
def reset_session(self):
"""Simulates clicking the 'Reset Session' button in the GUI."""
return self.click("btn_reset")
def request_confirmation(self, tool_name, args):
"""Asks the user for confirmation via the GUI (blocking call)."""
# Using a long timeout as this waits for human input (60 seconds)
res = self._make_request('POST', '/api/ask',
data={'type': 'tool_approval', 'tool': tool_name, 'args': args},
timeout=60.0)
return res.get('response')
def request_confirmation(self, tool_name, args):
"""Asks the user for confirmation via the GUI (blocking call)."""
# Using a long timeout as this waits for human input (60 seconds)
res = self._make_request('POST', '/api/ask',
data={'type': 'tool_approval', 'tool': tool_name, 'args': args},
timeout=60.0)
return res.get('response')

View File

@@ -6,338 +6,313 @@ import logging
import session_logger
class HookServerInstance(ThreadingHTTPServer):
"""Custom HTTPServer that carries a reference to the main App instance."""
def __init__(self, server_address, RequestHandlerClass, app):
super().__init__(server_address, RequestHandlerClass)
self.app = app
"""Custom HTTPServer that carries a reference to the main App instance."""
def __init__(self, server_address, RequestHandlerClass, app):
super().__init__(server_address, RequestHandlerClass)
self.app = app
class HookHandler(BaseHTTPRequestHandler):
"""Handles incoming HTTP requests for the API hooks."""
def do_GET(self):
app = self.server.app
session_logger.log_api_hook("GET", self.path, "")
if self.path == '/status':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
elif self.path == '/api/project':
import project_manager
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
flat = project_manager.flat_config(app.project)
self.wfile.write(json.dumps({'project': flat}).encode('utf-8'))
elif self.path == '/api/session':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'session': {'entries': app.disc_entries}}).
encode('utf-8'))
elif self.path == '/api/performance':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
metrics = {}
if hasattr(app, 'perf_monitor'):
metrics = app.perf_monitor.get_metrics()
self.wfile.write(json.dumps({'performance': metrics}).encode('utf-8'))
elif self.path == '/api/events':
# Long-poll or return current event queue
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
events = []
if hasattr(app, '_api_event_queue'):
with app._api_event_queue_lock:
events = list(app._api_event_queue)
app._api_event_queue.clear()
self.wfile.write(json.dumps({'events': events}).encode('utf-8'))
elif self.path == '/api/gui/value':
# POST with {"field": "field_tag"} to get value
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
data = json.loads(body.decode('utf-8'))
field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
"""Handles incoming HTTP requests for the API hooks."""
event = threading.Event()
result = {"value": None}
def do_GET(self):
app = self.server.app
session_logger.log_api_hook("GET", self.path, "")
if self.path == '/status':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
elif self.path == '/api/project':
import project_manager
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
flat = project_manager.flat_config(app.project)
self.wfile.write(json.dumps({'project': flat}).encode('utf-8'))
elif self.path == '/api/session':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'session': {'entries': app.disc_entries}}).
encode('utf-8'))
elif self.path == '/api/performance':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
metrics = {}
if hasattr(app, 'perf_monitor'):
metrics = app.perf_monitor.get_metrics()
self.wfile.write(json.dumps({'performance': metrics}).encode('utf-8'))
elif self.path == '/api/events':
# Long-poll or return current event queue
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
events = []
if hasattr(app, '_api_event_queue'):
with app._api_event_queue_lock:
events = list(app._api_event_queue)
app._api_event_queue.clear()
self.wfile.write(json.dumps({'events': events}).encode('utf-8'))
elif self.path == '/api/gui/value':
# POST with {"field": "field_tag"} to get value
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
data = json.loads(body.decode('utf-8'))
field_tag = data.get("field")
print(f"[DEBUG] Hook Server: get_value for {field_tag}")
event = threading.Event()
result = {"value": None}
def get_val():
try:
if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag]
val = getattr(app, attr, None)
print(f"[DEBUG] Hook Server: attr={attr}, val={val}")
result["value"] = val
else:
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
finally:
event.set()
def get_val():
try:
if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag]
val = getattr(app, attr, None)
print(f"[DEBUG] Hook Server: attr={attr}, val={val}")
result["value"] = val
else:
print(f"[DEBUG] Hook Server: {field_tag} NOT in settable_fields")
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path.startswith('/api/gui/value/'):
# Generic endpoint to get the value of any settable field
field_tag = self.path.split('/')[-1]
event = threading.Event()
result = {"value": None}
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
def get_val():
try:
if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag]
result["value"] = getattr(app, attr, None)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/mma_status':
event = threading.Event()
result = {}
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path.startswith('/api/gui/value/'):
# Generic endpoint to get the value of any settable field
field_tag = self.path.split('/')[-1]
event = threading.Event()
result = {"value": None}
def get_mma():
try:
result["mma_status"] = getattr(app, "mma_status", "idle")
result["active_tier"] = getattr(app, "active_tier", None)
result["active_track"] = getattr(app, "active_track", None)
result["active_tickets"] = getattr(app, "active_tickets", [])
result["mma_step_mode"] = getattr(app, "mma_step_mode", False)
result["pending_approval"] = app._pending_mma_approval is not None
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_mma
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/diagnostics':
# Safe way to query multiple states at once via the main thread queue
event = threading.Event()
result = {}
def get_val():
try:
if field_tag in app._settable_fields:
attr = app._settable_fields[field_tag]
result["value"] = getattr(app, attr, None)
finally:
event.set()
def check_all():
try:
# Generic state check based on App attributes (works for both DPG and ImGui versions)
status = getattr(app, "ai_status", "idle")
result["thinking"] = status in ["sending...", "running powershell..."]
result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
result["prior"] = getattr(app, "is_viewing_prior_session", False)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": check_all
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_val
})
def do_POST(self):
app = self.server.app
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
body_str = body.decode('utf-8') if body else ""
session_logger.log_api_hook("POST", self.path, body_str)
try:
data = json.loads(body_str) if body_str else {}
if self.path == '/api/project':
app.project = data.get('project', app.project)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/session':
app.disc_entries = data.get('session', {}).get(
'entries', app.disc_entries)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/gui':
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'queued'}).encode('utf-8'))
elif self.path == '/api/ask':
request_id = str(uuid.uuid4())
event = threading.Event()
if not hasattr(app, '_pending_asks'):
app._pending_asks = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock:
app._api_event_queue.append({
"type": "ask_received",
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id)
# Clean up response after reading
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8'))
else:
if request_id in app._pending_asks:
del app._pending_asks[request_id]
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
elif self.path == '/api/ask/respond':
request_id = data.get('request_id')
response_data = data.get('response')
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id]
event.set()
# Clean up pending ask entry
del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
else:
self.send_response(404)
self.end_headers()
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode('utf-8'))
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/mma_status':
event = threading.Event()
result = {}
def get_mma():
try:
result["mma_status"] = getattr(app, "mma_status", "idle")
result["active_tier"] = getattr(app, "active_tier", None)
result["active_track"] = getattr(app, "active_track", None)
result["active_tickets"] = getattr(app, "active_tickets", [])
result["mma_step_mode"] = getattr(app, "mma_step_mode", False)
result["pending_approval"] = app._pending_mma_approval is not None
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": get_mma
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
elif self.path == '/api/gui/diagnostics':
# Safe way to query multiple states at once via the main thread queue
event = threading.Event()
result = {}
def check_all():
try:
# Generic state check based on App attributes (works for both DPG and ImGui versions)
status = getattr(app, "ai_status", "idle")
result["thinking"] = status in ["sending...", "running powershell..."]
result["live"] = status in ["running powershell...", "fetching url...", "searching web...", "powershell done, awaiting AI..."]
result["prior"] = getattr(app, "is_viewing_prior_session", False)
finally:
event.set()
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "custom_callback",
"callback": check_all
})
if event.wait(timeout=2):
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(result).encode('utf-8'))
else:
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
app = self.server.app
content_length = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(content_length)
body_str = body.decode('utf-8') if body else ""
session_logger.log_api_hook("POST", self.path, body_str)
try:
data = json.loads(body_str) if body_str else {}
if self.path == '/api/project':
app.project = data.get('project', app.project)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/session':
app.disc_entries = data.get('session', {}).get(
'entries', app.disc_entries)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'updated'}).encode('utf-8'))
elif self.path == '/api/gui':
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append(data)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(
json.dumps({'status': 'queued'}).encode('utf-8'))
elif self.path == '/api/ask':
request_id = str(uuid.uuid4())
event = threading.Event()
if not hasattr(app, '_pending_asks'):
app._pending_asks = {}
if not hasattr(app, '_ask_responses'):
app._ask_responses = {}
app._pending_asks[request_id] = event
# Emit event for test/client discovery
with app._api_event_queue_lock:
app._api_event_queue.append({
"type": "ask_received",
"request_id": request_id,
"data": data
})
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"type": "ask",
"request_id": request_id,
"data": data
})
if event.wait(timeout=60.0):
response_data = app._ask_responses.get(request_id)
# Clean up response after reading
if request_id in app._ask_responses:
del app._ask_responses[request_id]
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok', 'response': response_data}).encode('utf-8'))
else:
if request_id in app._pending_asks:
del app._pending_asks[request_id]
self.send_response(504)
self.end_headers()
self.wfile.write(json.dumps({'error': 'timeout'}).encode('utf-8'))
elif self.path == '/api/ask/respond':
request_id = data.get('request_id')
response_data = data.get('response')
if request_id and hasattr(app, '_pending_asks') and request_id in app._pending_asks:
app._ask_responses[request_id] = response_data
event = app._pending_asks[request_id]
event.set()
# Clean up pending ask entry
del app._pending_asks[request_id]
# Queue GUI task to clear the dialog
with app._pending_gui_tasks_lock:
app._pending_gui_tasks.append({
"action": "clear_ask",
"request_id": request_id
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'status': 'ok'}).encode('utf-8'))
else:
self.send_response(404)
self.end_headers()
else:
self.send_response(404)
self.end_headers()
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode('utf-8'))
def log_message(self, format, *args):
logging.info("Hook API: " + format % args)
def log_message(self, format, *args):
logging.info("Hook API: " + format % args)
class HookServer:
def __init__(self, app, port=8999):
self.app = app
self.port = port
self.server = None
self.thread = None
def __init__(self, app, port=8999):
self.app = app
self.port = port
self.server = None
self.thread = None
def start(self):
if self.thread and self.thread.is_alive():
return
def start(self):
if self.thread and self.thread.is_alive():
return
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
return
# Ensure the app has the task queue and lock initialized
if not hasattr(self.app, '_pending_gui_tasks'):
self.app._pending_gui_tasks = []
if not hasattr(self.app, '_pending_gui_tasks_lock'):
self.app._pending_gui_tasks_lock = threading.Lock()
# Initialize ask-related dictionaries
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start()
logging.info(f"Hook server started on port {self.port}")
is_gemini_cli = getattr(self.app, 'current_provider', '') == 'gemini_cli'
if not getattr(self.app, 'test_hooks_enabled', False) and not is_gemini_cli:
return
# Ensure the app has the task queue and lock initialized
if not hasattr(self.app, '_pending_gui_tasks'):
self.app._pending_gui_tasks = []
if not hasattr(self.app, '_pending_gui_tasks_lock'):
self.app._pending_gui_tasks_lock = threading.Lock()
# Initialize ask-related dictionaries
if not hasattr(self.app, '_pending_asks'):
self.app._pending_asks = {}
if not hasattr(self.app, '_ask_responses'):
self.app._ask_responses = {}
# Event queue for test script subscriptions
if not hasattr(self.app, '_api_event_queue'):
self.app._api_event_queue = []
if not hasattr(self.app, '_api_event_queue_lock'):
self.app._api_event_queue_lock = threading.Lock()
self.server = HookServerInstance(('127.0.0.1', self.port), HookHandler, self.app)
self.thread = threading.Thread(target=self.server.serve_forever, daemon=True)
self.thread.start()
logging.info(f"Hook server started on port {self.port}")
def stop(self):
if self.server:
self.server.shutdown()
self.server.server_close()
if self.thread:
self.thread.join()
logging.info("Hook server stopped")
def stop(self):
if self.server:
self.server.shutdown()
self.server.server_close()
if self.thread:
self.thread.join()
logging.info("Hook server stopped")

View File

@@ -3,23 +3,22 @@ import sys
import os
def run_diag(role, prompt):
print(f"--- Running Diag for {role} ---")
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
print("STDOUT:")
print(result.stdout)
print("STDERR:")
print(result.stderr)
return result.stdout
except Exception as e:
print(f"FAILED: {e}")
return str(e)
print(f"--- Running Diag for {role} ---")
cmd = [sys.executable, "scripts/mma_exec.py", "--role", role, prompt]
try:
result = subprocess.run(cmd, capture_output=True, text=True, encoding='utf-8')
print("STDOUT:")
print(result.stdout)
print("STDERR:")
print(result.stderr)
return result.stdout
except Exception as e:
print(f"FAILED: {e}")
return str(e)
if __name__ == "__main__":
# Test 1: Simple read
print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")
# Test 1: Simple read
print("TEST 1: read_file")
run_diag("tier3-worker", "Read the file 'pyproject.toml' and tell me the version of the project. ONLY the version string.")
print("\nTEST 2: run_shell_command")
run_diag("tier3-worker", "Use run_shell_command to execute 'echo HELLO_SUBAGENT' and return the output. ONLY the output.")

View File

@@ -3,55 +3,51 @@ import pytest
import os
def run_ps_script(role, prompt):
"""Helper to run the run_subagent.ps1 script."""
# Using -File is safer and handles arguments better
cmd = [
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
"-File", "./scripts/run_subagent.ps1",
"-Role", role,
"-Prompt", prompt
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout:
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
if result.stderr:
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
return result
"""Helper to run the run_subagent.ps1 script."""
# Using -File is safer and handles arguments better
cmd = [
"powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
"-File", "./scripts/run_subagent.ps1",
"-Role", role,
"-Prompt", prompt
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.stdout:
print(f"\n[Sub-Agent {role} Output]:\n{result.stdout}")
if result.stderr:
print(f"\n[Sub-Agent {role} Error]:\n{result.stderr}")
return result
def test_subagent_script_qa_live():
"""Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt)
assert result.returncode == 0
# Expected output should mention the fix for division by zero
assert "zero" in result.stdout.lower()
# It should be short (QA agents compress)
assert len(result.stdout.split()) < 40
"""Verify that the QA role works and returns a compressed fix."""
prompt = "Traceback (most recent call last): File 'test.py', line 1, in <module> 1/0 ZeroDivisionError: division by zero"
result = run_ps_script("QA", prompt)
assert result.returncode == 0
# Expected output should mention the fix for division by zero
assert "zero" in result.stdout.lower()
# It should be short (QA agents compress)
assert len(result.stdout.split()) < 40
def test_subagent_script_worker_live():
"""Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert "def" in result.stdout.lower()
assert "hello" in result.stdout.lower()
"""Verify that the Worker role works and returns code."""
prompt = "Write a python function that returns 'hello world'"
result = run_ps_script("Worker", prompt)
assert result.returncode == 0
assert "def" in result.stdout.lower()
assert "hello" in result.stdout.lower()
def test_subagent_script_utility_live():
"""Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert "true" in result.stdout.lower()
"""Verify that the Utility role works."""
prompt = "Tell me 'True' if 1+1=2, otherwise 'False'"
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
assert "true" in result.stdout.lower()
def test_subagent_isolation_live():
"""Verify that the sub-agent is stateless and does not see the parent's conversation context."""
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
# A stateless agent should not know any previous context.
assert "unknown" in result.stdout.lower()
"""Verify that the sub-agent is stateless and does not see the parent's conversation context."""
# This prompt asks the sub-agent about a 'secret' mentioned only here, not in its prompt.
prompt = "What is the secret code I just told you? If I didn't tell you, say 'UNKNOWN'."
result = run_ps_script("Utility", prompt)
assert result.returncode == 0
# A stateless agent should not know any previous context.
assert "unknown" in result.stdout.lower()

View File

@@ -4,148 +4,137 @@ from unittest.mock import patch, MagicMock
from scripts.mma_exec import create_parser, get_role_documents, execute_agent, get_model_for_role, get_dependencies
def test_parser_role_choices():
"""Test that the parser accepts valid roles and the prompt argument."""
parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
args = parser.parse_args(['--role', role, test_prompt])
assert args.role == role
assert args.prompt == test_prompt
"""Test that the parser accepts valid roles and the prompt argument."""
parser = create_parser()
valid_roles = ['tier1', 'tier2', 'tier3', 'tier4']
test_prompt = "Analyze the codebase for bottlenecks."
for role in valid_roles:
args = parser.parse_args(['--role', role, test_prompt])
assert args.role == role
assert args.prompt == test_prompt
def test_parser_invalid_role():
"""Test that the parser rejects roles outside the specified choices."""
parser = create_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--role', 'tier5', 'Some prompt'])
"""Test that the parser rejects roles outside the specified choices."""
parser = create_parser()
with pytest.raises(SystemExit):
parser.parse_args(['--role', 'tier5', 'Some prompt'])
def test_parser_prompt_optional():
"""Test that the prompt argument is optional if role is provided (or handled in main)."""
parser = create_parser()
# Prompt is now optional (nargs='?')
args = parser.parse_args(['--role', 'tier3'])
assert args.role == 'tier3'
assert args.prompt is None
"""Test that the prompt argument is optional if role is provided (or handled in main)."""
parser = create_parser()
# Prompt is now optional (nargs='?')
args = parser.parse_args(['--role', 'tier3'])
assert args.role == 'tier3'
assert args.prompt is None
def test_parser_help():
"""Test that the help flag works without raising errors (exits with 0)."""
parser = create_parser()
with pytest.raises(SystemExit) as excinfo:
parser.parse_args(['--help'])
assert excinfo.value.code == 0
"""Test that the help flag works without raising errors (exits with 0)."""
parser = create_parser()
with pytest.raises(SystemExit) as excinfo:
parser.parse_args(['--help'])
assert excinfo.value.code == 0
def test_get_role_documents():
"""Test that get_role_documents returns the correct documentation paths for each tier."""
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
assert get_role_documents('tier3') == ['conductor/workflow.md']
assert get_role_documents('tier4') == []
"""Test that get_role_documents returns the correct documentation paths for each tier."""
assert get_role_documents('tier1') == ['conductor/product.md', 'conductor/product-guidelines.md']
assert get_role_documents('tier2') == ['conductor/tech-stack.md', 'conductor/workflow.md']
assert get_role_documents('tier3') == ['conductor/workflow.md']
assert get_role_documents('tier4') == []
def test_get_model_for_role():
"""Test that get_model_for_role returns the correct model for each role."""
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
"""Test that get_model_for_role returns the correct model for each role."""
assert get_model_for_role('tier1-orchestrator') == 'gemini-3.1-pro-preview'
assert get_model_for_role('tier2-tech-lead') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier3-worker') == 'gemini-2.5-flash-lite'
assert get_model_for_role('tier4-qa') == 'gemini-2.5-flash-lite'
def test_execute_agent():
"""
"""
Test that execute_agent calls subprocess.run with powershell and the correct gemini CLI arguments
including the model specified for the role.
"""
role = "tier3-worker"
prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = mock_stdout
mock_process.returncode = 0
mock_run.return_value = mock_process
result = execute_agent(role, prompt, docs)
mock_run.assert_called_once()
args, kwargs = mock_run.call_args
cmd_list = args[0]
assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
assert "gemini" in ps_cmd
assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
role = "tier3-worker"
prompt = "Write a unit test."
docs = ["file1.py", "docs/spec.md"]
expected_model = "gemini-2.5-flash-lite"
mock_stdout = "Mocked AI Response"
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = mock_stdout
mock_process.returncode = 0
mock_run.return_value = mock_process
result = execute_agent(role, prompt, docs)
mock_run.assert_called_once()
args, kwargs = mock_run.call_args
cmd_list = args[0]
assert cmd_list[0] == "powershell.exe"
assert "-Command" in cmd_list
ps_cmd = cmd_list[cmd_list.index("-Command") + 1]
assert "gemini" in ps_cmd
assert f"--model {expected_model}" in ps_cmd
# Verify input contains the prompt and system directive
input_text = kwargs.get("input")
assert "STRICT SYSTEM DIRECTIVE" in input_text
assert "TASK: Write a unit test." in input_text
assert kwargs.get("capture_output") is True
assert kwargs.get("text") is True
assert result == mock_stdout
def test_get_dependencies(tmp_path):
content = (
"import os\n"
"import sys\n"
"import file_cache\n"
"from mcp_client import something\n"
)
filepath = tmp_path / "mock_script.py"
filepath.write_text(content)
dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
content = (
"import os\n"
"import sys\n"
"import file_cache\n"
"from mcp_client import something\n"
)
filepath = tmp_path / "mock_script.py"
filepath.write_text(content)
dependencies = get_dependencies(str(filepath))
assert dependencies == ['os', 'sys', 'file_cache', 'mcp_client']
import re
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log"
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
# We will patch LOG_FILE to point to our temp location
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = ""
mock_process.returncode = 0
mock_run.return_value = mock_process
test_role = "tier1"
test_prompt = "Plan the next phase"
execute_agent(test_role, test_prompt, [])
assert log_file.exists()
log_content = log_file.read_text()
assert test_role in log_content
assert test_prompt in log_content # Master log should now have the summary prompt
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
def test_execute_agent_tier3_injection(tmp_path):
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
main_file = tmp_path / "main.py"
main_file.write_text(main_content)
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = "OK"
mock_process.returncode = 0
mock_run.return_value = mock_process
execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
assert mock_run.called
input_text = mock_run.call_args[1].get("input")
assert "DEPENDENCY SKELETON: dependency.py" in input_text
assert "def do_work():" in input_text
assert "Modify main.py" in input_text
finally:
os.chdir(old_cwd)
def test_execute_agent_logging(tmp_path):
log_file = tmp_path / "mma_delegation.log"
# mma_exec now uses logs/agents/ for individual logs and logs/mma_delegation.log for master
# We will patch LOG_FILE to point to our temp location
with patch("scripts.mma_exec.LOG_FILE", str(log_file)), \
patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = ""
mock_process.returncode = 0
mock_run.return_value = mock_process
test_role = "tier1"
test_prompt = "Plan the next phase"
execute_agent(test_role, test_prompt, [])
assert log_file.exists()
log_content = log_file.read_text()
assert test_role in log_content
assert test_prompt in log_content # Master log should now have the summary prompt
assert re.search(r"\d{4}-\d{2}-\d{2}", log_content)
def test_execute_agent_tier3_injection(tmp_path):
main_content = "import dependency\n\ndef run():\n dependency.do_work()\n"
main_file = tmp_path / "main.py"
main_file.write_text(main_content)
dep_content = "def do_work():\n pass\n\ndef other_func():\n print('hello')\n"
dep_file = tmp_path / "dependency.py"
dep_file.write_text(dep_content)
# We need to ensure generate_skeleton is mockable or working
old_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with patch("subprocess.run") as mock_run:
mock_process = MagicMock()
mock_process.stdout = "OK"
mock_process.returncode = 0
mock_run.return_value = mock_process
execute_agent('tier3-worker', 'Modify main.py', ['main.py'])
assert mock_run.called
input_text = mock_run.call_args[1].get("input")
assert "DEPENDENCY SKELETON: dependency.py" in input_text
assert "def do_work():" in input_text
assert "Modify main.py" in input_text
finally:
os.chdir(old_cwd)

View File

@@ -2,7 +2,7 @@ import pytest
from scripts.mma_exec import generate_skeleton
def test_generate_skeleton():
sample_code = '''
sample_code = '''
class Calculator:
"""Performs basic math operations."""
@@ -15,26 +15,21 @@ def log_message(msg):
timestamp = "2026-02-25"
print(f"[{timestamp}] {msg}")
'''
skeleton = generate_skeleton(sample_code)
# Check that signatures are preserved
assert "class Calculator:" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "def log_message(msg):" in skeleton
# Check that docstrings are preserved
assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that implementation details are removed
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton
skeleton = generate_skeleton(sample_code)
# Check that signatures are preserved
assert "class Calculator:" in skeleton
assert "def add(self, a: int, b: int) -> int:" in skeleton
assert "def log_message(msg):" in skeleton
# Check that docstrings are preserved
assert '"""Performs basic math operations."""' in skeleton
assert '"""Adds two numbers."""' in skeleton
# Check that implementation details are removed
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert "timestamp =" not in skeleton
assert "print(" not in skeleton
# Check that bodies are replaced with ellipsis
assert "..." in skeleton
if __name__ == "__main__":
pytest.main([__file__])
pytest.main([__file__])

View File

@@ -9,5 +9,5 @@ This file tracks all major tracks for the project. Each track has its own detail
---
- [ ] **Track: AI-Optimized Python Style Refactor**
- [~] **Track: AI-Optimized Python Style Refactor**
*Link: [./tracks/python_style_refactor_20260227/](./tracks/python_style_refactor_20260227/)*

View File

@@ -6,14 +6,18 @@
- [x] Task: Conductor - User Manual Verification 'Phase 1: Pilot and Tooling' (Protocol in workflow.md) [checkpoint: Phase1]
## Phase 2: Core Refactor - Indentation and Newlines
- [~] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`).
- [ ] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`).
- [ ] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`).
- [ ] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`).
- [ ] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md)
- [x] Task: Conductor - Refactor Primary Engine Modules (`ai_client.py`, `aggregate.py`, `mcp_client.py`, `shell_runner.py`). [db65162]
- [x] Task: Conductor - Refactor Project & Session Management Modules (`project_manager.py`, `session_logger.py`). [db65162]
- [x] Task: Conductor - Refactor UI Modules (`gui_2.py`, `gui_legacy.py`, `theme.py`, `theme_2.py`). [db65162]
- [x] Task: Conductor - Refactor Remaining Utility and Support Modules (`events.py`, `file_cache.py`, `models.py`, `mma_prompts.py`). [db65162]
- [x] Task: Conductor - User Manual Verification 'Phase 2: Indentation and Newline Refactor' (Protocol in workflow.md) [checkpoint: Phase2]
## Phase 3: AI-Optimized Metadata and Final Cleanup
- [ ] Task: Conductor - Implement Strict Type Hinting and Compact Imports across the Entire Codebase.
- [~] Task: Conductor - Implement Strict Type Hinting across the Entire Codebase.
- [x] Engine Core (`ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py`)
- [x] Develop/Integrate Surgical AST Tools in `mcp_client.py` and `tools.json`.
- [x] Management Modules (project_manager.py, session_logger.py) [19c28a1]
- [~] UI Modules (`gui_2.py`, `gui_legacy.py`)
- [ ] Task: Conductor - Update `conductor/code_styleguides/python.md` with the new AI-optimized standard.
- [ ] Task: Conductor - User Manual Verification 'Phase 3: Metadata and Final Documentation' (Protocol in workflow.md)

View File

@@ -8,7 +8,6 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **Newlines (Ultra-Compact):**
- Maximum **one (1)** blank line between top-level definitions (classes, functions).
- **Zero (0)** blank lines inside function or method bodies.
- **Imports (Compact):** Consolidate imports into compact blocks to reduce vertical space.
- **Typing (Strict):** Ensure all function and method signatures include strict type hints for `Args` and `Returns`.
- **Scope:**
- Target: All `.py` files in the project root and subdirectories.
@@ -19,14 +18,22 @@ Refactor the Python codebase to a "Single-Space, Ultra-Compact" style specifical
- **AST Compatibility:** The style must not interfere with existing AST tools (`ast`, `tree-sitter`) used for interface extraction and code outlines.
- **Token Efficiency:** The primary goal is to reduce the total token count of the codebase.
## 4. Acceptance Criteria
- [ ] Codebase indentation is uniformly 1 space.
- [ ] No `.py` file contains consecutive blank lines.
- [ ] No `.py` file contains blank lines within function or method bodies.
- [ ] All functions/methods have complete type hints.
- [ ] Application remains functional and passes existing tests.
## 4. Current Status (Progress Checkpoint)
- **Phase 1: Completed.** Tooling developed (`scripts/ai_style_formatter.py`) and verified.
- **Phase 2: Completed.** Global codebase refactor for indentation and ultra-compact newlines (including 1-line gap before definitions) applied to all Python files.
- **Phase 3: In Progress.**
- **Surgical Tooling:** New tools added to `mcp_client.py` and `.gemini/tools.json`: `get_file_slice`, `set_file_slice`, `py_update_definition`, `py_get_signature`, `py_set_signature`, `py_get_class_summary`, `py_get_var_declaration`, `py_set_var_declaration`.
- **Core Typing:** `ai_client.py`, `mcp_client.py`, `aggregate.py`, `shell_runner.py` fully updated with strict type hints.
- **Remaining:** `project_manager.py`, `session_logger.py`, `gui_2.py`, `gui_legacy.py` need strict typing.
## 5. Out of Scope
## 5. Acceptance Criteria
- [x] Codebase indentation is uniformly 1 space.
- [x] No `.py` file contains consecutive blank lines.
- [x] No `.py` file contains blank lines within function or method bodies.
- [~] All functions/methods have complete type hints (Core Engine complete, UI/Manager pending).
- [x] Application remains functional and passes existing tests.
## 6. Out of Scope
- Architectural changes or logic refactoring.
- Modification of non-Python files (e.g., `.md`, `.toml`, `.ps1`).
- Breaking PEP 8 compliance where it's not strictly necessary for token reduction (though indentation and blank lines are explicitly targeted).
- Import compaction (discarded per user request).

View File

@@ -5,7 +5,7 @@
- [x] Task: Implement helper methods in `ApiHookClient` for querying specific DearPyGui item states (e.g., `get_text_value`, `get_node_status`). 2a30e62
## Phase 2: Epic & Track Verification
- [ ] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
- [~] Task: Write the simulation routine to trigger a new Epic and verify the Track Browser updates correctly.
- [ ] Task: Verify that selecting a newly generated track successfully loads its initial (empty) state into the DAG visualizer.
## Phase 3: DAG & Spawn Interception Verification

View File

@@ -4,85 +4,76 @@ import mma_prompts
import re
def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict]:
"""
"""
Tier 2 (Tech Lead) call.
Breaks down a Track Brief and module skeletons into discrete Tier 3 Tickets.
"""
# 1. Set Tier 2 Model (Tech Lead - Flash)
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
ai_client.reset_session()
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
user_message = (
f"### TRACK BRIEF:\n{track_brief}\n\n"
f"### MODULE SKELETONS:\n{module_skeletons}\n\n"
"Please generate the implementation tickets for this track."
)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 2 Model
response = ai_client.send(
md_content="",
user_message=user_message
)
# 4. Parse JSON Output
# Extract JSON array from markdown code blocks if present
json_match = response.strip()
if "```json" in json_match:
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
# If it's still not valid JSON, try to find a [ ... ] block
if not (json_match.startswith('[') and json_match.endswith(']')):
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
if match:
json_match = match.group(0)
tickets = json.loads(json_match)
return tickets
except Exception as e:
print(f"Error parsing Tier 2 response: {e}")
# print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
# 1. Set Tier 2 Model (Tech Lead - Flash)
ai_client.set_provider('gemini', 'gemini-2.5-flash-lite')
ai_client.reset_session()
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier2_sprint_planning")
user_message = (
f"### TRACK BRIEF:\n{track_brief}\n\n"
f"### MODULE SKELETONS:\n{module_skeletons}\n\n"
"Please generate the implementation tickets for this track."
)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 2 Model
response = ai_client.send(
md_content="",
user_message=user_message
)
# 4. Parse JSON Output
# Extract JSON array from markdown code blocks if present
json_match = response.strip()
if "```json" in json_match:
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
# If it's still not valid JSON, try to find a [ ... ] block
if not (json_match.startswith('[') and json_match.endswith(']')):
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
if match:
json_match = match.group(0)
tickets = json.loads(json_match)
return tickets
except Exception as e:
print(f"Error parsing Tier 2 response: {e}")
# print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
from dag_engine import TrackDAG
from models import Ticket
def topological_sort(tickets: list[dict]) -> list[dict]:
"""
"""
Sorts a list of tickets based on their 'depends_on' field.
Raises ValueError if a circular dependency or missing internal dependency is detected.
"""
# 1. Convert to Ticket objects for TrackDAG
ticket_objs = []
for t_data in tickets:
ticket_objs.append(Ticket.from_dict(t_data))
# 2. Use TrackDAG for validation and sorting
dag = TrackDAG(ticket_objs)
try:
sorted_ids = dag.topological_sort()
except ValueError as e:
raise ValueError(f"DAG Validation Error: {e}")
# 3. Return sorted dictionaries
ticket_map = {t['id']: t for t in tickets}
return [ticket_map[tid] for tid in sorted_ids]
# 1. Convert to Ticket objects for TrackDAG
ticket_objs = []
for t_data in tickets:
ticket_objs.append(Ticket.from_dict(t_data))
# 2. Use TrackDAG for validation and sorting
dag = TrackDAG(ticket_objs)
try:
sorted_ids = dag.topological_sort()
except ValueError as e:
raise ValueError(f"DAG Validation Error: {e}")
# 3. Return sorted dictionaries
ticket_map = {t['id']: t for t in tickets}
return [ticket_map[tid] for tid in sorted_ids]
if __name__ == "__main__":
# Quick test if run directly
test_brief = "Implement a new feature."
test_skeletons = "class NewFeature: pass"
tickets = generate_tickets(test_brief, test_skeletons)
print(json.dumps(tickets, indent=2))
# Quick test if run directly
test_brief = "Implement a new feature."
test_skeletons = "class NewFeature: pass"
tickets = generate_tickets(test_brief, test_skeletons)
print(json.dumps(tickets, indent=2))

View File

@@ -22,7 +22,7 @@ paths = [
"C:\\projects\\manual_slop\\tests\\temp_livetoolssim.toml",
"C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml",
]
active = "C:\\projects\\manual_slop\\tests\\temp_project.toml"
active = "C:\\projects\\manual_slop\\tests\\temp_liveexecutionsim.toml"
[gui.show_windows]
"Context Hub" = true

View File

@@ -2,160 +2,152 @@ from typing import List, Optional
from models import Ticket
class TrackDAG:
"""
"""
Manages a Directed Acyclic Graph of implementation tickets.
Provides methods for dependency resolution, cycle detection, and topological sorting.
"""
def __init__(self, tickets: List[Ticket]):
"""
def __init__(self, tickets: List[Ticket]):
"""
Initializes the TrackDAG with a list of Ticket objects.
Args:
tickets: A list of Ticket instances defining the graph nodes and edges.
"""
self.tickets = tickets
self.ticket_map = {t.id: t for t in tickets}
self.tickets = tickets
self.ticket_map = {t.id: t for t in tickets}
def get_ready_tasks(self) -> List[Ticket]:
"""
def get_ready_tasks(self) -> List[Ticket]:
"""
Returns a list of tickets that are in 'todo' status and whose dependencies are all 'completed'.
Returns:
A list of Ticket objects ready for execution.
"""
ready = []
for ticket in self.tickets:
if ticket.status == 'todo':
# Check if all dependencies exist and are completed
all_done = True
for dep_id in ticket.depends_on:
dep = self.ticket_map.get(dep_id)
if not dep or dep.status != 'completed':
all_done = False
break
if all_done:
ready.append(ticket)
return ready
ready = []
for ticket in self.tickets:
if ticket.status == 'todo':
# Check if all dependencies exist and are completed
all_done = True
for dep_id in ticket.depends_on:
dep = self.ticket_map.get(dep_id)
if not dep or dep.status != 'completed':
all_done = False
break
if all_done:
ready.append(ticket)
return ready
def has_cycle(self) -> bool:
"""
def has_cycle(self) -> bool:
"""
Performs a Depth-First Search to detect cycles in the dependency graph.
Returns:
True if a cycle is detected, False otherwise.
"""
visited = set()
rec_stack = set()
visited = set()
rec_stack = set()
def is_cyclic(ticket_id: str) -> bool:
"""Internal recursive helper for cycle detection."""
if ticket_id in rec_stack:
return True
if ticket_id in visited:
return False
def is_cyclic(ticket_id: str) -> bool:
"""Internal recursive helper for cycle detection."""
if ticket_id in rec_stack:
return True
if ticket_id in visited:
return False
visited.add(ticket_id)
rec_stack.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for neighbor in ticket.depends_on:
if is_cyclic(neighbor):
return True
rec_stack.remove(ticket_id)
return False
for ticket in self.tickets:
if ticket.id not in visited:
if is_cyclic(ticket.id):
return True
return False
visited.add(ticket_id)
rec_stack.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for neighbor in ticket.depends_on:
if is_cyclic(neighbor):
return True
rec_stack.remove(ticket_id)
return False
for ticket in self.tickets:
if ticket.id not in visited:
if is_cyclic(ticket.id):
return True
return False
def topological_sort(self) -> List[str]:
"""
def topological_sort(self) -> List[str]:
"""
Returns a list of ticket IDs in topological order (dependencies before dependents).
Returns:
A list of ticket ID strings.
Raises:
ValueError: If a dependency cycle is detected.
"""
if self.has_cycle():
raise ValueError("Dependency cycle detected")
if self.has_cycle():
raise ValueError("Dependency cycle detected")
visited = set()
stack = []
visited = set()
stack = []
def visit(ticket_id: str):
"""Internal recursive helper for topological sorting."""
if ticket_id in visited:
return
visited.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for dep_id in ticket.depends_on:
visit(dep_id)
stack.append(ticket_id)
for ticket in self.tickets:
visit(ticket.id)
return stack
def visit(ticket_id: str):
"""Internal recursive helper for topological sorting."""
if ticket_id in visited:
return
visited.add(ticket_id)
ticket = self.ticket_map.get(ticket_id)
if ticket:
for dep_id in ticket.depends_on:
visit(dep_id)
stack.append(ticket_id)
for ticket in self.tickets:
visit(ticket.id)
return stack
class ExecutionEngine:
"""
"""
A state machine that governs the progression of tasks within a TrackDAG.
Handles automatic queueing and manual task approval.
"""
def __init__(self, dag: TrackDAG, auto_queue: bool = False):
"""
def __init__(self, dag: TrackDAG, auto_queue: bool = False):
"""
Initializes the ExecutionEngine.
Args:
dag: The TrackDAG instance to manage.
auto_queue: If True, ready tasks will automatically move to 'in_progress'.
"""
self.dag = dag
self.auto_queue = auto_queue
self.dag = dag
self.auto_queue = auto_queue
def tick(self) -> List[Ticket]:
"""
def tick(self) -> List[Ticket]:
"""
Evaluates the DAG and returns a list of tasks that are currently 'ready' for execution.
If auto_queue is enabled, tasks without 'step_mode' will be marked as 'in_progress'.
Returns:
A list of ready Ticket objects.
"""
ready = self.dag.get_ready_tasks()
if self.auto_queue:
for ticket in ready:
if not ticket.step_mode:
ticket.status = "in_progress"
return ready
ready = self.dag.get_ready_tasks()
if self.auto_queue:
for ticket in ready:
if not ticket.step_mode:
ticket.status = "in_progress"
return ready
def approve_task(self, task_id: str):
"""
def approve_task(self, task_id: str):
"""
Manually transitions a task from 'todo' to 'in_progress' if its dependencies are met.
Args:
task_id: The ID of the task to approve.
"""
ticket = self.dag.ticket_map.get(task_id)
if ticket and ticket.status == "todo":
# Check if dependencies are met first
all_done = True
for dep_id in ticket.depends_on:
dep = self.dag.ticket_map.get(dep_id)
if not dep or dep.status != "completed":
all_done = False
break
ticket = self.dag.ticket_map.get(task_id)
if ticket and ticket.status == "todo":
# Check if dependencies are met first
all_done = True
for dep_id in ticket.depends_on:
dep = self.dag.ticket_map.get(dep_id)
if not dep or dep.status != "completed":
all_done = False
break
if all_done:
ticket.status = "in_progress"
if all_done:
ticket.status = "in_progress"
def update_task_status(self, task_id: str, status: str):
"""
def update_task_status(self, task_id: str, status: str):
"""
Force-updates the status of a specific task.
Args:
task_id: The ID of the task.
status: The new status string (e.g., 'todo', 'in_progress', 'completed', 'blocked').
"""
ticket = self.dag.ticket_map.get(task_id)
if ticket:
ticket.status = status
ticket = self.dag.ticket_map.get(task_id)
if ticket:
ticket.status = status

View File

@@ -10,9 +10,9 @@ parser = tree_sitter.Parser(PY_LANGUAGE)
tree = parser.parse(bytes(code, "utf8"))
def walk(node, indent=0):
content = code[node.start_byte:node.end_byte].strip()
print(f"{' ' * indent}{node.type} ({node.start_byte}-{node.end_byte}): {content[:20]}")
for child in node.children:
walk(child, indent + 1)
content = code[node.start_byte:node.end_byte].strip()
print(f"{' ' * indent}{node.type} ({node.start_byte}-{node.end_byte}): {content[:20]}")
for child in node.children:
walk(child, indent + 1)
walk(tree.root_node)

View File

@@ -2,81 +2,77 @@ import tree_sitter
import tree_sitter_python
class ASTParser:
def __init__(self, language: str):
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def __init__(self, language: str):
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree:
return self.parser.parse(bytes(code, "utf8"))
def parse(self, code: str) -> tree_sitter.Tree:
return self.parser.parse(bytes(code, "utf8"))
def get_curated_view(self, code: str) -> str:
tree = self.parse(code)
edits = []
def get_curated_view(self, code: str) -> str:
tree = self.parse(code)
edits = []
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def has_core_logic_decorator(node):
parent = node.parent
if parent and parent.type == "decorated_definition":
for child in parent.children:
if child.type == "decorator":
if "@core_logic" in code[child.start_byte:child.end_byte]:
return True
return False
def has_core_logic_decorator(node):
parent = node.parent
if parent and parent.type == "decorated_definition":
for child in parent.children:
if child.type == "decorator":
if "@core_logic" in code[child.start_byte:child.end_byte]:
return True
return False
def has_hot_comment(func_node):
print(f"Checking {code[func_node.start_byte:func_node.start_byte+20].strip()}...")
stack = [func_node]
while stack:
curr = stack.pop()
if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte]
print(f" Found comment: {comment_text}")
if "[HOT]" in comment_text:
print(" [HOT] FOUND!")
return True
for child in curr.children:
stack.append(child)
return False
def has_hot_comment(func_node):
print(f"Checking {code[func_node.start_byte:func_node.start_byte+20].strip()}...")
stack = [func_node]
while stack:
curr = stack.pop()
if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte]
print(f" Found comment: {comment_text}")
if "[HOT]" in comment_text:
print(" [HOT] FOUND!")
return True
for child in curr.children:
stack.append(child)
return False
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}")
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, "\\n" + indent + "..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
print(f"Function {code[node.start_byte:node.start_byte+20].strip()}, preserve={preserve}")
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, "\\n" + indent + "..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
parser = ASTParser("python")
code = '''

View File

@@ -5,27 +5,28 @@ import asyncio
from typing import Callable, Any, Dict, List, Tuple
class EventEmitter:
"""
"""
Simple event emitter for decoupled communication between modules.
"""
def __init__(self):
"""Initializes the EventEmitter with an empty listener map."""
self._listeners: Dict[str, List[Callable]] = {}
def on(self, event_name: str, callback: Callable):
"""
def __init__(self):
"""Initializes the EventEmitter with an empty listener map."""
self._listeners: Dict[str, List[Callable]] = {}
def on(self, event_name: str, callback: Callable):
"""
Registers a callback for a specific event.
Args:
event_name: The name of the event to listen for.
callback: The function to call when the event is emitted.
"""
if event_name not in self._listeners:
self._listeners[event_name] = []
self._listeners[event_name].append(callback)
if event_name not in self._listeners:
self._listeners[event_name] = []
self._listeners[event_name].append(callback)
def emit(self, event_name: str, *args: Any, **kwargs: Any):
"""
def emit(self, event_name: str, *args: Any, **kwargs: Any):
"""
Emits an event, calling all registered callbacks.
Args:
@@ -33,53 +34,55 @@ class EventEmitter:
*args: Positional arguments to pass to callbacks.
**kwargs: Keyword arguments to pass to callbacks.
"""
if event_name in self._listeners:
for callback in self._listeners[event_name]:
callback(*args, **kwargs)
if event_name in self._listeners:
for callback in self._listeners[event_name]:
callback(*args, **kwargs)
class AsyncEventQueue:
"""
"""
Asynchronous event queue for decoupled communication using asyncio.Queue.
"""
def __init__(self):
"""Initializes the AsyncEventQueue with an internal asyncio.Queue."""
self._queue: asyncio.Queue = asyncio.Queue()
async def put(self, event_name: str, payload: Any = None):
"""
def __init__(self):
"""Initializes the AsyncEventQueue with an internal asyncio.Queue."""
self._queue: asyncio.Queue = asyncio.Queue()
async def put(self, event_name: str, payload: Any = None):
"""
Puts an event into the queue.
Args:
event_name: The name of the event.
payload: Optional data associated with the event.
"""
await self._queue.put((event_name, payload))
await self._queue.put((event_name, payload))
async def get(self) -> Tuple[str, Any]:
"""
async def get(self) -> Tuple[str, Any]:
"""
Gets an event from the queue.
Returns:
A tuple containing (event_name, payload).
"""
return await self._queue.get()
return await self._queue.get()
class UserRequestEvent:
"""
"""
Payload for a user request event.
"""
def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
self.prompt = prompt
self.stable_md = stable_md
self.file_items = file_items
self.disc_text = disc_text
self.base_dir = base_dir
def to_dict(self) -> Dict[str, Any]:
return {
"prompt": self.prompt,
"stable_md": self.stable_md,
"file_items": self.file_items,
"disc_text": self.disc_text,
"base_dir": self.base_dir
}
def __init__(self, prompt: str, stable_md: str, file_items: List[Any], disc_text: str, base_dir: str):
self.prompt = prompt
self.stable_md = stable_md
self.file_items = file_items
self.disc_text = disc_text
self.base_dir = base_dir
def to_dict(self) -> Dict[str, Any]:
return {
"prompt": self.prompt,
"stable_md": self.stable_md,
"file_items": self.file_items,
"disc_text": self.disc_text,
"base_dir": self.base_dir
}

View File

@@ -10,164 +10,148 @@ from typing import Optional
import tree_sitter
import tree_sitter_python
class ASTParser:
"""
"""
Parser for extracting AST-based views of source code.
Currently supports Python.
"""
def __init__(self, language: str):
if language != "python":
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def parse(self, code: str) -> tree_sitter.Tree:
"""Parse the given code and return the tree-sitter Tree."""
return self.parser.parse(bytes(code, "utf8"))
def __init__(self, language: str):
if language != "python":
raise ValueError(f"Language '{language}' not supported yet.")
self.language_name = language
# Load the tree-sitter language grammar
self.language = tree_sitter.Language(tree_sitter_python.language())
self.parser = tree_sitter.Parser(self.language)
def get_skeleton(self, code: str) -> str:
"""
def parse(self, code: str) -> tree_sitter.Tree:
"""Parse the given code and return the tree-sitter Tree."""
return self.parser.parse(bytes(code, "utf8"))
def get_skeleton(self, code: str) -> str:
"""
Returns a skeleton of a Python file (preserving docstrings, stripping function bodies).
"""
tree = self.parse(code)
edits = []
tree = self.parse(code)
edits = []
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def get_curated_view(self, code: str) -> str:
"""
def get_curated_view(self, code: str) -> str:
"""
Returns a curated skeleton of a Python file.
Preserves function bodies if they have @core_logic decorator or # [HOT] comment.
Otherwise strips bodies but preserves docstrings.
"""
tree = self.parse(code)
edits = []
tree = self.parse(code)
edits = []
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def is_docstring(node):
if node.type == "expression_statement" and node.child_count > 0:
if node.children[0].type == "string":
return True
return False
def has_core_logic_decorator(node):
# Check if parent is decorated_definition
parent = node.parent
if parent and parent.type == "decorated_definition":
for child in parent.children:
if child.type == "decorator":
# decorator -> ( '@', identifier ) or ( '@', call )
if "@core_logic" in code[child.start_byte:child.end_byte]:
return True
return False
def has_core_logic_decorator(node):
# Check if parent is decorated_definition
parent = node.parent
if parent and parent.type == "decorated_definition":
for child in parent.children:
if child.type == "decorator":
# decorator -> ( '@', identifier ) or ( '@', call )
if "@core_logic" in code[child.start_byte:child.end_byte]:
return True
return False
def has_hot_comment(func_node):
# Check all descendants of the function_definition for a [HOT] comment
stack = [func_node]
while stack:
curr = stack.pop()
if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte]
if "[HOT]" in comment_text:
return True
for child in curr.children:
stack.append(child)
return False
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def has_hot_comment(func_node):
# Check all descendants of the function_definition for a [HOT] comment
stack = [func_node]
while stack:
curr = stack.pop()
if curr.type == "comment":
comment_text = code[curr.start_byte:curr.end_byte]
if "[HOT]" in comment_text:
return True
for child in curr.children:
stack.append(child)
return False
def walk(node):
if node.type == "function_definition":
body = node.child_by_field_name("body")
if body and body.type == "block":
# Check if we should preserve it
preserve = has_core_logic_decorator(node) or has_hot_comment(node)
if not preserve:
indent = " " * body.start_point.column
first_stmt = None
for child in body.children:
if child.type != "comment":
first_stmt = child
break
if first_stmt and is_docstring(first_stmt):
start_byte = first_stmt.end_byte
end_byte = body.end_byte
if end_byte > start_byte:
edits.append((start_byte, end_byte, f"\n{indent}..."))
else:
start_byte = body.start_byte
end_byte = body.end_byte
edits.append((start_byte, end_byte, "..."))
for child in node.children:
walk(child)
walk(tree.root_node)
# Apply edits in reverse to maintain byte offsets
edits.sort(key=lambda x: x[0], reverse=True)
code_bytes = bytearray(code, "utf8")
for start, end, replacement in edits:
code_bytes[start:end] = bytes(replacement, "utf8")
return code_bytes.decode("utf8")
def reset_client():
pass
pass
def content_block_type(path: Path) -> str:
return "unsupported"
return "unsupported"
def get_file_id(path: Path) -> Optional[str]:
return None
return None
def evict(path: Path):
pass
pass
def list_cached() -> list[dict]:
return []
return []

View File

@@ -8,28 +8,28 @@ _client = None
_chat = None
def _load_key() -> str:
with open("credentials.toml", "rb") as f:
return tomllib.load(f)["gemini"]["api_key"]
with open("credentials.toml", "rb") as f:
return tomllib.load(f)["gemini"]["api_key"]
def _ensure_client():
global _client
if _client is None:
_client = genai.Client(api_key=_load_key())
global _client
if _client is None:
_client = genai.Client(api_key=_load_key())
def _ensure_chat():
global _chat
if _chat is None:
_ensure_client()
_chat = _client.chats.create(model="gemini-2.0-flash")
global _chat
if _chat is None:
_ensure_client()
_chat = _client.chats.create(model="gemini-2.0-flash")
def send(md_content: str, user_message: str) -> str:
global _chat
_ensure_chat()
full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
response = _chat.send_message(full_message)
return response.text
global _chat
_ensure_chat()
full_message = f"<context>\n{md_content}\n</context>\n\n{user_message}"
response = _chat.send_message(full_message)
return response.text
def reset_session():
global _client, _chat
_client = None
_chat = None
global _client, _chat
_client = None
_chat = None

View File

@@ -6,135 +6,118 @@ import os
import session_logger # Import session_logger
class GeminiCliAdapter:
def __init__(self, binary_path="gemini"):
self.binary_path = binary_path
self.last_usage = None
self.session_id = None
self.last_latency = 0.0
def __init__(self, binary_path="gemini"):
self.binary_path = binary_path
self.last_usage = None
self.session_id = None
self.last_latency = 0.0
def count_tokens(self, contents: list[str]) -> int:
"""
def count_tokens(self, contents: list[str]) -> int:
"""
Counts the tokens for a list of string contents using a character-based estimation.
Approximates tokens by assuming 4 characters per token.
This replaces the broken 'gemini count' CLI call.
"""
input_text = "\n".join(contents)
total_chars = len(input_text)
estimated_tokens = total_chars // 4
return estimated_tokens
input_text = "\n".join(contents)
total_chars = len(input_text)
estimated_tokens = total_chars // 4
return estimated_tokens
def send(self, message, safety_settings=None, system_instruction=None, model: str = None):
"""
def send(self, message, safety_settings=None, system_instruction=None, model: str = None):
"""
Sends a message to the Gemini CLI and processes the streaming JSON output.
Logs the CLI call details using session_logger.log_cli_call.
System instruction is prepended to the message.
Uses --prompt flag with a placeholder and sends the content via stdin.
"""
start_time = time.time()
command_parts = [self.binary_path]
if model:
command_parts.extend(['-m', f'"{model}"'])
# Use an empty string placeholder.
command_parts.extend(['--prompt', '""'])
if self.session_id:
command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts)
# Construct the prompt text by prepending system_instruction if available
prompt_text = message
if system_instruction:
prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = ""
tool_calls = []
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
process = None
stdout_content = ""
stderr_content = ""
stdin_content = prompt_text
try:
process = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
shell=True,
env=env
)
stdout_output, stderr_output = process.communicate(input=prompt_text)
stdout_content = stdout_output
stderr_content = stderr_output
for line in stdout_content.splitlines():
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
msg_type = data.get("type")
if msg_type == "init":
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "message":
# CRITICAL: Only accumulate content from the assistant/model role.
# The CLI echoes back the 'user' prompt in the stream, which we must skip.
role = data.get("role", "")
if role in ["assistant", "model"]:
content = data.get("content", data.get("text"))
if content:
accumulated_text += content
elif msg_type == "result":
self.last_usage = data.get("stats") or data.get("usage")
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "tool_use":
# Standardize format for ai_client.py
# Real CLI might use 'tool_name'/'tool_id'/'parameters'
# or 'name'/'id'/'args'. We'll map to 'name'/'id'/'args'.
tc = {
"name": data.get("tool_name", data.get("name")),
"args": data.get("parameters", data.get("args", {})),
"id": data.get("tool_id", data.get("id"))
}
if tc["name"]:
tool_calls.append(tc)
except json.JSONDecodeError:
continue
except Exception as e:
if process:
process.kill()
raise e
finally:
current_latency = time.time() - start_time
if process:
session_logger.open_session()
session_logger.log_cli_call(
command=command,
stdin_content=stdin_content,
stdout_content=stdout_content,
stderr_content=stderr_content,
latency=current_latency
)
self.last_latency = current_latency
return {
"text": accumulated_text,
"tool_calls": tool_calls,
"stderr": stderr_content
}
start_time = time.time()
command_parts = [self.binary_path]
if model:
command_parts.extend(['-m', f'"{model}"'])
# Use an empty string placeholder.
command_parts.extend(['--prompt', '""'])
if self.session_id:
command_parts.extend(['--resume', self.session_id])
command_parts.extend(['--output-format', 'stream-json'])
command = " ".join(command_parts)
# Construct the prompt text by prepending system_instruction if available
prompt_text = message
if system_instruction:
prompt_text = f"{system_instruction}\n\n{message}"
accumulated_text = ""
tool_calls = []
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "manual_slop"
process = None
stdout_content = ""
stderr_content = ""
stdin_content = prompt_text
try:
process = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
shell=True,
env=env
)
stdout_output, stderr_output = process.communicate(input=prompt_text)
stdout_content = stdout_output
stderr_content = stderr_output
for line in stdout_content.splitlines():
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
msg_type = data.get("type")
if msg_type == "init":
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "message":
# CRITICAL: Only accumulate content from the assistant/model role.
# The CLI echoes back the 'user' prompt in the stream, which we must skip.
role = data.get("role", "")
if role in ["assistant", "model"]:
content = data.get("content", data.get("text"))
if content:
accumulated_text += content
elif msg_type == "result":
self.last_usage = data.get("stats") or data.get("usage")
if "session_id" in data:
self.session_id = data.get("session_id")
elif msg_type == "tool_use":
# Standardize format for ai_client.py
# Real CLI might use 'tool_name'/'tool_id'/'parameters'
# or 'name'/'id'/'args'. We'll map to 'name'/'id'/'args'.
tc = {
"name": data.get("tool_name", data.get("name")),
"args": data.get("parameters", data.get("args", {})),
"id": data.get("tool_id", data.get("id"))
}
if tc["name"]:
tool_calls.append(tc)
except json.JSONDecodeError:
continue
except Exception as e:
if process:
process.kill()
raise e
finally:
current_latency = time.time() - start_time
if process:
session_logger.open_session()
session_logger.log_cli_call(
command=command,
stdin_content=stdin_content,
stdout_content=stdout_content,
stderr_content=stderr_content,
latency=current_latency
)
self.last_latency = current_latency
return {
"text": accumulated_text,
"tool_calls": tool_calls,
"stderr": stderr_content
}

6029
gui_2.py

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,8 @@ def normal_func():
tree = parser.parse(bytes(code, "utf8"))
def print_node(node, indent=0):
print(" " * indent + f"{node.type} [{node.start_byte}-{node.end_byte}] " + (f"'{code[node.start_byte:node.end_byte]}'" if node.type in ["decorator", "comment", "identifier"] else ""))
for child in node.children:
print_node(child, indent + 1)
print(" " * indent + f"{node.type} [{node.start_byte}-{node.end_byte}] " + (f"'{code[node.start_byte:node.end_byte]}'" if node.type in ["decorator", "comment", "identifier"] else ""))
for child in node.children:
print_node(child, indent + 1)
print_node(tree.root_node)

View File

@@ -4,24 +4,25 @@ from datetime import datetime, timedelta
from log_registry import LogRegistry
class LogPruner:
"""
"""
Handles the automated deletion of old and insignificant session logs.
Ensures that only whitelisted or significant sessions (based on size/content)
are preserved long-term.
"""
def __init__(self, log_registry: LogRegistry, logs_dir: str):
"""
def __init__(self, log_registry: LogRegistry, logs_dir: str):
"""
Initializes the LogPruner.
Args:
log_registry: An instance of LogRegistry to check session data.
logs_dir: The path to the directory containing session sub-directories.
"""
self.log_registry = log_registry
self.logs_dir = logs_dir
self.log_registry = log_registry
self.logs_dir = logs_dir
def prune(self):
"""
def prune(self):
"""
Prunes old and small session directories from the logs directory.
Deletes session directories that meet the following criteria:
@@ -29,37 +30,31 @@ class LogPruner:
2. The session name is NOT in the whitelist provided by the LogRegistry.
3. The total size of all files within the session directory is less than 2KB (2048 bytes).
"""
now = datetime.now()
cutoff_time = now - timedelta(hours=24)
# Ensure the base logs directory exists.
if not os.path.isdir(self.logs_dir):
return
# Get sessions that are old and not whitelisted from the registry
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time)
# Prune sessions if their size is less than 2048 bytes
for session_info in old_sessions_to_check:
session_id = session_info['session_id']
session_path = session_info['path']
if not session_path or not os.path.isdir(session_path):
continue
# Calculate total size of files in the directory
total_size = 0
try:
for entry in os.scandir(session_path):
if entry.is_file():
total_size += entry.stat().st_size
except OSError:
continue
# Prune if the total size is less than 2KB (2048 bytes)
if total_size < 2048: # 2KB
try:
shutil.rmtree(session_path)
# print(f"Pruned session '{session_id}' (Size: {total_size} bytes)")
except OSError:
pass
now = datetime.now()
cutoff_time = now - timedelta(hours=24)
# Ensure the base logs directory exists.
if not os.path.isdir(self.logs_dir):
return
# Get sessions that are old and not whitelisted from the registry
old_sessions_to_check = self.log_registry.get_old_non_whitelisted_sessions(cutoff_time)
# Prune sessions if their size is less than 2048 bytes
for session_info in old_sessions_to_check:
session_id = session_info['session_id']
session_path = session_info['path']
if not session_path or not os.path.isdir(session_path):
continue
# Calculate total size of files in the directory
total_size = 0
try:
for entry in os.scandir(session_path):
if entry.is_file():
total_size += entry.stat().st_size
except OSError:
continue
# Prune if the total size is less than 2KB (2048 bytes)
if total_size < 2048: # 2KB
try:
shutil.rmtree(session_path)
# print(f"Pruned session '{session_id}' (Size: {total_size} bytes)")
except OSError:
pass

View File

@@ -4,85 +4,85 @@ from datetime import datetime
import os
class LogRegistry:
"""
"""
Manages a persistent registry of session logs using a TOML file.
Tracks session paths, start times, whitelisting status, and metadata.
"""
def __init__(self, registry_path):
"""
def __init__(self, registry_path):
"""
Initializes the LogRegistry with a path to the registry file.
Args:
registry_path (str): The file path to the TOML registry.
"""
self.registry_path = registry_path
self.data = {}
self.load_registry()
self.registry_path = registry_path
self.data = {}
self.load_registry()
def load_registry(self):
"""
def load_registry(self):
"""
Loads the registry data from the TOML file into memory.
Handles date/time conversions from TOML-native formats to strings for consistency.
"""
if os.path.exists(self.registry_path):
try:
with open(self.registry_path, 'rb') as f:
loaded_data = tomllib.load(f)
# Keep data as it is from TOML (strings or native datetimes)
# If we want to satisfy tests that expect strings, we ensure they are strings.
self.data = {}
for session_id, session_data in loaded_data.items():
new_session_data = session_data.copy()
# If tomllib parsed it as a datetime, convert it back to string for the tests
if 'start_time' in new_session_data and isinstance(new_session_data['start_time'], datetime):
new_session_data['start_time'] = new_session_data['start_time'].isoformat()
if 'metadata' in new_session_data and isinstance(new_session_data['metadata'], dict):
m = new_session_data['metadata']
if 'timestamp' in m and isinstance(m['timestamp'], datetime):
m['timestamp'] = m['timestamp'].isoformat()
self.data[session_id] = new_session_data
except Exception as e:
print(f"Error loading registry from {self.registry_path}: {e}")
self.data = {}
else:
self.data = {}
if os.path.exists(self.registry_path):
try:
with open(self.registry_path, 'rb') as f:
loaded_data = tomllib.load(f)
# Keep data as it is from TOML (strings or native datetimes)
# If we want to satisfy tests that expect strings, we ensure they are strings.
self.data = {}
for session_id, session_data in loaded_data.items():
new_session_data = session_data.copy()
# If tomllib parsed it as a datetime, convert it back to string for the tests
if 'start_time' in new_session_data and isinstance(new_session_data['start_time'], datetime):
new_session_data['start_time'] = new_session_data['start_time'].isoformat()
if 'metadata' in new_session_data and isinstance(new_session_data['metadata'], dict):
m = new_session_data['metadata']
if 'timestamp' in m and isinstance(m['timestamp'], datetime):
m['timestamp'] = m['timestamp'].isoformat()
self.data[session_id] = new_session_data
except Exception as e:
print(f"Error loading registry from {self.registry_path}: {e}")
self.data = {}
else:
self.data = {}
def save_registry(self):
"""
def save_registry(self):
"""
Serializes and saves the current registry data to the TOML file.
Converts internal datetime objects to ISO format strings for compatibility.
"""
try:
# Convert datetime objects to ISO format strings for TOML serialization
data_to_save = {}
for session_id, session_data in self.data.items():
session_data_copy = {}
for k, v in session_data.items():
if v is None:
continue
if k == 'start_time' and isinstance(v, datetime):
session_data_copy[k] = v.isoformat()
elif k == 'metadata' and isinstance(v, dict):
metadata_copy = {}
for mk, mv in v.items():
if mv is None:
continue
if mk == 'timestamp' and isinstance(mv, datetime):
metadata_copy[mk] = mv.isoformat()
else:
metadata_copy[mk] = mv
session_data_copy[k] = metadata_copy
else:
session_data_copy[k] = v
data_to_save[session_id] = session_data_copy
with open(self.registry_path, 'wb') as f:
tomli_w.dump(data_to_save, f)
except Exception as e:
print(f"Error saving registry to {self.registry_path}: {e}")
try:
# Convert datetime objects to ISO format strings for TOML serialization
data_to_save = {}
for session_id, session_data in self.data.items():
session_data_copy = {}
for k, v in session_data.items():
if v is None:
continue
if k == 'start_time' and isinstance(v, datetime):
session_data_copy[k] = v.isoformat()
elif k == 'metadata' and isinstance(v, dict):
metadata_copy = {}
for mk, mv in v.items():
if mv is None:
continue
if mk == 'timestamp' and isinstance(mv, datetime):
metadata_copy[mk] = mv.isoformat()
else:
metadata_copy[mk] = mv
session_data_copy[k] = metadata_copy
else:
session_data_copy[k] = v
data_to_save[session_id] = session_data_copy
with open(self.registry_path, 'wb') as f:
tomli_w.dump(data_to_save, f)
except Exception as e:
print(f"Error saving registry to {self.registry_path}: {e}")
def register_session(self, session_id, path, start_time):
"""
def register_session(self, session_id, path, start_time):
"""
Registers a new session in the registry.
Args:
@@ -90,25 +90,23 @@ class LogRegistry:
path (str): File path to the session's log directory.
start_time (datetime|str): The timestamp when the session started.
"""
if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
if session_id in self.data:
print(f"Warning: Session ID '{session_id}' already exists. Overwriting.")
# Store start_time internally as a string to satisfy tests
if isinstance(start_time, datetime):
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
}
self.save_registry()
# Store start_time internally as a string to satisfy tests
if isinstance(start_time, datetime):
start_time_str = start_time.isoformat()
else:
start_time_str = start_time
self.data[session_id] = {
'path': path,
'start_time': start_time_str,
'whitelisted': False,
'metadata': None
}
self.save_registry()
def update_session_metadata(self, session_id, message_count, errors, size_kb, whitelisted, reason):
"""
def update_session_metadata(self, session_id, message_count, errors, size_kb, whitelisted, reason):
"""
Updates metadata fields for an existing session.
Args:
@@ -119,30 +117,26 @@ class LogRegistry:
whitelisted (bool): Whether the session should be protected from pruning.
reason (str): Explanation for the current whitelisting status.
"""
if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.")
return
if session_id not in self.data:
print(f"Error: Session ID '{session_id}' not found for metadata update.")
return
# Ensure metadata exists
if self.data[session_id].get('metadata') is None:
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
self.data[session_id]['metadata']['size_kb'] = size_kb
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
# Ensure metadata exists
if self.data[session_id].get('metadata') is None:
self.data[session_id]['metadata'] = {}
# Update fields
self.data[session_id]['metadata']['message_count'] = message_count
self.data[session_id]['metadata']['errors'] = errors
self.data[session_id]['metadata']['size_kb'] = size_kb
self.data[session_id]['metadata']['whitelisted'] = whitelisted
self.data[session_id]['metadata']['reason'] = reason
# self.data[session_id]['metadata']['timestamp'] = datetime.utcnow() # Optionally add a timestamp
# Also update the top-level whitelisted flag if provided
if whitelisted is not None:
self.data[session_id]['whitelisted'] = whitelisted
self.save_registry() # Save after update
def is_session_whitelisted(self, session_id):
"""
def is_session_whitelisted(self, session_id):
"""
Checks if a specific session is marked as whitelisted.
Args:
@@ -151,15 +145,14 @@ class LogRegistry:
Returns:
bool: True if whitelisted, False otherwise.
"""
session_data = self.data.get(session_id)
if session_data is None:
return False # Non-existent sessions are not whitelisted
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
return session_data.get('whitelisted', False)
session_data = self.data.get(session_id)
if session_data is None:
return False # Non-existent sessions are not whitelisted
# Check the top-level 'whitelisted' flag. If it's not set or False, it's not whitelisted.
return session_data.get('whitelisted', False)
def update_auto_whitelist_status(self, session_id: str):
"""
def update_auto_whitelist_status(self, session_id: str):
"""
Analyzes session logs and updates whitelisting status based on heuristics.
Sessions are automatically whitelisted if they contain error keywords,
have a high message count, or exceed a size threshold.
@@ -167,64 +160,57 @@ class LogRegistry:
Args:
session_id (str): Unique identifier for the session to analyze.
"""
if session_id not in self.data:
return
if session_id not in self.data:
return
session_data = self.data[session_id]
session_path = session_data.get('path')
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
message_count += 1
for kw in keywords_to_check:
if kw in line and kw not in found_keywords:
found_keywords.append(kw)
except Exception:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
)
session_data = self.data[session_id]
session_path = session_data.get('path')
if not session_path or not os.path.isdir(session_path):
return
total_size_bytes = 0
message_count = 0
found_keywords = []
keywords_to_check = ['ERROR', 'WARNING', 'EXCEPTION']
try:
for entry in os.scandir(session_path):
if entry.is_file():
size = entry.stat().st_size
total_size_bytes += size
# Analyze comms.log for messages and keywords
if entry.name == "comms.log":
try:
with open(entry.path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
message_count += 1
for kw in keywords_to_check:
if kw in line and kw not in found_keywords:
found_keywords.append(kw)
except Exception:
pass
except Exception:
pass
size_kb = total_size_bytes / 1024
whitelisted = False
reason = ""
if found_keywords:
whitelisted = True
reason = f"Found keywords: {', '.join(found_keywords)}"
elif message_count > 10:
whitelisted = True
reason = f"High message count: {message_count}"
elif size_kb > 50:
whitelisted = True
reason = f"Large session size: {size_kb:.1f} KB"
self.update_session_metadata(
session_id,
message_count=message_count,
errors=len(found_keywords),
size_kb=int(size_kb),
whitelisted=whitelisted,
reason=reason
)
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
"""
def get_old_non_whitelisted_sessions(self, cutoff_datetime):
"""
Retrieves a list of sessions that are older than a specific cutoff time
and are not marked as whitelisted.
@@ -234,24 +220,22 @@ class LogRegistry:
Returns:
list: A list of dictionaries containing session details (id, path, start_time).
"""
old_sessions = []
for session_id, session_data in self.data.items():
# Check if session is older than cutoff and not whitelisted
start_time_raw = session_data.get('start_time')
if isinstance(start_time_raw, str):
try:
start_time = datetime.fromisoformat(start_time_raw)
except ValueError:
start_time = None
else:
start_time = start_time_raw
is_whitelisted = session_data.get('whitelisted', False)
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
old_sessions.append({
'session_id': session_id,
'path': session_data.get('path'),
'start_time': start_time_raw
})
return old_sessions
old_sessions = []
for session_id, session_data in self.data.items():
# Check if session is older than cutoff and not whitelisted
start_time_raw = session_data.get('start_time')
if isinstance(start_time_raw, str):
try:
start_time = datetime.fromisoformat(start_time_raw)
except ValueError:
start_time = None
else:
start_time = start_time_raw
is_whitelisted = session_data.get('whitelisted', False)
if start_time is not None and start_time < cutoff_datetime and not is_whitelisted:
old_sessions.append({
'session_id': session_id,
'path': session_data.get('path'),
'start_time': start_time_raw
})
return old_sessions

File diff suppressed because it is too large Load Diff

View File

@@ -143,11 +143,11 @@ Return the Ticket set in Godot ECS Flat List format (JSON array).
"""
PROMPTS: Dict[str, str] = {
"tier1_epic_init": TIER1_EPIC_INIT,
"tier1_track_delegation": TIER1_TRACK_DELEGATION,
"tier1_macro_merge": TIER1_MACRO_MERGE,
"tier2_sprint_planning": TIER2_SPRINT_PLANNING,
"tier2_code_review": TIER2_CODE_REVIEW,
"tier2_track_finalization": TIER2_TRACK_FINALIZATION,
"tier2_contract_first": TIER2_CONTRACT_FIRST,
"tier1_epic_init": TIER1_EPIC_INIT,
"tier1_track_delegation": TIER1_TRACK_DELEGATION,
"tier1_macro_merge": TIER1_MACRO_MERGE,
"tier2_sprint_planning": TIER2_SPRINT_PLANNING,
"tier2_code_review": TIER2_CODE_REVIEW,
"tier2_track_finalization": TIER2_TRACK_FINALIZATION,
"tier2_contract_first": TIER2_CONTRACT_FIRST,
}

244
models.py
View File

@@ -4,156 +4,152 @@ from datetime import datetime
@dataclass
class Ticket:
"""
"""
Represents a discrete unit of work within a track.
"""
id: str
description: str
status: str
assigned_to: str
target_file: Optional[str] = None
context_requirements: List[str] = field(default_factory=list)
depends_on: List[str] = field(default_factory=list)
blocked_reason: Optional[str] = None
step_mode: bool = False
id: str
description: str
status: str
assigned_to: str
target_file: Optional[str] = None
context_requirements: List[str] = field(default_factory=list)
depends_on: List[str] = field(default_factory=list)
blocked_reason: Optional[str] = None
step_mode: bool = False
def mark_blocked(self, reason: str):
"""Sets the ticket status to 'blocked' and records the reason."""
self.status = "blocked"
self.blocked_reason = reason
def mark_blocked(self, reason: str):
"""Sets the ticket status to 'blocked' and records the reason."""
self.status = "blocked"
self.blocked_reason = reason
def mark_complete(self):
"""Sets the ticket status to 'completed'."""
self.status = "completed"
def mark_complete(self):
"""Sets the ticket status to 'completed'."""
self.status = "completed"
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"description": self.description,
"status": self.status,
"assigned_to": self.assigned_to,
"target_file": self.target_file,
"context_requirements": self.context_requirements,
"depends_on": self.depends_on,
"blocked_reason": self.blocked_reason,
"step_mode": self.step_mode,
}
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"description": self.description,
"status": self.status,
"assigned_to": self.assigned_to,
"target_file": self.target_file,
"context_requirements": self.context_requirements,
"depends_on": self.depends_on,
"blocked_reason": self.blocked_reason,
"step_mode": self.step_mode,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Ticket":
return cls(
id=data["id"],
description=data.get("description"),
status=data.get("status"),
assigned_to=data.get("assigned_to"),
target_file=data.get("target_file"),
context_requirements=data.get("context_requirements", []),
depends_on=data.get("depends_on", []),
blocked_reason=data.get("blocked_reason"),
step_mode=data.get("step_mode", False),
)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Ticket":
return cls(
id=data["id"],
description=data.get("description"),
status=data.get("status"),
assigned_to=data.get("assigned_to"),
target_file=data.get("target_file"),
context_requirements=data.get("context_requirements", []),
depends_on=data.get("depends_on", []),
blocked_reason=data.get("blocked_reason"),
step_mode=data.get("step_mode", False),
)
@dataclass
class Track:
"""
"""
Represents a collection of tickets that together form an architectural track or epic.
"""
id: str
description: str
tickets: List[Ticket] = field(default_factory=list)
id: str
description: str
tickets: List[Ticket] = field(default_factory=list)
def get_executable_tickets(self) -> List[Ticket]:
"""
def get_executable_tickets(self) -> List[Ticket]:
"""
Returns all 'todo' tickets whose dependencies are all 'completed'.
"""
# Map ticket IDs to their current status for efficient lookup
status_map = {t.id: t.status for t in self.tickets}
executable = []
for ticket in self.tickets:
if ticket.status != "todo":
continue
# Check if all dependencies are completed
all_deps_completed = True
for dep_id in ticket.depends_on:
# If a dependency is missing from the track, we treat it as not completed (or we could raise an error)
if status_map.get(dep_id) != "completed":
all_deps_completed = False
break
if all_deps_completed:
executable.append(ticket)
return executable
# Map ticket IDs to their current status for efficient lookup
status_map = {t.id: t.status for t in self.tickets}
executable = []
for ticket in self.tickets:
if ticket.status != "todo":
continue
# Check if all dependencies are completed
all_deps_completed = True
for dep_id in ticket.depends_on:
# If a dependency is missing from the track, we treat it as not completed (or we could raise an error)
if status_map.get(dep_id) != "completed":
all_deps_completed = False
break
if all_deps_completed:
executable.append(ticket)
return executable
@dataclass
class WorkerContext:
"""
"""
Represents the context provided to a Tier 3 Worker for a specific ticket.
"""
ticket_id: str
model_name: str
messages: List[dict]
ticket_id: str
model_name: str
messages: List[dict]
@dataclass
class Metadata:
id: str
name: str
status: str
created_at: datetime
updated_at: datetime
id: str
name: str
status: str
created_at: datetime
updated_at: datetime
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"status": self.status,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"name": self.name,
"status": self.status,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
return cls(
id=data["id"],
name=data["name"],
status=data.get("status"),
created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None,
)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Metadata":
return cls(
id=data["id"],
name=data["name"],
status=data.get("status"),
created_at=datetime.fromisoformat(data['created_at']) if data.get('created_at') else None,
updated_at=datetime.fromisoformat(data['updated_at']) if data.get('updated_at') else None,
)
@dataclass
class TrackState:
metadata: Metadata
discussion: List[Dict[str, Any]]
tasks: List[Ticket]
metadata: Metadata
discussion: List[Dict[str, Any]]
tasks: List[Ticket]
def to_dict(self) -> Dict[str, Any]:
return {
"metadata": self.metadata.to_dict(),
"discussion": [
{
k: v.isoformat() if isinstance(v, datetime) else v
for k, v in item.items()
}
for item in self.discussion
],
"tasks": [task.to_dict() for task in self.tasks],
}
def to_dict(self) -> Dict[str, Any]:
return {
"metadata": self.metadata.to_dict(),
"discussion": [
{
k: v.isoformat() if isinstance(v, datetime) else v
for k, v in item.items()
}
for item in self.discussion
],
"tasks": [task.to_dict() for task in self.tasks],
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "TrackState":
metadata = Metadata.from_dict(data["metadata"])
tasks = [Ticket.from_dict(task_data) for task_data in data["tasks"]]
return cls(
metadata=metadata,
discussion=[
{
k: datetime.fromisoformat(v) if isinstance(v, str) and 'T' in v else v # Basic check for ISO format
for k, v in item.items()
}
for item in data["discussion"]
],
tasks=tasks,
)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "TrackState":
metadata = Metadata.from_dict(data["metadata"])
tasks = [Ticket.from_dict(task_data) for task_data in data["tasks"]]
return cls(
metadata=metadata,
discussion=[
{
k: datetime.fromisoformat(v) if isinstance(v, str) and 'T' in v else v # Basic check for ISO format
for k, v in item.items()
}
for item in data["discussion"]
],
tasks=tasks,
)

View File

@@ -13,231 +13,208 @@ from pathlib import Path
from dag_engine import TrackDAG, ExecutionEngine
class ConductorEngine:
"""
"""
Orchestrates the execution of tickets within a track.
"""
def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
self.track = track
self.event_queue = event_queue
self.tier_usage = {
"Tier 1": {"input": 0, "output": 0},
"Tier 2": {"input": 0, "output": 0},
"Tier 3": {"input": 0, "output": 0},
"Tier 4": {"input": 0, "output": 0},
}
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=auto_queue)
async def _push_state(self, status: str = "running", active_tier: str = None):
if not self.event_queue:
return
payload = {
"status": status,
"active_tier": active_tier,
"tier_usage": self.tier_usage,
"track": {
"id": self.track.id,
"title": self.track.description,
},
"tickets": [asdict(t) for t in self.track.tickets]
}
await self.event_queue.put("mma_state_update", payload)
def __init__(self, track: Track, event_queue: Optional[events.AsyncEventQueue] = None, auto_queue: bool = False):
self.track = track
self.event_queue = event_queue
self.tier_usage = {
"Tier 1": {"input": 0, "output": 0},
"Tier 2": {"input": 0, "output": 0},
"Tier 3": {"input": 0, "output": 0},
"Tier 4": {"input": 0, "output": 0},
}
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=auto_queue)
def parse_json_tickets(self, json_str: str):
"""
async def _push_state(self, status: str = "running", active_tier: str = None):
if not self.event_queue:
return
payload = {
"status": status,
"active_tier": active_tier,
"tier_usage": self.tier_usage,
"track": {
"id": self.track.id,
"title": self.track.description,
},
"tickets": [asdict(t) for t in self.track.tickets]
}
await self.event_queue.put("mma_state_update", payload)
def parse_json_tickets(self, json_str: str):
"""
Parses a JSON string of ticket definitions (Godot ECS Flat List format)
and populates the Track's ticket list.
"""
try:
data = json.loads(json_str)
if not isinstance(data, list):
print("Error: JSON input must be a list of ticket definitions.")
return
try:
data = json.loads(json_str)
if not isinstance(data, list):
print("Error: JSON input must be a list of ticket definitions.")
return
for ticket_data in data:
# Construct Ticket object, using defaults for optional fields
ticket = Ticket(
id=ticket_data["id"],
description=ticket_data["description"],
status=ticket_data.get("status", "todo"),
assigned_to=ticket_data.get("assigned_to", "unassigned"),
depends_on=ticket_data.get("depends_on", []),
step_mode=ticket_data.get("step_mode", False)
)
self.track.tickets.append(ticket)
# Rebuild DAG and Engine after parsing new tickets
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
except json.JSONDecodeError as e:
print(f"Error parsing JSON tickets: {e}")
except KeyError as e:
print(f"Missing required field in ticket definition: {e}")
for ticket_data in data:
# Construct Ticket object, using defaults for optional fields
ticket = Ticket(
id=ticket_data["id"],
description=ticket_data["description"],
status=ticket_data.get("status", "todo"),
assigned_to=ticket_data.get("assigned_to", "unassigned"),
depends_on=ticket_data.get("depends_on", []),
step_mode=ticket_data.get("step_mode", False)
)
self.track.tickets.append(ticket)
# Rebuild DAG and Engine after parsing new tickets
self.dag = TrackDAG(self.track.tickets)
self.engine = ExecutionEngine(self.dag, auto_queue=self.engine.auto_queue)
except json.JSONDecodeError as e:
print(f"Error parsing JSON tickets: {e}")
except KeyError as e:
print(f"Missing required field in ticket definition: {e}")
async def run(self, md_content: str = ""):
"""
async def run(self, md_content: str = ""):
"""
Main execution loop using the DAG engine.
Args:
md_content: The full markdown context (history + files) for AI workers.
"""
await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)")
while True:
# 1. Identify ready tasks
ready_tasks = self.engine.tick()
# 2. Check for completion or blockage
if not ready_tasks:
all_done = all(t.status == "completed" for t in self.track.tickets)
if all_done:
print("Track completed successfully.")
await self._push_state(status="done", active_tier=None)
else:
# Check if any tasks are in-progress or could be ready
if any(t.status == "in_progress" for t in self.track.tickets):
# Wait for async tasks to complete
await asyncio.sleep(1)
continue
print("No more executable tickets. Track is blocked or finished.")
await self._push_state(status="blocked", active_tier=None)
break
# 3. Process ready tasks
loop = asyncio.get_event_loop()
for ticket in ready_tasks:
# If auto_queue is on and step_mode is off, engine.tick() already marked it 'in_progress'
# but we need to verify and handle the lifecycle.
if ticket.status == "in_progress" or (not ticket.step_mode and self.engine.auto_queue):
ticket.status = "in_progress"
print(f"Executing ticket {ticket.id}: {ticket.description}")
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
context = WorkerContext(
ticket_id=ticket.id,
model_name="gemini-2.5-flash-lite",
messages=[]
)
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
# We pass the md_content so the worker has full context.
context_files = ticket.context_requirements if ticket.context_requirements else None
await loop.run_in_executor(
None,
run_worker_lifecycle,
ticket,
context,
context_files,
self.event_queue,
self,
md_content
)
await self._push_state(active_tier="Tier 2 (Tech Lead)")
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
# Task is ready but needs approval
print(f"Ticket {ticket.id} is ready and awaiting approval.")
await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}")
# In a real UI, this would wait for a user event.
# For now, we'll treat it as a pause point if not auto-queued.
pass
await self._push_state(status="running", active_tier="Tier 2 (Tech Lead)")
while True:
# 1. Identify ready tasks
ready_tasks = self.engine.tick()
# 2. Check for completion or blockage
if not ready_tasks:
all_done = all(t.status == "completed" for t in self.track.tickets)
if all_done:
print("Track completed successfully.")
await self._push_state(status="done", active_tier=None)
else:
# Check if any tasks are in-progress or could be ready
if any(t.status == "in_progress" for t in self.track.tickets):
# Wait for async tasks to complete
await asyncio.sleep(1)
continue
print("No more executable tickets. Track is blocked or finished.")
await self._push_state(status="blocked", active_tier=None)
break
# 3. Process ready tasks
loop = asyncio.get_event_loop()
for ticket in ready_tasks:
# If auto_queue is on and step_mode is off, engine.tick() already marked it 'in_progress'
# but we need to verify and handle the lifecycle.
if ticket.status == "in_progress" or (not ticket.step_mode and self.engine.auto_queue):
ticket.status = "in_progress"
print(f"Executing ticket {ticket.id}: {ticket.description}")
await self._push_state(active_tier=f"Tier 3 (Worker): {ticket.id}")
context = WorkerContext(
ticket_id=ticket.id,
model_name="gemini-2.5-flash-lite",
messages=[]
)
# Offload the blocking lifecycle call to a thread to avoid blocking the async event loop.
# We pass the md_content so the worker has full context.
context_files = ticket.context_requirements if ticket.context_requirements else None
await loop.run_in_executor(
None,
run_worker_lifecycle,
ticket,
context,
context_files,
self.event_queue,
self,
md_content
)
await self._push_state(active_tier="Tier 2 (Tech Lead)")
elif ticket.status == "todo" and (ticket.step_mode or not self.engine.auto_queue):
# Task is ready but needs approval
print(f"Ticket {ticket.id} is ready and awaiting approval.")
await self._push_state(active_tier=f"Awaiting Approval: {ticket.id}")
# In a real UI, this would wait for a user event.
# For now, we'll treat it as a pause point if not auto-queued.
pass
def confirm_execution(payload: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> bool:
"""
"""
Pushes an approval request to the GUI and waits for response.
"""
# We use a list container so the GUI can inject the actual Dialog object back to us
# since the dialog is created in the GUI thread.
dialog_container = [None]
task = {
"action": "mma_step_approval",
"ticket_id": ticket_id,
"payload": payload,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
if loop.is_running():
asyncio.run_coroutine_threadsafe(event_queue.put("mma_step_approval", task), loop)
else:
event_queue._queue.put_nowait(("mma_step_approval", task))
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_step_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
approved, final_payload = dialog_container[0].wait()
return approved
return False
# We use a list container so the GUI can inject the actual Dialog object back to us
# since the dialog is created in the GUI thread.
dialog_container = [None]
task = {
"action": "mma_step_approval",
"ticket_id": ticket_id,
"payload": payload,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
if loop.is_running():
asyncio.run_coroutine_threadsafe(event_queue.put("mma_step_approval", task), loop)
else:
event_queue._queue.put_nowait(("mma_step_approval", task))
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_step_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
approved, final_payload = dialog_container[0].wait()
return approved
return False
def confirm_spawn(role: str, prompt: str, context_md: str, event_queue: events.AsyncEventQueue, ticket_id: str) -> Tuple[bool, str, str]:
"""
"""
Pushes a spawn approval request to the GUI and waits for response.
Returns (approved, modified_prompt, modified_context)
"""
dialog_container = [None]
task = {
"action": "mma_spawn_approval",
"ticket_id": ticket_id,
"role": role,
"prompt": prompt,
"context_md": context_md,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
if loop.is_running():
asyncio.run_coroutine_threadsafe(event_queue.put("mma_spawn_approval", task), loop)
else:
event_queue._queue.put_nowait(("mma_spawn_approval", task))
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_spawn_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
res = dialog_container[0].wait()
if isinstance(res, dict):
approved = res.get("approved", False)
abort = res.get("abort", False)
modified_prompt = res.get("prompt", prompt)
modified_context = res.get("context_md", context_md)
return approved and not abort, modified_prompt, modified_context
else:
# Fallback for old tuple style if any
approved, final_payload = res
modified_prompt = prompt
modified_context = context_md
if isinstance(final_payload, dict):
modified_prompt = final_payload.get("prompt", prompt)
modified_context = final_payload.get("context_md", context_md)
return approved, modified_prompt, modified_context
return False, prompt, context_md
dialog_container = [None]
task = {
"action": "mma_spawn_approval",
"ticket_id": ticket_id,
"role": role,
"prompt": prompt,
"context_md": context_md,
"dialog_container": dialog_container
}
# Push to queue
try:
loop = asyncio.get_event_loop()
if loop.is_running():
asyncio.run_coroutine_threadsafe(event_queue.put("mma_spawn_approval", task), loop)
else:
event_queue._queue.put_nowait(("mma_spawn_approval", task))
except Exception:
# Fallback if no loop
event_queue._queue.put_nowait(("mma_spawn_approval", task))
# Wait for the GUI to create the dialog and for the user to respond
start = time.time()
while dialog_container[0] is None and time.time() - start < 60:
time.sleep(0.1)
if dialog_container[0]:
res = dialog_container[0].wait()
if isinstance(res, dict):
approved = res.get("approved", False)
abort = res.get("abort", False)
modified_prompt = res.get("prompt", prompt)
modified_context = res.get("context_md", context_md)
return approved and not abort, modified_prompt, modified_context
else:
# Fallback for old tuple style if any
approved, final_payload = res
modified_prompt = prompt
modified_context = context_md
if isinstance(final_payload, dict):
modified_prompt = final_payload.get("prompt", prompt)
modified_context = final_payload.get("context_md", context_md)
return approved, modified_prompt, modified_context
return False, prompt, context_md
def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: List[str] = None, event_queue: events.AsyncEventQueue = None, engine: Optional['ConductorEngine'] = None, md_content: str = ""):
"""
"""
Simulates the lifecycle of a single agent working on a ticket.
Calls the AI client and updates the ticket status based on the response.
Args:
@@ -248,78 +225,69 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
engine: The conductor engine.
md_content: The markdown context (history + files) for AI workers.
"""
# Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session()
context_injection = ""
if context_files:
parser = ASTParser(language="python")
for i, file_path in enumerate(context_files):
try:
abs_path = Path(file_path)
# (This is a bit simplified, but helps)
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if i == 0:
view = parser.get_curated_view(content)
else:
view = parser.get_skeleton(content)
context_injection += f"\nFile: {file_path}\n{view}\n"
except Exception as e:
context_injection += f"\nError reading {file_path}: {e}\n"
# Build a prompt for the worker
user_message = (
f"You are assigned to Ticket {ticket.id}.\n"
f"Task Description: {ticket.description}\n"
)
if context_injection:
user_message += f"\nContext Files:\n{context_injection}\n"
user_message += (
"Please complete this task. If you are blocked and cannot proceed, "
"start your response with 'BLOCKED' and explain why."
)
# HITL Clutch: call confirm_spawn if event_queue is provided
if event_queue:
approved, modified_prompt, modified_context = confirm_spawn(
role="Tier 3 Worker",
prompt=user_message,
context_md=md_content,
event_queue=event_queue,
ticket_id=ticket.id
)
if not approved:
ticket.mark_blocked("Spawn rejected by user.")
return "BLOCKED: Spawn rejected by user."
user_message = modified_prompt
md_content = modified_context
# HITL Clutch: pass the queue and ticket_id to confirm_execution
def clutch_callback(payload: str) -> bool:
if not event_queue:
return True
return confirm_execution(payload, event_queue, ticket.id)
response = ai_client.send(
md_content=md_content,
user_message=user_message,
base_dir=".",
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis
)
# Update usage in engine if provided
if engine:
stats = {} # ai_client.get_token_stats() is not available
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
if "BLOCKED" in response.upper():
ticket.mark_blocked(response)
# Enforce Context Amnesia: each ticket starts with a clean slate.
ai_client.reset_session()
context_injection = ""
if context_files:
parser = ASTParser(language="python")
for i, file_path in enumerate(context_files):
try:
abs_path = Path(file_path)
# (This is a bit simplified, but helps)
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if i == 0:
view = parser.get_curated_view(content)
else:
ticket.mark_complete()
return response
view = parser.get_skeleton(content)
context_injection += f"\nFile: {file_path}\n{view}\n"
except Exception as e:
context_injection += f"\nError reading {file_path}: {e}\n"
# Build a prompt for the worker
user_message = (
f"You are assigned to Ticket {ticket.id}.\n"
f"Task Description: {ticket.description}\n"
)
if context_injection:
user_message += f"\nContext Files:\n{context_injection}\n"
user_message += (
"Please complete this task. If you are blocked and cannot proceed, "
"start your response with 'BLOCKED' and explain why."
)
# HITL Clutch: call confirm_spawn if event_queue is provided
if event_queue:
approved, modified_prompt, modified_context = confirm_spawn(
role="Tier 3 Worker",
prompt=user_message,
context_md=md_content,
event_queue=event_queue,
ticket_id=ticket.id
)
if not approved:
ticket.mark_blocked("Spawn rejected by user.")
return "BLOCKED: Spawn rejected by user."
user_message = modified_prompt
md_content = modified_context
# HITL Clutch: pass the queue and ticket_id to confirm_execution
def clutch_callback(payload: str) -> bool:
if not event_queue:
return True
return confirm_execution(payload, event_queue, ticket.id)
response = ai_client.send(
md_content=md_content,
user_message=user_message,
base_dir=".",
pre_tool_callback=clutch_callback if ticket.step_mode else None,
qa_callback=ai_client.run_tier4_analysis
)
# Update usage in engine if provided
if engine:
stats = {} # ai_client.get_token_stats() is not available
engine.tier_usage["Tier 3"]["input"] += stats.get("prompt_tokens", 0)
engine.tier_usage["Tier 3"]["output"] += stats.get("candidates_tokens", 0)
if "BLOCKED" in response.upper():
ticket.mark_blocked(response)
else:
ticket.mark_complete()
return response

View File

@@ -9,126 +9,106 @@ from pathlib import Path
CONDUCTOR_PATH = Path("conductor")
def get_track_history_summary() -> str:
"""
"""
Scans conductor/archive/ and conductor/tracks/ to build a summary of past work.
"""
summary_parts = []
archive_path = CONDUCTOR_PATH / "archive"
tracks_path = CONDUCTOR_PATH / "tracks"
paths_to_scan = []
if archive_path.exists():
paths_to_scan.extend(list(archive_path.iterdir()))
if tracks_path.exists():
paths_to_scan.extend(list(tracks_path.iterdir()))
for track_dir in paths_to_scan:
if not track_dir.is_dir():
continue
metadata_file = track_dir / "metadata.json"
spec_file = track_dir / "spec.md"
title = track_dir.name
status = "unknown"
overview = "No overview available."
if metadata_file.exists():
try:
with open(metadata_file, "r", encoding="utf-8") as f:
meta = json.load(f)
title = meta.get("title", title)
status = meta.get("status", status)
except Exception:
pass
if spec_file.exists():
try:
with open(spec_file, "r", encoding="utf-8") as f:
content = f.read()
# Basic extraction of Overview section if it exists
if "## Overview" in content:
overview = content.split("## Overview")[1].split("##")[0].strip()
else:
# Just take a snippet of the beginning
overview = content[:200] + "..."
except Exception:
pass
summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
if not summary_parts:
return "No previous tracks found."
return "\n".join(summary_parts)
summary_parts = []
archive_path = CONDUCTOR_PATH / "archive"
tracks_path = CONDUCTOR_PATH / "tracks"
paths_to_scan = []
if archive_path.exists():
paths_to_scan.extend(list(archive_path.iterdir()))
if tracks_path.exists():
paths_to_scan.extend(list(tracks_path.iterdir()))
for track_dir in paths_to_scan:
if not track_dir.is_dir():
continue
metadata_file = track_dir / "metadata.json"
spec_file = track_dir / "spec.md"
title = track_dir.name
status = "unknown"
overview = "No overview available."
if metadata_file.exists():
try:
with open(metadata_file, "r", encoding="utf-8") as f:
meta = json.load(f)
title = meta.get("title", title)
status = meta.get("status", status)
except Exception:
pass
if spec_file.exists():
try:
with open(spec_file, "r", encoding="utf-8") as f:
content = f.read()
# Basic extraction of Overview section if it exists
if "## Overview" in content:
overview = content.split("## Overview")[1].split("##")[0].strip()
else:
# Just take a snippet of the beginning
overview = content[:200] + "..."
except Exception:
pass
summary_parts.append(f"Track: {title}\nStatus: {status}\nOverview: {overview}\n---")
if not summary_parts:
return "No previous tracks found."
return "\n".join(summary_parts)
def generate_tracks(user_request: str, project_config: dict, file_items: list[dict], history_summary: str = None) -> list[dict]:
"""
"""
Tier 1 (Strategic PM) call.
Analyzes the project state and user request to generate a list of Tracks.
"""
# 1. Build Repository Map (Summary View)
repo_map = summarize.build_summary_markdown(file_items)
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init")
user_message_parts = [
f"### USER REQUEST:\n{user_request}\n",
f"### REPOSITORY MAP:\n{repo_map}\n"
]
if history_summary:
user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n")
user_message_parts.append("Please generate the implementation tracks for this request.")
user_message = "\n".join(user_message_parts)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 1 Model (Strategic - Pro)
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1
response = ai_client.send(
md_content="", # We pass everything in user_message for clarity
user_message=user_message
)
# 4. Parse JSON Output
try:
# The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks.
json_match = response.strip()
if "```json" in json_match:
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
tracks = json.loads(json_match)
# Ensure each track has a 'title' for the GUI
for t in tracks:
if "title" not in t:
t["title"] = t.get("goal", "Untitled Track")[:50]
return tracks
except Exception as e:
print(f"Error parsing Tier 1 response: {e}")
print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
# 1. Build Repository Map (Summary View)
repo_map = summarize.build_summary_markdown(file_items)
# 2. Construct Prompt
system_prompt = mma_prompts.PROMPTS.get("tier1_epic_init")
user_message_parts = [
f"### USER REQUEST:\n{user_request}\n",
f"### REPOSITORY MAP:\n{repo_map}\n"
]
if history_summary:
user_message_parts.append(f"### TRACK HISTORY:\n{history_summary}\n")
user_message_parts.append("Please generate the implementation tracks for this request.")
user_message = "\n".join(user_message_parts)
# Set custom system prompt for this call
old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt)
try:
# 3. Call Tier 1 Model (Strategic - Pro)
# Note: We use gemini-1.5-pro or similar high-reasoning model for Tier 1
response = ai_client.send(
md_content="", # We pass everything in user_message for clarity
user_message=user_message
)
# 4. Parse JSON Output
try:
# The prompt asks for a JSON array. We need to extract it if the AI added markdown blocks.
json_match = response.strip()
if "```json" in json_match:
json_match = json_match.split("```json")[1].split("```")[0].strip()
elif "```" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip()
tracks = json.loads(json_match)
# Ensure each track has a 'title' for the GUI
for t in tracks:
if "title" not in t:
t["title"] = t.get("goal", "Untitled Track")[:50]
return tracks
except Exception as e:
print(f"Error parsing Tier 1 response: {e}")
print(f"Raw response: {response}")
return []
finally:
# Restore old system prompt
ai_client.set_custom_system_prompt(old_system_prompt)
if __name__ == "__main__":
# Quick CLI test
import project_manager
proj = project_manager.load_project("manual_slop.toml")
flat = project_manager.flat_config(proj)
file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", []))
print("Testing Tier 1 Track Generation...")
history = get_track_history_summary()
tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history)
print(json.dumps(tracks, indent=2))
# Quick CLI test
import project_manager
proj = project_manager.load_project("manual_slop.toml")
flat = project_manager.flat_config(proj)
file_items = aggregate.build_file_items(Path("."), flat.get("files", {}).get("paths", []))
print("Testing Tier 1 Track Generation...")
history = get_track_history_summary()
tracks = generate_tracks("Implement a basic unit test for the ai_client.py module.", flat, file_items, history_summary=history)
print(json.dumps(tracks, indent=2))

View File

@@ -2,60 +2,54 @@ import ast
from pathlib import Path
class CodeOutliner:
def __init__(self):
pass
def __init__(self):
pass
def outline(self, code: str) -> str:
code = code.lstrip(chr(0xFEFF))
try:
tree = ast.parse(code)
except SyntaxError as e:
return f"ERROR parsing code: {e}"
def outline(self, code: str) -> str:
code = code.lstrip(chr(0xFEFF))
try:
tree = ast.parse(code)
except SyntaxError as e:
return f"ERROR parsing code: {e}"
output = []
output = []
def get_docstring(node):
doc = ast.get_docstring(node)
if doc:
return doc.splitlines()[0]
return None
def get_docstring(node):
doc = ast.get_docstring(node)
if doc:
return doc.splitlines()[0]
return None
def walk(node, indent=0):
if isinstance(node, ast.ClassDef):
start_line = node.lineno
end_line = getattr(node, "end_lineno", start_line)
output.append(f"{' ' * indent}[Class] {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for item in node.body:
walk(item, indent + 1)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
start_line = node.lineno
end_line = getattr(node, "end_lineno", start_line)
prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]"
# Check if it's a method
# We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method.
if indent > 0:
prefix = "[Method]"
output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for node in tree.body:
walk(node)
return "\n".join(output)
def walk(node, indent=0):
if isinstance(node, ast.ClassDef):
start_line = node.lineno
end_line = getattr(node, "end_lineno", start_line)
output.append(f"{' ' * indent}[Class] {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for item in node.body:
walk(item, indent + 1)
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
start_line = node.lineno
end_line = getattr(node, "end_lineno", start_line)
prefix = "[Async Func]" if isinstance(node, ast.AsyncFunctionDef) else "[Func]"
# Check if it's a method
# We can check the indent or the parent, but in AST walk we know if we are inside a ClassDef
# Let's use a simpler heuristic for the outline: if indent > 0, it's likely a method.
if indent > 0:
prefix = "[Method]"
output.append(f"{' ' * indent}{prefix} {node.name} (Lines {start_line}-{end_line})")
doc = get_docstring(node)
if doc:
output.append(f"{' ' * (indent + 1)}\"\"\"{doc}\"\"\"")
for node in tree.body:
walk(node)
return "\n".join(output)
def get_outline(path: Path, code: str) -> str:
suffix = path.suffix.lower()
if suffix == ".py":
outliner = CodeOutliner()
return outliner.outline(code)
else:
return f"Outlining not supported for {suffix} files yet."
suffix = path.suffix.lower()
if suffix == ".py":
outliner = CodeOutliner()
return outliner.outline(code)
else:
return f"Outlining not supported for {suffix} files yet."

View File

@@ -3,132 +3,117 @@ import psutil
import threading
class PerformanceMonitor:
def __init__(self):
self._start_time = None
self._last_frame_time = 0.0
self._fps = 0.0
self._frame_count = 0
self._fps_last_time = time.time()
self._process = psutil.Process()
self._cpu_usage = 0.0
self._cpu_lock = threading.Lock()
# Input lag tracking
self._last_input_time = None
self._input_lag_ms = 0.0
# Alerts
self.alert_callback = None
self.thresholds = {
'frame_time_ms': 33.3, # < 30 FPS
'cpu_percent': 80.0,
'input_lag_ms': 100.0
}
self._last_alert_time = 0
self._alert_cooldown = 30 # seconds
def __init__(self):
self._start_time = None
self._last_frame_time = 0.0
self._fps = 0.0
self._frame_count = 0
self._fps_last_time = time.time()
self._process = psutil.Process()
self._cpu_usage = 0.0
self._cpu_lock = threading.Lock()
# Input lag tracking
self._last_input_time = None
self._input_lag_ms = 0.0
# Alerts
self.alert_callback = None
self.thresholds = {
'frame_time_ms': 33.3, # < 30 FPS
'cpu_percent': 80.0,
'input_lag_ms': 100.0
}
self._last_alert_time = 0
self._alert_cooldown = 30 # seconds
# Detailed profiling
self._component_timings = {}
self._comp_start = {}
# Start CPU usage monitoring thread
self._stop_event = threading.Event()
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True)
self._cpu_thread.start()
# Detailed profiling
self._component_timings = {}
self._comp_start = {}
# Start CPU usage monitoring thread
self._stop_event = threading.Event()
self._cpu_thread = threading.Thread(target=self._monitor_cpu, daemon=True)
self._cpu_thread.start()
def _monitor_cpu(self):
while not self._stop_event.is_set():
# psutil.cpu_percent with interval=1.0 is blocking for 1 second.
# To be responsive to stop_event, we use a smaller interval or no interval
# and handle the timing ourselves.
try:
usage = self._process.cpu_percent()
with self._cpu_lock:
self._cpu_usage = usage
except Exception:
pass
# Sleep in small increments to stay responsive to stop_event
for _ in range(10):
if self._stop_event.is_set():
break
time.sleep(0.1)
def _monitor_cpu(self):
while not self._stop_event.is_set():
# psutil.cpu_percent with interval=1.0 is blocking for 1 second.
# To be responsive to stop_event, we use a smaller interval or no interval
# and handle the timing ourselves.
try:
usage = self._process.cpu_percent()
with self._cpu_lock:
self._cpu_usage = usage
except Exception:
pass
# Sleep in small increments to stay responsive to stop_event
for _ in range(10):
if self._stop_event.is_set():
break
time.sleep(0.1)
def start_frame(self):
self._start_time = time.time()
def start_frame(self):
self._start_time = time.time()
def record_input_event(self):
self._last_input_time = time.time()
def record_input_event(self):
self._last_input_time = time.time()
def start_component(self, name: str):
self._comp_start[name] = time.time()
def start_component(self, name: str):
self._comp_start[name] = time.time()
def end_component(self, name: str):
if name in self._comp_start:
elapsed = (time.time() - self._comp_start[name]) * 1000.0
self._component_timings[name] = elapsed
def end_component(self, name: str):
if name in self._comp_start:
elapsed = (time.time() - self._comp_start[name]) * 1000.0
self._component_timings[name] = elapsed
def end_frame(self):
if self._start_time is None:
return
end_time = time.time()
self._last_frame_time = (end_time - self._start_time) * 1000.0
self._frame_count += 1
# Calculate input lag if an input occurred during this frame
if self._last_input_time is not None:
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
self._last_input_time = None
self._check_alerts()
elapsed_since_fps = end_time - self._fps_last_time
if elapsed_since_fps >= 1.0:
self._fps = self._frame_count / elapsed_since_fps
self._frame_count = 0
self._fps_last_time = end_time
def end_frame(self):
if self._start_time is None:
return
end_time = time.time()
self._last_frame_time = (end_time - self._start_time) * 1000.0
self._frame_count += 1
# Calculate input lag if an input occurred during this frame
if self._last_input_time is not None:
self._input_lag_ms = (end_time - self._last_input_time) * 1000.0
self._last_input_time = None
def _check_alerts(self):
if not self.alert_callback:
return
now = time.time()
if now - self._last_alert_time < self._alert_cooldown:
return
metrics = self.get_metrics()
alerts = []
if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
alerts.append(f"Frame time high: {metrics['last_frame_time_ms']:.1f}ms")
if metrics['cpu_percent'] > self.thresholds['cpu_percent']:
alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
if alerts:
self._last_alert_time = now
self.alert_callback("; ".join(alerts))
self._check_alerts()
elapsed_since_fps = end_time - self._fps_last_time
if elapsed_since_fps >= 1.0:
self._fps = self._frame_count / elapsed_since_fps
self._frame_count = 0
self._fps_last_time = end_time
def get_metrics(self):
with self._cpu_lock:
cpu_usage = self._cpu_usage
metrics = {
'last_frame_time_ms': self._last_frame_time,
'fps': self._fps,
'cpu_percent': cpu_usage,
'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag
}
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
metrics['input_lag_ms'] = self._input_lag_ms
# Add detailed timings
for name, elapsed in self._component_timings.items():
metrics[f'time_{name}_ms'] = elapsed
return metrics
def _check_alerts(self):
if not self.alert_callback:
return
now = time.time()
if now - self._last_alert_time < self._alert_cooldown:
return
metrics = self.get_metrics()
alerts = []
if metrics['last_frame_time_ms'] > self.thresholds['frame_time_ms']:
alerts.append(f"Frame time high: {metrics['last_frame_time_ms']:.1f}ms")
if metrics['cpu_percent'] > self.thresholds['cpu_percent']:
alerts.append(f"CPU usage high: {metrics['cpu_percent']:.1f}%")
if metrics['input_lag_ms'] > self.thresholds['input_lag_ms']:
alerts.append(f"Input lag high: {metrics['input_lag_ms']:.1f}ms")
if alerts:
self._last_alert_time = now
self.alert_callback("; ".join(alerts))
def get_metrics(self):
with self._cpu_lock:
cpu_usage = self._cpu_usage
metrics = {
'last_frame_time_ms': self._last_frame_time,
'fps': self._fps,
'cpu_percent': cpu_usage,
'input_lag_ms': self._last_input_time if self._last_input_time else 0.0 # Wait, this should be the calculated lag
}
# Oops, fixed the input lag logic in previous turn, let's keep it consistent
metrics['input_lag_ms'] = self._input_lag_ms
# Add detailed timings
for name, elapsed in self._component_timings.items():
metrics[f'time_{name}_ms'] = elapsed
return metrics
def stop(self):
self._stop_event.set()
self._cpu_thread.join(timeout=2.0)
def stop(self):
self._stop_event.set()
self._cpu_thread.join(timeout=2.0)

View File

@@ -3,7 +3,7 @@
Note(Gemini):
Handles loading/saving of project .toml configurations.
Also handles serializing the discussion history into the TOML format using a special
@timestamp prefix to preserve the exact sequence of events.
@timestamp prefix to preserve the exact sequence of events.
"""
import subprocess
import datetime
@@ -11,384 +11,307 @@ import tomllib
import tomli_w
import re
import json
from typing import Any, Optional, TYPE_CHECKING, Union
from pathlib import Path
TS_FMT = "%Y-%m-%dT%H:%M:%S"
if TYPE_CHECKING:
from models import TrackState
TS_FMT: str = "%Y-%m-%dT%H:%M:%S"
def now_ts() -> str:
return datetime.datetime.now().strftime(TS_FMT)
def parse_ts(s: str):
try:
return datetime.datetime.strptime(s, TS_FMT)
except Exception:
return None
return datetime.datetime.now().strftime(TS_FMT)
def parse_ts(s: str) -> Optional[datetime.datetime]:
try:
return datetime.datetime.strptime(s, TS_FMT)
except Exception:
return None
# ── entry serialisation ──────────────────────────────────────────────────────
def entry_to_str(entry: dict) -> str:
"""Serialise a disc entry dict -> stored string."""
ts = entry.get("ts", "")
role = entry.get("role", "User")
content = entry.get("content", "")
if ts:
return f"@{ts}\n{role}:\n{content}"
return f"{role}:\n{content}"
def str_to_entry(raw: str, roles: list[str]) -> dict:
"""Parse a stored string back to a disc entry dict."""
ts = ""
rest = raw
if rest.startswith("@"):
nl = rest.find("\n")
if nl != -1:
ts = rest[1:nl]
rest = rest[nl + 1:]
known = roles or ["User", "AI", "Vendor API", "System"]
role_pat = re.compile(
r"^(?:\[)?(" + "|".join(re.escape(r) for r in known) + r")(?:\])?:?\s*$",
re.IGNORECASE,
)
parts = rest.split("\n", 1)
matched_role = "User"
content = rest.strip()
if parts:
m = role_pat.match(parts[0].strip())
if m:
raw_role = m.group(1)
matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
content = parts[1].strip() if len(parts) > 1 else ""
return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
def entry_to_str(entry: dict[str, Any]) -> str:
"""Serialise a disc entry dict -> stored string."""
ts = entry.get("ts", "")
role = entry.get("role", "User")
content = entry.get("content", "")
if ts:
return f"@{ts}\n{role}:\n{content}"
return f"{role}:\n{content}"
def str_to_entry(raw: str, roles: list[str]) -> dict[str, Any]:
"""Parse a stored string back to a disc entry dict."""
ts = ""
rest = raw
if rest.startswith("@"):
nl = rest.find("\n")
if nl != -1:
ts = rest[1:nl]
rest = rest[nl + 1:]
known = roles or ["User", "AI", "Vendor API", "System"]
role_pat = re.compile(
r"^(?:\[)?(" + "|".join(re.escape(r) for r in known) + r")(?:\])?:?\s*$",
re.IGNORECASE,
)
parts = rest.split("\n", 1)
matched_role = "User"
content = rest.strip()
if parts:
m = role_pat.match(parts[0].strip())
if m:
raw_role = m.group(1)
matched_role = next((r for r in known if r.lower() == raw_role.lower()), raw_role)
content = parts[1].strip() if len(parts) > 1 else ""
return {"role": matched_role, "content": content, "collapsed": False, "ts": ts}
# ── git helpers ──────────────────────────────────────────────────────────────
def get_git_commit(git_dir: str) -> str:
try:
r = subprocess.run(
["git", "rev-parse", "HEAD"],
capture_output=True, text=True, cwd=git_dir, timeout=5,
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
try:
r = subprocess.run(
["git", "rev-parse", "HEAD"],
capture_output=True, text=True, cwd=git_dir, timeout=5,
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
def get_git_log(git_dir: str, n: int = 5) -> str:
try:
r = subprocess.run(
["git", "log", "--oneline", f"-{n}"],
capture_output=True, text=True, cwd=git_dir, timeout=5,
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
try:
r = subprocess.run(
["git", "log", "--oneline", f"-{n}"],
capture_output=True, text=True, cwd=git_dir, timeout=5,
)
return r.stdout.strip() if r.returncode == 0 else ""
except Exception:
return ""
# ── default structures ───────────────────────────────────────────────────────
def default_discussion() -> dict:
return {"git_commit": "", "last_updated": now_ts(), "history": []}
def default_project(name: str = "unnamed") -> dict:
return {
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""},
"output": {"output_dir": "./md_gen"},
"files": {"base_dir": ".", "paths": [], "tier_assignments": {}},
"screenshots": {"base_dir": ".", "paths": []},
"gemini_cli": {"binary_path": "gemini"},
"deepseek": {"reasoning_effort": "medium"},
"agent": {
"tools": {
"run_powershell": True,
"read_file": True,
"list_directory": True,
"search_files": True,
"get_file_summary": True,
"web_search": True,
"fetch_url": True
}
},
"discussion": {
"roles": ["User", "AI", "Vendor API", "System", "Reasoning"],
"active": "main",
"discussions": {"main": default_discussion()},
},
"mma": {
"epic": "",
"active_track_id": "",
"tracks": []
}
}
def default_discussion() -> dict[str, Any]:
return {"git_commit": "", "last_updated": now_ts(), "history": []}
def default_project(name: str = "unnamed") -> dict[str, Any]:
return {
"project": {"name": name, "git_dir": "", "system_prompt": "", "main_context": ""},
"output": {"output_dir": "./md_gen"},
"files": {"base_dir": ".", "paths": [], "tier_assignments": {}},
"screenshots": {"base_dir": ".", "paths": []},
"gemini_cli": {"binary_path": "gemini"},
"deepseek": {"reasoning_effort": "medium"},
"agent": {
"tools": {
"run_powershell": True,
"read_file": True,
"list_directory": True,
"search_files": True,
"get_file_summary": True,
"web_search": True,
"fetch_url": True
}
},
"discussion": {
"roles": ["User", "AI", "Vendor API", "System", "Reasoning"],
"active": "main",
"discussions": {"main": default_discussion()},
},
"mma": {
"epic": "",
"active_track_id": "",
"tracks": []
}
}
# ── load / save ──────────────────────────────────────────────────────────────
def get_history_path(project_path: str | Path) -> Path:
"""Return the Path to the sibling history TOML file for a given project."""
p = Path(project_path)
return p.parent / f"{p.stem}_history.toml"
def load_project(path: str | Path) -> dict:
"""
def get_history_path(project_path: Union[str, Path]) -> Path:
"""Return the Path to the sibling history TOML file for a given project."""
p = Path(project_path)
return p.parent / f"{p.stem}_history.toml"
def load_project(path: Union[str, Path]) -> dict[str, Any]:
"""
Load a project TOML file.
Automatically migrates legacy 'discussion' keys to a sibling history file.
"""
with open(path, "rb") as f:
proj = tomllib.load(f)
# Automatic Migration: move legacy 'discussion' to sibling file
hist_path = get_history_path(path)
if "discussion" in proj:
disc = proj.pop("discussion")
# Save to history file if it doesn't exist yet (or overwrite to migrate)
with open(hist_path, "wb") as f:
tomli_w.dump(disc, f)
# Save the stripped project file
save_project(proj, path)
# Restore for the returned dict so GUI works as before
proj["discussion"] = disc
else:
# Load from sibling if it exists
if hist_path.exists():
proj["discussion"] = load_history(path)
return proj
def load_history(project_path: str | Path) -> dict:
"""Load the segregated discussion history from its dedicated TOML file."""
hist_path = get_history_path(project_path)
if hist_path.exists():
with open(hist_path, "rb") as f:
return tomllib.load(f)
return {}
def clean_nones(data):
"""Recursively remove None values from a dictionary/list."""
if isinstance(data, dict):
return {k: clean_nones(v) for k, v in data.items() if v is not None}
elif isinstance(data, list):
return [clean_nones(v) for v in data if v is not None]
return data
def save_project(proj: dict, path: str | Path, disc_data: dict | None = None):
"""
with open(path, "rb") as f:
proj = tomllib.load(f)
hist_path = get_history_path(path)
if "discussion" in proj:
disc = proj.pop("discussion")
with open(hist_path, "wb") as f:
tomli_w.dump(disc, f)
save_project(proj, path)
proj["discussion"] = disc
else:
if hist_path.exists():
proj["discussion"] = load_history(path)
return proj
def load_history(project_path: Union[str, Path]) -> dict[str, Any]:
"""Load the segregated discussion history from its dedicated TOML file."""
hist_path = get_history_path(project_path)
if hist_path.exists():
with open(hist_path, "rb") as f:
return tomllib.load(f)
return {}
def clean_nones(data: Any) -> Any:
"""Recursively remove None values from a dictionary/list."""
if isinstance(data, dict):
return {k: clean_nones(v) for k, v in data.items() if v is not None}
elif isinstance(data, list):
return [clean_nones(v) for v in data if v is not None]
return data
def save_project(proj: dict[str, Any], path: Union[str, Path], disc_data: Optional[dict[str, Any]] = None) -> None:
"""
Save the project TOML.
If 'discussion' is present in proj, it is moved to the sibling history file.
"""
# Clean None values as TOML doesn't support them
proj = clean_nones(proj)
# Ensure 'discussion' is NOT in the main project dict
if "discussion" in proj:
# If disc_data wasn't provided, use the one from proj
if disc_data is None:
disc_data = proj["discussion"]
# Remove it so it doesn't get saved to the main file
proj = dict(proj) # shallow copy to avoid mutating caller's dict
del proj["discussion"]
with open(path, "wb") as f:
tomli_w.dump(proj, f)
if disc_data:
disc_data = clean_nones(disc_data)
hist_path = get_history_path(path)
with open(hist_path, "wb") as f:
tomli_w.dump(disc_data, f)
proj = clean_nones(proj)
if "discussion" in proj:
if disc_data is None:
disc_data = proj["discussion"]
proj = dict(proj)
del proj["discussion"]
with open(path, "wb") as f:
tomli_w.dump(proj, f)
if disc_data:
disc_data = clean_nones(disc_data)
hist_path = get_history_path(path)
with open(hist_path, "wb") as f:
tomli_w.dump(disc_data, f)
# ── migration helper ─────────────────────────────────────────────────────────
def migrate_from_legacy_config(cfg: dict) -> dict:
"""Build a fresh project dict from a legacy flat config.toml. Does NOT save."""
name = cfg.get("output", {}).get("namespace", "project")
proj = default_project(name)
for key in ("output", "files", "screenshots"):
if key in cfg:
proj[key] = dict(cfg[key])
disc = cfg.get("discussion", {})
proj["discussion"]["roles"] = disc.get("roles", ["User", "AI", "Vendor API", "System"])
main_disc = proj["discussion"]["discussions"]["main"]
main_disc["history"] = disc.get("history", [])
main_disc["last_updated"] = now_ts()
return proj
def migrate_from_legacy_config(cfg: dict[str, Any]) -> dict[str, Any]:
"""Build a fresh project dict from a legacy flat config.toml. Does NOT save."""
name = cfg.get("output", {}).get("namespace", "project")
proj = default_project(name)
for key in ("output", "files", "screenshots"):
if key in cfg:
proj[key] = dict(cfg[key])
disc = cfg.get("discussion", {})
proj["discussion"]["roles"] = disc.get("roles", ["User", "AI", "Vendor API", "System"])
main_disc = proj["discussion"]["discussions"]["main"]
main_disc["history"] = disc.get("history", [])
main_disc["last_updated"] = now_ts()
return proj
# ── flat config for aggregate.run() ─────────────────────────────────────────
def flat_config(proj: dict, disc_name: str | None = None, track_id: str | None = None) -> dict:
"""Return a flat config dict compatible with aggregate.run()."""
disc_sec = proj.get("discussion", {})
if track_id:
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", "."))
else:
name = disc_name or disc_sec.get("active", "main")
disc_data = disc_sec.get("discussions", {}).get(name, {})
history = disc_data.get("history", [])
return {
"project": proj.get("project", {}),
"output": proj.get("output", {}),
"files": proj.get("files", {}),
"screenshots": proj.get("screenshots", {}),
"discussion": {
"roles": disc_sec.get("roles", []),
"history": history,
},
}
def flat_config(proj: dict[str, Any], disc_name: Optional[str] = None, track_id: Optional[str] = None) -> dict[str, Any]:
"""Return a flat config dict compatible with aggregate.run()."""
disc_sec = proj.get("discussion", {})
if track_id:
history = load_track_history(track_id, proj.get("files", {}).get("base_dir", "."))
else:
name = disc_name or disc_sec.get("active", "main")
disc_data = disc_sec.get("discussions", {}).get(name, {})
history = disc_data.get("history", [])
return {
"project": proj.get("project", {}),
"output": proj.get("output", {}),
"files": proj.get("files", {}),
"screenshots": proj.get("screenshots", {}),
"discussion": {
"roles": disc_sec.get("roles", []),
"history": history,
},
}
# ── track state persistence ─────────────────────────────────────────────────
def save_track_state(track_id: str, state: 'TrackState', base_dir: str | Path = "."):
"""
def save_track_state(track_id: str, state: 'TrackState', base_dir: Union[str, Path] = ".") -> None:
"""
Saves a TrackState object to conductor/tracks/<track_id>/state.toml.
"""
track_dir = Path(base_dir) / "conductor" / "tracks" / track_id
track_dir.mkdir(parents=True, exist_ok=True)
state_file = track_dir / "state.toml"
data = clean_nones(state.to_dict())
with open(state_file, "wb") as f:
tomli_w.dump(data, f)
def load_track_state(track_id: str, base_dir: str | Path = ".") -> 'TrackState':
"""
track_dir = Path(base_dir) / "conductor" / "tracks" / track_id
track_dir.mkdir(parents=True, exist_ok=True)
state_file = track_dir / "state.toml"
data = clean_nones(state.to_dict())
with open(state_file, "wb") as f:
tomli_w.dump(data, f)
def load_track_state(track_id: str, base_dir: Union[str, Path] = ".") -> Optional['TrackState']:
"""
Loads a TrackState object from conductor/tracks/<track_id>/state.toml.
"""
from models import TrackState
state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml"
if not state_file.exists():
return None
with open(state_file, "rb") as f:
data = tomllib.load(f)
return TrackState.from_dict(data)
def load_track_history(track_id: str, base_dir: str | Path = ".") -> list:
"""
from models import TrackState
state_file = Path(base_dir) / "conductor" / "tracks" / track_id / "state.toml"
if not state_file.exists():
return None
with open(state_file, "rb") as f:
data = tomllib.load(f)
return TrackState.from_dict(data)
def load_track_history(track_id: str, base_dir: Union[str, Path] = ".") -> list[str]:
"""
Loads the discussion history for a specific track from its state.toml.
Returns a list of entry strings formatted with @timestamp.
"""
from models import TrackState
state = load_track_state(track_id, base_dir)
if not state:
return []
history = []
for entry in state.discussion:
e = dict(entry)
ts = e.get("ts")
if isinstance(ts, datetime.datetime):
e["ts"] = ts.strftime(TS_FMT)
history.append(entry_to_str(e))
return history
def save_track_history(track_id: str, history: list, base_dir: str | Path = "."):
"""
from models import TrackState
state = load_track_state(track_id, base_dir)
if not state:
return []
history: list[str] = []
for entry in state.discussion:
e = dict(entry)
ts = e.get("ts")
if isinstance(ts, datetime.datetime):
e["ts"] = ts.strftime(TS_FMT)
history.append(entry_to_str(e))
return history
def save_track_history(track_id: str, history: list[str], base_dir: Union[str, Path] = ".") -> None:
"""
Saves the discussion history for a specific track to its state.toml.
'history' is expected to be a list of formatted strings.
"""
from models import TrackState
state = load_track_state(track_id, base_dir)
if not state:
return
roles = ["User", "AI", "Vendor API", "System", "Reasoning"]
entries = [str_to_entry(h, roles) for h in history]
state.discussion = entries
save_track_state(track_id, state, base_dir)
def get_all_tracks(base_dir: str | Path = ".") -> list[dict]:
"""
Scans the conductor/tracks/ directory and returns a list of dictionaries
containing track metadata: 'id', 'title', 'status', 'complete', 'total',
from models import TrackState
state = load_track_state(track_id, base_dir)
if not state:
return
roles = ["User", "AI", "Vendor API", "System", "Reasoning"]
entries = [str_to_entry(h, roles) for h in history]
state.discussion = entries
save_track_state(track_id, state, base_dir)
def get_all_tracks(base_dir: Union[str, Path] = ".") -> list[dict[str, Any]]:
"""
Scans the conductor/tracks/ directory and returns a list of dictionaries
containing track metadata: 'id', 'title', 'status', 'complete', 'total',
and 'progress' (0.0 to 1.0).
Handles missing or malformed metadata.json or state.toml by falling back
Handles missing or malformed metadata.json or state.toml by falling back
to available info or defaults.
"""
from models import TrackState
tracks_dir = Path(base_dir) / "conductor" / "tracks"
if not tracks_dir.exists():
return []
results = []
for entry in tracks_dir.iterdir():
if not entry.is_dir():
continue
track_id = entry.name
track_info = {
"id": track_id,
"title": track_id,
"status": "unknown",
"complete": 0,
"total": 0,
"progress": 0.0
}
state_found = False
# Try loading state.toml
try:
state = load_track_state(track_id, base_dir)
if state:
track_info["id"] = state.metadata.id or track_id
track_info["title"] = state.metadata.name or track_id
track_info["status"] = state.metadata.status or "unknown"
track_info["complete"] = len([t for t in state.tasks if t.status == "completed"])
track_info["total"] = len(state.tasks)
if track_info["total"] > 0:
track_info["progress"] = track_info["complete"] / track_info["total"]
state_found = True
except Exception:
pass
if not state_found:
# Try loading metadata.json
metadata_file = entry / "metadata.json"
if metadata_file.exists():
try:
with open(metadata_file, "r") as f:
data = json.load(f)
track_info["id"] = data.get("id", data.get("track_id", track_id))
track_info["title"] = data.get("title", data.get("name", data.get("description", track_id)))
track_info["status"] = data.get("status", "unknown")
except Exception:
pass
# Try parsing plan.md for complete/total if state was missing or empty
if track_info["total"] == 0:
plan_file = entry / "plan.md"
if plan_file.exists():
try:
with open(plan_file, "r", encoding="utf-8") as f:
content = f.read()
# Simple regex to count tasks
# - [ ] Task: ...
# - [x] Task: ...
# - [~] Task: ...
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
track_info["total"] = len(tasks)
track_info["complete"] = len(completed_tasks)
if track_info["total"] > 0:
track_info["progress"] = float(track_info["complete"]) / track_info["total"]
except Exception:
pass
results.append(track_info)
return results
from models import TrackState
tracks_dir = Path(base_dir) / "conductor" / "tracks"
if not tracks_dir.exists():
return []
results: list[dict[str, Any]] = []
for entry in tracks_dir.iterdir():
if not entry.is_dir():
continue
track_id = entry.name
track_info: dict[str, Any] = {
"id": track_id,
"title": track_id,
"status": "unknown",
"complete": 0,
"total": 0,
"progress": 0.0
}
state_found = False
try:
state = load_track_state(track_id, base_dir)
if state:
track_info["id"] = state.metadata.id or track_id
track_info["title"] = state.metadata.name or track_id
track_info["status"] = state.metadata.status or "unknown"
track_info["complete"] = len([t for t in state.tasks if t.status == "completed"])
track_info["total"] = len(state.tasks)
if track_info["total"] > 0:
track_info["progress"] = track_info["complete"] / track_info["total"]
state_found = True
except Exception:
pass
if not state_found:
metadata_file = entry / "metadata.json"
if metadata_file.exists():
try:
with open(metadata_file, "r") as f:
data = json.load(f)
track_info["id"] = data.get("id", data.get("track_id", track_id))
track_info["title"] = data.get("title", data.get("name", data.get("description", track_id)))
track_info["status"] = data.get("status", "unknown")
except Exception:
pass
if track_info["total"] == 0:
plan_file = entry / "plan.md"
if plan_file.exists():
try:
with open(plan_file, "r", encoding="utf-8") as f:
content = f.read()
tasks = re.findall(r"^[ \t]*- \[[ x~]\] .*", content, re.MULTILINE)
completed_tasks = re.findall(r"^[ \t]*- \[x\] .*", content, re.MULTILINE)
track_info["total"] = len(tasks)
track_info["complete"] = len(completed_tasks)
if track_info["total"] > 0:
track_info["progress"] = float(track_info["complete"]) / track_info["total"]
except Exception:
pass
results.append(track_info)
return results

10
refactor_ui_task.toml Normal file
View File

@@ -0,0 +1,10 @@
role = "tier3-worker"
prompt = """Implement strict type hints for ALL functions and methods in @gui_2.py and @gui_legacy.py.
1. Use specific types (e.g., dict[str, Any], list[str], Union[str, Path], etc.) for arguments and returns.
2. Maintain the 'AI-Optimized' style: 1-space indentation, NO blank lines within function bodies, and maximum 1 blank line between definitions.
3. Since these files are very large, you MUST use surgical tools (discovered_tool_py_update_definition, discovered_tool_py_set_signature, discovered_tool_py_set_var_declaration) to apply changes. Do NOT try to overwrite the entire file at once.
4. Do NOT change any logic.
5. Use discovered_tool_py_check_syntax after each major change to verify syntax.
6. Ensure 'from typing import Any, dict, list, Union, Optional, Callable' etc. are present.
7. Focus on completing the task efficiently without hitting timeouts."""
docs = ["gui_2.py", "gui_legacy.py", "conductor/workflow.md"]

View File

@@ -3,34 +3,29 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine
def test_auto_queue_and_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1, t2])
# Expectation: ExecutionEngine takes auto_queue parameter
try:
engine = ExecutionEngine(dag, auto_queue=True)
except TypeError:
pytest.fail("ExecutionEngine does not accept auto_queue parameter")
# Tick 1: T1 should be 'in-progress' because auto_queue=True
# T2 should remain 'todo' because step_mode=True
engine.tick()
assert t1.status == "in_progress"
assert t2.status == "todo"
# Approve T2
try:
engine.approve_task("T2")
except AttributeError:
pytest.fail("ExecutionEngine does not have approve_task method")
assert t2.status == "in_progress"
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1, t2])
# Expectation: ExecutionEngine takes auto_queue parameter
try:
engine = ExecutionEngine(dag, auto_queue=True)
except TypeError:
pytest.fail("ExecutionEngine does not accept auto_queue parameter")
# Tick 1: T1 should be 'in-progress' because auto_queue=True
# T2 should remain 'todo' because step_mode=True
engine.tick()
assert t1.status == "in_progress"
assert t2.status == "todo"
# Approve T2
try:
engine.approve_task("T2")
except AttributeError:
pytest.fail("ExecutionEngine does not have approve_task method")
assert t2.status == "in_progress"
if __name__ == "__main__":
try:
test_auto_queue_and_step_mode()
print("Test passed (unexpectedly)")
except Exception as e:
print(f"Test failed as expected: {e}")
try:
test_auto_queue_and_step_mode()
print("Test passed (unexpectedly)")
except Exception as e:
print(f"Test failed as expected: {e}")

View File

@@ -0,0 +1,21 @@
import subprocess
import sys
def test_type_hints():
files = ["project_manager.py", "session_logger.py"]
all_missing = []
for f in files:
print(f"Scanning {f}...")
result = subprocess.run(["uv", "run", "python", "scripts/type_hint_scanner.py", f], capture_output=True, text=True)
if result.stdout.strip():
print(f"Missing hints in {f}:\n{result.stdout}")
all_missing.append(f)
if all_missing:
print(f"FAILURE: Missing type hints in: {', '.join(all_missing)}")
sys.exit(1)
else:
print("SUCCESS: All functions have type hints.")
sys.exit(0)
if __name__ == "__main__":
test_type_hints()

View File

@@ -5,7 +5,7 @@ import pytest
from typing import Dict, List, Any
def load_manifest(path: str) -> Dict[str, Any]:
"""
"""
Loads a manifest file (expected to be in TOML format) from the given path.
Args:
@@ -18,18 +18,18 @@ def load_manifest(path: str) -> Dict[str, Any]:
FileNotFoundError: If the manifest file does not exist.
tomllib.TOMLDecodeError: If the manifest file is not valid TOML.
"""
try:
with open(path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: Manifest file not found at {path}", file=sys.stderr)
raise
except tomllib.TOMLDecodeError:
print(f"Error: Could not decode TOML from {path}", file=sys.stderr)
raise
try:
with open(path, 'rb') as f:
return tomllib.load(f)
except FileNotFoundError:
print(f"Error: Manifest file not found at {path}", file=sys.stderr)
raise
except tomllib.TOMLDecodeError:
print(f"Error: Could not decode TOML from {path}", file=sys.stderr)
raise
def get_test_files(manifest: Dict[str, Any], category: str) -> List[str]:
"""
"""
Determines the list of test files based on the manifest and a specified category.
Args:
@@ -40,16 +40,16 @@ def get_test_files(manifest: Dict[str, Any], category: str) -> List[str]:
A list of file paths corresponding to the tests in the given category.
Returns an empty list if the category is not found or has no tests.
"""
print(f"DEBUG: Looking for category '{category}' in manifest.", file=sys.stderr)
files = manifest.get("categories", {}).get(category, {}).get("files", [])
print(f"DEBUG: Found test files for category '{category}': {files}", file=sys.stderr)
return files
print(f"DEBUG: Looking for category '{category}' in manifest.", file=sys.stderr)
files = manifest.get("categories", {}).get(category, {}).get("files", [])
print(f"DEBUG: Found test files for category '{category}': {files}", file=sys.stderr)
return files
def main():
parser = argparse.ArgumentParser(
description="Run tests with optional manifest and category filtering, passing additional pytest arguments.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\
parser = argparse.ArgumentParser(
description="Run tests with optional manifest and category filtering, passing additional pytest arguments.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""\
Example usage:
python run_tests.py --manifest tests.toml --category unit -- --verbose --cov=my_module
python run_tests.py --manifest tests.toml --category integration
@@ -57,65 +57,57 @@ Example usage:
python run_tests.py --manifest tests.toml # Runs tests from default_categories
python run_tests.py -- --capture=no # Runs all tests with pytest args
"""
)
parser.add_argument(
"--manifest",
type=str,
help="Path to the TOML manifest file containing test configurations."
)
parser.add_argument(
"--category",
type=str,
help="Category of tests to run (e.g., 'unit', 'integration')."
)
# Parse known arguments for the script itself, then parse remaining args for pytest
args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:])
selected_test_files = []
manifest_data = None
if args.manifest:
try:
manifest_data = load_manifest(args.manifest)
except (FileNotFoundError, tomllib.TOMLDecodeError):
# Error message already printed by load_manifest
sys.exit(1)
if args.category:
# Case 1: --manifest and --category provided
files = get_test_files(manifest_data, args.category)
selected_test_files.extend(files)
else:
# Case 2: --manifest provided, but no --category
# Load default categories from manifest['execution']['default_categories']
default_categories = manifest_data.get("execution", {}).get("default_categories", [])
if not default_categories:
print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr)
for cat in default_categories:
files = get_test_files(manifest_data, cat)
selected_test_files.extend(files)
elif args.category:
# Case 3: --category provided without --manifest
print("Error: --category requires --manifest to be specified.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
# Combine selected test files with any remaining pytest arguments that were not parsed by this script.
# We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times.
pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--']
# Filter out any empty strings that might have been included.
final_pytest_args = [arg for arg in pytest_command_args if arg]
# If no specific tests were selected from manifest/category and no manifest was provided,
# and no other pytest args were given, pytest.main([]) runs default test discovery.
print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)
sys.exit(pytest.main(final_pytest_args))
)
parser.add_argument(
"--manifest",
type=str,
help="Path to the TOML manifest file containing test configurations."
)
parser.add_argument(
"--category",
type=str,
help="Category of tests to run (e.g., 'unit', 'integration')."
)
# Parse known arguments for the script itself, then parse remaining args for pytest
args, remaining_pytest_args = parser.parse_known_args(sys.argv[1:])
selected_test_files = []
manifest_data = None
if args.manifest:
try:
manifest_data = load_manifest(args.manifest)
except (FileNotFoundError, tomllib.TOMLDecodeError):
# Error message already printed by load_manifest
sys.exit(1)
if args.category:
# Case 1: --manifest and --category provided
files = get_test_files(manifest_data, args.category)
selected_test_files.extend(files)
else:
# Case 2: --manifest provided, but no --category
# Load default categories from manifest['execution']['default_categories']
default_categories = manifest_data.get("execution", {}).get("default_categories", [])
if not default_categories:
print(f"Error: --manifest provided without --category, and no 'default_categories' found in manifest '{args.manifest}'.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
print(f"DEBUG: Using default categories from manifest '{args.manifest}': {default_categories}", file=sys.stderr)
for cat in default_categories:
files = get_test_files(manifest_data, cat)
selected_test_files.extend(files)
elif args.category:
# Case 3: --category provided without --manifest
print("Error: --category requires --manifest to be specified.", file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
# Combine selected test files with any remaining pytest arguments that were not parsed by this script.
# We also filter out the literal '--' if it was passed by the user to avoid pytest errors if it appears multiple times.
pytest_command_args = selected_test_files + [arg for arg in remaining_pytest_args if arg != '--']
# Filter out any empty strings that might have been included.
final_pytest_args = [arg for arg in pytest_command_args if arg]
# If no specific tests were selected from manifest/category and no manifest was provided,
# and no other pytest args were given, pytest.main([]) runs default test discovery.
print(f"Running pytest with arguments: {final_pytest_args}", file=sys.stderr)
sys.exit(pytest.main(final_pytest_args))
if __name__ == "__main__":
main()
main()

View File

@@ -89,13 +89,20 @@ def main():
# This prevents the hook from affecting normal CLI usage.
hook_context = os.environ.get("GEMINI_CLI_HOOK_CONTEXT")
logging.debug(f"Checking GEMINI_CLI_HOOK_CONTEXT: '{hook_context}'")
if hook_context != "manual_slop":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop'. Allowing execution without confirmation.")
if hook_context != "manual_slop" and hook_context != "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is '{hook_context}', NOT 'manual_slop' or 'mma_headless'. Allowing execution without confirmation.")
print(json.dumps({
"decision": "allow",
"reason": f"Non-programmatic usage (GEMINI_CLI_HOOK_CONTEXT={hook_context})."
}))
return
if hook_context == "mma_headless":
logging.debug(f"GEMINI_CLI_HOOK_CONTEXT is 'mma_headless'. Allowing execution for sub-agent.")
print(json.dumps({
"decision": "allow",
"reason": "Sub-agent headless mode (MMA)."
}))
return
# 5. Use 'ApiHookClient' (assuming GUI is on http://127.0.0.1:8999)
logging.debug("GEMINI_CLI_HOOK_CONTEXT is 'manual_slop'. Proceeding with API Hook Client.")
client = ApiHookClient(base_url="http://127.0.0.1:8999")

View File

@@ -189,15 +189,15 @@ def execute_agent(role: str, prompt: str, docs: list[str]) -> str:
command_text += f"\n\nTASK: {prompt}\n\n"
# Use subprocess with input to pipe the prompt via stdin, avoiding WinError 206.
# We use -p 'mma_task' to ensure non-interactive (headless) mode and valid parsing.
# Whitelist tools to ensure they are available to the model in headless mode.
allowed_tools = "read_file,write_file,replace,list_directory,glob,grep_search,discovered_tool_search_files,discovered_tool_get_file_summary,discovered_tool_py_get_skeleton,discovered_tool_py_get_code_outline,discovered_tool_py_get_definition,discovered_tool_py_update_definition,discovered_tool_py_get_signature,discovered_tool_py_set_signature,discovered_tool_py_get_class_summary,discovered_tool_py_get_var_declaration,discovered_tool_py_set_var_declaration,discovered_tool_get_git_diff,discovered_tool_run_powershell,activate_skill,codebase_investigator,discovered_tool_web_search,discovered_tool_fetch_url,discovered_tool_py_find_usages,discovered_tool_py_get_imports,discovered_tool_py_check_syntax,discovered_tool_py_get_hierarchy,discovered_tool_py_get_docstring,discovered_tool_get_tree"
ps_command = (
f"if (Test-Path 'C:\\projects\\misc\\setup_gemini.ps1') {{ . 'C:\\projects\\misc\\setup_gemini.ps1' }}; "
f"gemini -p 'mma_task' --allowed-tools {allowed_tools} --output-format json --model {model}"
f"gemini -p '{role}' --output-format json --model {model}"
)
cmd = ['powershell.exe', '-NoProfile', '-Command', ps_command]
try:
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8')
env = os.environ.copy()
env["GEMINI_CLI_HOOK_CONTEXT"] = "mma_headless"
process = subprocess.run(cmd, input=command_text, capture_output=True, text=True, encoding='utf-8', env=env)
result = process.stdout
if not process.stdout and process.stderr:
result = f"Error: {process.stderr}"

View File

@@ -3,7 +3,6 @@
Opens timestamped log/script files at startup and keeps them open for the
lifetime of the process. The next run of the GUI creates new files; the
previous run's files are simply closed when the process exits.
File layout
-----------
logs/
@@ -12,195 +11,149 @@ logs/
clicalls_<ts>.log - sequential record of every CLI subprocess call
scripts/generated/
<ts>_<seq:04d>.ps1 - each PowerShell script the AI generated, in order
Where <ts> = YYYYMMDD_HHMMSS of when this session was started.
"""
import atexit
import datetime
import json
import threading
from typing import Any, Optional, TextIO
from pathlib import Path
_LOG_DIR = Path("./logs")
_SCRIPTS_DIR = Path("./scripts/generated")
_LOG_DIR: Path = Path("./logs")
_SCRIPTS_DIR: Path = Path("./scripts/generated")
_ts: str = "" # session timestamp string e.g. "20260301_142233"
_session_id: str = "" # YYYYMMDD_HHMMSS[_Label]
_session_dir: Path = None # Path to the sub-directory for this session
_session_dir: Optional[Path] = None # Path to the sub-directory for this session
_seq: int = 0 # monotonic counter for script files this session
_seq_lock = threading.Lock()
_comms_fh = None # file handle: logs/<session_id>/comms.log
_tool_fh = None # file handle: logs/<session_id>/toolcalls.log
_api_fh = None # file handle: logs/<session_id>/apihooks.log
_cli_fh = None # file handle: logs/<session_id>/clicalls.log
_seq_lock: threading.Lock = threading.Lock()
_comms_fh: Optional[TextIO] = None # file handle: logs/<session_id>/comms.log
_tool_fh: Optional[TextIO] = None # file handle: logs/<session_id>/toolcalls.log
_api_fh: Optional[TextIO] = None # file handle: logs/<session_id>/apihooks.log
_cli_fh: Optional[TextIO] = None # file handle: logs/<session_id>/clicalls.log
def _now_ts() -> str:
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
def open_session(label: str | None = None):
"""
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
def open_session(label: Optional[str] = None) -> None:
"""
Called once at GUI startup. Creates the log directories if needed and
opens the log files for this session within a sub-directory.
"""
global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq
if _comms_fh is not None:
return # already open
_ts = _now_ts()
_session_id = _ts
if label:
# Sanitize label: remove non-alphanumeric chars
safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label)
_session_id += f"_{safe_label}"
_session_dir = _LOG_DIR / _session_id
_session_dir.mkdir(parents=True, exist_ok=True)
_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
_seq = 0
_comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1)
_tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1)
_api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1)
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1)
_tool_fh.write(f"# Tool-call log — session {_session_id}\n\n")
_tool_fh.flush()
_cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n")
_cli_fh.flush()
# Register this session in the log registry
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.register_session(_session_id, str(_session_dir), datetime.datetime.now())
except Exception as e:
print(f"Warning: Could not register session in LogRegistry: {e}")
atexit.register(close_session)
def close_session():
"""Flush and close all log files. Called on clean exit."""
global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR
if _comms_fh is None:
return
# Close files first to ensure all data is flushed to disk
if _comms_fh:
_comms_fh.close()
_comms_fh = None
if _tool_fh:
_tool_fh.close()
_tool_fh = None
if _api_fh:
_api_fh.close()
_api_fh = None
if _cli_fh:
_cli_fh.close()
_cli_fh = None
# Trigger auto-whitelist update for this session after closing
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.update_auto_whitelist_status(_session_id)
except Exception as e:
print(f"Warning: Could not update auto-whitelist on close: {e}")
def log_api_hook(method: str, path: str, payload: str):
"""
Log an API hook invocation.
"""
if _api_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
_api_fh.write(f"[{ts_entry}] {method} {path} - Payload: {payload}\n")
_api_fh.flush()
except Exception:
pass
def log_comms(entry: dict):
"""
global _ts, _session_id, _session_dir, _comms_fh, _tool_fh, _api_fh, _cli_fh, _seq
if _comms_fh is not None:
return
_ts = _now_ts()
_session_id = _ts
if label:
safe_label = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in label)
_session_id += f"_{safe_label}"
_session_dir = _LOG_DIR / _session_id
_session_dir.mkdir(parents=True, exist_ok=True)
_SCRIPTS_DIR.mkdir(parents=True, exist_ok=True)
_seq = 0
_comms_fh = open(_session_dir / "comms.log", "w", encoding="utf-8", buffering=1)
_tool_fh = open(_session_dir / "toolcalls.log", "w", encoding="utf-8", buffering=1)
_api_fh = open(_session_dir / "apihooks.log", "w", encoding="utf-8", buffering=1)
_cli_fh = open(_session_dir / "clicalls.log", "w", encoding="utf-8", buffering=1)
_tool_fh.write(f"# Tool-call log — session {_session_id}\n\n")
_tool_fh.flush()
_cli_fh.write(f"# CLI Subprocess Call Log — session {_session_id}\n\n")
_cli_fh.flush()
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.register_session(_session_id, str(_session_dir), datetime.datetime.now())
except Exception as e:
print(f"Warning: Could not register session in LogRegistry: {e}")
atexit.register(close_session)
def close_session() -> None:
"""Flush and close all log files. Called on clean exit."""
global _comms_fh, _tool_fh, _api_fh, _cli_fh, _session_id, _LOG_DIR
if _comms_fh is None:
return
if _comms_fh:
_comms_fh.close()
_comms_fh = None
if _tool_fh:
_tool_fh.close()
_tool_fh = None
if _api_fh:
_api_fh.close()
_api_fh = None
if _cli_fh:
_cli_fh.close()
_cli_fh = None
try:
from log_registry import LogRegistry
registry = LogRegistry(str(_LOG_DIR / "log_registry.toml"))
registry.update_auto_whitelist_status(_session_id)
except Exception as e:
print(f"Warning: Could not update auto-whitelist on close: {e}")
def log_api_hook(method: str, path: str, payload: str) -> None:
"""Log an API hook invocation."""
if _api_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
_api_fh.write(f"[{ts_entry}] {method} {path} - Payload: {payload}\n")
_api_fh.flush()
except Exception:
pass
def log_comms(entry: dict[str, Any]) -> None:
"""
Append one comms entry to the comms log file as a JSON-L line.
Thread-safe (GIL + line-buffered file).
"""
if _comms_fh is None:
return
try:
_comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
except Exception:
pass
def log_tool_call(script: str, result: str, script_path: str | None):
"""
if _comms_fh is None:
return
try:
_comms_fh.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
except Exception:
pass
def log_tool_call(script: str, result: str, script_path: Optional[str]) -> Optional[str]:
"""
Append a tool-call record to the toolcalls log and write the PS1 script to
scripts/generated/. Returns the path of the written script file.
"""
global _seq
if _tool_fh is None:
return script_path # logger not open yet
with _seq_lock:
_seq += 1
seq = _seq
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
# Write the .ps1 file
ps1_name = f"{_ts}_{seq:04d}.ps1"
ps1_path = _SCRIPTS_DIR / ps1_name
try:
ps1_path.write_text(script, encoding="utf-8")
except Exception as exc:
ps1_path = None
ps1_name = f"(write error: {exc})"
# Append to the tool-call sequence log (script body omitted - see .ps1 file)
try:
_tool_fh.write(
f"## Call #{seq} [{ts_entry}]\n"
f"Script file: {ps1_path}\n\n"
f"### Result\n\n"
f"```\n{result}\n```\n\n"
f"---\n\n"
)
_tool_fh.flush()
except Exception:
pass
return str(ps1_path) if ps1_path else None
def log_cli_call(command: str, stdin_content: str | None, stdout_content: str | None, stderr_content: str | None, latency: float):
"""
Log details of a CLI subprocess execution.
"""
if _cli_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
log_data = {
"timestamp": ts_entry,
"command": command,
"stdin": stdin_content,
"stdout": stdout_content,
"stderr": stderr_content,
"latency_sec": latency
}
_cli_fh.write(json.dumps(log_data, ensure_ascii=False, default=str) + "\n")
_cli_fh.flush()
except Exception:
pass
global _seq
if _tool_fh is None:
return script_path
with _seq_lock:
_seq += 1
seq = _seq
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
ps1_name = f"{_ts}_{seq:04d}.ps1"
ps1_path: Optional[Path] = _SCRIPTS_DIR / ps1_name
try:
ps1_path.write_text(script, encoding="utf-8")
except Exception as exc:
ps1_path = None
ps1_name = f"(write error: {exc})"
try:
_tool_fh.write(
f"## Call #{seq} [{ts_entry}]\n"
f"Script file: {ps1_path}\n\n"
f"### Result\n\n"
f"```\n{result}\n```\n\n"
f"---\n\n"
)
_tool_fh.flush()
except Exception:
pass
return str(ps1_path) if ps1_path else None
def log_cli_call(command: str, stdin_content: Optional[str], stdout_content: Optional[str], stderr_content: Optional[str], latency: float) -> None:
"""Log details of a CLI subprocess execution."""
if _cli_fh is None:
return
ts_entry = datetime.datetime.now().strftime("%H:%M:%S")
try:
log_data = {
"timestamp": ts_entry,
"command": command,
"stdin": stdin_content,
"stdout": stdout_content,
"stderr": stderr_content,
"latency_sec": latency
}
_cli_fh.write(json.dumps(log_data, ensure_ascii=False, default=str) + "\n")
_cli_fh.flush()
except Exception:
pass

View File

@@ -3,37 +3,46 @@ import subprocess, shutil
from pathlib import Path
from typing import Callable, Optional
TIMEOUT_SECONDS = 60
TIMEOUT_SECONDS: int = 60
def run_powershell(script: str, base_dir: str, qa_callback: Optional[Callable[[str], str]] = None) -> str:
"""
"""
Run a PowerShell script with working directory set to base_dir.
Returns a string combining stdout, stderr, and exit code.
If qa_callback is provided and the command fails or has stderr,
the callback is called with the stderr content and its result is appended.
"""
safe_dir = str(base_dir).replace("'", "''")
full_script = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
# Try common executable names
exe = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
if not exe: return "ERROR: Neither powershell nor pwsh found in PATH"
try:
r = subprocess.run(
[exe, "-NoProfile", "-NonInteractive", "-Command", full_script],
capture_output=True, text=True, timeout=TIMEOUT_SECONDS, cwd=base_dir
)
parts = []
if r.stdout.strip(): parts.append(f"STDOUT:\n{r.stdout.strip()}")
if r.stderr.strip(): parts.append(f"STDERR:\n{r.stderr.strip()}")
parts.append(f"EXIT CODE: {r.returncode}")
# QA Interceptor logic
if (r.returncode != 0 or r.stderr.strip()) and qa_callback:
qa_analysis = qa_callback(r.stderr.strip())
if qa_analysis:
parts.append(f"\nQA ANALYSIS:\n{qa_analysis}")
return "\n".join(parts)
except subprocess.TimeoutExpired: return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
except Exception as e: return f"ERROR: {e}"
safe_dir: str = str(base_dir).replace("'", "''")
full_script: str = f"Set-Location -LiteralPath '{safe_dir}'\n{script}"
# Try common executable names
exe: Optional[str] = next((x for x in ["powershell.exe", "pwsh.exe", "powershell", "pwsh"] if shutil.which(x)), None)
if not exe: return "ERROR: Neither powershell nor pwsh found in PATH"
try:
process = subprocess.Popen(
[exe, "-NoProfile", "-NonInteractive", "-Command", full_script],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, cwd=base_dir
)
stdout, stderr = process.communicate(timeout=TIMEOUT_SECONDS)
parts: list[str] = []
if stdout.strip(): parts.append(f"STDOUT:\n{stdout.strip()}")
if stderr.strip(): parts.append(f"STDERR:\n{stderr.strip()}")
parts.append(f"EXIT CODE: {process.returncode}")
if (process.returncode != 0 or stderr.strip()) and qa_callback:
qa_analysis: Optional[str] = qa_callback(stderr.strip())
if qa_analysis:
parts.append(f"\nQA ANALYSIS:\n{qa_analysis}")
return "\n".join(parts)
except subprocess.TimeoutExpired:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: timed out after {TIMEOUT_SECONDS}s"
except KeyboardInterrupt:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
raise
except Exception as e:
if 'process' in locals() and process:
subprocess.run(["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True)
return f"ERROR: {e}"

View File

@@ -6,74 +6,59 @@ from api_hook_client import ApiHookClient
from simulation.workflow_sim import WorkflowSimulator
def main():
client = ApiHookClient()
print("=== Manual Slop: Live UX Walkthrough ===")
print("Connecting to GUI...")
if not client.wait_for_server(timeout=10):
print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
return
sim = WorkflowSimulator(client)
# 1. Start Clean
print("\n[Action] Resetting Session...")
client.click("btn_reset")
time.sleep(2)
# 2. Project Scaffolding
project_name = f"LiveTest_{int(time.time())}"
# Use actual project dir for realism
git_dir = os.path.abspath(".")
project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
sim.setup_new_project(project_name, git_dir, project_path)
# Enable auto-add so results appear in history automatically
client.set_value("auto_add_history", True)
time.sleep(1)
# 3. Discussion Loop (3 turns for speed, but logic supports more)
turns = [
"Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
"That looks great. Can you also add a feature to print the name of the operating system?",
"Excellent. Now, please create a requirements.txt file with 'requests' in it."
]
for i, msg in enumerate(turns):
print(f"\n--- Turn {i+1} ---")
# Switch to Comms Log to see the send
client.select_tab("operations_tabs", "tab_comms")
sim.run_discussion_turn(msg)
# Check thinking indicator
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
print("[Status] Thinking indicator is visible.")
# Switch to Tool Log halfway through wait
time.sleep(2)
client.select_tab("operations_tabs", "tab_tool")
# Wait for AI response if not already finished
# (run_discussion_turn already waits, so we just observe)
# 4. History Management
print("\n[Action] Creating new discussion thread...")
sim.create_discussion("Refinement")
print("\n[Action] Switching back to Default...")
sim.switch_discussion("Default")
# 5. Manual Sign-off Simulation
print("\n=== Walkthrough Complete ===")
print("Please verify the following in the GUI:")
print("1. The project metadata reflects the new project.")
print("2. The discussion history contains the 3 turns.")
print("3. The 'Refinement' discussion exists in the list.")
print("\nWalkthrough finished successfully.")
client = ApiHookClient()
print("=== Manual Slop: Live UX Walkthrough ===")
print("Connecting to GUI...")
if not client.wait_for_server(timeout=10):
print("Error: Could not connect to GUI. Ensure it is running with --enable-test-hooks")
return
sim = WorkflowSimulator(client)
# 1. Start Clean
print("\n[Action] Resetting Session...")
client.click("btn_reset")
time.sleep(2)
# 2. Project Scaffolding
project_name = f"LiveTest_{int(time.time())}"
# Use actual project dir for realism
git_dir = os.path.abspath(".")
project_path = os.path.join(git_dir, "tests", f"{project_name}.toml")
print(f"\n[Action] Scaffolding Project: {project_name} at {project_path}")
sim.setup_new_project(project_name, git_dir, project_path)
# Enable auto-add so results appear in history automatically
client.set_value("auto_add_history", True)
time.sleep(1)
# 3. Discussion Loop (3 turns for speed, but logic supports more)
turns = [
"Hi! I want to create a simple python script called 'hello.py' that prints the current date and time. Can you write it for me?",
"That looks great. Can you also add a feature to print the name of the operating system?",
"Excellent. Now, please create a requirements.txt file with 'requests' in it."
]
for i, msg in enumerate(turns):
print(f"\n--- Turn {i+1} ---")
# Switch to Comms Log to see the send
client.select_tab("operations_tabs", "tab_comms")
sim.run_discussion_turn(msg)
# Check thinking indicator
state = client.get_indicator_state("thinking_indicator")
if state.get('shown'):
print("[Status] Thinking indicator is visible.")
# Switch to Tool Log halfway through wait
time.sleep(2)
client.select_tab("operations_tabs", "tab_tool")
# Wait for AI response if not already finished
# (run_discussion_turn already waits, so we just observe)
# 4. History Management
print("\n[Action] Creating new discussion thread...")
sim.create_discussion("Refinement")
print("\n[Action] Switching back to Default...")
sim.switch_discussion("Default")
# 5. Manual Sign-off Simulation
print("\n=== Walkthrough Complete ===")
print("Please verify the following in the GUI:")
print("1. The project metadata reflects the new project.")
print("2. The discussion history contains the 3 turns.")
print("3. The 'Refinement' discussion exists in the list.")
print("\nWalkthrough finished successfully.")
if __name__ == "__main__":
main()
main()

View File

@@ -9,49 +9,42 @@ from api_hook_client import ApiHookClient
from simulation.user_agent import UserSimAgent
def main():
client = ApiHookClient()
print("Waiting for hook server...")
if not client.wait_for_server(timeout=5):
print("Hook server not found. Start GUI with --enable-test-hooks")
return
sim_agent = UserSimAgent(client)
# 1. Reset session to start clean
print("Resetting session...")
client.click("btn_reset")
time.sleep(2) # Give it time to clear
# 2. Initial message
initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
client = ApiHookClient()
print("Waiting for hook server...")
if not client.wait_for_server(timeout=5):
print("Hook server not found. Start GUI with --enable-test-hooks")
return
sim_agent = UserSimAgent(client)
# 1. Reset session to start clean
print("Resetting session...")
client.click("btn_reset")
time.sleep(2) # Give it time to clear
# 2. Initial message
initial_msg = "Hello! I want to create a simple python script that prints 'Hello World'. Can you help me?"
print(f"
[USER]: {initial_msg}")
client.set_value("ai_input", initial_msg)
client.click("btn_gen_send")
# 3. Wait for AI response
print("Waiting for AI response...", end="", flush=True)
last_entry_count = 0
for _ in range(60): # 60 seconds max
time.sleep(1)
print(".", end="", flush=True)
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if len(entries) > last_entry_count:
# Something happened
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
print(f"
[USER]: {initial_msg}")
client.set_value("ai_input", initial_msg)
client.click("btn_gen_send")
# 3. Wait for AI response
print("Waiting for AI response...", end="", flush=True)
last_entry_count = 0
for _ in range(60): # 60 seconds max
time.sleep(1)
print(".", end="", flush=True)
session = client.get_session()
entries = session.get('session', {}).get('entries', [])
if len(entries) > last_entry_count:
# Something happened
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
print(f"
[AI]: {last_entry.get('content')[:100]}...")
print("
Ping-pong successful!")
return
last_entry_count = len(entries)
[AI]: {last_entry.get('content')[:100]}...")
print("
Timeout waiting for AI response")
Ping-pong successful!")
return
last_entry_count = len(entries)
print("
Timeout waiting for AI response")
if __name__ == "__main__":
main()
main()

View File

@@ -4,35 +4,30 @@ import time
from simulation.sim_base import BaseSimulation, run_sim
class AISettingsSimulation(BaseSimulation):
def run(self):
print("\n--- Running AI Settings Simulation (Gemini Only) ---")
# 1. Verify initial model
provider = self.client.get_value("current_provider")
model = self.client.get_value("current_model")
print(f"[Sim] Initial Provider: {provider}, Model: {model}")
assert provider == "gemini", f"Expected gemini, got {provider}"
# 2. Switch to another Gemini model
other_gemini = "gemini-1.5-flash"
print(f"[Sim] Switching to {other_gemini}...")
self.client.set_value("current_model", other_gemini)
time.sleep(2)
# Verify
new_model = self.client.get_value("current_model")
print(f"[Sim] Updated Model: {new_model}")
assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}"
# 3. Switch back to flash-lite
target_model = "gemini-2.5-flash-lite"
print(f"[Sim] Switching back to {target_model}...")
self.client.set_value("current_model", target_model)
time.sleep(2)
final_model = self.client.get_value("current_model")
print(f"[Sim] Final Model: {final_model}")
assert final_model == target_model, f"Expected {target_model}, got {final_model}"
def run(self):
print("\n--- Running AI Settings Simulation (Gemini Only) ---")
# 1. Verify initial model
provider = self.client.get_value("current_provider")
model = self.client.get_value("current_model")
print(f"[Sim] Initial Provider: {provider}, Model: {model}")
assert provider == "gemini", f"Expected gemini, got {provider}"
# 2. Switch to another Gemini model
other_gemini = "gemini-1.5-flash"
print(f"[Sim] Switching to {other_gemini}...")
self.client.set_value("current_model", other_gemini)
time.sleep(2)
# Verify
new_model = self.client.get_value("current_model")
print(f"[Sim] Updated Model: {new_model}")
assert new_model == other_gemini, f"Expected {other_gemini}, got {new_model}"
# 3. Switch back to flash-lite
target_model = "gemini-2.5-flash-lite"
print(f"[Sim] Switching back to {target_model}...")
self.client.set_value("current_model", target_model)
time.sleep(2)
final_model = self.client.get_value("current_model")
print(f"[Sim] Final Model: {final_model}")
assert final_model == target_model, f"Expected {target_model}, got {final_model}"
if __name__ == "__main__":
run_sim(AISettingsSimulation)
run_sim(AISettingsSimulation)

View File

@@ -9,80 +9,75 @@ from simulation.workflow_sim import WorkflowSimulator
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
class BaseSimulation:
def __init__(self, client: ApiHookClient = None):
if client is None:
self.client = ApiHookClient()
else:
self.client = client
self.sim = WorkflowSimulator(self.client)
self.project_path = None
def __init__(self, client: ApiHookClient = None):
if client is None:
self.client = ApiHookClient()
else:
self.client = client
self.sim = WorkflowSimulator(self.client)
self.project_path = None
def setup(self, project_name="SimProject"):
print(f"\n[BaseSim] Connecting to GUI...")
if not self.client.wait_for_server(timeout=5):
raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks")
def setup(self, project_name="SimProject"):
print(f"\n[BaseSim] Connecting to GUI...")
if not self.client.wait_for_server(timeout=5):
raise RuntimeError("Could not connect to GUI. Ensure it is running with --enable-test-hooks")
print("[BaseSim] Resetting session...")
self.client.click("btn_reset")
time.sleep(0.5)
git_dir = os.path.abspath(".")
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml")
if os.path.exists(self.project_path):
os.remove(self.project_path)
print(f"[BaseSim] Scaffolding Project: {project_name}")
self.sim.setup_new_project(project_name, git_dir, self.project_path)
# Standard test settings
self.client.set_value("auto_add_history", True)
self.client.set_value("current_provider", "gemini")
self.client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.2)
print("[BaseSim] Resetting session...")
self.client.click("btn_reset")
time.sleep(0.5)
def teardown(self):
if self.project_path and os.path.exists(self.project_path):
# We keep it for debugging if it failed, but usually we'd clean up
# os.remove(self.project_path)
pass
print("[BaseSim] Teardown complete.")
git_dir = os.path.abspath(".")
self.project_path = os.path.abspath(f"tests/temp_{project_name.lower()}.toml")
if os.path.exists(self.project_path):
os.remove(self.project_path)
def get_value(self, tag):
return self.client.get_value(tag)
print(f"[BaseSim] Scaffolding Project: {project_name}")
self.sim.setup_new_project(project_name, git_dir, self.project_path)
# Standard test settings
self.client.set_value("auto_add_history", True)
self.client.set_value("current_provider", "gemini")
self.client.set_value("current_model", "gemini-2.5-flash-lite")
time.sleep(0.2)
def wait_for_event(self, event_type, timeout=5):
return self.client.wait_for_event(event_type, timeout)
def teardown(self):
if self.project_path and os.path.exists(self.project_path):
# We keep it for debugging if it failed, but usually we'd clean up
# os.remove(self.project_path)
pass
print("[BaseSim] Teardown complete.")
def assert_panel_visible(self, panel_tag, msg=None):
# This assumes we have a hook to check panel visibility or just check if an element in it exists
# For now, we'll check if we can get a value from an element that should be in that panel
# or use a specific hook if available.
# Actually, let's just check if get_indicator_state or similar works for generic tags.
pass
def get_value(self, tag):
return self.client.get_value(tag)
def wait_for_event(self, event_type, timeout=5):
return self.client.wait_for_event(event_type, timeout)
def assert_panel_visible(self, panel_tag, msg=None):
# This assumes we have a hook to check panel visibility or just check if an element in it exists
# For now, we'll check if we can get a value from an element that should be in that panel
# or use a specific hook if available.
# Actually, let's just check if get_indicator_state or similar works for generic tags.
pass
def wait_for_element(self, tag, timeout=2):
start = time.time()
while time.time() - start < timeout:
try:
# If we can get_value without error, it's likely there
self.client.get_value(tag)
return True
except:
time.sleep(0.1)
return False
def wait_for_element(self, tag, timeout=2):
start = time.time()
while time.time() - start < timeout:
try:
# If we can get_value without error, it's likely there
self.client.get_value(tag)
return True
except:
time.sleep(0.1)
return False
def run_sim(sim_class):
"""Helper to run a simulation class standalone."""
sim = sim_class()
try:
sim.setup()
sim.run()
print(f"\n[SUCCESS] {sim_class.__name__} completed successfully.")
except Exception as e:
print(f"\n[FAILURE] {sim_class.__name__} failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
sim.teardown()
"""Helper to run a simulation class standalone."""
sim = sim_class()
try:
sim.setup()
sim.run()
print(f"\n[SUCCESS] {sim_class.__name__} completed successfully.")
except Exception as e:
print(f"\n[FAILURE] {sim_class.__name__} failed: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
finally:
sim.teardown()

View File

@@ -4,78 +4,67 @@ import time
from simulation.sim_base import BaseSimulation, run_sim
class ContextSimulation(BaseSimulation):
def run(self):
print("\n--- Running Context & Chat Simulation ---")
# 1. Test Discussion Creation
disc_name = f"TestDisc_{int(time.time())}"
print(f"[Sim] Creating discussion: {disc_name}")
self.sim.create_discussion(disc_name)
time.sleep(1)
# Verify it's in the list
session = self.client.get_session()
# The session structure usually has discussions listed somewhere, or we can check the listbox
# For now, we'll trust the click and check the session update
# 2. Test File Aggregation & Context Refresh
print("[Sim] Testing context refresh and token budget...")
proj = self.client.get_project()
# Add many files to ensure we cross the 1% threshold (~9000 tokens)
import glob
all_py = [os.path.basename(f) for f in glob.glob("*.py")]
for f in all_py:
if f not in proj['project']['files']['paths']:
proj['project']['files']['paths'].append(f)
# Update project via hook
self.client.post_project(proj['project'])
time.sleep(1)
# Trigger MD Only to refresh context and token budget
print("[Sim] Clicking MD Only...")
self.client.click("btn_md_only")
time.sleep(5)
# Verify status
proj_updated = self.client.get_project()
status = self.client.get_value("ai_status")
print(f"[Sim] Status: {status}")
assert "md written" in status, f"Expected 'md written' in status, got {status}"
# Verify token budget
pct = self.client.get_value("token_budget_pct")
current = self.client.get_value("token_budget_current")
print(f"[Sim] Token budget pct: {pct}, current={current}")
# We'll just warn if it's 0 but the MD was written, as it might be a small context
if pct == 0:
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
# 3. Test Chat Turn
msg = "What is the current date and time? Answer in one sentence."
print(f"[Sim] Sending message: {msg}")
self.sim.run_discussion_turn(msg)
# 4. Verify History
print("[Sim] Verifying history...")
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# We expect at least 2 entries (User and AI)
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 5. Test History Truncation
print("[Sim] Testing history truncation...")
self.sim.truncate_history(1)
time.sleep(1)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Truncating to 1 pair means 2 entries max (if it's already at 2, it might not change,
# but if we had more, it would).
assert len(entries) <= 2, f"Expected <= 2 entries after truncation, found {len(entries)}"
def run(self):
print("\n--- Running Context & Chat Simulation ---")
# 1. Test Discussion Creation
disc_name = f"TestDisc_{int(time.time())}"
print(f"[Sim] Creating discussion: {disc_name}")
self.sim.create_discussion(disc_name)
time.sleep(1)
# Verify it's in the list
session = self.client.get_session()
# The session structure usually has discussions listed somewhere, or we can check the listbox
# For now, we'll trust the click and check the session update
# 2. Test File Aggregation & Context Refresh
print("[Sim] Testing context refresh and token budget...")
proj = self.client.get_project()
# Add many files to ensure we cross the 1% threshold (~9000 tokens)
import glob
all_py = [os.path.basename(f) for f in glob.glob("*.py")]
for f in all_py:
if f not in proj['project']['files']['paths']:
proj['project']['files']['paths'].append(f)
# Update project via hook
self.client.post_project(proj['project'])
time.sleep(1)
# Trigger MD Only to refresh context and token budget
print("[Sim] Clicking MD Only...")
self.client.click("btn_md_only")
time.sleep(5)
# Verify status
proj_updated = self.client.get_project()
status = self.client.get_value("ai_status")
print(f"[Sim] Status: {status}")
assert "md written" in status, f"Expected 'md written' in status, got {status}"
# Verify token budget
pct = self.client.get_value("token_budget_pct")
current = self.client.get_value("token_budget_current")
print(f"[Sim] Token budget pct: {pct}, current={current}")
# We'll just warn if it's 0 but the MD was written, as it might be a small context
if pct == 0:
print("[Sim] WARNING: token_budget_pct is 0. This might be due to small context or estimation failure.")
# 3. Test Chat Turn
msg = "What is the current date and time? Answer in one sentence."
print(f"[Sim] Sending message: {msg}")
self.sim.run_discussion_turn(msg)
# 4. Verify History
print("[Sim] Verifying history...")
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# We expect at least 2 entries (User and AI)
assert len(entries) >= 2, f"Expected at least 2 entries, found {len(entries)}"
assert entries[-2]['role'] == 'User', "Expected second to last entry to be User"
assert entries[-1]['role'] == 'AI', "Expected last entry to be AI"
print(f"[Sim] AI responded: {entries[-1]['content'][:50]}...")
# 5. Test History Truncation
print("[Sim] Testing history truncation...")
self.sim.truncate_history(1)
time.sleep(1)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Truncating to 1 pair means 2 entries max (if it's already at 2, it might not change,
# but if we had more, it would).
assert len(entries) <= 2, f"Expected <= 2 entries after truncation, found {len(entries)}"
if __name__ == "__main__":
run_sim(ContextSimulation)
run_sim(ContextSimulation)

View File

@@ -4,76 +4,66 @@ import time
from simulation.sim_base import BaseSimulation, run_sim
class ExecutionSimulation(BaseSimulation):
def setup(self, project_name="SimProject"):
super().setup(project_name)
if os.path.exists("hello.ps1"):
os.remove("hello.ps1")
def setup(self, project_name="SimProject"):
super().setup(project_name)
if os.path.exists("hello.ps1"):
os.remove("hello.ps1")
def run(self):
print("\n--- Running Execution & Modals Simulation ---")
# 1. Trigger script generation (Async so we don't block on the wait loop)
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
print(f"[Sim] Sending message to trigger script: {msg}")
self.sim.run_discussion_turn_async(msg)
# 2. Monitor for events and text responses
print("[Sim] Monitoring for script approvals and AI text...")
start_wait = time.time()
approved_count = 0
success = False
consecutive_errors = 0
while time.time() - start_wait < 90:
# Check for error status (be lenient with transients)
status = self.client.get_value("ai_status")
if status and status.lower().startswith("error"):
consecutive_errors += 1
if consecutive_errors >= 3:
print(f"[ABORT] Execution simulation aborted due to persistent GUI error: {status}")
break
else:
consecutive_errors = 0
# Check for script confirmation event
ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
if ev:
print(f"[Sim] Approving script #{approved_count+1}: {ev.get('script', '')[:50]}...")
self.client.click("btn_approve_script")
approved_count += 1
# Give more time if we just approved a script
start_wait = time.time()
# Check if AI has responded with text yet
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Debug: log last few roles/content
if entries:
last_few = entries[-3:]
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
if any(e.get('role') == 'AI' and e.get('content') for e in entries):
# Double check content for our keyword
for e in entries:
if e.get('role') == 'AI' and "Simulation Test" in e.get('content', ''):
print("[Sim] AI responded with expected text. Success.")
success = True
break
if success: break
# Also check if output is already in history via tool role
for e in entries:
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
print(f"[Sim] Expected output found in {e.get('role')} results. Success.")
success = True
break
if success: break
time.sleep(1.0)
assert success, "Failed to observe script execution output or AI confirmation text"
print(f"[Sim] Final check: approved {approved_count} scripts.")
def run(self):
print("\n--- Running Execution & Modals Simulation ---")
# 1. Trigger script generation (Async so we don't block on the wait loop)
msg = "Create a hello.ps1 script that prints 'Simulation Test' and execute it."
print(f"[Sim] Sending message to trigger script: {msg}")
self.sim.run_discussion_turn_async(msg)
# 2. Monitor for events and text responses
print("[Sim] Monitoring for script approvals and AI text...")
start_wait = time.time()
approved_count = 0
success = False
consecutive_errors = 0
while time.time() - start_wait < 90:
# Check for error status (be lenient with transients)
status = self.client.get_value("ai_status")
if status and status.lower().startswith("error"):
consecutive_errors += 1
if consecutive_errors >= 3:
print(f"[ABORT] Execution simulation aborted due to persistent GUI error: {status}")
break
else:
consecutive_errors = 0
# Check for script confirmation event
ev = self.client.wait_for_event("script_confirmation_required", timeout=1)
if ev:
print(f"[Sim] Approving script #{approved_count+1}: {ev.get('script', '')[:50]}...")
self.client.click("btn_approve_script")
approved_count += 1
# Give more time if we just approved a script
start_wait = time.time()
# Check if AI has responded with text yet
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Debug: log last few roles/content
if entries:
last_few = entries[-3:]
print(f"[Sim] Waiting... Last {len(last_few)} roles: {[e.get('role') for e in last_few]}")
if any(e.get('role') == 'AI' and e.get('content') for e in entries):
# Double check content for our keyword
for e in entries:
if e.get('role') == 'AI' and "Simulation Test" in e.get('content', ''):
print("[Sim] AI responded with expected text. Success.")
success = True
break
if success: break
# Also check if output is already in history via tool role
for e in entries:
if e.get('role') in ['Tool', 'Function'] and "Simulation Test" in e.get('content', ''):
print(f"[Sim] Expected output found in {e.get('role')} results. Success.")
success = True
break
if success: break
time.sleep(1.0)
assert success, "Failed to observe script execution output or AI confirmation text"
print(f"[Sim] Final check: approved {approved_count} scripts.")
if __name__ == "__main__":
run_sim(ExecutionSimulation)
run_sim(ExecutionSimulation)

View File

@@ -4,44 +4,37 @@ import time
from simulation.sim_base import BaseSimulation, run_sim
class ToolsSimulation(BaseSimulation):
def run(self):
print("\n--- Running Tools Simulation ---")
# 1. Trigger list_directory tool
msg = "List the files in the current directory."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 2. Wait for AI to execute tool
print("[Sim] Waiting for tool execution...")
time.sleep(5) # Give it some time
# 3. Verify Tool Log
# We need a hook to get the tool log
# In gui_2.py, there is _on_tool_log which appends to self._tool_log
# We need a hook to read self._tool_log
# 4. Trigger read_file tool
msg = "Read the first 10 lines of aggregate.py."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 5. Wait and Verify
print("[Sim] Waiting for tool execution...")
time.sleep(5)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Tool outputs are usually in the conversation history as 'Tool' role or similar
tool_outputs = [e for e in entries if e.get('role') in ['Tool', 'Function']]
print(f"[Sim] Found {len(tool_outputs)} tool outputs in history.")
# Actually in Gemini history, they might be nested.
# But our GUI disc_entries list usually has them as separate entries or
# they are part of the AI turn.
# Let's check if the AI mentions it in its response
last_ai_msg = entries[-1]['content']
print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")
def run(self):
print("\n--- Running Tools Simulation ---")
# 1. Trigger list_directory tool
msg = "List the files in the current directory."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 2. Wait for AI to execute tool
print("[Sim] Waiting for tool execution...")
time.sleep(5) # Give it some time
# 3. Verify Tool Log
# We need a hook to get the tool log
# In gui_2.py, there is _on_tool_log which appends to self._tool_log
# We need a hook to read self._tool_log
# 4. Trigger read_file tool
msg = "Read the first 10 lines of aggregate.py."
print(f"[Sim] Sending message to trigger tool: {msg}")
self.sim.run_discussion_turn(msg)
# 5. Wait and Verify
print("[Sim] Waiting for tool execution...")
time.sleep(5)
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
# Tool outputs are usually in the conversation history as 'Tool' role or similar
tool_outputs = [e for e in entries if e.get('role') in ['Tool', 'Function']]
print(f"[Sim] Found {len(tool_outputs)} tool outputs in history.")
# Actually in Gemini history, they might be nested.
# But our GUI disc_entries list usually has them as separate entries or
# they are part of the AI turn.
# Let's check if the AI mentions it in its response
last_ai_msg = entries[-1]['content']
print(f"[Sim] Final AI Response: {last_ai_msg[:100]}...")
if __name__ == "__main__":
run_sim(ToolsSimulation)
run_sim(ToolsSimulation)

View File

@@ -3,48 +3,45 @@ import random
import ai_client
class UserSimAgent:
def __init__(self, hook_client, model="gemini-2.5-flash-lite"):
self.hook_client = hook_client
self.model = model
self.system_prompt = (
"You are a software engineer testing an AI coding assistant called 'Manual Slop'. "
"You want to build a small Python project and verify the assistant's capabilities. "
"Keep your responses concise and human-like. "
"Do not use markdown blocks for your main message unless you are providing code."
)
def __init__(self, hook_client, model="gemini-2.5-flash-lite"):
self.hook_client = hook_client
self.model = model
self.system_prompt = (
"You are a software engineer testing an AI coding assistant called 'Manual Slop'. "
"You want to build a small Python project and verify the assistant's capabilities. "
"Keep your responses concise and human-like. "
"Do not use markdown blocks for your main message unless you are providing code."
)
def generate_response(self, conversation_history):
"""
def generate_response(self, conversation_history):
"""
Generates a human-like response based on the conversation history.
conversation_history: list of dicts with 'role' and 'content'
"""
# Format history for ai_client
# ai_client expects md_content and user_message.
# It handles its own internal history.
# We want the 'User AI' to have context of what the 'Assistant AI' said.
# For now, let's just use the last message from Assistant as the prompt.
last_ai_msg = ""
for entry in reversed(conversation_history):
if entry.get('role') == 'AI':
last_ai_msg = entry.get('content', '')
break
# We need to set a custom system prompt for the User Simulator
try:
ai_client.set_custom_system_prompt(self.system_prompt)
# We'll use a blank md_content for now as the 'User' doesn't need to read its own files
# via the same mechanism, but we could provide it if needed.
response = ai_client.send(md_content="", user_message=last_ai_msg)
finally:
ai_client.set_custom_system_prompt("")
return response
# Format history for ai_client
# ai_client expects md_content and user_message.
# It handles its own internal history.
# We want the 'User AI' to have context of what the 'Assistant AI' said.
# For now, let's just use the last message from Assistant as the prompt.
last_ai_msg = ""
for entry in reversed(conversation_history):
if entry.get('role') == 'AI':
last_ai_msg = entry.get('content', '')
break
# We need to set a custom system prompt for the User Simulator
try:
ai_client.set_custom_system_prompt(self.system_prompt)
# We'll use a blank md_content for now as the 'User' doesn't need to read its own files
# via the same mechanism, but we could provide it if needed.
response = ai_client.send(md_content="", user_message=last_ai_msg)
finally:
ai_client.set_custom_system_prompt("")
return response
def perform_action_with_delay(self, action_func, *args, **kwargs):
"""
def perform_action_with_delay(self, action_func, *args, **kwargs):
"""
Executes an action with a human-like delay.
"""
delay = random.uniform(0.5, 2.0)
time.sleep(delay)
return action_func(*args, **kwargs)
delay = random.uniform(0.5, 2.0)
time.sleep(delay)
return action_func(*args, **kwargs)

View File

@@ -4,84 +4,80 @@ from api_hook_client import ApiHookClient
from simulation.user_agent import UserSimAgent
class WorkflowSimulator:
def __init__(self, hook_client: ApiHookClient):
self.client = hook_client
self.user_agent = UserSimAgent(hook_client)
def __init__(self, hook_client: ApiHookClient):
self.client = hook_client
self.user_agent = UserSimAgent(hook_client)
def setup_new_project(self, name, git_dir, project_path=None):
print(f"Setting up new project: {name}")
if project_path:
self.client.click("btn_project_new_automated", user_data=project_path)
else:
self.client.click("btn_project_new")
time.sleep(1)
self.client.set_value("project_git_dir", git_dir)
self.client.click("btn_project_save")
time.sleep(1)
def setup_new_project(self, name, git_dir, project_path=None):
print(f"Setting up new project: {name}")
if project_path:
self.client.click("btn_project_new_automated", user_data=project_path)
else:
self.client.click("btn_project_new")
time.sleep(1)
self.client.set_value("project_git_dir", git_dir)
self.client.click("btn_project_save")
time.sleep(1)
def create_discussion(self, name):
print(f"Creating discussion: {name}")
self.client.set_value("disc_new_name_input", name)
self.client.click("btn_disc_create")
time.sleep(1)
def create_discussion(self, name):
print(f"Creating discussion: {name}")
self.client.set_value("disc_new_name_input", name)
self.client.click("btn_disc_create")
time.sleep(1)
def switch_discussion(self, name):
print(f"Switching to discussion: {name}")
self.client.select_list_item("disc_listbox", name)
time.sleep(1)
def switch_discussion(self, name):
print(f"Switching to discussion: {name}")
self.client.select_list_item("disc_listbox", name)
time.sleep(1)
def load_prior_log(self):
print("Loading prior log")
self.client.click("btn_load_log")
# This usually opens a file dialog which we can't easily automate from here
# without more hooks, but we can verify the button click.
time.sleep(1)
def load_prior_log(self):
print("Loading prior log")
self.client.click("btn_load_log")
# This usually opens a file dialog which we can't easily automate from here
# without more hooks, but we can verify the button click.
time.sleep(1)
def truncate_history(self, pairs):
print(f"Truncating history to {pairs} pairs")
self.client.set_value("disc_truncate_pairs", pairs)
self.client.click("btn_disc_truncate")
time.sleep(1)
def truncate_history(self, pairs):
print(f"Truncating history to {pairs} pairs")
self.client.set_value("disc_truncate_pairs", pairs)
self.client.click("btn_disc_truncate")
time.sleep(1)
def run_discussion_turn(self, user_message=None):
self.run_discussion_turn_async(user_message)
# Wait for AI
return self.wait_for_ai_response()
def run_discussion_turn(self, user_message=None):
self.run_discussion_turn_async(user_message)
# Wait for AI
return self.wait_for_ai_response()
def run_discussion_turn_async(self, user_message=None):
if user_message is None:
# Generate from AI history
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
user_message = self.user_agent.generate_response(entries)
def run_discussion_turn_async(self, user_message=None):
if user_message is None:
# Generate from AI history
session = self.client.get_session()
entries = session.get('session', {}).get('entries', [])
user_message = self.user_agent.generate_response(entries)
print(f"\n[USER]: {user_message}")
self.client.set_value("ai_input", user_message)
self.client.click("btn_gen_send")
print(f"\n[USER]: {user_message}")
self.client.set_value("ai_input", user_message)
self.client.click("btn_gen_send")
def wait_for_ai_response(self, timeout=60):
print("Waiting for AI response...", end="", flush=True)
start_time = time.time()
last_count = len(self.client.get_session().get('session', {}).get('entries', []))
while time.time() - start_time < timeout:
# Check for error status first
status = self.client.get_value("ai_status")
if status and status.lower().startswith("error"):
print(f"\n[ABORT] GUI reported error status: {status}")
return {"role": "AI", "content": f"ERROR: {status}"}
time.sleep(1)
print(".", end="", flush=True)
entries = self.client.get_session().get('session', {}).get('entries', [])
if len(entries) > last_count:
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
content = last_entry.get('content')
print(f"\n[AI]: {content[:100]}...")
if "error" in content.lower() or "blocked" in content.lower():
print(f"[WARN] AI response appears to contain an error message.")
return last_entry
print("\nTimeout waiting for AI")
return None
def wait_for_ai_response(self, timeout=60):
print("Waiting for AI response...", end="", flush=True)
start_time = time.time()
last_count = len(self.client.get_session().get('session', {}).get('entries', []))
while time.time() - start_time < timeout:
# Check for error status first
status = self.client.get_value("ai_status")
if status and status.lower().startswith("error"):
print(f"\n[ABORT] GUI reported error status: {status}")
return {"role": "AI", "content": f"ERROR: {status}"}
time.sleep(1)
print(".", end="", flush=True)
entries = self.client.get_session().get('session', {}).get('entries', [])
if len(entries) > last_count:
last_entry = entries[-1]
if last_entry.get('role') == 'AI' and last_entry.get('content'):
content = last_entry.get('content')
print(f"\n[AI]: {content[:100]}...")
if "error" in content.lower() or "blocked" in content.lower():
print(f"[WARN] AI response appears to contain an error message.")
return last_entry
print("\nTimeout waiting for AI")
return None

View File

@@ -27,185 +27,165 @@ import ast
import re
from pathlib import Path
# ------------------------------------------------------------------ per-type extractors
def _summarise_python(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Python** — {line_count} lines"]
try:
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
except SyntaxError as e:
parts.append(f"_Parse error: {e}_")
return "\n".join(parts)
# Imports
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name.split(".")[0])
elif isinstance(node, ast.ImportFrom):
if node.module:
imports.append(node.module.split(".")[0])
if imports:
unique_imports = sorted(set(imports))
parts.append(f"imports: {', '.join(unique_imports)}")
# Top-level constants (ALL_CAPS assignments)
constants = []
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.Assign):
for t in node.targets:
if isinstance(t, ast.Name) and t.id.isupper():
constants.append(t.id)
elif isinstance(node, (ast.AnnAssign,)):
if isinstance(node.target, ast.Name) and node.target.id.isupper():
constants.append(node.target.id)
if constants:
parts.append(f"constants: {', '.join(constants)}")
# Classes + their methods
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.ClassDef):
methods = [
n.name for n in ast.iter_child_nodes(node)
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if methods:
parts.append(f"class {node.name}: {', '.join(methods)}")
else:
parts.append(f"class {node.name}")
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Python** — {line_count} lines"]
try:
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
except SyntaxError as e:
parts.append(f"_Parse error: {e}_")
return "\n".join(parts)
# Imports
imports = []
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name.split(".")[0])
elif isinstance(node, ast.ImportFrom):
if node.module:
imports.append(node.module.split(".")[0])
if imports:
unique_imports = sorted(set(imports))
parts.append(f"imports: {', '.join(unique_imports)}")
# Top-level constants (ALL_CAPS assignments)
constants = []
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.Assign):
for t in node.targets:
if isinstance(t, ast.Name) and t.id.isupper():
constants.append(t.id)
elif isinstance(node, (ast.AnnAssign,)):
if isinstance(node.target, ast.Name) and node.target.id.isupper():
constants.append(node.target.id)
if constants:
parts.append(f"constants: {', '.join(constants)}")
# Classes + their methods
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.ClassDef):
methods = [
n.name for n in ast.iter_child_nodes(node)
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if methods:
parts.append(f"class {node.name}: {', '.join(methods)}")
else:
parts.append(f"class {node.name}")
# Top-level functions
top_fns = [
node.name for node in ast.iter_child_nodes(tree)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if top_fns:
parts.append(f"functions: {', '.join(top_fns)}")
return "\n".join(parts)
top_fns = [
node.name for node in ast.iter_child_nodes(tree)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if top_fns:
parts.append(f"functions: {', '.join(top_fns)}")
return "\n".join(parts)
def _summarise_toml(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**TOML** — {line_count} lines"]
# Extract top-level table headers [key] and [[key]]
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
tables = []
for line in lines:
m = table_pat.match(line)
if m:
tables.append(m.group(1).strip())
if tables:
parts.append(f"tables: {', '.join(tables)}")
# Top-level key = value (not inside a [table])
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
in_table = False
top_keys = []
for line in lines:
if table_pat.match(line):
in_table = True
continue
if not in_table:
m = kv_pat.match(line)
if m:
top_keys.append(m.group(1))
if top_keys:
parts.append(f"top-level keys: {', '.join(top_keys)}")
return "\n".join(parts)
lines = content.splitlines()
line_count = len(lines)
parts = [f"**TOML** — {line_count} lines"]
# Extract top-level table headers [key] and [[key]]
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
tables = []
for line in lines:
m = table_pat.match(line)
if m:
tables.append(m.group(1).strip())
if tables:
parts.append(f"tables: {', '.join(tables)}")
# Top-level key = value (not inside a [table])
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
in_table = False
top_keys = []
for line in lines:
if table_pat.match(line):
in_table = True
continue
if not in_table:
m = kv_pat.match(line)
if m:
top_keys.append(m.group(1))
if top_keys:
parts.append(f"top-level keys: {', '.join(top_keys)}")
return "\n".join(parts)
def _summarise_markdown(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Markdown** — {line_count} lines"]
headings = []
for line in lines:
m = re.match(r"^(#{1,3})\s+(.+)", line)
if m:
level = len(m.group(1))
text = m.group(2).strip()
indent = " " * (level - 1)
headings.append(f"{indent}{text}")
if headings:
parts.append("headings:\n" + "\n".join(f" {h}" for h in headings))
return "\n".join(parts)
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Markdown** — {line_count} lines"]
headings = []
for line in lines:
m = re.match(r"^(#{1,3})\s+(.+)", line)
if m:
level = len(m.group(1))
text = m.group(2).strip()
indent = " " * (level - 1)
headings.append(f"{indent}{text}")
if headings:
parts.append("headings:\n" + "\n".join(f" {h}" for h in headings))
return "\n".join(parts)
def _summarise_generic(path: Path, content: str) -> str:
lines = content.splitlines()
line_count = len(lines)
suffix = path.suffix.lstrip(".").upper() or "TEXT"
parts = [f"**{suffix}** — {line_count} lines"]
preview = lines[:8]
if preview:
parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
return "\n".join(parts)
# ------------------------------------------------------------------ dispatch
lines = content.splitlines()
line_count = len(lines)
suffix = path.suffix.lstrip(".").upper() or "TEXT"
parts = [f"**{suffix}** — {line_count} lines"]
preview = lines[:8]
if preview:
parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
return "\n".join(parts)
# ------------------------------------------------------------------ dispatch
_SUMMARISERS = {
".py": _summarise_python,
".toml": _summarise_toml,
".md": _summarise_markdown,
".ini": _summarise_generic,
".txt": _summarise_generic,
".ps1": _summarise_generic,
".py": _summarise_python,
".toml": _summarise_toml,
".md": _summarise_markdown,
".ini": _summarise_generic,
".txt": _summarise_generic,
".ps1": _summarise_generic,
}
def summarise_file(path: Path, content: str) -> str:
"""
"""
Return a compact markdown summary string for a single file.
`content` is the already-read file text (or an error string).
"""
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
fn = _SUMMARISERS.get(suffix, _summarise_generic)
try:
return fn(path, content)
except Exception as e:
return f"_Summariser error: {e}_"
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
fn = _SUMMARISERS.get(suffix, _summarise_generic)
try:
return fn(path, content)
except Exception as e:
return f"_Summariser error: {e}_"
def summarise_items(file_items: list[dict]) -> list[dict]:
"""
"""
Given a list of file_item dicts (as returned by aggregate.build_file_items),
return a parallel list of dicts with an added `summary` key.
"""
result = []
for item in file_items:
path = item.get("path")
content = item.get("content", "")
error = item.get("error", False)
if error or path is None:
summary = f"_Error reading file_"
else:
p = Path(path) if not isinstance(path, Path) else path
summary = summarise_file(p, content)
result.append({**item, "summary": summary})
return result
result = []
for item in file_items:
path = item.get("path")
content = item.get("content", "")
error = item.get("error", False)
if error or path is None:
summary = f"_Error reading file_"
else:
p = Path(path) if not isinstance(path, Path) else path
summary = summarise_file(p, content)
result.append({**item, "summary": summary})
return result
def build_summary_markdown(file_items: list[dict]) -> str:
"""
"""
Build a compact markdown string of file summaries, suitable for the
initial <context> block instead of full file contents.
"""
summarised = summarise_items(file_items)
parts = []
for item in summarised:
path = item.get("path") or item.get("entry", "unknown")
summary = item.get("summary", "")
parts.append(f"### `{path}`\n\n{summary}")
return "\n\n---\n\n".join(parts)
summarised = summarise_items(file_items)
parts = []
for item in summarised:
path = item.get("path") or item.get("entry", "unknown")
summary = item.get("summary", "")
parts.append(f"### `{path}`\n\n{summary}")
return "\n\n---\n\n".join(parts)

View File

@@ -6,28 +6,27 @@ import project_manager
from models import Track, Ticket
class TestMMAPersistence(unittest.TestCase):
def test_default_project_has_mma(self):
proj = project_manager.default_project("test")
self.assertIn("mma", proj)
self.assertEqual(proj["mma"], {"epic": "", "active_track_id": "", "tracks": []})
def test_default_project_has_mma(self):
proj = project_manager.default_project("test")
self.assertIn("mma", proj)
self.assertEqual(proj["mma"], {"epic": "", "active_track_id": "", "tracks": []})
def test_save_load_mma(self):
proj = project_manager.default_project("test")
proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]}
test_file = Path("test_mma_proj.toml")
try:
project_manager.save_project(proj, test_file)
loaded = project_manager.load_project(test_file)
self.assertIn("mma", loaded)
self.assertEqual(loaded["mma"]["epic"], "Test Epic")
self.assertEqual(len(loaded["mma"]["tracks"]), 1)
finally:
if test_file.exists():
test_file.unlink()
hist_file = Path("test_mma_proj_history.toml")
if hist_file.exists():
hist_file.unlink()
def test_save_load_mma(self):
proj = project_manager.default_project("test")
proj["mma"] = {"epic": "Test Epic", "tracks": [{"id": "track_1"}]}
test_file = Path("test_mma_proj.toml")
try:
project_manager.save_project(proj, test_file)
loaded = project_manager.load_project(test_file)
self.assertIn("mma", loaded)
self.assertEqual(loaded["mma"]["epic"], "Test Epic")
self.assertEqual(len(loaded["mma"]["tracks"]), 1)
finally:
if test_file.exists():
test_file.unlink()
hist_file = Path("test_mma_proj_history.toml")
if hist_file.exists():
hist_file.unlink()
if __name__ == "__main__":
unittest.main()
unittest.main()

View File

@@ -15,82 +15,76 @@ import ai_client
@pytest.fixture(autouse=True)
def reset_ai_client():
"""Reset ai_client global state between every test to prevent state pollution."""
ai_client.reset_session()
# Default to a safe model
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
yield
"""Reset ai_client global state between every test to prevent state pollution."""
ai_client.reset_session()
# Default to a safe model
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
yield
def kill_process_tree(pid):
"""Robustly kills a process and all its children."""
if pid is None:
return
try:
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
if os.name == 'nt':
# /F is force, /T is tree (includes children)
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
else:
# On Unix, kill the process group
os.killpg(os.getpgid(pid), signal.SIGKILL)
print(f"[Fixture] Process tree {pid} killed.")
except Exception as e:
print(f"[Fixture] Error killing process tree {pid}: {e}")
"""Robustly kills a process and all its children."""
if pid is None:
return
try:
print(f"[Fixture] Attempting to kill process tree for PID {pid}...")
if os.name == 'nt':
# /F is force, /T is tree (includes children)
subprocess.run(["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False)
else:
# On Unix, kill the process group
os.killpg(os.getpgid(pid), signal.SIGKILL)
print(f"[Fixture] Process tree {pid} killed.")
except Exception as e:
print(f"[Fixture] Error killing process tree {pid}: {e}")
@pytest.fixture(scope="session")
def live_gui():
"""
"""
Session-scoped fixture that starts gui_2.py with --enable-test-hooks.
"""
gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
stdout=log_file,
stderr=log_file,
text=True,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
start_time = time.time()
while time.time() - start_time < max_retries:
try:
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
if response.status_code == 200:
ready = True
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
if process.poll() is not None:
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break
time.sleep(0.5)
if not ready:
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
kill_process_tree(process.pid)
pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
yield process, gui_script
finally:
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
# Reset the GUI state before shutting down
try:
client = ApiHookClient()
client.reset_session()
time.sleep(0.5)
except: pass
kill_process_tree(process.pid)
log_file.close()
gui_script = "gui_2.py"
print(f"\n[Fixture] Starting {gui_script} --enable-test-hooks...")
os.makedirs("logs", exist_ok=True)
log_file = open(f"logs/{gui_script.replace('.', '_')}_test.log", "w", encoding="utf-8")
process = subprocess.Popen(
["uv", "run", "python", "-u", gui_script, "--enable-test-hooks"],
stdout=log_file,
stderr=log_file,
text=True,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if os.name == 'nt' else 0
)
max_retries = 15 # Slightly more time for gui_2
ready = False
print(f"[Fixture] Waiting up to {max_retries}s for Hook Server on port 8999...")
start_time = time.time()
while time.time() - start_time < max_retries:
try:
response = requests.get("http://127.0.0.1:8999/status", timeout=0.5)
if response.status_code == 200:
ready = True
print(f"[Fixture] GUI Hook Server for {gui_script} is ready after {round(time.time() - start_time, 2)}s.")
break
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
if process.poll() is not None:
print(f"[Fixture] {gui_script} process died unexpectedly during startup.")
break
time.sleep(0.5)
if not ready:
print(f"[Fixture] TIMEOUT/FAILURE: Hook server for {gui_script} failed to respond.")
kill_process_tree(process.pid)
pytest.fail(f"Failed to start {gui_script} with test hooks.")
try:
yield process, gui_script
finally:
print(f"\n[Fixture] Finally block triggered: Shutting down {gui_script}...")
# Reset the GUI state before shutting down
try:
client = ApiHookClient()
client.reset_session()
time.sleep(0.5)
except: pass
kill_process_tree(process.pid)
log_file.close()

21
tests/mock_alias_tool.py Normal file
View File

@@ -0,0 +1,21 @@
import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({"type": "message", "role": "assistant", "content": "Tool worked!"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 20}}), flush=True)
else:
# We must call the bridge to trigger the GUI approval!
tool_call = {"name": "list_directory", "input": {"dir_path": "."}}
bridge_cmd = [sys.executable, "C:/projects/manual_slop/scripts/cli_tool_bridge.py"]
proc = subprocess.Popen(bridge_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
stdout, _ = proc.communicate(input=json.dumps(tool_call))
# Even if bridge says allow, we emit the tool_use to the adapter
print(json.dumps({"type": "message", "role": "assistant", "content": "I will list the directory."}), flush=True)
print(json.dumps({
"type": "tool_use",
"name": "list_directory",
"id": "alias_call",
"args": {"dir_path": "."}
}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)

View File

@@ -4,104 +4,92 @@ import subprocess
import os
def main():
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
try:
# On Windows, stdin might be closed or behave weirdly if not handled
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I have processed the tool results. Everything looks good!"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Using format that bridge understands
bridge_tool_call = {
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
try:
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
"type": "tool_use",
"tool_name": "read_file",
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
"session_id": "mock-session-123"
}), flush=True)
else:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)
# Debug log to stderr
sys.stderr.write(f"DEBUG: mock_gemini_cli called with args: {sys.argv}\n")
sys.stderr.write(f"DEBUG: GEMINI_CLI_HOOK_CONTEXT: {os.environ.get('GEMINI_CLI_HOOK_CONTEXT')}\n")
# Read prompt from stdin
try:
# On Windows, stdin might be closed or behave weirdly if not handled
prompt = sys.stdin.read()
except EOFError:
prompt = ""
sys.stderr.write(f"DEBUG: Received prompt via stdin ({len(prompt)} chars)\n")
sys.stderr.flush()
# Skip management commands
if len(sys.argv) > 1 and sys.argv[1] in ["mcp", "extensions", "skills", "hooks"]:
return
# If the prompt contains tool results, provide final answer
if '"role": "tool"' in prompt or '"tool_call_id"' in prompt:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I have processed the tool results. Everything looks good!"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 100, "input_tokens": 80, "output_tokens": 20},
"session_id": "mock-session-final"
}), flush=True)
return
# Default flow: simulate a tool call
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Using format that bridge understands
bridge_tool_call = {
"name": "read_file",
"input": {"path": "test.txt"}
}
sys.stderr.write(f"DEBUG: Calling bridge at {bridge_path}\n")
sys.stderr.flush()
try:
# CRITICAL: Use the current process environment to ensure GEMINI_CLI_HOOK_CONTEXT is passed
process = subprocess.Popen(
[sys.executable, bridge_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
env=os.environ
)
stdout, stderr = process.communicate(input=json.dumps(bridge_tool_call))
sys.stderr.write(f"DEBUG: Bridge stdout: {stdout}\n")
sys.stderr.write(f"DEBUG: Bridge stderr: {stderr}\n")
decision_data = json.loads(stdout.strip())
decision = decision_data.get("decision")
except Exception as e:
sys.stderr.write(f"DEBUG: Bridge failed: {e}\n")
decision = "deny"
if decision == "allow":
# Simulate REAL CLI field names for adapter normalization test
print(json.dumps({
"type": "tool_use",
"tool_name": "read_file",
"tool_id": "call_123",
"parameters": {"path": "test.txt"}
}), flush=True)
print(json.dumps({
"type": "message",
"role": "assistant",
"content": "I am reading the file now..."
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 50, "input_tokens": 40, "output_tokens": 10},
"session_id": "mock-session-123"
}), flush=True)
else:
print(json.dumps({
"type": "message",
"role": "assistant",
"content": f"Tool execution was denied. Decision: {decision}"
}), flush=True)
print(json.dumps({
"type": "result",
"status": "success",
"stats": {"total_tokens": 10, "input_tokens": 10, "output_tokens": 0},
"session_id": "mock-session-denied"
}), flush=True)
if __name__ == "__main__":
main()
main()

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:56:53"
last_updated = "2026-02-28T07:35:03"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -6,10 +6,10 @@ roles = [
"Reasoning",
]
history = []
active = "TestDisc_1772236592"
active = "TestDisc_1772282083"
auto_add = true
[discussions.TestDisc_1772236592]
[discussions.TestDisc_1772282083]
git_commit = ""
last_updated = "2026-02-27T18:56:46"
last_updated = "2026-02-28T07:34:56"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:53"
last_updated = "2026-02-28T07:35:49"
history = []

View File

@@ -15,6 +15,8 @@ output_dir = "./md_gen"
base_dir = "."
paths = []
[files.tier_assignments]
[screenshots]
base_dir = "."
paths = []

View File

@@ -10,5 +10,5 @@ auto_add = true
[discussions.main]
git_commit = ""
last_updated = "2026-02-27T18:57:10"
last_updated = "2026-02-28T07:35:20"
history = []

View File

@@ -18,7 +18,5 @@ history = [
[discussions.AutoDisc]
git_commit = ""
last_updated = "2026-02-27T23:54:05"
history = [
"@2026-02-27T19:08:37\nSystem:\n[PERFORMANCE ALERT] Frame time high: 62.2ms. Please consider optimizing recent changes or reducing load.",
]
last_updated = "2026-02-28T07:34:41"
history = []

View File

@@ -8,5 +8,5 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import ai_client
def test_agent_capabilities_listing():
# Verify that the agent exposes its available tools correctly
pass
# Verify that the agent exposes its available tools correctly
pass

View File

@@ -9,14 +9,14 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import set_agent_tools, _build_anthropic_tools
def test_set_agent_tools():
# Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools)
# Correct usage: pass a dict
agent_tools = {"read_file": True, "list_directory": False}
set_agent_tools(agent_tools)
def test_build_anthropic_tools_conversion():
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools]
assert "read_file" in tool_names
# _build_anthropic_tools takes no arguments and uses the global _agent_tools
# We set a tool to True and check if it appears in the output
set_agent_tools({"read_file": True})
anthropic_tools = _build_anthropic_tools()
tool_names = [t["name"] for t in anthropic_tools]
assert "read_file" in tool_names

View File

@@ -3,39 +3,33 @@ from unittest.mock import MagicMock, patch
import ai_client
def test_ai_client_send_gemini_cli():
"""
"""
Verifies that 'ai_client.send' correctly interacts with 'GeminiCliAdapter'
when the 'gemini_cli' provider is specified.
"""
test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
mock_adapter_instance = MockAdapterClass.return_value
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.session_id = "test-session"
# Verify that 'events' are emitted correctly
with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
md_content="<context></context>",
user_message=test_message,
base_dir="."
)
# Check that the adapter's send method was called.
mock_adapter_instance.send.assert_called()
# Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response
test_message = "Hello, this is a test prompt for the CLI adapter."
test_response = "This is a dummy response from the Gemini CLI."
# Set provider to gemini_cli
ai_client.set_provider("gemini_cli", "gemini-2.5-flash-lite")
# 1. Mock 'ai_client.GeminiCliAdapter' (which we will add)
with patch('ai_client.GeminiCliAdapter') as MockAdapterClass:
mock_adapter_instance = MockAdapterClass.return_value
mock_adapter_instance.send.return_value = {"text": test_response, "tool_calls": []}
mock_adapter_instance.last_usage = {"total_tokens": 100}
mock_adapter_instance.last_latency = 0.5
mock_adapter_instance.session_id = "test-session"
# Verify that 'events' are emitted correctly
with patch.object(ai_client.events, 'emit') as mock_emit:
response = ai_client.send(
md_content="<context></context>",
user_message=test_message,
base_dir="."
)
# Check that the adapter's send method was called.
mock_adapter_instance.send.assert_called()
# Verify that the expected lifecycle events were emitted.
emitted_event_names = [call.args[0] for call in mock_emit.call_args_list]
assert 'request_start' in emitted_event_names
assert 'response_received' in emitted_event_names
# Verify that the combined text returned by the adapter is returned by 'ai_client.send'.
assert response == test_response

View File

@@ -3,15 +3,14 @@ from unittest.mock import patch, MagicMock
import ai_client
def test_list_models_gemini_cli():
"""
"""
Verifies that 'ai_client.list_models' correctly returns a list of models
for the 'gemini_cli' provider.
"""
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash-lite" in models
assert len(models) == 5
models = ai_client.list_models("gemini_cli")
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemini-2.5-flash-lite" in models
assert len(models) == 5

View File

@@ -3,22 +3,22 @@ import textwrap
from scripts.ai_style_formatter import format_code
def test_basic_indentation():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def hello():
print("world")
if True:
print("nested")
""")
expected = (
"def hello():\n"
" print(\"world\")\n"
" if True:\n"
" print(\"nested\")\n"
)
assert format_code(source) == expected
expected = (
"def hello():\n"
" print(\"world\")\n"
" if True:\n"
" print(\"nested\")\n"
)
assert format_code(source) == expected
def test_top_level_blank_lines():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
pass
@@ -26,31 +26,31 @@ def test_top_level_blank_lines():
def b():
pass
""")
expected = (
"def a():\n"
" pass\n"
"\n"
"def b():\n"
" pass\n"
)
assert format_code(source) == expected
expected = (
"def a():\n"
" pass\n"
"\n"
"def b():\n"
" pass\n"
)
assert format_code(source) == expected
def test_inner_blank_lines():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
print("start")
print("end")
""")
expected = (
"def a():\n"
" print(\"start\")\n"
" print(\"end\")\n"
)
assert format_code(source) == expected
expected = (
"def a():\n"
" print(\"start\")\n"
" print(\"end\")\n"
)
assert format_code(source) == expected
def test_multiline_string_safety():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def a():
'''
This is a multiline
@@ -60,21 +60,20 @@ def test_multiline_string_safety():
'''
pass
""")
# Note: the indentation of the ''' itself becomes 1 space.
# The content inside remains exactly as in source.
# textwrap.dedent will remove the common leading whitespace from the source.
# The source's ''' is at 4 spaces. Content is at 4 spaces.
# After dedent:
# def a():
# '''
# This is a...
result = format_code(source)
assert " This is a multiline" in result
assert result.startswith("def a():\n '''")
# Note: the indentation of the ''' itself becomes 1 space.
# The content inside remains exactly as in source.
# textwrap.dedent will remove the common leading whitespace from the source.
# The source's ''' is at 4 spaces. Content is at 4 spaces.
# After dedent:
# def a():
# '''
# This is a...
result = format_code(source)
assert " This is a multiline" in result
assert result.startswith("def a():\n '''")
def test_continuation_indentation():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
def long_func(
a,
b
@@ -84,20 +83,20 @@ def test_continuation_indentation():
b
)
""")
expected = (
"def long_func(\n"
" a,\n"
" b\n"
"):\n"
" return (\n"
" a +\n"
" b\n"
" )\n"
)
assert format_code(source) == expected
expected = (
"def long_func(\n"
" a,\n"
" b\n"
"):\n"
" return (\n"
" a +\n"
" b\n"
" )\n"
)
assert format_code(source) == expected
def test_multiple_top_level_definitions():
source = textwrap.dedent("""\
source = textwrap.dedent("""\
class MyClass:
def __init__(self):
self.x = 1
@@ -109,14 +108,14 @@ def test_multiple_top_level_definitions():
def top_level():
pass
""")
expected = (
"class MyClass:\n"
" def __init__(self):\n"
" self.x = 1\n"
" def method(self):\n"
" pass\n"
"\n"
"def top_level():\n"
" pass\n"
)
assert format_code(source) == expected
expected = (
"class MyClass:\n"
" def __init__(self):\n"
" self.x = 1\n"
" def method(self):\n"
" pass\n"
"\n"
"def top_level():\n"
" pass\n"
)
assert format_code(source) == expected

View File

@@ -3,127 +3,104 @@ from unittest.mock import MagicMock, patch
import ai_client
class MockUsage:
def __init__(self):
self.prompt_token_count = 10
self.candidates_token_count = 5
self.total_token_count = 15
self.cached_content_token_count = 0
def __init__(self):
self.prompt_token_count = 10
self.candidates_token_count = 5
self.total_token_count = 15
self.cached_content_token_count = 0
class MockPart:
def __init__(self, text, function_call):
self.text = text
self.function_call = function_call
def __init__(self, text, function_call):
self.text = text
self.function_call = function_call
class MockContent:
def __init__(self, parts):
self.parts = parts
def __init__(self, parts):
self.parts = parts
class MockCandidate:
def __init__(self, parts):
self.content = MockContent(parts)
self.finish_reason = MagicMock()
self.finish_reason.name = "STOP"
def __init__(self, parts):
self.content = MockContent(parts)
self.finish_reason = MagicMock()
self.finish_reason.name = "STOP"
def test_ai_client_event_emitter_exists():
# This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events')
# This should fail initially because 'events' won't exist on ai_client
assert hasattr(ai_client, 'events')
def test_event_emission():
callback = MagicMock()
ai_client.events.on("test_event", callback)
ai_client.events.emit("test_event", payload={"data": 123})
callback.assert_called_once_with(payload={"data": 123})
callback = MagicMock()
ai_client.events.on("test_event", callback)
ai_client.events.emit("test_event", payload={"data": 123})
callback.assert_called_once_with(payload={"data": 123})
def test_send_emits_events():
with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
with patch("ai_client._send_gemini") as mock_send_gemini, \
patch("ai_client._send_anthropic") as mock_send_anthropic:
mock_send_gemini.return_value = "gemini response"
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
# We mocked _send_gemini so it doesn't emit events inside.
# But wait, ai_client.send itself emits request_start and response_received?
# Actually, ai_client.send delegates to _send_gemini.
# Let's mock _gemini_client instead to let _send_gemini run and emit events.
pass
def test_send_emits_events_proper():
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response.usage_metadata = MockUsage()
mock_chat.send_message.return_value = mock_response
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
mock_response = MagicMock()
mock_response.candidates = [MockCandidate([MockPart("gemini response", None)])]
mock_response.usage_metadata = MockUsage()
mock_chat.send_message.return_value = mock_response
start_callback = MagicMock()
response_callback = MagicMock()
ai_client.events.on("request_start", start_callback)
ai_client.events.on("response_received", response_callback)
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
ai_client.send("context", "message")
assert start_callback.called
assert response_callback.called
args, kwargs = start_callback.call_args
assert kwargs['payload']['provider'] == 'gemini'
def test_send_emits_tool_events():
import mcp_client
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
# 1. Setup mock response with a tool call
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"path": "test.txt"}
mock_response_with_tool = MagicMock()
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer)
mock_response_final = MagicMock()
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_dispatch.return_value = "file content"
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
ai_client.send("context", "message")
# Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
assert kwargs['payload']['result'] == 'file content'
import mcp_client
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch("mcp_client.dispatch") as mock_dispatch:
mock_chat = MagicMock()
mock_client.chats.create.return_value = mock_chat
# 1. Setup mock response with a tool call
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"path": "test.txt"}
mock_response_with_tool = MagicMock()
mock_response_with_tool.candidates = [MockCandidate([MockPart("tool call text", mock_fc)])]
mock_response_with_tool.usage_metadata = MockUsage()
# 2. Setup second mock response (final answer)
mock_response_final = MagicMock()
mock_response_final.candidates = [MockCandidate([MockPart("final answer", None)])]
mock_response_final.usage_metadata = MockUsage()
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
mock_dispatch.return_value = "file content"
ai_client.set_provider("gemini", "gemini-2.5-flash-lite")
tool_callback = MagicMock()
ai_client.events.on("tool_execution", tool_callback)
ai_client.send("context", "message")
# Should be called twice: once for 'started', once for 'completed'
assert tool_callback.call_count == 2
# Check 'started' call
args, kwargs = tool_callback.call_args_list[0]
assert kwargs['payload']['status'] == 'started'
assert kwargs['payload']['tool'] == 'read_file'
# Check 'completed' call
args, kwargs = tool_callback.call_args_list[1]
assert kwargs['payload']['status'] == 'completed'
assert kwargs['payload']['result'] == 'file content'

View File

@@ -13,88 +13,84 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_get_status_success(live_gui):
"""
"""
Test that get_status successfully retrieves the server status
when the live GUI is running.
"""
client = ApiHookClient()
status = client.get_status()
assert status == {'status': 'ok'}
client = ApiHookClient()
status = client.get_status()
assert status == {'status': 'ok'}
def test_get_project_success(live_gui):
"""
"""
Test successful retrieval of project data from the live GUI.
"""
client = ApiHookClient()
response = client.get_project()
assert 'project' in response
# We don't assert specific content as it depends on the environment's active project
client = ApiHookClient()
response = client.get_project()
assert 'project' in response
# We don't assert specific content as it depends on the environment's active project
def test_get_session_success(live_gui):
"""
"""
Test successful retrieval of session data.
"""
client = ApiHookClient()
response = client.get_session()
assert 'session' in response
assert 'entries' in response['session']
client = ApiHookClient()
response = client.get_session()
assert 'session' in response
assert 'entries' in response['session']
def test_post_gui_success(live_gui):
"""
"""
Test successful posting of GUI data.
"""
client = ApiHookClient()
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
client = ApiHookClient()
gui_data = {'command': 'set_text', 'id': 'some_item', 'value': 'new_text'}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
def test_get_performance_success(live_gui):
"""
"""
Test successful retrieval of performance metrics.
"""
client = ApiHookClient()
response = client.get_performance()
assert "performance" in response
client = ApiHookClient()
response = client.get_performance()
assert "performance" in response
def test_unsupported_method_error():
"""
"""
Test that calling an unsupported HTTP method raises a ValueError.
"""
client = ApiHookClient()
with pytest.raises(ValueError, match="Unsupported HTTP method"):
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
client = ApiHookClient()
with pytest.raises(ValueError, match="Unsupported HTTP method"):
client._make_request('PUT', '/some_endpoint', data={'key': 'value'})
def test_get_text_value():
"""
"""
Test retrieval of string representation using get_text_value.
"""
client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
assert client.get_text_value("dummy_tag") is None
client = ApiHookClient()
with patch.object(client, 'get_value', return_value=123):
assert client.get_text_value("dummy_tag") == "123"
with patch.object(client, 'get_value', return_value=None):
assert client.get_text_value("dummy_tag") is None
def test_get_node_status():
"""
"""
Test retrieval of DAG node status using get_node_status.
"""
client = ApiHookClient()
# When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
assert client.get_node_status("my_node") == "completed"
# When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
assert client.get_node_status("my_node") == "failed"
# When neither works
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
assert client.get_node_status("my_node") is None
client = ApiHookClient()
# When get_value returns a status directly
with patch.object(client, 'get_value', return_value="running"):
assert client.get_node_status("my_node") == "running"
# When get_value returns None and diagnostics provides a nodes dict
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'nodes': {'my_node': 'completed'}}):
assert client.get_node_status("my_node") == "completed"
# When get_value returns None and diagnostics provides a direct key
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={'my_node': 'failed'}):
assert client.get_node_status("my_node") == "failed"
# When neither works
with patch.object(client, 'get_value', return_value=None):
with patch.object(client, '_make_request', return_value={}):
assert client.get_node_status("my_node") is None

View File

@@ -8,68 +8,64 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def test_api_client_has_extensions():
client = ApiHookClient()
# These should fail initially as they are not implemented
assert hasattr(client, 'select_tab')
assert hasattr(client, 'select_list_item')
client = ApiHookClient()
# These should fail initially as they are not implemented
assert hasattr(client, 'select_tab')
assert hasattr(client, 'select_list_item')
def test_select_tab_integration(live_gui):
client = ApiHookClient()
# We'll need to make sure the tags exist in gui_legacy.py
# For now, this is a placeholder for the integration test
response = client.select_tab("operations_tabs", "tab_tool")
assert response == {'status': 'queued'}
client = ApiHookClient()
# We'll need to make sure the tags exist in gui_legacy.py
# For now, this is a placeholder for the integration test
response = client.select_tab("operations_tabs", "tab_tool")
assert response == {'status': 'queued'}
def test_select_list_item_integration(live_gui):
client = ApiHookClient()
# Assuming 'Default' discussion exists or we can just test that it queues
response = client.select_list_item("disc_listbox", "Default")
assert response == {'status': 'queued'}
client = ApiHookClient()
# Assuming 'Default' discussion exists or we can just test that it queues
response = client.select_list_item("disc_listbox", "Default")
assert response == {'status': 'queued'}
def test_get_indicator_state_integration(live_gui):
client = ApiHookClient()
# thinking_indicator is usually hidden unless AI is running
response = client.get_indicator_state("thinking_indicator")
assert 'shown' in response
assert response['tag'] == "thinking_indicator"
client = ApiHookClient()
# thinking_indicator is usually hidden unless AI is running
response = client.get_indicator_state("thinking_indicator")
assert 'shown' in response
assert response['tag'] == "thinking_indicator"
def test_app_processes_new_actions():
import gui_legacy
from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg
dpg.create_context()
try:
with patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(gui_legacy.App, '_load_active_project'):
app = gui_legacy.App()
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
# Test select_tab
app._pending_gui_tasks.append({
"action": "select_tab",
"tab_bar": "some_tab_bar",
"tab": "some_tab"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
# Test select_list_item
mock_cb = MagicMock()
mock_get_cb.return_value = mock_cb
app._pending_gui_tasks.append({
"action": "select_list_item",
"listbox": "some_listbox",
"item_value": "some_value"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_listbox", "some_value")
mock_cb.assert_called_with("some_listbox", "some_value")
finally:
dpg.destroy_context()
import gui_legacy
from unittest.mock import MagicMock, patch
import dearpygui.dearpygui as dpg
dpg.create_context()
try:
with patch('gui_legacy.load_config', return_value={}), \
patch('gui_legacy.PerformanceMonitor'), \
patch('gui_legacy.shell_runner'), \
patch('gui_legacy.project_manager'), \
patch.object(gui_legacy.App, '_load_active_project'):
app = gui_legacy.App()
with patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True), \
patch('dearpygui.dearpygui.get_item_callback') as mock_get_cb:
# Test select_tab
app._pending_gui_tasks.append({
"action": "select_tab",
"tab_bar": "some_tab_bar",
"tab": "some_tab"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_tab_bar", "some_tab")
# Test select_list_item
mock_cb = MagicMock()
mock_get_cb.return_value = mock_cb
app._pending_gui_tasks.append({
"action": "select_list_item",
"listbox": "some_listbox",
"item_value": "some_value"
})
app._process_pending_gui_tasks()
mock_set_value.assert_any_call("some_listbox", "some_value")
mock_cb.assert_called_with("some_listbox", "some_value")
finally:
dpg.destroy_context()

View File

@@ -3,24 +3,24 @@ import tree_sitter
from file_cache import ASTParser
def test_ast_parser_initialization():
"""Verify that ASTParser can be initialized with a language string."""
parser = ASTParser("python")
assert parser.language_name == "python"
"""Verify that ASTParser can be initialized with a language string."""
parser = ASTParser("python")
assert parser.language_name == "python"
def test_ast_parser_parse():
"""Verify that the parse method returns a tree_sitter.Tree."""
parser = ASTParser("python")
code = """def example_func():
"""Verify that the parse method returns a tree_sitter.Tree."""
parser = ASTParser("python")
code = """def example_func():
return 42"""
tree = parser.parse(code)
assert isinstance(tree, tree_sitter.Tree)
# Basic check that it parsed something
assert tree.root_node.type == "module"
tree = parser.parse(code)
assert isinstance(tree, tree_sitter.Tree)
# Basic check that it parsed something
assert tree.root_node.type == "module"
def test_ast_parser_get_skeleton_python():
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
parser = ASTParser("python")
code = '''
"""Verify that get_skeleton replaces function bodies with '...' while preserving docstrings."""
parser = ASTParser("python")
code = '''
def complex_function(a, b):
"""
This is a docstring.
@@ -36,35 +36,32 @@ class MyClass:
print("doing something")
return None
'''
skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
assert "def complex_function(a, b):" in skeleton
assert "class MyClass:" in skeleton
assert "def method_without_docstring(self):" in skeleton
# Check that docstring is preserved
assert '"""' in skeleton
assert "This is a docstring." in skeleton
assert "It should be preserved." in skeleton
# Check that bodies are replaced with '...'
assert "..." in skeleton
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert 'print("doing something")' not in skeleton
skeleton = parser.get_skeleton(code)
# Check that signatures are preserved
assert "def complex_function(a, b):" in skeleton
assert "class MyClass:" in skeleton
assert "def method_without_docstring(self):" in skeleton
# Check that docstring is preserved
assert '"""' in skeleton
assert "This is a docstring." in skeleton
assert "It should be preserved." in skeleton
# Check that bodies are replaced with '...'
assert "..." in skeleton
assert "result = a + b" not in skeleton
assert "return result" not in skeleton
assert 'print("doing something")' not in skeleton
def test_ast_parser_invalid_language():
"""Verify handling of unsupported or invalid languages."""
# This might raise an error or return a default, depending on implementation
# For now, we expect it to either fail gracefully or raise an exception we can catch
with pytest.raises(Exception):
ASTParser("not-a-language")
"""Verify handling of unsupported or invalid languages."""
# This might raise an error or return a default, depending on implementation
# For now, we expect it to either fail gracefully or raise an exception we can catch
with pytest.raises(Exception):
ASTParser("not-a-language")
def test_ast_parser_get_curated_view():
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
parser = ASTParser("python")
code = '''
"""Verify that get_curated_view preserves function bodies with @core_logic or # [HOT]."""
parser = ASTParser("python")
code = '''
@core_logic
def core_func():
"""Core logic doc."""
@@ -86,20 +83,16 @@ class MyClass:
def core_method(self, x):
print("method preserved", x)
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved", x)' in curated

View File

@@ -2,8 +2,8 @@ import pytest
from file_cache import ASTParser
def test_ast_parser_get_curated_view():
parser = ASTParser("python")
code = '''
parser = ASTParser("python")
code = '''
@core_logic
def core_func():
"""Core logic doc."""
@@ -25,20 +25,16 @@ class MyClass:
def core_method(self):
print("method preserved")
'''
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated
curated = parser.get_curated_view(code)
# Check that core_func is preserved
assert 'print("this should be preserved")' in curated
assert 'return True' in curated
# Check that hot_func is preserved
assert '# [HOT]' in curated
assert 'print("this should also be preserved")' in curated
# Check that normal_func is stripped but docstring is preserved
assert '"""Normal doc."""' in curated
assert 'print("this should be stripped")' not in curated
assert '...' in curated
# Check that core_method is preserved
assert 'print("method preserved")' in curated

View File

@@ -3,45 +3,40 @@ import pytest
from events import AsyncEventQueue
def test_async_event_queue_put_get():
"""Verify that an event can be asynchronously put and retrieved from the queue."""
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
"""Verify that an event can be asynchronously put and retrieved from the queue."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
event_name = "test_event"
payload = {"data": "hello"}
await queue.put(event_name, payload)
ret_name, ret_payload = await queue.get()
assert ret_name == event_name
assert ret_payload == payload
asyncio.run(run_test())
def test_async_event_queue_multiple():
"""Verify that multiple events can be asynchronously put and retrieved in order."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
"""Verify that multiple events can be asynchronously put and retrieved in order."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
await queue.put("event1", 1)
await queue.put("event2", 2)
name1, val1 = await queue.get()
name2, val2 = await queue.get()
assert name1 == "event1"
assert val1 == 1
assert name2 == "event2"
assert val2 == 2
asyncio.run(run_test())
def test_async_event_queue_none_payload():
"""Verify that an event with None payload works correctly."""
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
"""Verify that an event with None payload works correctly."""
asyncio.run(run_test())
async def run_test():
queue = AsyncEventQueue()
await queue.put("no_payload")
name, payload = await queue.get()
assert name == "no_payload"
assert payload is None
asyncio.run(run_test())

View File

@@ -5,72 +5,60 @@ from log_registry import LogRegistry
@pytest.fixture
def registry_setup(tmp_path):
registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry = LogRegistry(str(registry_path))
return registry, logs_dir
registry_path = tmp_path / "log_registry.toml"
logs_dir = tmp_path / "logs"
logs_dir.mkdir()
registry = LogRegistry(str(registry_path))
return registry, logs_dir
def test_auto_whitelist_keywords(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
registry, logs_dir = registry_setup
session_id = "test_kw"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with ERROR
comms_log = session_dir / "comms.log"
comms_log.write_text("Some message\nAN ERROR OCCURRED\nMore text")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "ERROR" in registry.data[session_id]["metadata"]["reason"]
def test_auto_whitelist_message_count(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
registry, logs_dir = registry_setup
session_id = "test_msg_count"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create comms.log with > 10 lines
comms_log = session_dir / "comms.log"
comms_log.write_text("\n".join(["msg"] * 15))
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 15
def test_auto_whitelist_large_size(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
registry, logs_dir = registry_setup
session_id = "test_large"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Create large file (> 50KB)
large_file = session_dir / "large.log"
large_file.write_text("x" * 60000)
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert registry.is_session_whitelisted(session_id)
assert "Large session size" in registry.data[session_id]["metadata"]["reason"]
def test_no_auto_whitelist_insignificant(registry_setup):
registry, logs_dir = registry_setup
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2
registry, logs_dir = registry_setup
session_id = "test_insignificant"
session_dir = logs_dir / session_id
session_dir.mkdir()
# Small file, few lines, no keywords
comms_log = session_dir / "comms.log"
comms_log.write_text("hello\nworld")
registry.register_session(session_id, str(session_dir), datetime.now())
registry.update_auto_whitelist_status(session_id)
assert not registry.is_session_whitelisted(session_id)
assert registry.data[session_id]["metadata"]["message_count"] == 2

View File

@@ -12,64 +12,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main
class TestCliToolBridge(unittest.TestCase):
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
self.tool_call = {
'tool_name': 'read_file',
'tool_input': {'path': 'test.txt'}
}
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
self.tool_call = {
'tool_name': 'read_file',
'tool_input': {'path': 'test.txt'}
}
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
# 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_allow_decision(self, mock_request, mock_stdout, mock_stdin):
# 1. Mock stdin with a JSON string tool call
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
# Run main
main()
# 3. Capture stdout and assert allow
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'allow')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_deny_decision(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 4. Mock ApiHookClient to return denied
mock_request.return_value = {'approved': False}
main()
# Assert deny
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_unreachable_hook_server(self, mock_request, mock_stdout, mock_stdin):
# Mock stdin
mock_stdin.write(json.dumps(self.tool_call))
mock_stdin.seek(0)
# 5. Test case where hook server is unreachable (exception)
mock_request.side_effect = Exception("Connection refused")
main()
# Assert deny on error
output = json.loads(mock_stdout.getvalue().strip())
self.assertEqual(output.get('decision'), 'deny')
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -12,42 +12,37 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from scripts.cli_tool_bridge import main
class TestCliToolBridgeMapping(unittest.TestCase):
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
def setUp(self):
os.environ['GEMINI_CLI_HOOK_CONTEXT'] = 'manual_slop'
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
"""
@patch('sys.stdin', new_callable=io.StringIO)
@patch('sys.stdout', new_callable=io.StringIO)
@patch('api_hook_client.ApiHookClient.request_confirmation')
def test_mapping_from_api_format(self, mock_request, mock_stdout, mock_stdin):
"""
Verify that bridge correctly maps 'id', 'name', 'input' (Gemini API format)
into tool_name and tool_input for the hook client.
"""
api_tool_call = {
'id': 'call123',
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
api_tool_call = {
'id': 'call123',
'name': 'read_file',
'input': {'path': 'test.txt'}
}
# 1. Mock stdin with the API format JSON
mock_stdin.write(json.dumps(api_tool_call))
mock_stdin.seek(0)
# 2. Mock ApiHookClient to return approved
mock_request.return_value = {'approved': True}
# Run main
main()
# 3. Verify that request_confirmation was called with mapped values
# If it's not mapped, it will likely be called with None or fail
mock_request.assert_called_once_with('read_file', {'path': 'test.txt'})
# 4. Capture stdout and assert allow
output_str = mock_stdout.getvalue().strip()
self.assertTrue(output_str, "Stdout should not be empty")
output = json.loads(output_str)
self.assertEqual(output.get('decision'), 'allow')
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -13,61 +13,55 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from api_hook_client import ApiHookClient
def simulate_conductor_phase_completion(client: ApiHookClient):
"""
"""
Simulates the Conductor agent's logic for phase completion using ApiHookClient.
"""
results = {
"verification_successful": False,
"verification_message": ""
}
try:
status = client.get_status()
if status.get('status') == 'ok':
results["verification_successful"] = True
results["verification_message"] = "Automated verification completed successfully."
else:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {status}"
except Exception as e:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {e}"
return results
results = {
"verification_successful": False,
"verification_message": ""
}
try:
status = client.get_status()
if status.get('status') == 'ok':
results["verification_successful"] = True
results["verification_message"] = "Automated verification completed successfully."
else:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {status}"
except Exception as e:
results["verification_successful"] = False
results["verification_message"] = f"Automated verification failed: {e}"
return results
def test_conductor_integrates_api_hook_client_for_verification(live_gui):
"""
"""
Verify that Conductor's simulated phase completion logic properly integrates
and uses the ApiHookClient for verification against the live GUI.
"""
client = ApiHookClient()
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert "successfully" in results["verification_message"]
client = ApiHookClient()
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is True
assert "successfully" in results["verification_message"]
def test_conductor_handles_api_hook_failure(live_gui):
"""
"""
Verify Conductor handles a simulated API hook verification failure.
We patch the client's get_status to simulate failure even with live GUI.
"""
client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
client = ApiHookClient()
with patch.object(ApiHookClient, 'get_status') as mock_get_status:
mock_get_status.return_value = {'status': 'failed', 'error': 'Something went wrong'}
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
assert "failed" in results["verification_message"]
def test_conductor_handles_api_hook_connection_error():
"""
"""
Verify Conductor handles a simulated API hook connection error (server down).
"""
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
# Check for expected error substrings from ApiHookClient
msg = results["verification_message"]
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])
client = ApiHookClient(base_url="http://127.0.0.1:9998", max_retries=0)
results = simulate_conductor_phase_completion(client)
assert results["verification_successful"] is False
# Check for expected error substrings from ApiHookClient
msg = results["verification_message"]
assert any(term in msg for term in ["Could not connect", "timed out", "Could not reach"])

View File

@@ -7,284 +7,234 @@ import ai_client
# which will be implemented in the next phase of TDD.
def test_conductor_engine_initialization():
"""
"""
Test that ConductorEngine can be initialized with a Track.
"""
track = Track(id="test_track", description="Test Track")
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
assert engine.track == track
track = Track(id="test_track", description="Test Track")
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
assert engine.track == track
@pytest.mark.asyncio
async def test_conductor_engine_run_linear_executes_tickets_in_order(monkeypatch):
"""
"""
Test that run_linear iterates through executable tickets and calls the worker lifecycle.
"""
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
# Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
assert ticket2.status == "completed"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
assert calls[1][0][0].id == "T2"
ticket1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
ticket2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker2", depends_on=["T1"])
track = Track(id="track1", description="Track 1", tickets=[ticket1, ticket2])
from multi_agent_conductor import ConductorEngine
engine = ConductorEngine(track=track)
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock run_worker_lifecycle as it is expected to be in the same module
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
# Mocking lifecycle to mark ticket as complete so dependencies can be resolved
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
# Track.get_executable_tickets() should be called repeatedly until all are done
# T1 should run first, then T2.
assert mock_lifecycle.call_count == 2
assert ticket1.status == "completed"
assert ticket2.status == "completed"
# Verify sequence: T1 before T2
calls = mock_lifecycle.call_args_list
assert calls[0][0][0].id == "T1"
assert calls[1][0][0].id == "T2"
@pytest.mark.asyncio
async def test_run_worker_lifecycle_calls_ai_client_send(monkeypatch):
"""
"""
Test that run_worker_lifecycle triggers the AI client and updates ticket status on success.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send.return_value = "Task complete. I have updated the file."
result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
assert ticket.status == "completed"
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
assert ticket.description in kwargs["user_message"]
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
mock_send.return_value = "Task complete. I have updated the file."
result = run_worker_lifecycle(ticket, context)
assert result == "Task complete. I have updated the file."
assert ticket.status == "completed"
mock_send.assert_called_once()
# Check if description was passed to send()
args, kwargs = mock_send.call_args
# user_message is passed as a keyword argument
assert ticket.description in kwargs["user_message"]
@pytest.mark.asyncio
async def test_run_worker_lifecycle_context_injection(monkeypatch):
"""
"""
Test that run_worker_lifecycle can take a context_files list and injects AST views into the prompt.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock ASTParser which is expected to be imported in multi_agent_conductor
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# Setup open mock to return different content for different files
file_contents = {
"primary.py": "def primary(): pass",
"secondary.py": "def secondary(): pass"
}
def mock_open_side_effect(file, *args, **kwargs):
content = file_contents.get(file, "")
mock_file = MagicMock()
mock_file.read.return_value = content
mock_file.__enter__.return_value = mock_file
return mock_file
mock_open.side_effect = mock_open_side_effect
# Setup ASTParser mock
mock_ast_parser = mock_ast_parser_class.return_value
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
assert "primary.py" in user_message
assert "secondary.py" in user_message
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
context_files = ["primary.py", "secondary.py"]
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# We mock ASTParser which is expected to be imported in multi_agent_conductor
with patch("multi_agent_conductor.ASTParser") as mock_ast_parser_class, \
patch("builtins.open", new_callable=MagicMock) as mock_open:
# Setup open mock to return different content for different files
file_contents = {
"primary.py": "def primary(): pass",
"secondary.py": "def secondary(): pass"
}
def mock_open_side_effect(file, *args, **kwargs):
content = file_contents.get(file, "")
mock_file = MagicMock()
mock_file.read.return_value = content
mock_file.__enter__.return_value = mock_file
return mock_file
mock_open.side_effect = mock_open_side_effect
# Setup ASTParser mock
mock_ast_parser = mock_ast_parser_class.return_value
mock_ast_parser.get_curated_view.return_value = "CURATED VIEW"
mock_ast_parser.get_skeleton.return_value = "SKELETON VIEW"
mock_send.return_value = "Success"
run_worker_lifecycle(ticket, context, context_files=context_files)
# Verify ASTParser calls:
# First file (primary) should get curated view, others (secondary) get skeleton
mock_ast_parser.get_curated_view.assert_called_once_with("def primary(): pass")
mock_ast_parser.get_skeleton.assert_called_once_with("def secondary(): pass")
# Verify user_message contains the views
_, kwargs = mock_send.call_args
user_message = kwargs["user_message"]
assert "CURATED VIEW" in user_message
assert "SKELETON VIEW" in user_message
assert "primary.py" in user_message
assert "secondary.py" in user_message
@pytest.mark.asyncio
async def test_run_worker_lifecycle_handles_blocked_response(monkeypatch):
"""
"""
Test that run_worker_lifecycle marks the ticket as blocked if the AI indicates it cannot proceed.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Simulate a response indicating a block
mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1")
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Simulate a response indicating a block
mock_send.return_value = "I am BLOCKED because I don't have enough information."
run_worker_lifecycle(ticket, context)
assert ticket.status == "blocked"
assert "BLOCKED" in ticket.blocked_reason
@pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_confirmation(monkeypatch):
"""
"""
Test that run_worker_lifecycle passes confirm_execution to ai_client.send when step_mode is True.
Verify that if confirm_execution is called (simulated by mocking ai_client.send to call its callback),
the flow works as expected.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
# We simulate ai_client.send by making it call the pre_tool_callback it received
def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
if callback:
# Simulate calling it with some payload
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
return "Success"
mock_send.side_effect = mock_send_side_effect
mock_confirm.return_value = True
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
# We simulate ai_client.send by making it call the pre_tool_callback it received
def mock_send_side_effect(md_content, user_message, **kwargs):
callback = kwargs.get("pre_tool_callback")
if callback:
# Simulate calling it with some payload
callback('{"tool": "read_file", "args": {"path": "test.txt"}}')
return "Success"
mock_send.side_effect = mock_send_side_effect
mock_confirm.return_value = True
mock_event_queue = MagicMock()
run_worker_lifecycle(ticket, context, event_queue=mock_event_queue)
# Verify confirm_execution was called
mock_confirm.assert_called_once()
assert ticket.status == "completed"
@pytest.mark.asyncio
async def test_run_worker_lifecycle_step_mode_rejection(monkeypatch):
"""
"""
Verify that if confirm_execution returns False, the logic (in ai_client, which we simulate here)
would prevent execution. In run_worker_lifecycle, we just check if it's passed.
"""
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
run_worker_lifecycle(ticket, context)
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
ticket = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker1", step_mode=True)
context = WorkerContext(ticket_id="T1", model_name="test-model", messages=[])
from multi_agent_conductor import run_worker_lifecycle
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
with patch("multi_agent_conductor.confirm_execution") as mock_confirm:
mock_confirm.return_value = False
mock_send.return_value = "Task failed because tool execution was rejected."
run_worker_lifecycle(ticket, context)
# Verify it was passed to send
args, kwargs = mock_send.call_args
assert kwargs["pre_tool_callback"] is not None
# Since we've already tested ai_client's implementation of pre_tool_callback (mentally or via other tests),
# here we just verify the wiring.
@pytest.mark.asyncio
async def test_conductor_engine_dynamic_parsing_and_execution(monkeypatch):
"""
"""
Test that parse_json_tickets correctly populates the track and run_linear executes them in dependency order.
"""
import json
from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
engine = ConductorEngine(track=track)
tickets_json = json.dumps([
{
"id": "T1",
"description": "Initial task",
"status": "todo",
"assigned_to": "worker1",
"depends_on": []
},
{
"id": "T2",
"description": "Dependent task",
"status": "todo",
"assigned_to": "worker2",
"depends_on": ["T1"]
},
{
"id": "T3",
"description": "Another initial task",
"status": "todo",
"assigned_to": "worker3",
"depends_on": []
}
])
engine.parse_json_tickets(tickets_json)
assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls
import json
from multi_agent_conductor import ConductorEngine
track = Track(id="dynamic_track", description="Dynamic Track")
engine = ConductorEngine(track=track)
tickets_json = json.dumps([
{
"id": "T1",
"description": "Initial task",
"status": "todo",
"assigned_to": "worker1",
"depends_on": []
},
{
"id": "T2",
"description": "Dependent task",
"status": "todo",
"assigned_to": "worker2",
"depends_on": ["T1"]
},
{
"id": "T3",
"description": "Another initial task",
"status": "todo",
"assigned_to": "worker3",
"depends_on": []
}
])
engine.parse_json_tickets(tickets_json)
assert len(engine.track.tickets) == 3
assert engine.track.tickets[0].id == "T1"
assert engine.track.tickets[1].id == "T2"
assert engine.track.tickets[2].id == "T3"
# Mock ai_client.send using monkeypatch
mock_send = MagicMock()
monkeypatch.setattr(ai_client, 'send', mock_send)
# Mock run_worker_lifecycle to mark tickets as complete
with patch("multi_agent_conductor.run_worker_lifecycle") as mock_lifecycle:
def side_effect(ticket, context, *args, **kwargs):
ticket.mark_complete()
return "Success"
mock_lifecycle.side_effect = side_effect
await engine.run_linear()
assert mock_lifecycle.call_count == 3
# Verify dependency order: T1 must be called before T2
calls = [call[0][0].id for call in mock_lifecycle.call_args_list]
t1_idx = calls.index("T1")
t2_idx = calls.index("T2")
assert t1_idx < t2_idx
# T3 can be anywhere relative to T1 and T2, but T1 < T2 is mandatory
assert "T3" in calls

View File

@@ -4,112 +4,106 @@ import json
import conductor_tech_lead
class TestConductorTechLead(unittest.TestCase):
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock response
mock_tickets = [
{
"id": "ticket_1",
"type": "Ticket",
"goal": "Test goal",
"target_file": "test.py",
"depends_on": [],
"context_requirements": []
}
]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
module_skeletons = "Test skeletons"
# Call the function
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_success(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock response
mock_tickets = [
{
"id": "ticket_1",
"type": "Ticket",
"goal": "Test goal",
"target_file": "test.py",
"depends_on": [],
"context_requirements": []
}
]
mock_send.return_value = "```json\n" + json.dumps(mock_tickets) + "\n```"
track_brief = "Test track brief"
module_skeletons = "Test skeletons"
# Call the function
tickets = conductor_tech_lead.generate_tickets(track_brief, module_skeletons)
# Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
# Verify set_provider was called
mock_set_provider.assert_called_with('gemini', 'gemini-2.5-flash-lite')
mock_reset_session.assert_called_once()
# Verify send was called
mock_send.assert_called_once()
args, kwargs = mock_send.call_args
self.assertEqual(kwargs['md_content'], "")
self.assertIn(track_brief, kwargs['user_message'])
self.assertIn(module_skeletons, kwargs['user_message'])
# Verify tickets were parsed correctly
self.assertEqual(tickets, mock_tickets)
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])
@patch('ai_client.send')
@patch('ai_client.set_provider')
@patch('ai_client.reset_session')
def test_generate_tickets_parse_error(self, mock_reset_session, mock_set_provider, mock_send):
# Setup mock invalid response
mock_send.return_value = "Invalid JSON"
# Call the function
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
# Verify it returns an empty list on parse error
self.assertEqual(tickets, [])
class TestTopologicalSort(unittest.TestCase):
def test_topological_sort_empty(self):
tickets = []
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets, [])
def test_topological_sort_empty(self):
tickets = []
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets, [])
def test_topological_sort_linear(self):
tickets = [
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
{"id": "t3", "depends_on": ["t2"]},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
self.assertEqual(ids, ["t1", "t2", "t3"])
def test_topological_sort_linear(self):
tickets = [
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
{"id": "t3", "depends_on": ["t2"]},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
self.assertEqual(ids, ["t1", "t2", "t3"])
def test_topological_sort_complex(self):
# t1
# | \
# t2 t3
# | /
# t4
tickets = [
{"id": "t4", "depends_on": ["t2", "t3"]},
{"id": "t3", "depends_on": ["t1"]},
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
self.assertEqual(ids[0], "t1")
self.assertEqual(ids[-1], "t4")
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
def test_topological_sort_complex(self):
# t1
# | \
# t2 t3
# | /
# t4
tickets = [
{"id": "t4", "depends_on": ["t2", "t3"]},
{"id": "t3", "depends_on": ["t1"]},
{"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []},
]
sorted_tickets = conductor_tech_lead.topological_sort(tickets)
ids = [t["id"] for t in sorted_tickets]
# Possible valid orders: [t1, t2, t3, t4] or [t1, t3, t2, t4]
self.assertEqual(ids[0], "t1")
self.assertEqual(ids[-1], "t4")
self.assertSetEqual(set(ids[1:3]), {"t2", "t3"})
def test_topological_sort_cycle(self):
tickets = [
{"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]},
]
with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets)
self.assertIn("Circular dependency detected", str(cm.exception))
def test_topological_sort_cycle(self):
tickets = [
{"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]},
]
with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets)
self.assertIn("Circular dependency detected", str(cm.exception))
def test_topological_sort_missing_dependency(self):
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track.
tickets = [
{"id": "t1", "depends_on": ["missing"]},
]
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
# OR it should just treat it as "ready" if it's external?
# Actually, let's just test that it doesn't crash if it's not a cycle.
# But if 'missing' is not in tickets, it will never be satisfied.
# Let's say it raises ValueError for missing internal dependencies.
with self.assertRaises(ValueError):
conductor_tech_lead.topological_sort(tickets)
def test_topological_sort_missing_dependency(self):
# If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track.
tickets = [
{"id": "t1", "depends_on": ["missing"]},
]
# For now, let's assume it should raise an error if a dependency is missing within the set we are sorting,
# OR it should just treat it as "ready" if it's external?
# Actually, let's just test that it doesn't crash if it's not a cycle.
# But if 'missing' is not in tickets, it will never be satisfied.
# Let's say it raises ValueError for missing internal dependencies.
with self.assertRaises(ValueError):
conductor_tech_lead.topological_sort(tickets)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -3,82 +3,72 @@ from models import Ticket
from dag_engine import TrackDAG
def test_get_ready_tasks_linear():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 1
assert ready[0].id == "T2"
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 1
assert ready[0].id == "T2"
def test_get_ready_tasks_branching():
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 2
ready_ids = {t.id for t in ready}
assert ready_ids == {"T2", "T3"}
t1 = Ticket(id="T1", description="Task 1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
ready = dag.get_ready_tasks()
assert len(ready) == 2
ready_ids = {t.id for t in ready}
assert ready_ids == {"T2", "T3"}
def test_has_cycle_no_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert not dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert not dag.has_cycle()
def test_has_cycle_direct_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
assert dag.has_cycle()
def test_has_cycle_indirect_cycle():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
assert dag.has_cycle()
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T3"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2, t3])
assert dag.has_cycle()
def test_has_cycle_complex_no_cycle():
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
assert not dag.has_cycle()
# T1 -> T2, T1 -> T3, T2 -> T4, T3 -> T4
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T4"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T4"])
t4 = Ticket(id="T4", description="T4", status="todo", assigned_to="worker")
dag = TrackDAG([t1, t2, t3, t4])
assert not dag.has_cycle()
def test_get_ready_tasks_multiple_deps():
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
t1 = Ticket(id="T1", description="T1", status="completed", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="completed", assigned_to="worker")
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T1", "T2"])
dag = TrackDAG([t1, t2, t3])
assert [t.id for t in dag.get_ready_tasks()] == ["T3"]
t2.status = "todo"
assert [t.id for t in dag.get_ready_tasks()] == ["T2"]
def test_topological_sort():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
sort = dag.topological_sort()
assert sort == ["T1", "T2", "T3"]
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="T3", status="todo", assigned_to="worker", depends_on=["T2"])
dag = TrackDAG([t1, t2, t3])
sort = dag.topological_sort()
assert sort == ["T1", "T2", "T3"]
def test_topological_sort_cycle():
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
with pytest.raises(ValueError, match="Dependency cycle detected"):
dag.topological_sort()
t1 = Ticket(id="T1", description="T1", status="todo", assigned_to="worker", depends_on=["T2"])
t2 = Ticket(id="T2", description="T2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
with pytest.raises(ValueError, match="Dependency cycle detected"):
dag.topological_sort()

View File

@@ -12,54 +12,51 @@ import ai_client
import project_manager
def test_credentials_error_mentions_deepseek(monkeypatch):
"""
"""
Verify that the error message shown when credentials.toml is missing
includes deepseek instructions.
"""
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
ai_client._load_credentials()
err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
# Monkeypatch SLOP_CREDENTIALS to a non-existent file
monkeypatch.setenv("SLOP_CREDENTIALS", "non_existent_credentials_file.toml")
with pytest.raises(FileNotFoundError) as excinfo:
ai_client._load_credentials()
err_msg = str(excinfo.value)
assert "[deepseek]" in err_msg
assert "api_key" in err_msg
def test_default_project_includes_reasoning_role():
"""
"""
Verify that 'Reasoning' is included in the default discussion roles
to support DeepSeek-R1 reasoning traces.
"""
proj = project_manager.default_project("test")
roles = proj["discussion"]["roles"]
assert "Reasoning" in roles
proj = project_manager.default_project("test")
roles = proj["discussion"]["roles"]
assert "Reasoning" in roles
def test_gui_providers_list():
"""
"""
Check if 'deepseek' is in the GUI's provider list.
"""
import gui_2
assert "deepseek" in gui_2.PROVIDERS
import gui_2
assert "deepseek" in gui_2.PROVIDERS
def test_deepseek_model_listing():
"""
"""
Verify that list_models for deepseek returns expected models.
"""
models = ai_client.list_models("deepseek")
assert "deepseek-chat" in models
assert "deepseek-reasoner" in models
models = ai_client.list_models("deepseek")
assert "deepseek-chat" in models
assert "deepseek-reasoner" in models
def test_gui_provider_list_via_hooks(live_gui):
"""
"""
Verify 'deepseek' is present in the GUI provider list using API hooks.
"""
from api_hook_client import ApiHookClient
import time
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
client.set_value('current_provider', 'deepseek')
time.sleep(0.5)
assert client.get_value('current_provider') == 'deepseek'
from api_hook_client import ApiHookClient
import time
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# Attempt to set provider to deepseek to verify it's an allowed value
client.set_value('current_provider', 'deepseek')
time.sleep(0.5)
assert client.get_value('current_provider') == 'deepseek'

View File

@@ -3,137 +3,124 @@ from unittest.mock import patch, MagicMock
import ai_client
def test_deepseek_model_selection():
"""
"""
Verifies that ai_client.set_provider('deepseek', 'deepseek-chat') correctly updates the internal state.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
assert ai_client._provider == "deepseek"
assert ai_client._model == "deepseek-chat"
ai_client.set_provider("deepseek", "deepseek-chat")
assert ai_client._provider == "deepseek"
assert ai_client._model == "deepseek-chat"
def test_deepseek_completion_logic():
"""
"""
Verifies that ai_client.send() correctly calls the DeepSeek API and returns content.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {"role": "assistant", "content": "DeepSeek Response"},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
assert mock_post.called
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {"role": "assistant", "content": "DeepSeek Response"},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 5}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Hello", base_dir=".")
assert result == "DeepSeek Response"
assert mock_post.called
def test_deepseek_reasoning_logic():
"""
"""
Verifies that reasoning_content is captured and wrapped in <thinking> tags.
"""
ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Final Answer",
"reasoning_content": "Chain of thought"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
ai_client.set_provider("deepseek", "deepseek-reasoner")
with patch("requests.post") as mock_post:
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Final Answer",
"reasoning_content": "Chain of thought"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 10, "completion_tokens": 20}
}
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Reasoning test", base_dir=".")
assert "<thinking>\nChain of thought\n</thinking>" in result
assert "Final Answer" in result
def test_deepseek_tool_calling():
"""
"""
Verifies that DeepSeek provider correctly identifies and executes tool calls.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
patch("mcp_client.dispatch") as mock_dispatch:
# 1. Mock first response with a tool call
mock_resp1 = MagicMock()
mock_resp1.status_code = 200
mock_resp1.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Let me read that file.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path": "test.txt"}'
}
}]
},
"finish_reason": "tool_calls"
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post, \
patch("mcp_client.dispatch") as mock_dispatch:
# 1. Mock first response with a tool call
mock_resp1 = MagicMock()
mock_resp1.status_code = 200
mock_resp1.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "Let me read that file.",
"tool_calls": [{
"id": "call_123",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path": "test.txt"}'
}
# 2. Mock second response (final answer)
mock_resp2 = MagicMock()
mock_resp2.status_code = 200
mock_resp2.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "File content is: Hello World"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
}
mock_post.side_effect = [mock_resp1, mock_resp2]
mock_dispatch.return_value = "Hello World"
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert "File content is: Hello World" in result
assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
}]
},
"finish_reason": "tool_calls"
}],
"usage": {"prompt_tokens": 50, "completion_tokens": 10}
}
# 2. Mock second response (final answer)
mock_resp2 = MagicMock()
mock_resp2.status_code = 200
mock_resp2.json.return_value = {
"choices": [{
"message": {
"role": "assistant",
"content": "File content is: Hello World"
},
"finish_reason": "stop"
}],
"usage": {"prompt_tokens": 100, "completion_tokens": 20}
}
mock_post.side_effect = [mock_resp1, mock_resp2]
mock_dispatch.return_value = "Hello World"
result = ai_client.send(md_content="Context", user_message="Read test.txt", base_dir=".")
assert "File content is: Hello World" in result
assert mock_dispatch.called
assert mock_dispatch.call_args[0][0] == "read_file"
assert mock_dispatch.call_args[0][1] == {"path": "test.txt"}
def test_deepseek_streaming():
"""
"""
Verifies that DeepSeek provider correctly aggregates streaming chunks.
"""
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
# Mock a streaming response
mock_response = MagicMock()
mock_response.status_code = 200
# Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
chunks = [
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
'data: [DONE]'
]
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"
ai_client.set_provider("deepseek", "deepseek-chat")
with patch("requests.post") as mock_post:
# Mock a streaming response
mock_response = MagicMock()
mock_response.status_code = 200
# Simulate OpenAI-style server-sent events (SSE) for streaming
# Each line starts with 'data: ' and contains a JSON object
chunks = [
'data: {"choices": [{"delta": {"role": "assistant", "content": "Hello"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {"content": " World"}, "index": 0, "finish_reason": null}]}',
'data: {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}',
'data: [DONE]'
]
mock_response.iter_lines.return_value = [c.encode('utf-8') for c in chunks]
mock_post.return_value = mock_response
result = ai_client.send(md_content="Context", user_message="Stream test", base_dir=".", stream=True)
assert result == "Hello World"

View File

@@ -3,121 +3,99 @@ from models import Ticket
from dag_engine import TrackDAG, ExecutionEngine
def test_execution_engine_basic_flow():
# Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
engine = ExecutionEngine(dag)
# Tick 1: Only T1 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
ready = engine.tick()
assert len(ready) == 2
ids = {t.id for t in ready}
assert ids == {"T2", "T3"}
# Complete T2
engine.update_task_status("T2", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T3"
# Complete T3
engine.update_task_status("T3", "completed")
# Tick 4: T4 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
# Setup tickets with dependencies
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
t3 = Ticket(id="T3", description="Task 3", status="todo", assigned_to="worker", depends_on=["T1"])
t4 = Ticket(id="T4", description="Task 4", status="todo", assigned_to="worker", depends_on=["T2", "T3"])
dag = TrackDAG([t1, t2, t3, t4])
engine = ExecutionEngine(dag)
# Tick 1: Only T1 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 2: T2 and T3 should be ready
ready = engine.tick()
assert len(ready) == 2
ids = {t.id for t in ready}
assert ids == {"T2", "T3"}
# Complete T2
engine.update_task_status("T2", "completed")
# Tick 3: Only T3 should be ready (T4 depends on T2 AND T3)
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T3"
# Complete T3
engine.update_task_status("T3", "completed")
# Tick 4: T4 should be ready
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T4"
# Complete T4
engine.update_task_status("T4", "completed")
# Tick 5: Nothing ready
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_update_nonexistent_task():
dag = TrackDAG([])
engine = ExecutionEngine(dag)
# Should not raise error, or handle gracefully
engine.update_task_status("NONEXISTENT", "completed")
dag = TrackDAG([])
engine = ExecutionEngine(dag)
# Should not raise error, or handle gracefully
engine.update_task_status("NONEXISTENT", "completed")
def test_execution_engine_status_persistence():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
assert t1.status == "in_progress"
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag)
engine.update_task_status("T1", "in_progress")
assert t1.status == "in_progress"
ready = engine.tick()
assert len(ready) == 0 # Only 'todo' tasks should be returned by tick() if they are ready
def test_execution_engine_auto_queue():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "in_progress"
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
ready = engine.tick()
assert len(ready) == 0
assert t2.status == "todo"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T2"
assert t2.status == "in_progress"
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
t2 = Ticket(id="T2", description="Task 2", status="todo", assigned_to="worker", depends_on=["T1"])
dag = TrackDAG([t1, t2])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "in_progress"
# Tick 2: T1 is in_progress, so T2 is NOT ready yet (T1 must be 'completed')
ready = engine.tick()
assert len(ready) == 0
assert t2.status == "todo"
# Complete T1
engine.update_task_status("T1", "completed")
# Tick 3: T2 is now ready and should be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T2"
assert t2.status == "in_progress"
def test_execution_engine_step_mode():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "todo"
# Manual approval
engine.approve_task("T1")
assert t1.status == "in_progress"
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker", step_mode=True)
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=True)
# Tick 1: T1 is ready, but step_mode=True, so it should NOT be automatically marked as 'in_progress'
ready = engine.tick()
assert len(ready) == 1
assert ready[0].id == "T1"
assert t1.status == "todo"
# Manual approval
engine.approve_task("T1")
assert t1.status == "in_progress"
# Tick 2: T1 is already in_progress, should not be returned by tick() (it's not 'ready'/todo)
ready = engine.tick()
assert len(ready) == 0
def test_execution_engine_approve_task():
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
engine.approve_task("T1")
assert t1.status == "in_progress"
t1 = Ticket(id="T1", description="Task 1", status="todo", assigned_to="worker")
dag = TrackDAG([t1])
engine = ExecutionEngine(dag, auto_queue=False)
# Should be able to approve even if auto_queue is False
engine.approve_task("T1")
assert t1.status == "in_progress"

View File

@@ -14,44 +14,40 @@ from simulation.sim_execution import ExecutionSimulation
@pytest.mark.integration
def test_context_sim_live(live_gui):
"""Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim.setup("LiveContextSim")
sim.run()
sim.teardown()
"""Run the Context & Chat simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ContextSimulation(client)
sim.setup("LiveContextSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_ai_settings_sim_live(live_gui):
"""Run the AI Settings simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim.setup("LiveAISettingsSim")
sim.run()
sim.teardown()
"""Run the AI Settings simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = AISettingsSimulation(client)
sim.setup("LiveAISettingsSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_tools_sim_live(live_gui):
"""Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim.setup("LiveToolsSim")
sim.run()
sim.teardown()
"""Run the Tools & Search simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ToolsSimulation(client)
sim.setup("LiveToolsSim")
sim.run()
sim.teardown()
@pytest.mark.integration
def test_execution_sim_live(live_gui):
"""Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
sim.run()
sim.teardown()
"""Run the Execution & Modals simulation against a live GUI."""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
sim = ExecutionSimulation(client)
sim.setup("LiveExecutionSim")
sim.run()
sim.teardown()

View File

@@ -12,119 +12,105 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapter(unittest.TestCase):
def setUp(self):
self.adapter = GeminiCliAdapter(binary_path="gemini")
def setUp(self):
self.adapter = GeminiCliAdapter(binary_path="gemini")
@patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_starts_subprocess_with_correct_args(self, mock_popen):
"""
Verify that send(message) correctly starts the subprocess with
--output-format stream-json and the provided message via stdin using communicate.
"""
# Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
# Setup mock process with a minimal valid JSONL termination
process_mock = MagicMock()
stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
message = "Hello Gemini CLI"
self.adapter.send(message)
# Verify subprocess.Popen call
mock_popen.assert_called_once()
args, kwargs = mock_popen.call_args
cmd = args[0]
# Check mandatory CLI components
self.assertIn("gemini", cmd)
self.assertIn("--output-format", cmd)
self.assertIn("stream-json", cmd)
# Message should NOT be in cmd now
self.assertNotIn(message, cmd)
# Verify message was sent via communicate
process_mock.communicate.assert_called_once_with(input=message)
# Check process configuration
self.assertEqual(kwargs.get('stdout'), subprocess.PIPE)
self.assertEqual(kwargs.get('stdin'), subprocess.PIPE)
self.assertEqual(kwargs.get('text'), True)
@patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_parses_jsonl_output(self, mock_popen):
"""
Verify that it correctly parses multiple JSONL 'message' events
and returns the combined text.
"""
jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
jsonl_output = [
json.dumps({"type": "message", "role": "model", "text": "The quick brown "}),
json.dumps({"type": "message", "role": "model", "text": "fox jumps."}),
json.dumps({"type": "result", "usage": {"prompt_tokens": 5, "candidates_tokens": 5}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
result = self.adapter.send("test message")
self.assertEqual(result["text"], "The quick brown fox jumps.")
self.assertEqual(result["tool_calls"], [])
@patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_handles_tool_use_events(self, mock_popen):
"""
Verify that it correctly handles 'tool_use' events in the stream
by continuing to read until the final 'result' event.
"""
jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
json.dumps({"type": "result", "usage": {}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
jsonl_output = [
json.dumps({"type": "message", "role": "assistant", "text": "Calling tool..."}),
json.dumps({"type": "tool_use", "name": "read_file", "args": {"path": "test.txt"}}),
json.dumps({"type": "message", "role": "assistant", "text": "\nFile read successfully."}),
json.dumps({"type": "result", "usage": {}})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
result = self.adapter.send("read test.txt")
# Result should contain the combined text from all 'message' events
self.assertEqual(result["text"], "Calling tool...\nFile read successfully.")
self.assertEqual(len(result["tool_calls"]), 1)
self.assertEqual(result["tool_calls"][0]["name"], "read_file")
@patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_captures_usage_metadata(self, mock_popen):
"""
Verify that usage data is extracted from the 'result' event.
"""
usage_data = {"total_tokens": 42}
jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}),
json.dumps({"type": "result", "usage": usage_data})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)
usage_data = {"total_tokens": 42}
jsonl_output = [
json.dumps({"type": "message", "text": "Finalizing"}),
json.dumps({"type": "result", "usage": usage_data})
]
stdout_content = "\n".join(jsonl_output) + "\n"
process_mock = MagicMock()
process_mock.communicate.return_value = (stdout_content, "")
process_mock.poll.return_value = 0
process_mock.wait.return_value = 0
mock_popen.return_value = process_mock
self.adapter.send("usage test")
# Verify the usage was captured in the adapter instance
self.assertEqual(self.adapter.last_usage, usage_data)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -9,168 +9,143 @@ import os
# Ensure the project root is in sys.path to resolve imports correctly
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path:
sys.path.append(project_root)
# Import the class to be tested
sys.path.append(project_root)
# Import the class to be tested
from gemini_cli_adapter import GeminiCliAdapter
class TestGeminiCliAdapterParity(unittest.TestCase):
def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test."""
# Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
self.adapter.last_latency = 0.0
def setUp(self):
"""Set up a fresh adapter instance and reset session state for each test."""
# Patch session_logger to prevent file operations during tests
self.session_logger_patcher = patch('gemini_cli_adapter.session_logger')
self.mock_session_logger = self.session_logger_patcher.start()
self.adapter = GeminiCliAdapter(binary_path="gemini")
self.adapter.session_id = None
self.adapter.last_usage = None
self.adapter.last_latency = 0.0
def tearDown(self):
self.session_logger_patcher.stop()
def tearDown(self):
self.session_logger_patcher.stop()
@patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_count_tokens_uses_estimation(self, mock_popen):
"""
Test that count_tokens uses character-based estimation.
"""
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
contents_to_count = ["This is the first line.", "This is the second line."]
expected_chars = len("\n".join(contents_to_count))
expected_tokens = expected_chars // 4
token_count = self.adapter.count_tokens(contents=contents_to_count)
self.assertEqual(token_count, expected_tokens)
# Verify that NO subprocess was started for counting
mock_popen.assert_not_called()
@patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_safety_settings_no_flags_added(self, mock_popen):
"""
Test that the send method does NOT add --safety flags when safety_settings are provided,
as this functionality is no longer supported via CLI flags.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here."
safety_settings = [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
]
self.adapter.send(message=message_content, safety_settings=safety_settings)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that no --safety flags were added to the command
self.assertNotIn("--safety", command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_without_safety_settings_no_flags(self, mock_popen):
"""
Test that when safety_settings is None or an empty list, no --safety flags are added.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
message_content = "Another prompt."
self.adapter.send(message=message_content, safety_settings=None)
args_none, _ = mock_popen.call_args
self.assertNotIn("--safety", args_none[0])
mock_popen.reset_mock()
self.adapter.send(message=message_content, safety_settings=[])
args_empty, _ = mock_popen.call_args
self.assertNotIn("--safety", args_empty[0])
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_system_instruction_prepended_to_stdin(self, mock_popen):
"""
Test that the send method prepends the system instruction to the prompt
sent via stdin, and does NOT add a --system flag to the command.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
message_content = "User's prompt here."
system_instruction_text = "Some instruction"
expected_input = f"{system_instruction_text}\n\n{message_content}"
self.adapter.send(message=message_content, system_instruction=system_instruction_text)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the system instruction was prepended to the input sent to communicate
process_mock.communicate.assert_called_once_with(input=expected_input)
# Verify that no --system flag was added to the command
self.assertNotIn("--system", command)
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_with_model_parameter(self, mock_popen):
"""
Test that the send method correctly adds the -m <model> flag when a model is specified.
"""
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
process_mock = MagicMock()
mock_stdout_content = json.dumps({"type": "result", "usage": {}}) + "\n"
process_mock.communicate.return_value = (mock_stdout_content, "")
process_mock.returncode = 0
mock_popen.return_value = process_mock
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
message_content = "User's prompt here."
model_name = "gemini-1.5-flash"
expected_command_part = f'-m "{model_name}"'
self.adapter.send(message=message_content, model=model_name)
args, kwargs = mock_popen.call_args
command = args[0]
# Verify that the -m <model> flag was added to the command
self.assertIn(expected_command_part, command)
# Verify that the message was passed correctly via stdin
process_mock.communicate.assert_called_once_with(input=message_content)
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
@patch('subprocess.Popen')
def test_send_kills_process_on_communicate_exception(self, mock_popen):
"""
Test that if subprocess.Popen().communicate() raises an exception,
GeminiCliAdapter.send() kills the process and re-raises the exception.
"""
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
mock_process = MagicMock()
mock_popen.return_value = mock_process
# Define an exception to simulate
simulated_exception = RuntimeError("Simulated communicate error")
mock_process.communicate.side_effect = simulated_exception
message_content = "User message"
# Assert that the exception is raised and process is killed
with self.assertRaises(RuntimeError) as cm:
self.adapter.send(message=message_content)
# Verify that the process's kill method was called
mock_process.kill.assert_called_once()
# Verify that the correct exception was re-raised
self.assertIs(cm.exception, simulated_exception)
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@@ -7,66 +7,57 @@ import json
from api_hook_client import ApiHookClient
def test_gemini_cli_context_bleed_prevention(live_gui):
"""
"""
Test that the GeminiCliAdapter correctly filters out echoed 'user' messages
and only shows assistant content in the GUI history.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
with open(bleed_mock, "w") as f:
f.write('''import sys, json
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Create a specialized mock for context bleed
bleed_mock = os.path.abspath("tests/mock_context_bleed.py")
with open(bleed_mock, "w") as f:
f.write('''import sys, json
print(json.dumps({"type": "init", "session_id": "bleed-test"}), flush=True)
print(json.dumps({"type": "message", "role": "user", "content": "I am echoing you"}), flush=True)
print(json.dumps({"type": "message", "role": "assistant", "content": "Actual AI Response"}), flush=True)
print(json.dumps({"type": "result", "stats": {"total_tokens": 10}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
# Wait for completion
time.sleep(3)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
cli_cmd = f'"{sys.executable}" "{bleed_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test context bleed")
client.click("btn_gen_send")
# Wait for completion
time.sleep(3)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Verify: We expect exactly one AI entry, and it must NOT contain the echoed user message
ai_entries = [e for e in entries if e.get("role") == "AI"]
assert len(ai_entries) == 1
assert ai_entries[0].get("content") == "Actual AI Response"
assert "echoing you" not in ai_entries[0].get("content")
os.remove(bleed_mock)
def test_gemini_cli_parameter_resilience(live_gui):
"""
"""
Test that mcp_client correctly handles 'file_path' and 'dir_path' aliases
sent by the AI instead of 'path'.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Avoid backslashes in f-string expression part
if sys.platform == "win32":
bridge_path_str = bridge_path.replace("\\", "/")
else:
bridge_path_str = bridge_path
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# Create a mock that uses dir_path for list_directory
alias_mock = os.path.abspath("tests/mock_alias_tool.py")
bridge_path = os.path.abspath("scripts/cli_tool_bridge.py")
# Avoid backslashes in f-string expression part
if sys.platform == "win32":
bridge_path_str = bridge_path.replace("\\", "/")
else:
bridge_path_str = bridge_path
with open(alias_tool_content := "tests/mock_alias_tool.py", "w") as f:
f.write(f'''import sys, json, os, subprocess
prompt = sys.stdin.read()
if '"role": "tool"' in prompt:
print(json.dumps({{"type": "message", "role": "assistant", "content": "Tool worked!"}}), flush=True)
@@ -88,83 +79,71 @@ else:
}}), flush=True)
print(json.dumps({{"type": "result", "stats": {{"total_tokens": 10}}}}), flush=True)
''')
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
# Handle approval
timeout = 15
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
assert approved, "Tool approval event never received"
# Verify tool result in history
time.sleep(2)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
cli_cmd = f'"{sys.executable}" "{alias_mock}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Test parameter aliases")
client.click("btn_gen_send")
# Handle approval
timeout = 15
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
assert approved, "Tool approval event never received"
# Verify tool result in history
time.sleep(2)
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
# Check for "Tool worked!" which implies the tool execution was successful
found = any("Tool worked!" in e.get("content", "") for e in entries)
assert found, "Tool result indicating success not found in history"
os.remove(alias_mock)
def test_gemini_cli_loop_termination(live_gui):
"""
"""
Test that multi-round tool calling correctly terminates and preserves
payload (session context) between rounds.
"""
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
# Handle approvals (mock does one tool call)
timeout = 20
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
# Wait for the second round and final answer
found_final = False
start_time = time.time()
while time.time() - start_time < 15:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for e in entries:
if "processed the tool results" in e.get("content", ""):
found_final = True
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"
client = ApiHookClient("http://127.0.0.1:8999")
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
# This uses the existing mock_gemini_cli.py which is already designed for 2 rounds
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
client.set_value("ai_input", "Perform multi-round tool test")
client.click("btn_gen_send")
# Handle approvals (mock does one tool call)
timeout = 20
start_time = time.time()
approved = False
while time.time() - start_time < timeout:
for ev in client.get_events():
if ev.get("type") == "ask_received":
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": ev.get("request_id"), "response": {"approved": True}})
approved = True
if approved: break
time.sleep(0.5)
# Wait for the second round and final answer
found_final = False
start_time = time.time()
while time.time() - start_time < 15:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for e in entries:
if "processed the tool results" in e.get("content", ""):
found_final = True
break
if found_final: break
time.sleep(1)
assert found_final, "Final message after multi-round tool loop not found"

View File

@@ -6,136 +6,116 @@ import requests
from api_hook_client import ApiHookClient
def test_gemini_cli_full_integration(live_gui):
"""
"""
Integration test for the Gemini CLI provider and tool bridge.
Handles 'ask_received' events from the bridge and any other approval requests.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
# Clear events
client.get_events()
# 2. Trigger a message in the GUI
print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session and enable history
client.click("btn_reset")
client.set_value("auto_add_history", True)
# Switch to manual_slop project explicitly
client.select_list_item("proj_files", "manual_slop")
# 1. Setup paths and configure the GUI
# Use the real gemini CLI if available, otherwise use mock
# For CI/testing we prefer mock
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
print(f"[TEST] Setting current_provider to gemini_cli")
client.set_value("current_provider", "gemini_cli")
print(f"[TEST] Setting gcli_path to {cli_cmd}")
client.set_value("gcli_path", cli_cmd)
# Verify settings
assert client.get_value("current_provider") == "gemini_cli"
# Clear events
client.get_events()
# 2. Trigger a message in the GUI
print("[TEST] Sending user message...")
client.set_value("ai_input", "Please read test.txt")
client.click("btn_gen_send")
# 3. Monitor for approval events
print("[TEST] Waiting for approval events...")
timeout = 45
start_time = time.time()
approved_count = 0
while time.time() - start_time < timeout:
events = client.get_events()
if events:
for ev in events:
etype = ev.get("type")
eid = ev.get("request_id") or ev.get("action_id")
print(f"[TEST] Received event: {etype} (ID: {eid})")
if etype in ["ask_received", "glob_approval_required", "script_confirmation_required"]:
print(f"[TEST] Approving {etype} {eid}")
if etype == "script_confirmation_required":
resp = requests.post(f"http://127.0.0.1:8999/api/confirm/{eid}", json={"approved": True})
else:
resp = requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": True}})
assert resp.status_code == 200
approved_count += 1
# Check if we got a final response in history
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
found_final = False
for entry in entries:
content = entry.get("content", "")
if "Hello from mock!" in content or "processed the tool results" in content:
print(f"[TEST] Success! Found final message in history.")
found_final = True
break
if found_final:
break
time.sleep(1.0)
assert approved_count > 0, "No approval events were processed"
assert found_final, "Final message from mock CLI was not found in the GUI history"
def test_gemini_cli_rejection_and_history(live_gui):
"""
"""
Integration test for the Gemini CLI provider: Rejection flow and history.
"""
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if denied: break
time.sleep(0.5)
assert denied, "No ask_received event to deny"
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"
client = ApiHookClient("http://127.0.0.1:8999")
# 0. Reset session
client.click("btn_reset")
client.set_value("auto_add_history", True)
client.select_list_item("proj_files", "manual_slop")
mock_script = os.path.abspath("tests/mock_gemini_cli.py")
cli_cmd = f'"{sys.executable}" "{mock_script}"'
client.set_value("current_provider", "gemini_cli")
client.set_value("gcli_path", cli_cmd)
# 2. Trigger a message
print("[TEST] Sending user message (to be denied)...")
client.set_value("ai_input", "Deny me")
client.click("btn_gen_send")
# 3. Wait for event and reject
timeout = 20
start_time = time.time()
denied = False
while time.time() - start_time < timeout:
for ev in client.get_events():
etype = ev.get("type")
eid = ev.get("request_id")
print(f"[TEST] Received event: {etype}")
if etype == "ask_received":
print(f"[TEST] Denying request {eid}")
requests.post("http://127.0.0.1:8999/api/ask/respond",
json={"request_id": eid, "response": {"approved": False}})
denied = True
break
if denied: break
time.sleep(0.5)
assert denied, "No ask_received event to deny"
# 4. Verify rejection in history
print("[TEST] Waiting for rejection in history...")
rejection_found = False
start_time = time.time()
while time.time() - start_time < 20:
session = client.get_session()
entries = session.get("session", {}).get("entries", [])
for entry in entries:
if "Tool execution was denied" in entry.get("content", ""):
rejection_found = True
break
if rejection_found: break
time.sleep(1.0)
assert rejection_found, "Rejection message not found in history"

View File

@@ -10,43 +10,38 @@ import ai_client
@pytest.fixture(autouse=True)
def setup_ai_client():
ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None
yield
ai_client.reset_session()
ai_client.set_provider("gemini_cli", "gemini-2.5-flash")
ai_client.confirm_and_run_callback = lambda script, base_dir: "Mocked execution"
ai_client.comms_log_callback = lambda entry: None
ai_client.tool_log_callback = lambda script, result: None
yield
@patch('ai_client.GeminiCliAdapter')
@patch('ai_client._get_combined_system_prompt')
def test_send_invokes_adapter_send(mock_prompt, mock_adapter_class):
mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
mock_prompt.return_value = "Mocked Prompt"
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "Done", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 10}
mock_instance.last_latency = 0.1
mock_instance.session_id = None
ai_client.send("context", "message", discussion_history="hist")
expected_payload = "[DISCUSSION HISTORY]\n\nhist\n\n---\n\nmessage"
assert mock_instance.send.called
args, kwargs = mock_instance.send.call_args
assert args[0] == expected_payload
assert kwargs['system_instruction'] == "Mocked Prompt\n\n<context>\ncontext\n</context>"
@patch('ai_client.GeminiCliAdapter')
def test_get_history_bleed_stats(mock_adapter_class):
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500
mock_instance = mock_adapter_class.return_value
mock_instance.send.return_value = {"text": "txt", "tool_calls": []}
mock_instance.last_usage = {"input_tokens": 1500}
mock_instance.last_latency = 0.5
mock_instance.session_id = "sess"
# Initialize by sending a message
ai_client.send("context", "msg")
stats = ai_client.get_history_bleed_stats()
assert stats["provider"] == "gemini_cli"
assert stats["current"] == 1500

View File

@@ -10,41 +10,34 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from ai_client import get_gemini_cache_stats, reset_session
def test_get_gemini_cache_stats_with_mock_client():
"""
"""
Test that get_gemini_cache_stats correctly processes cache lists
from a mocked client instance.
"""
# Ensure a clean state before the test by resetting the session
reset_session()
# 1. Create a mock for the cache object that the client will return
mock_cache = MagicMock()
mock_cache.name = "cachedContents/test-cache"
mock_cache.display_name = "Test Cache"
mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.size_bytes = 1024
# 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# Configure its `caches.list` method to return our mock cache
mock_client_instance.caches.list.return_value = [mock_cache]
# 3. Patch the Client constructor to return our mock instance
# This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 4. Call the function under test
stats = get_gemini_cache_stats()
# 5. Assert that the function behaved as expected
# It should have constructed the client
mock_client_constructor.assert_called_once()
# It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats
assert stats["cache_count"] == 1
assert stats["total_size_bytes"] == 1024
# Ensure a clean state before the test by resetting the session
reset_session()
# 1. Create a mock for the cache object that the client will return
mock_cache = MagicMock()
mock_cache.name = "cachedContents/test-cache"
mock_cache.display_name = "Test Cache"
mock_cache.model = "models/gemini-1.5-pro-001"
mock_cache.size_bytes = 1024
# 2. Create a mock for the client instance
mock_client_instance = MagicMock()
# Configure its `caches.list` method to return our mock cache
mock_client_instance.caches.list.return_value = [mock_cache]
# 3. Patch the Client constructor to return our mock instance
# This intercepts the `_ensure_gemini_client` call inside the function
with patch('google.genai.Client', return_value=mock_client_instance) as mock_client_constructor:
# 4. Call the function under test
stats = get_gemini_cache_stats()
# 5. Assert that the function behaved as expected
# It should have constructed the client
mock_client_constructor.assert_called_once()
# It should have called the `list` method on the `caches` attribute
mock_client_instance.caches.list.assert_called_once()
# The returned stats dictionary should be correct
assert "cache_count" in stats
assert "total_size_bytes" in stats
assert stats["cache_count"] == 1
assert stats["total_size_bytes"] == 1024

View File

@@ -6,43 +6,40 @@ from events import EventEmitter
@pytest.fixture
def app_instance():
"""
"""
Fixture to create an instance of the gui_2.App class for testing.
It mocks functions that would render a window or block execution.
"""
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App
def test_app_subscribes_to_events(app_instance):
"""
"""
This test checks that the App's __init__ method subscribes the necessary
event handlers to the ai_client.events emitter.
This test will fail until the event subscription logic is added to gui_2.App.
"""
with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance()
mock_on.assert_called()
calls = mock_on.call_args_list
event_names = [call.args[0] for call in calls]
assert "request_start" in event_names
assert "response_received" in event_names
assert "tool_execution" in event_names
for call in calls:
handler = call.args[1]
assert hasattr(handler, '__self__')
assert handler.__self__ is app
with patch.object(ai_client.events, 'on') as mock_on:
app = app_instance()
mock_on.assert_called()
calls = mock_on.call_args_list
event_names = [call.args[0] for call in calls]
assert "request_start" in event_names
assert "response_received" in event_names
assert "tool_execution" in event_names
for call in calls:
handler = call.args[1]
assert hasattr(handler, '__self__')
assert handler.__self__ is app

View File

@@ -4,45 +4,43 @@ from gui_2 import App
@pytest.fixture
def app_instance():
with (
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
with (
patch('gui_2.load_config', return_value={'gui': {'show_windows': {}}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
def test_gui2_hubs_exist_in_show_windows(app_instance):
"""
"""
Verifies that the new consolidated Hub windows are defined in the App's show_windows.
This ensures they will be available in the 'Windows' menu.
"""
expected_hubs = [
"Context Hub",
"AI Settings",
"Discussion Hub",
"Operations Hub",
"Files & Media",
"Theme",
]
for hub in expected_hubs:
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
expected_hubs = [
"Context Hub",
"AI Settings",
"Discussion Hub",
"Operations Hub",
"Files & Media",
"Theme",
]
for hub in expected_hubs:
assert hub in app_instance.show_windows, f"Expected hub window '{hub}' not found in show_windows"
def test_gui2_old_windows_removed_from_show_windows(app_instance):
"""
"""
Verifies that the old fragmented windows are removed from show_windows.
"""
old_windows = [
"Projects", "Files", "Screenshots",
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"
old_windows = [
"Projects", "Files", "Screenshots",
"Provider", "System Prompts",
"Message", "Response", "Tool Calls", "Comms History"
]
for old_win in old_windows:
assert old_win not in app_instance.show_windows, f"Old window '{old_win}' should have been removed from show_windows"

View File

@@ -6,74 +6,65 @@ from events import EventEmitter
@pytest.fixture
def app_instance():
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
if not hasattr(ai_client, 'events') or ai_client.events is None:
ai_client.events = EventEmitter()
with (
patch('gui_2.load_config', return_value={'ai': {}, 'projects': {}}),
patch('gui_2.save_config'),
patch('gui_2.project_manager'),
patch('gui_2.session_logger'),
patch('gui_2.immapp.run'),
patch.object(App, '_load_active_project'),
patch.object(App, '_fetch_models'),
patch.object(App, '_load_fonts'),
patch.object(App, '_post_init')
):
yield App()
def test_mcp_tool_call_is_dispatched(app_instance):
"""
"""
This test verifies that when the AI returns a tool call for an MCP function,
the ai_client correctly dispatches it to mcp_client.
This will fail until mcp_client is properly integrated.
"""
# 1. Define the mock tool call from the AI
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"}
# 1. Define the mock tool call from the AI
mock_fc = MagicMock()
mock_fc.name = "read_file"
mock_fc.args = {"file_path": "test.txt"}
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = mock_fc
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "TOOL_CALLING"
mock_response_with_tool.candidates = [mock_candidate]
# 2. Construct the mock AI response (Gemini format)
mock_response_with_tool = MagicMock()
mock_part = MagicMock()
mock_part.text = ""
mock_part.function_call = mock_fc
mock_candidate = MagicMock()
mock_candidate.content.parts = [mock_part]
mock_candidate.finish_reason.name = "TOOL_CALLING"
mock_response_with_tool.candidates = [mock_candidate]
class DummyUsage:
prompt_token_count = 100
candidates_token_count = 10
cached_content_token_count = 0
mock_response_with_tool.usage_metadata = DummyUsage()
# 3. Create a mock for the final AI response after the tool call
mock_response_final = MagicMock()
mock_response_final.text = "Final answer"
mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
# 4. Patch the necessary components
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
user_message="read the file",
base_dir=".",
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})
class DummyUsage:
prompt_token_count = 100
candidates_token_count = 10
cached_content_token_count = 0
mock_response_with_tool.usage_metadata = DummyUsage()
# 3. Create a mock for the final AI response after the tool call
mock_response_final = MagicMock()
mock_response_final.text = "Final answer"
mock_response_final.candidates = []
mock_response_final.usage_metadata = DummyUsage()
# 4. Patch the necessary components
with patch("ai_client._ensure_gemini_client"), \
patch("ai_client._gemini_client") as mock_client, \
patch('mcp_client.dispatch', return_value="file content") as mock_dispatch:
mock_chat = mock_client.chats.create.return_value
mock_chat.send_message.side_effect = [mock_response_with_tool, mock_response_final]
ai_client.set_provider("gemini", "mock-model")
# 5. Call the send function
ai_client.send(
md_content="some context",
user_message="read the file",
base_dir=".",
file_items=[],
discussion_history=""
)
# 6. Assert that the MCP dispatch function was called
mock_dispatch.assert_called_once_with("read_file", {"file_path": "test.txt"})

View File

@@ -15,70 +15,62 @@ TEST_CALLBACK_FILE = Path("temp_callback_output.txt")
@pytest.fixture(scope="function", autouse=True)
def cleanup_callback_file():
"""Ensures the test callback file is cleaned up before and after each test."""
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
yield
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
"""Ensures the test callback file is cleaned up before and after each test."""
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
yield
if TEST_CALLBACK_FILE.exists():
TEST_CALLBACK_FILE.unlink()
def test_gui2_set_value_hook_works(live_gui):
"""
"""
Tests that the 'set_value' GUI hook is correctly implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
assert current_value == test_value
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_value = f"New value set by test: {uuid.uuid4()}"
gui_data = {'action': 'set_value', 'item': 'ai_input', 'value': test_value}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
# Verify the value was actually set using the new get_value hook
time.sleep(0.5)
current_value = client.get_value('ai_input')
assert current_value == test_value
def test_gui2_click_hook_works(live_gui):
"""
"""
Tests that the 'click' GUI hook for the 'Reset' button is implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
test_value = "This text should be cleared by the reset button."
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
client.click('btn_reset')
time.sleep(0.5)
# Verify it was reset
assert client.get_value('ai_input') == ""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
# First, set some state that 'Reset' would clear.
test_value = "This text should be cleared by the reset button."
client.set_value('ai_input', test_value)
time.sleep(0.5)
assert client.get_value('ai_input') == test_value
# Now, trigger the click
client.click('btn_reset')
time.sleep(0.5)
# Verify it was reset
assert client.get_value('ai_input') == ""
def test_gui2_custom_callback_hook_works(live_gui):
"""
"""
Tests that the 'custom_callback' GUI hook is correctly implemented.
"""
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
'action': 'custom_callback',
'callback': '_test_callback_func_write_to_file',
'args': [test_data]
}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:
content = f.read()
assert content == test_data, "Callback executed, but file content is incorrect."
client = ApiHookClient()
assert client.wait_for_server(timeout=10)
test_data = f"Callback executed: {uuid.uuid4()}"
gui_data = {
'action': 'custom_callback',
'callback': '_test_callback_func_write_to_file',
'args': [test_data]
}
response = client.post_gui(gui_data)
assert response == {'status': 'queued'}
time.sleep(1) # Give gui_2.py time to process its task queue
# Assert that the file WAS created and contains the correct data
assert TEST_CALLBACK_FILE.exists(), "Custom callback was NOT executed, or file path is wrong!"
with open(TEST_CALLBACK_FILE, "r") as f:
content = f.read()
assert content == test_data, "Callback executed, but file content is incorrect."

View File

@@ -12,78 +12,66 @@ from api_hook_client import ApiHookClient
_shared_metrics = {}
def test_performance_benchmarking(live_gui):
"""
"""
Collects performance metrics for the current GUI script (parameterized as gui.py and gui_2.py).
"""
process, gui_script = live_gui
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(3.0)
# Collect metrics over 5 seconds
fps_values = []
cpu_values = []
frame_time_values = []
start_time = time.time()
while time.time() - start_time < 5:
try:
perf_data = client.get_performance()
metrics = perf_data.get('performance', {})
if metrics:
fps = metrics.get('fps', 0.0)
cpu = metrics.get('cpu_percent', 0.0)
ft = metrics.get('last_frame_time_ms', 0.0)
# In some CI environments without a display, metrics might be 0
# We only record positive ones to avoid skewing averages if hooks are failing
if fps > 0:
fps_values.append(fps)
cpu_values.append(cpu)
frame_time_values.append(ft)
time.sleep(0.1)
except Exception:
break
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
_shared_metrics[gui_script] = {
"avg_fps": avg_fps,
"avg_cpu": avg_cpu,
"avg_ft": avg_ft
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
process, gui_script = live_gui
client = ApiHookClient()
# Wait for app to stabilize and render some frames
time.sleep(3.0)
# Collect metrics over 5 seconds
fps_values = []
cpu_values = []
frame_time_values = []
start_time = time.time()
while time.time() - start_time < 5:
try:
perf_data = client.get_performance()
metrics = perf_data.get('performance', {})
if metrics:
fps = metrics.get('fps', 0.0)
cpu = metrics.get('cpu_percent', 0.0)
ft = metrics.get('last_frame_time_ms', 0.0)
# In some CI environments without a display, metrics might be 0
# We only record positive ones to avoid skewing averages if hooks are failing
if fps > 0:
fps_values.append(fps)
cpu_values.append(cpu)
frame_time_values.append(ft)
time.sleep(0.1)
except Exception:
break
avg_fps = sum(fps_values) / len(fps_values) if fps_values else 0
avg_cpu = sum(cpu_values) / len(cpu_values) if cpu_values else 0
avg_ft = sum(frame_time_values) / len(frame_time_values) if frame_time_values else 0
_shared_metrics[gui_script] = {
"avg_fps": avg_fps,
"avg_cpu": avg_cpu,
"avg_ft": avg_ft
}
print(f"\n[Test] Results for {gui_script}: FPS={avg_fps:.2f}, CPU={avg_cpu:.2f}%, FT={avg_ft:.2f}ms")
# Absolute minimum requirements
if avg_fps > 0:
assert avg_fps >= 30, f"{gui_script} FPS {avg_fps:.2f} is below 30 FPS threshold"
assert avg_ft <= 33.3, f"{gui_script} Frame time {avg_ft:.2f}ms is above 33.3ms threshold"
def test_performance_parity():
"""
"""
Compare the metrics collected in the parameterized test_performance_benchmarking.
"""
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui2_m = _shared_metrics["gui_2.py"]
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
print(f"\n--- Performance Parity Results ---")
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"
if "gui_legacy.py" not in _shared_metrics or "gui_2.py" not in _shared_metrics:
if len(_shared_metrics) < 2:
pytest.skip("Metrics for both GUIs not yet collected.")
gui_m = _shared_metrics["gui_legacy.py"]
gui2_m = _shared_metrics["gui_2.py"]
# FPS Parity Check (+/- 15% leeway for now, target is 5%)
# Actually I'll use 0.15 for assertion and log the actual.
fps_diff_pct = abs(gui_m["avg_fps"] - gui2_m["avg_fps"]) / gui_m["avg_fps"] if gui_m["avg_fps"] > 0 else 0
cpu_diff_pct = abs(gui_m["avg_cpu"] - gui2_m["avg_cpu"]) / gui_m["avg_cpu"] if gui_m["avg_cpu"] > 0 else 0
print(f"\n--- Performance Parity Results ---")
print(f"FPS Diff: {fps_diff_pct*100:.2f}%")
print(f"CPU Diff: {cpu_diff_pct*100:.2f}%")
# We follow the 5% requirement for FPS
# For CPU we might need more leeway
assert fps_diff_pct <= 0.15, f"FPS difference {fps_diff_pct*100:.2f}% exceeds 15% threshold"
assert cpu_diff_pct <= 3.0, f"CPU difference {cpu_diff_pct*100:.2f}% exceeds 300% threshold"

View File

@@ -6,75 +6,70 @@ from events import UserRequestEvent
@pytest.fixture
def mock_gui():
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
gui = App()
return gui
with (
patch('gui_2.load_config', return_value={
"ai": {"provider": "gemini", "model": "model-1"},
"projects": {"paths": [], "active": ""},
"gui": {"show_windows": {}}
}),
patch('gui_2.project_manager.load_project', return_value={}),
patch('gui_2.project_manager.migrate_from_legacy_config', return_value={}),
patch('gui_2.project_manager.save_project'),
patch('gui_2.session_logger.open_session'),
patch('gui_2.App._init_ai_and_hooks'),
patch('gui_2.App._fetch_models')
):
gui = App()
return gui
def test_handle_generate_send_pushes_event(mock_gui):
# Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text"
))
mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
mock_gui.event_queue.put = MagicMock()
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
mock_gui._handle_generate_send()
# Verify run_coroutine_threadsafe was called
assert mock_run.called
# Verify the call to event_queue.put was correct
# This is a bit tricky since the first arg to run_coroutine_threadsafe
# is the coroutine returned by event_queue.put().
# Let's verify that the call to put occurred.
mock_gui.event_queue.put.assert_called_once()
args, kwargs = mock_gui.event_queue.put.call_args
assert args[0] == "user_request"
event = args[1]
assert isinstance(event, UserRequestEvent)
assert event.prompt == "test prompt"
assert event.stable_md == "stable_md"
assert event.disc_text == "disc_text"
assert event.base_dir == "."
# Mock _do_generate to return sample data
mock_gui._do_generate = MagicMock(return_value=(
"full_md", "path", [], "stable_md", "disc_text"
))
mock_gui.ui_ai_input = "test prompt"
mock_gui.ui_files_base_dir = "."
# Mock event_queue.put
mock_gui.event_queue.put = MagicMock()
# We need to mock asyncio.run_coroutine_threadsafe to immediately execute
with patch('asyncio.run_coroutine_threadsafe') as mock_run:
mock_gui._handle_generate_send()
# Verify run_coroutine_threadsafe was called
assert mock_run.called
# Verify the call to event_queue.put was correct
# This is a bit tricky since the first arg to run_coroutine_threadsafe
# is the coroutine returned by event_queue.put().
# Let's verify that the call to put occurred.
mock_gui.event_queue.put.assert_called_once()
args, kwargs = mock_gui.event_queue.put.call_args
assert args[0] == "user_request"
event = args[1]
assert isinstance(event, UserRequestEvent)
assert event.prompt == "test prompt"
assert event.stable_md == "stable_md"
assert event.disc_text == "disc_text"
assert event.base_dir == "."
def test_user_request_event_payload():
payload = UserRequestEvent(
prompt="hello",
stable_md="md",
file_items=[],
disc_text="disc",
base_dir="."
)
d = payload.to_dict()
assert d["prompt"] == "hello"
assert d["stable_md"] == "md"
assert d["file_items"] == []
assert d["disc_text"] == "disc"
assert d["base_dir"] == "."
payload = UserRequestEvent(
prompt="hello",
stable_md="md",
file_items=[],
disc_text="disc",
base_dir="."
)
d = payload.to_dict()
assert d["prompt"] == "hello"
assert d["stable_md"] == "md"
assert d["file_items"] == []
assert d["disc_text"] == "disc"
assert d["base_dir"] == "."
@pytest.mark.asyncio
async def test_async_event_queue():
from events import AsyncEventQueue
q = AsyncEventQueue()
await q.put("test_event", {"data": 123})
name, payload = await q.get()
assert name == "test_event"
assert payload["data"] == 123
from events import AsyncEventQueue
q = AsyncEventQueue()
await q.put("test_event", {"data": 123})
name, payload = await q.get()
assert name == "test_event"
assert payload["data"] == 123

View File

@@ -13,53 +13,48 @@ from gui_legacy import App
@pytest.fixture
def app_instance():
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
dpg.create_context()
with patch('dearpygui.dearpygui.create_viewport'), \
patch('dearpygui.dearpygui.setup_dearpygui'), \
patch('dearpygui.dearpygui.show_viewport'), \
patch('dearpygui.dearpygui.start_dearpygui'), \
patch('gui_legacy.load_config', return_value={}), \
patch.object(App, '_rebuild_files_list'), \
patch.object(App, '_rebuild_shots_list'), \
patch.object(App, '_rebuild_disc_list'), \
patch.object(App, '_rebuild_disc_roles_list'), \
patch.object(App, '_rebuild_discussion_selector'), \
patch.object(App, '_refresh_project_widgets'):
app = App()
yield app
dpg.destroy_context()
def test_diagnostics_panel_initialization(app_instance):
assert "Diagnostics" in app_instance.window_info
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
assert "frame_time" in app_instance.perf_history
assert len(app_instance.perf_history["frame_time"]) == 100
assert "Diagnostics" in app_instance.window_info
assert app_instance.window_info["Diagnostics"] == "win_diagnostics"
assert "frame_time" in app_instance.perf_history
assert len(app_instance.perf_history["frame_time"]) == 100
def test_diagnostics_panel_updates(app_instance):
# Mock dependencies
mock_metrics = {
'last_frame_time_ms': 10.0,
'fps': 100.0,
'cpu_percent': 50.0,
'input_lag_ms': 5.0
}
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
# We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
app_instance._update_performance_diagnostics()
# Verify UI updates
mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0
# Mock dependencies
mock_metrics = {
'last_frame_time_ms': 10.0,
'fps': 100.0,
'cpu_percent': 50.0,
'input_lag_ms': 5.0
}
app_instance.perf_monitor.get_metrics = MagicMock(return_value=mock_metrics)
with patch('dearpygui.dearpygui.is_item_shown', return_value=True), \
patch('dearpygui.dearpygui.set_value') as mock_set_value, \
patch('dearpygui.dearpygui.configure_item') as mock_configure_item, \
patch('dearpygui.dearpygui.does_item_exist', return_value=True):
# We also need to mock ai_client stats
with patch('ai_client.get_history_bleed_stats', return_value={}):
app_instance._update_performance_diagnostics()
# Verify UI updates
mock_set_value.assert_any_call("perf_fps_text", "100.0")
mock_set_value.assert_any_call("perf_frame_text", "10.0ms")
mock_set_value.assert_any_call("perf_cpu_text", "50.0%")
mock_set_value.assert_any_call("perf_lag_text", "5.0ms")
# Verify history update
assert app_instance.perf_history["frame_time"][-1] == 10.0

Some files were not shown because too many files have changed in this diff Show More