This fixes an issue where config.toml was erroneously saved to the current working directory (e.g. project dir) rather than the global manual slop directory.
366 lines
15 KiB
Python
366 lines
15 KiB
Python
# aggregate.py
|
|
from __future__ import annotations
|
|
"""
|
|
Note(Gemini):
|
|
This module orchestrates the construction of the final Markdown context string.
|
|
Instead of sending every file to the AI raw (which blows up tokens), this uses a pipeline:
|
|
1. Resolve paths (handles globs and absolute paths).
|
|
2. Build file items (raw content).
|
|
3. If 'summary_only' is true (which is the default behavior now), it pipes the files through
|
|
summarize.py to generate a compacted view.
|
|
|
|
This is essential for keeping prompt tokens low while giving the AI enough structural info
|
|
to use the MCP tools to fetch only what it needs.
|
|
"""
|
|
import tomllib
|
|
import re
|
|
import glob
|
|
from pathlib import Path, PureWindowsPath
|
|
from typing import Any, cast
|
|
from src import summarize
|
|
from src import project_manager
|
|
from src.file_cache import ASTParser
|
|
|
|
def find_next_increment(output_dir: Path, namespace: str) -> int:
|
|
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
|
|
max_num = 0
|
|
for f in output_dir.iterdir():
|
|
if f.is_file():
|
|
match = pattern.match(f.name)
|
|
if match:
|
|
max_num = max(max_num, int(match.group(1)))
|
|
return max_num + 1
|
|
|
|
def is_absolute_with_drive(entry: str) -> bool:
|
|
try:
|
|
p = PureWindowsPath(entry)
|
|
return p.drive != ""
|
|
except Exception:
|
|
return False
|
|
|
|
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
|
has_drive = is_absolute_with_drive(entry)
|
|
is_wildcard = "*" in entry
|
|
matches = []
|
|
if is_wildcard:
|
|
root = Path(entry) if has_drive else base_dir / entry
|
|
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
|
|
else:
|
|
p = Path(entry) if has_drive else (base_dir / entry).resolve()
|
|
matches = [p]
|
|
# Blacklist filter
|
|
filtered = []
|
|
for p in matches:
|
|
name = p.name.lower()
|
|
if name == "history.toml" or name.endswith("_history.toml"):
|
|
continue
|
|
filtered.append(p)
|
|
return sorted(filtered)
|
|
|
|
def build_discussion_section(history: list[Any]) -> str:
|
|
"""
|
|
Builds a markdown section for discussion history.
|
|
Handles both legacy list[str] and new list[dict].
|
|
"""
|
|
sections = []
|
|
for i, entry in enumerate(history, start=1):
|
|
if isinstance(entry, dict):
|
|
role = entry.get("role", "Unknown")
|
|
content = entry.get("content", "").strip()
|
|
text = f"{role}: {content}"
|
|
else:
|
|
text = str(entry).strip()
|
|
sections.append(f"### Discussion Excerpt {i}\n\n{text}")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
|
|
sections = []
|
|
for entry_raw in files:
|
|
if isinstance(entry_raw, dict):
|
|
entry = cast(str, entry_raw.get("path", ""))
|
|
else:
|
|
entry = entry_raw
|
|
if not entry or not isinstance(entry, str):
|
|
continue
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
|
|
continue
|
|
for path in paths:
|
|
suffix = path.suffix.lstrip(".")
|
|
lang = suffix if suffix else "text"
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
except FileNotFoundError:
|
|
content = f"ERROR: file not found: {path}"
|
|
except Exception as e:
|
|
content = f"ERROR: {e}"
|
|
original = entry if "*" not in entry else str(path)
|
|
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
|
|
sections = []
|
|
for entry in screenshots:
|
|
if not entry or not isinstance(entry, str):
|
|
continue
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
|
|
continue
|
|
for path in paths:
|
|
original = entry if "*" not in entry else str(path)
|
|
if not path.exists():
|
|
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
|
|
continue
|
|
sections.append(f"### `{original}`\n\n})")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
|
|
"""
|
|
Return a list of dicts describing each file, for use by ai_client when it
|
|
wants to upload individual files rather than inline everything as markdown.
|
|
|
|
Each dict has:
|
|
path : Path (resolved absolute path)
|
|
entry : str (original config entry string)
|
|
content : str (file text, or error string)
|
|
error : bool
|
|
mtime : float (last modification time, for skip-if-unchanged optimization)
|
|
tier : int | None (optional tier for context management)
|
|
auto_aggregate : bool
|
|
force_full : bool
|
|
"""
|
|
items: list[dict[str, Any]] = []
|
|
for entry_raw in files:
|
|
if isinstance(entry_raw, dict):
|
|
entry = cast(str, entry_raw.get("path", ""))
|
|
tier = entry_raw.get("tier")
|
|
auto_aggregate = entry_raw.get("auto_aggregate", True)
|
|
force_full = entry_raw.get("force_full", False)
|
|
elif hasattr(entry_raw, "path"):
|
|
entry = entry_raw.path
|
|
tier = getattr(entry_raw, "tier", None)
|
|
auto_aggregate = getattr(entry_raw, "auto_aggregate", True)
|
|
force_full = getattr(entry_raw, "force_full", False)
|
|
else:
|
|
entry = entry_raw
|
|
tier = None
|
|
auto_aggregate = True
|
|
force_full = False
|
|
if not entry or not isinstance(entry, str):
|
|
continue
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier, "auto_aggregate": auto_aggregate, "force_full": force_full})
|
|
continue
|
|
for path in paths:
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
mtime = path.stat().st_mtime
|
|
error = False
|
|
except FileNotFoundError:
|
|
content = f"ERROR: file not found: {path}"
|
|
mtime = 0.0
|
|
error = True
|
|
except Exception as e:
|
|
content = f"ERROR: {e}"
|
|
mtime = 0.0
|
|
error = True
|
|
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier, "auto_aggregate": auto_aggregate, "force_full": force_full})
|
|
return items
|
|
|
|
def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
|
|
"""
|
|
Build a compact summary section using summarize.py — one short block per file.
|
|
Used as the initial <context> block instead of full file contents.
|
|
"""
|
|
items = build_file_items(base_dir, files)
|
|
return summarize.build_summary_markdown(items)
|
|
|
|
def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
|
|
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
|
|
sections = []
|
|
for item in file_items:
|
|
if not item.get("auto_aggregate", True):
|
|
continue
|
|
path = item.get("path")
|
|
entry = cast(str, item.get("entry", "unknown"))
|
|
content = cast(str, item.get("content", ""))
|
|
if path is None:
|
|
sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
|
|
continue
|
|
p = cast(Path, path)
|
|
suffix = p.suffix.lstrip(".") if hasattr(p, "suffix") else "text"
|
|
lang = suffix if suffix else "text"
|
|
original = entry if "*" not in entry else str(p)
|
|
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
|
"""Build markdown from pre-read file items instead of re-reading from disk."""
|
|
parts = []
|
|
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
|
if file_items:
|
|
if summary_only:
|
|
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
|
|
else:
|
|
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
|
|
if screenshots:
|
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
# DYNAMIC SUFFIX: History changes every turn, must go last
|
|
if history:
|
|
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
|
|
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
|
|
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
|
|
|
|
def build_discussion_text(history: list[str]) -> str:
|
|
"""Build just the discussion history section text. Returns empty string if no history."""
|
|
if not history:
|
|
return ""
|
|
return "## Discussion History\n\n" + build_discussion_section(history)
|
|
|
|
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
|
"""
|
|
Tier 1 Context: Strategic/Orchestration.
|
|
Full content for core conductor files and files with tier=1, summaries for others.
|
|
"""
|
|
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
|
|
parts = []
|
|
# Files section
|
|
if file_items:
|
|
sections = []
|
|
for item in file_items:
|
|
if not item.get("auto_aggregate", True):
|
|
continue
|
|
path = item.get("path")
|
|
name = path.name if path and isinstance(path, Path) else ""
|
|
if name in core_files or item.get("tier") == 1 or item.get("force_full"):
|
|
# Include in full
|
|
sections.append("### `" + (cast(str, item.get("entry")) or str(path)) + "`\n\n" +
|
|
f"```{path.suffix.lstrip('.') if path and isinstance(path, Path) and path.suffix else 'text'}\n{item.get('content', '')}\n```")
|
|
else:
|
|
# Summarize
|
|
if path and isinstance(path, Path):
|
|
sections.append("### `" + (cast(str, item.get("entry")) or str(path)) + "`\n\n" +
|
|
summarize.summarise_file(path, cast(str, item.get("content", ""))))
|
|
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
|
|
if screenshots:
|
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
if history:
|
|
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
|
"""
|
|
Tier 2 Context: Architectural/Tech Lead.
|
|
Full content for all files (standard behavior).
|
|
"""
|
|
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
|
|
|
|
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
|
|
"""
|
|
Tier 3 Context: Execution/Worker.
|
|
Full content for focus_files and files with tier=3, summaries/skeletons for others.
|
|
"""
|
|
parts = []
|
|
if file_items:
|
|
sections = []
|
|
for item in file_items:
|
|
if not item.get("auto_aggregate", True):
|
|
continue
|
|
path = cast(Path, item.get("path"))
|
|
entry = cast(str, item.get("entry", ""))
|
|
path_str = str(path) if path else ""
|
|
# Check if this file is in focus_files (by name or path)
|
|
is_focus = False
|
|
for focus in focus_files:
|
|
if focus == entry or (path and focus == path.name) or (path_str and focus in path_str):
|
|
is_focus = True
|
|
break
|
|
if is_focus or item.get("tier") == 3 or item.get("force_full"):
|
|
sections.append("### `" + (entry or path_str) + "`\n\n" +
|
|
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
|
|
else:
|
|
content = cast(str, item.get("content", ""))
|
|
if path and path.suffix == ".py" and not item.get("error"):
|
|
try:
|
|
parser = ASTParser("python")
|
|
skeleton = parser.get_skeleton(content)
|
|
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
|
|
except Exception:
|
|
# Fallback to summary if AST parsing fails
|
|
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
|
|
else:
|
|
if path:
|
|
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
|
|
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
|
|
if screenshots:
|
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
if history:
|
|
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
|
|
parts = []
|
|
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
|
|
if files:
|
|
if summary_only:
|
|
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
|
|
else:
|
|
parts.append("## Files\n\n" + build_files_section(base_dir, files))
|
|
if screenshots:
|
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
# DYNAMIC SUFFIX: History changes every turn, must go last
|
|
if history:
|
|
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
|
|
namespace = config.get("project", {}).get("name")
|
|
if not namespace:
|
|
namespace = config.get("output", {}).get("namespace", "project")
|
|
output_dir = Path(config["output"]["output_dir"])
|
|
base_dir = Path(config["files"]["base_dir"])
|
|
files = config["files"].get("paths", [])
|
|
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
|
|
screenshots = config.get("screenshots", {}).get("paths", [])
|
|
history = config.get("discussion", {}).get("history", [])
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
increment = find_next_increment(output_dir, namespace)
|
|
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
|
# Build file items once, then construct markdown from them (avoids double I/O)
|
|
file_items = build_file_items(base_dir, files)
|
|
summary_only = config.get("project", {}).get("summary_only", False)
|
|
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
|
|
summary_only=summary_only)
|
|
output_file.write_text(markdown, encoding="utf-8")
|
|
return markdown, output_file, file_items
|
|
|
|
def main() -> None:
|
|
# Load global config to find active project
|
|
from src.paths import get_config_path
|
|
config_path = get_config_path()
|
|
if not config_path.exists():
|
|
|
|
print(f"{config_path} not found.")
|
|
return
|
|
with open(config_path, "rb") as f:
|
|
global_cfg = tomllib.load(f)
|
|
active_path = global_cfg.get("projects", {}).get("active")
|
|
if not active_path:
|
|
print(f"No active project found in {config_path}.")
|
|
return
|
|
# Use project_manager to load project (handles history segregation)
|
|
proj = project_manager.load_project(active_path)
|
|
# Use flat_config to make it compatible with aggregate.run()
|
|
config = project_manager.flat_config(proj)
|
|
markdown, output_file, _ = run(config)
|
|
print(f"Written: {output_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|