manual_slop/aggregate.py

# aggregate.py
"""
Note(Gemini):
This module orchestrates the construction of the final Markdown context string.
Instead of sending every file to the AI raw (which blows up tokens), this uses a pipeline:
1. Resolve paths (handles globs and absolute paths).
2. Build file items (raw content).
3. If 'summary_only' is true (which is the default behavior now), it pipes the files through
   summarize.py to generate a compacted view.

This is essential for keeping prompt tokens low while giving the AI enough structural info
to use the MCP tools to fetch only what it needs.
"""
import tomllib
import re
import glob
from pathlib import Path, PureWindowsPath
import summarize

def find_next_increment(output_dir: Path, namespace: str) -> int:
    pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
    max_num = 0
    for f in output_dir.iterdir():
        if f.is_file():
            match = pattern.match(f.name)
            if match:
                max_num = max(max_num, int(match.group(1)))
    return max_num + 1

def is_absolute_with_drive(entry: str) -> bool:
    try:
        p = PureWindowsPath(entry)
        return p.drive != ""
    except Exception:
        return False

def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
    has_drive = is_absolute_with_drive(entry)
    is_wildcard = "*" in entry
    if is_wildcard:
        root = Path(entry) if has_drive else base_dir / entry
        matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
        return sorted(matches)
    else:
        if has_drive:
            return [Path(entry)]
        return [(base_dir / entry).resolve()]

def build_discussion_section(history: list[str]) -> str:
    sections = []
    for i, paste in enumerate(history, start=1):
        sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
    return "\n\n---\n\n".join(sections)

def build_files_section(base_dir: Path, files: list[str]) -> str:
    sections = []
    for entry in files:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
            continue
        for path in paths:
            suffix = path.suffix.lstrip(".")
            lang = suffix if suffix else "text"
            try:
                content = path.read_text(encoding="utf-8")
            except FileNotFoundError:
                content = f"ERROR: file not found: {path}"
            except Exception as e:
                content = f"ERROR: {e}"
            original = entry if "*" not in entry else str(path)
            sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
    return "\n\n---\n\n".join(sections)

def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
    sections = []
    for entry in screenshots:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
            continue
        for path in paths:
            original = entry if "*" not in entry else str(path)
            if not path.exists():
                sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
                continue
            sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
    return "\n\n---\n\n".join(sections)


def build_file_items(base_dir: Path, files: list[str]) -> list[dict]:
    """
    Return a list of dicts describing each file, for use by ai_client when it
    wants to upload individual files rather than inline everything as markdown.

    Each dict has:
        path     : Path  (resolved absolute path)
        entry    : str   (original config entry string)
        content  : str   (file text, or error string)
        error    : bool
    """
    items = []
    for entry in files:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True})
            continue
        for path in paths:
            try:
                content = path.read_text(encoding="utf-8")
                error = False
            except FileNotFoundError:
                content = f"ERROR: file not found: {path}"
                error = True
            except Exception as e:
                content = f"ERROR: {e}"
                error = True
            items.append({"path": path, "entry": entry, "content": content, "error": error})
    return items

def build_summary_section(base_dir: Path, files: list[str]) -> str:
    """
    Build a compact summary section using summarize.py — one short block per file.
    Used as the initial <context> block instead of full file contents.
    """
    items = build_file_items(base_dir, files)
    return summarize.build_summary_markdown(items)

def build_static_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
    """Build the static (cacheable) portion of the context: files + screenshots."""
    parts = []
    if files:
        if summary_only:
            parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
        else:
            parts.append("## Files\n\n" + build_files_section(base_dir, files))
    if screenshots:
        parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
    return "\n\n---\n\n".join(parts)

def build_dynamic_markdown(history: list[str]) -> str:
    """Build the dynamic (changes every turn) portion: discussion history."""
    if history:
        return "## Discussion History\n\n" + build_discussion_section(history)
    return ""

def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
    parts = []
    # STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
    if files:
        if summary_only:
            parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
        else:
            parts.append("## Files\n\n" + build_files_section(base_dir, files))
    if screenshots:
        parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
    # DYNAMIC SUFFIX: History changes every turn, must go last
    if history:
        parts.append("## Discussion History\n\n" + build_discussion_section(history))
    return "\n\n---\n\n".join(parts)

def run(config: dict) -> tuple[str, Path, list]:
    namespace = config.get("project", {}).get("name")
    if not namespace:
        namespace = config.get("output", {}).get("namespace", "project")
    output_dir = Path(config["output"]["output_dir"])
    base_dir = Path(config["files"]["base_dir"])
    files = config["files"].get("paths", [])
    screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
    screenshots = config.get("screenshots", {}).get("paths", [])
    history = config.get("discussion", {}).get("history", [])

    output_dir.mkdir(parents=True, exist_ok=True)
    increment = find_next_increment(output_dir, namespace)
    output_file = output_dir / f"{namespace}_{increment:03d}.md"
    # Build static (files+screenshots) and dynamic (discussion) portions separately for better caching
    static_md = build_static_markdown(base_dir, files, screenshot_base_dir, screenshots, summary_only=False)
    dynamic_md = build_dynamic_markdown(history)
    # Write combined markdown to disk for archival
    markdown = f"{static_md}\n\n---\n\n{dynamic_md}" if static_md and dynamic_md else static_md or dynamic_md
    output_file.write_text(markdown, encoding="utf-8")
    file_items = build_file_items(base_dir, files)
    return static_md, dynamic_md, output_file, file_items

def main():
    with open("config.toml", "rb") as f:
        import tomllib
        config = tomllib.load(f)
    static_md, dynamic_md, output_file, _ = run(config)
    print(f"Written: {output_file}")

if __name__ == "__main__":
    main()