manual_slop/aggregate.py

# aggregate.py
import tomllib
import re
import glob
from pathlib import Path, PureWindowsPath
import summarize

def find_next_increment(output_dir: Path, namespace: str) -> int:
    pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
    max_num = 0
    for f in output_dir.iterdir():
        if f.is_file():
            match = pattern.match(f.name)
            if match:
                max_num = max(max_num, int(match.group(1)))
    return max_num + 1

def is_absolute_with_drive(entry: str) -> bool:
    try:
        p = PureWindowsPath(entry)
        return p.drive != ""
    except Exception:
        return False

def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
    has_drive = is_absolute_with_drive(entry)
    is_wildcard = "*" in entry
    if is_wildcard:
        root = Path(entry) if has_drive else base_dir / entry
        matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
        return sorted(matches)
    else:
        if has_drive:
            return [Path(entry)]
        return [(base_dir / entry).resolve()]

def build_discussion_section(history: list[str]) -> str:
    sections = []
    for i, paste in enumerate(history, start=1):
        sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
    return "\n\n---\n\n".join(sections)

def build_files_section(base_dir: Path, files: list[str]) -> str:
    sections = []
    for entry in files:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
            continue
        for path in paths:
            suffix = path.suffix.lstrip(".")
            lang = suffix if suffix else "text"
            try:
                content = path.read_text(encoding="utf-8")
            except FileNotFoundError:
                content = f"ERROR: file not found: {path}"
            except Exception as e:
                content = f"ERROR: {e}"
            original = entry if "*" not in entry else str(path)
            sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
    return "\n\n---\n\n".join(sections)

def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
    sections = []
    for entry in screenshots:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
            continue
        for path in paths:
            original = entry if "*" not in entry else str(path)
            if not path.exists():
                sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
                continue
            sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
    return "\n\n---\n\n".join(sections)


def build_file_items(base_dir: Path, files: list[str]) -> list[dict]:
    """
    Return a list of dicts describing each file, for use by ai_client when it
    wants to upload individual files rather than inline everything as markdown.

    Each dict has:
        path     : Path  (resolved absolute path)
        entry    : str   (original config entry string)
        content  : str   (file text, or error string)
        error    : bool
    """
    items = []
    for entry in files:
        paths = resolve_paths(base_dir, entry)
        if not paths:
            items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True})
            continue
        for path in paths:
            try:
                content = path.read_text(encoding="utf-8")
                error = False
            except FileNotFoundError:
                content = f"ERROR: file not found: {path}"
                error = True
            except Exception as e:
                content = f"ERROR: {e}"
                error = True
            items.append({"path": path, "entry": entry, "content": content, "error": error})
    return items

def build_summary_section(base_dir: Path, files: list[str]) -> str:
    """
    Build a compact summary section using summarize.py — one short block per file.
    Used as the initial <context> block instead of full file contents.
    """
    items = build_file_items(base_dir, files)
    return summarize.build_summary_markdown(items)

def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
    parts = []
    if history:
        parts.append("## Discussion History\n\n" + build_discussion_section(history))
    if files:
        if summary_only:
            parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
        else:
            parts.append("## Files\n\n" + build_files_section(base_dir, files))
    if screenshots:
        parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
    return "\n\n---\n\n".join(parts)

def run(config: dict) -> tuple[str, Path]:
    namespace = config.get("project", {}).get("name")
    if not namespace:
        namespace = config.get("output", {}).get("namespace", "project")
    output_dir = Path(config["output"]["output_dir"])
    base_dir = Path(config["files"]["base_dir"])
    files = config["files"].get("paths", [])
    screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
    screenshots = config.get("screenshots", {}).get("paths", [])
    history = config.get("discussion", {}).get("history", [])

    output_dir.mkdir(parents=True, exist_ok=True)
    increment = find_next_increment(output_dir, namespace)
    output_file = output_dir / f"{namespace}_{increment:03d}.md"
    markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history,
                              summary_only=True)
    output_file.write_text(markdown, encoding="utf-8")
    file_items = build_file_items(base_dir, files)
    return markdown, output_file, file_items

def main():
    with open("config.toml", "rb") as f:
        import tomllib
        config = tomllib.load(f)
    markdown, output_file, _ = run(config)
    print(f"Written: {output_file}")

if __name__ == "__main__":
    main()