145 lines
5.5 KiB
Python
145 lines
5.5 KiB
Python
# aggregate.py
|
|
import tomllib
|
|
import re
|
|
import glob
|
|
from pathlib import Path, PureWindowsPath
|
|
|
|
def find_next_increment(output_dir: Path, namespace: str) -> int:
|
|
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
|
|
max_num = 0
|
|
for f in output_dir.iterdir():
|
|
if f.is_file():
|
|
match = pattern.match(f.name)
|
|
if match:
|
|
max_num = max(max_num, int(match.group(1)))
|
|
return max_num + 1
|
|
|
|
def is_absolute_with_drive(entry: str) -> bool:
|
|
try:
|
|
p = PureWindowsPath(entry)
|
|
return p.drive != ""
|
|
except Exception:
|
|
return False
|
|
|
|
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
|
has_drive = is_absolute_with_drive(entry)
|
|
is_wildcard = "*" in entry
|
|
if is_wildcard:
|
|
root = Path(entry) if has_drive else base_dir / entry
|
|
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
|
|
return sorted(matches)
|
|
else:
|
|
if has_drive:
|
|
return [Path(entry)]
|
|
return [(base_dir / entry).resolve()]
|
|
|
|
def build_discussion_section(history: list[str]) -> str:
|
|
sections = []
|
|
for i, paste in enumerate(history, start=1):
|
|
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_files_section(base_dir: Path, files: list[str]) -> str:
|
|
sections = []
|
|
for entry in files:
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
|
|
continue
|
|
for path in paths:
|
|
suffix = path.suffix.lstrip(".")
|
|
lang = suffix if suffix else "text"
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
except FileNotFoundError:
|
|
content = f"ERROR: file not found: {path}"
|
|
except Exception as e:
|
|
content = f"ERROR: {e}"
|
|
original = entry if "*" not in entry else str(path)
|
|
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
|
|
sections = []
|
|
for entry in screenshots:
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
|
|
continue
|
|
for path in paths:
|
|
original = entry if "*" not in entry else str(path)
|
|
if not path.exists():
|
|
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
|
|
continue
|
|
sections.append(f"### `{original}`\n\n})")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
|
|
def build_file_items(base_dir: Path, files: list[str]) -> list[dict]:
|
|
"""
|
|
Return a list of dicts describing each file, for use by ai_client when it
|
|
wants to upload individual files rather than inline everything as markdown.
|
|
|
|
Each dict has:
|
|
path : Path (resolved absolute path)
|
|
entry : str (original config entry string)
|
|
content : str (file text, or error string)
|
|
error : bool
|
|
"""
|
|
items = []
|
|
for entry in files:
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True})
|
|
continue
|
|
for path in paths:
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
error = False
|
|
except FileNotFoundError:
|
|
content = f"ERROR: file not found: {path}"
|
|
error = True
|
|
except Exception as e:
|
|
content = f"ERROR: {e}"
|
|
error = True
|
|
items.append({"path": path, "entry": entry, "content": content, "error": error})
|
|
return items
|
|
def build_markdown(base_dir: Path, files: list[str], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
|
|
parts = []
|
|
if history:
|
|
parts.append("## Discussion History\n\n" + build_discussion_section(history))
|
|
if files:
|
|
parts.append("## Files\n\n" + build_files_section(base_dir, files))
|
|
if screenshots:
|
|
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
def run(config: dict) -> tuple[str, Path]:
|
|
namespace = config["output"]["namespace"]
|
|
output_dir = Path(config["output"]["output_dir"])
|
|
base_dir = Path(config["files"]["base_dir"])
|
|
files = config["files"].get("paths", [])
|
|
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
|
|
screenshots = config.get("screenshots", {}).get("paths", [])
|
|
history = config.get("discussion", {}).get("history", [])
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
increment = find_next_increment(output_dir, namespace)
|
|
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
|
markdown = build_markdown(base_dir, files, screenshot_base_dir, screenshots, history)
|
|
output_file.write_text(markdown, encoding="utf-8")
|
|
file_items = build_file_items(base_dir, files)
|
|
return markdown, output_file, file_items
|
|
|
|
def main():
|
|
with open("config.toml", "rb") as f:
|
|
import tomllib
|
|
config = tomllib.load(f)
|
|
markdown, output_file = run(config)
|
|
print(f"Written: {output_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|