78 lines
2.5 KiB
Python
78 lines
2.5 KiB
Python
# aggregate.py
|
|
import tomllib
|
|
import re
|
|
import glob
|
|
from pathlib import Path, PureWindowsPath
|
|
|
|
def find_next_increment(output_dir: Path, namespace: str) -> int:
|
|
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
|
|
max_num = 0
|
|
for f in output_dir.iterdir():
|
|
if f.is_file():
|
|
match = pattern.match(f.name)
|
|
if match:
|
|
max_num = max(max_num, int(match.group(1)))
|
|
return max_num + 1
|
|
|
|
def is_absolute_with_drive(entry: str) -> bool:
|
|
try:
|
|
p = PureWindowsPath(entry)
|
|
return p.drive != ""
|
|
except Exception:
|
|
return False
|
|
|
|
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
|
|
has_drive = is_absolute_with_drive(entry)
|
|
is_wildcard = "*" in entry
|
|
|
|
if is_wildcard:
|
|
root = Path(entry) if has_drive else base_dir / entry
|
|
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
|
|
return sorted(matches)
|
|
else:
|
|
if has_drive:
|
|
return [Path(entry)]
|
|
return [(base_dir / entry).resolve()]
|
|
|
|
def build_markdown(base_dir: Path, files: list[str]) -> str:
|
|
sections = []
|
|
for entry in files:
|
|
paths = resolve_paths(base_dir, entry)
|
|
if not paths:
|
|
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
|
|
continue
|
|
for path in paths:
|
|
suffix = path.suffix.lstrip(".")
|
|
lang = suffix if suffix else "text"
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
except FileNotFoundError:
|
|
content = f"ERROR: file not found: {path}"
|
|
except Exception as e:
|
|
content = f"ERROR: {e}"
|
|
original = entry if not ("*" in entry) else str(path)
|
|
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
|
|
return "\n\n---\n\n".join(sections)
|
|
|
|
def main():
|
|
config_path = Path("config.toml")
|
|
with config_path.open("rb") as f:
|
|
config = tomllib.load(f)
|
|
|
|
namespace = config["output"]["namespace"]
|
|
output_dir = Path(config["output"]["output_dir"])
|
|
base_dir = Path(config["files"]["base_dir"])
|
|
files = config["files"].get("paths", [])
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
increment = find_next_increment(output_dir, namespace)
|
|
output_file = output_dir / f"{namespace}_{increment:03d}.md"
|
|
|
|
markdown = build_markdown(base_dir, files)
|
|
output_file.write_text(markdown, encoding="utf-8")
|
|
print(f"Written: {output_file}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|