checkpoint: massive refactor

This commit is contained in:
2026-02-28 09:06:45 -05:00
parent f2512c30e9
commit d36632c21a
149 changed files with 16255 additions and 17722 deletions

View File

@@ -15,98 +15,94 @@ import tomllib
import re
import glob
from pathlib import Path, PureWindowsPath
from typing import Any
import summarize
import project_manager
from file_cache import ASTParser
def find_next_increment(output_dir: Path, namespace: str) -> int:
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
max_num = 0
for f in output_dir.iterdir():
if f.is_file():
match = pattern.match(f.name)
if match:
max_num = max(max_num, int(match.group(1)))
return max_num + 1
pattern = re.compile(rf"^{re.escape(namespace)}_(\d+)\.md$")
max_num = 0
for f in output_dir.iterdir():
if f.is_file():
match = pattern.match(f.name)
if match:
max_num = max(max_num, int(match.group(1)))
return max_num + 1
def is_absolute_with_drive(entry: str) -> bool:
try:
p = PureWindowsPath(entry)
return p.drive != ""
except Exception:
return False
try:
p = PureWindowsPath(entry)
return p.drive != ""
except Exception:
return False
def resolve_paths(base_dir: Path, entry: str) -> list[Path]:
has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry
matches = []
if is_wildcard:
root = Path(entry) if has_drive else base_dir / entry
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
else:
p = Path(entry) if has_drive else (base_dir / entry).resolve()
matches = [p]
# Blacklist filter
filtered = []
for p in matches:
name = p.name.lower()
if name == "history.toml" or name.endswith("_history.toml"):
continue
filtered.append(p)
return sorted(filtered)
has_drive = is_absolute_with_drive(entry)
is_wildcard = "*" in entry
matches = []
if is_wildcard:
root = Path(entry) if has_drive else base_dir / entry
matches = [Path(p) for p in glob.glob(str(root), recursive=True) if Path(p).is_file()]
else:
p = Path(entry) if has_drive else (base_dir / entry).resolve()
matches = [p]
# Blacklist filter
filtered = []
for p in matches:
name = p.name.lower()
if name == "history.toml" or name.endswith("_history.toml"):
continue
filtered.append(p)
return sorted(filtered)
def build_discussion_section(history: list[str]) -> str:
sections = []
for i, paste in enumerate(history, start=1):
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections)
sections = []
for i, paste in enumerate(history, start=1):
sections.append(f"### Discussion Excerpt {i}\n\n{paste.strip()}")
return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict]) -> str:
sections = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
else:
entry = entry_raw
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
continue
for path in paths:
suffix = path.suffix.lstrip(".")
lang = suffix if suffix else "text"
try:
content = path.read_text(encoding="utf-8")
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
except Exception as e:
content = f"ERROR: {e}"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_files_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
sections = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
else:
entry = entry_raw
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n```text\nERROR: no files matched: {entry}\n```")
continue
for path in paths:
suffix = path.suffix.lstrip(".")
lang = suffix if suffix else "text"
try:
content = path.read_text(encoding="utf-8")
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
except Exception as e:
content = f"ERROR: {e}"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_screenshots_section(base_dir: Path, screenshots: list[str]) -> str:
sections = []
for entry in screenshots:
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
continue
for path in paths:
original = entry if "*" not in entry else str(path)
if not path.exists():
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
continue
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections)
sections = []
for entry in screenshots:
paths = resolve_paths(base_dir, entry)
if not paths:
sections.append(f"### `{entry}`\n\n_ERROR: no files matched: {entry}_")
continue
for path in paths:
original = entry if "*" not in entry else str(path)
if not path.exists():
sections.append(f"### `{original}`\n\n_ERROR: file not found: {path}_")
continue
sections.append(f"### `{original}`\n\n![{path.name}]({path.as_posix()})")
return "\n\n---\n\n".join(sections)
def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
"""
def build_file_items(base_dir: Path, files: list[str | dict[str, Any]]) -> list[dict[str, Any]]:
"""
Return a list of dicts describing each file, for use by ai_client when it
wants to upload individual files rather than inline everything as markdown.
@@ -118,240 +114,213 @@ def build_file_items(base_dir: Path, files: list[str | dict]) -> list[dict]:
mtime : float (last modification time, for skip-if-unchanged optimization)
tier : int | None (optional tier for context management)
"""
items = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
tier = entry_raw.get("tier")
else:
entry = entry_raw
tier = None
items = []
for entry_raw in files:
if isinstance(entry_raw, dict):
entry = entry_raw.get("path")
tier = entry_raw.get("tier")
else:
entry = entry_raw
tier = None
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
paths = resolve_paths(base_dir, entry)
if not paths:
items.append({"path": None, "entry": entry, "content": f"ERROR: no files matched: {entry}", "error": True, "mtime": 0.0, "tier": tier})
continue
for path in paths:
try:
content = path.read_text(encoding="utf-8")
mtime = path.stat().st_mtime
error = False
except FileNotFoundError:
content = f"ERROR: file not found: {path}"
mtime = 0.0
error = True
except Exception as e:
content = f"ERROR: {e}"
mtime = 0.0
error = True
items.append({"path": path, "entry": entry, "content": content, "error": error, "mtime": mtime, "tier": tier})
return items
def build_summary_section(base_dir: Path, files: list[str | dict]) -> str:
"""
def build_summary_section(base_dir: Path, files: list[str | dict[str, Any]]) -> str:
"""
Build a compact summary section using summarize.py — one short block per file.
Used as the initial <context> block instead of full file contents.
"""
items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items)
items = build_file_items(base_dir, files)
return summarize.build_summary_markdown(items)
def _build_files_section_from_items(file_items: list[dict]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "unknown")
content = item.get("content", "")
if path is None:
sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
continue
suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text"
lang = suffix if suffix else "text"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
"""Build the files markdown section from pre-read file items (avoids double I/O)."""
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "unknown")
content = item.get("content", "")
if path is None:
sections.append(f"### `{entry}`\n\n```text\n{content}\n```")
continue
suffix = path.suffix.lstrip(".") if hasattr(path, "suffix") else "text"
lang = suffix if suffix else "text"
original = entry if "*" not in entry else str(path)
sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
return "\n\n---\n\n".join(sections)
def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_from_items(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
"""Build markdown from pre-read file items instead of re-reading from disk."""
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if file_items:
if summary_only:
parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
else:
parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown_no_history(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
"""Build markdown with only files + screenshots (no history). Used for stable caching."""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
def build_discussion_text(history: list[str]) -> str:
"""Build just the discussion history section text. Returns empty string if no history."""
if not history:
return ""
return "## Discussion History\n\n" + build_discussion_section(history)
"""Build just the discussion history section text. Returns empty string if no history."""
if not history:
return ""
return "## Discussion History\n\n" + build_discussion_section(history)
def build_tier1_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
def build_tier1_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 1 Context: Strategic/Orchestration.
Full content for core conductor files and files with tier=1, summaries for others.
"""
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
# Files section
if file_items:
sections = []
for item in file_items:
path = item.get("path")
name = path.name if path else ""
if name in core_files or item.get("tier") == 1:
# Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
# Summarize
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
summarize.summarise_file(path, item.get("content", "")))
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
core_files = {"product.md", "tech-stack.md", "workflow.md", "tracks.md"}
parts = []
# Files section
if file_items:
sections = []
for item in file_items:
path = item.get("path")
name = path.name if path else ""
if name in core_files or item.get("tier") == 1:
# Include in full
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
# Summarize
sections.append("### `" + (item.get("entry") or str(path)) + "`\n\n" +
summarize.summarise_file(path, item.get("content", "")))
parts.append("## Files (Tier 1 - Mixed)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_tier2_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
def build_tier2_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str]) -> str:
"""
Tier 2 Context: Architectural/Tech Lead.
Full content for all files (standard behavior).
"""
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, summary_only=False)
def build_tier3_context(file_items: list[dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
"""
def build_tier3_context(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], focus_files: list[str]) -> str:
"""
Tier 3 Context: Execution/Worker.
Full content for focus_files and files with tier=3, summaries/skeletons for others.
"""
parts = []
if file_items:
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "")
path_str = str(path) if path else ""
# Check if this file is in focus_files (by name or path)
is_focus = False
for focus in focus_files:
if focus == entry or (path and focus == path.name) or focus in path_str:
is_focus = True
break
if is_focus or item.get("tier") == 3:
sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
content = item.get("content", "")
if path and path.suffix == ".py" and not item.get("error"):
try:
parser = ASTParser("python")
skeleton = parser.get_skeleton(content)
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
except Exception as e:
# Fallback to summary if AST parsing fails
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
else:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
parts = []
if file_items:
sections = []
for item in file_items:
path = item.get("path")
entry = item.get("entry", "")
path_str = str(path) if path else ""
# Check if this file is in focus_files (by name or path)
is_focus = False
for focus in focus_files:
if focus == entry or (path and focus == path.name) or focus in path_str:
is_focus = True
break
if is_focus or item.get("tier") == 3:
sections.append("### `" + (entry or path_str) + "`\n\n" +
f"```{path.suffix.lstrip('.') if path and path.suffix else 'text'}\n{item.get('content', '')}\n```")
else:
content = item.get("content", "")
if path and path.suffix == ".py" and not item.get("error"):
try:
parser = ASTParser("python")
skeleton = parser.get_skeleton(content)
sections.append(f"### `{entry or path_str}` (AST Skeleton)\n\n```python\n{skeleton}\n```")
except Exception as e:
# Fallback to summary if AST parsing fails
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
else:
sections.append(f"### `{entry or path_str}`\n\n" + summarize.summarise_file(path, content))
parts.append("## Files (Tier 3 - Focused)\n\n" + "\n\n---\n\n".join(sections))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if files:
if summary_only:
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
else:
parts.append("## Files\n\n" + build_files_section(base_dir, files))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def build_markdown(base_dir: Path, files: list[str | dict], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
parts = []
# STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
if files:
if summary_only:
parts.append("## Files (Summary)\n\n" + build_summary_section(base_dir, files))
else:
parts.append("## Files\n\n" + build_files_section(base_dir, files))
if screenshots:
parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
# DYNAMIC SUFFIX: History changes every turn, must go last
if history:
parts.append("## Discussion History\n\n" + build_discussion_section(history))
return "\n\n---\n\n".join(parts)
def run(config: dict) -> tuple[str, Path, list[dict]]:
namespace = config.get("project", {}).get("name")
if not namespace:
namespace = config.get("output", {}).get("namespace", "project")
output_dir = Path(config["output"]["output_dir"])
base_dir = Path(config["files"]["base_dir"])
files = config["files"].get("paths", [])
screenshot_base_dir = Path(config.get("screenshots", {}).get("base_dir", "."))
screenshots = config.get("screenshots", {}).get("paths", [])
history = config.get("discussion", {}).get("history", [])
output_dir.mkdir(parents=True, exist_ok=True)
increment = find_next_increment(output_dir, namespace)
output_file = output_dir / f"{namespace}_{increment:03d}.md"
# Build file items once, then construct markdown from them (avoids double I/O)
file_items = build_file_items(base_dir, files)
summary_only = config.get("project", {}).get("summary_only", False)
markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
summary_only=summary_only)
output_file.write_text(markdown, encoding="utf-8")
return markdown, output_file, file_items
def main():
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
def main() -> None:
# Load global config to find active project
config_path = Path("config.toml")
if not config_path.exists():
print("config.toml not found.")
return
with open(config_path, "rb") as f:
global_cfg = tomllib.load(f)
active_path = global_cfg.get("projects", {}).get("active")
if not active_path:
print("No active project found in config.toml.")
return
# Use project_manager to load project (handles history segregation)
proj = project_manager.load_project(active_path)
# Use flat_config to make it compatible with aggregate.run()
config = project_manager.flat_config(proj)
markdown, output_file, _ = run(config)
print(f"Written: {output_file}")
if __name__ == "__main__":
main()
main()