# summarize.py """ Local symbolic summariser — no AI calls, no network. For each file, extracts structural information: .py : imports, classes (with methods), top-level functions, global constants .toml : top-level table keys + array lengths .md : headings (h1-h3) other : line count + first 8 lines as preview Returns a compact markdown string per file, suitable for use as a low-token context block that replaces full file contents in the initial send. """ import ast import re from pathlib import Path from typing import Callable, Any from src import ai_client from src.summary_cache import SummaryCache, get_file_hash _summary_cache = SummaryCache() # ------------------------------------------------------------------ per-type extractors def _summarise_python(path: Path, content: str) -> str: lines = content.splitlines() line_count = len(lines) parts = [f"**Python** — {line_count} lines"] try: tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path)) except SyntaxError as e: parts.append(f"_Parse error: {e}_") return "\n".join(parts) imports = [] for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: imports.append(alias.name.split(".")[0]) elif isinstance(node, ast.ImportFrom): if node.module: imports.append(node.module.split(".")[0]) if imports: unique_imports = sorted(set(imports)) parts.append(f"imports: {', '.join(unique_imports)}") constants = [] for node in ast.iter_child_nodes(tree): if isinstance(node, ast.Assign): for t in node.targets: if isinstance(t, ast.Name) and t.id.isupper(): constants.append(t.id) elif isinstance(node, (ast.AnnAssign,)): if isinstance(node.target, ast.Name) and node.target.id.isupper(): constants.append(node.target.id) if constants: parts.append(f"constants: {', '.join(constants)}") for node in ast.iter_child_nodes(tree): if isinstance(node, ast.ClassDef): methods = [ n.name for n in ast.iter_child_nodes(node) if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef)) ] if methods: parts.append(f"class {node.name}: {', '.join(methods)}") else: parts.append(f"class {node.name}") top_fns = [ node.name for node in ast.iter_child_nodes(tree) if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) ] if top_fns: parts.append(f"functions: {', '.join(top_fns)}") return "\n".join(parts) def _summarise_toml(path: Path, content: str) -> str: lines = content.splitlines() line_count = len(lines) parts = [f"**TOML** — {line_count} lines"] table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}") tables = [] for line in lines: m = table_pat.match(line) if m: tables.append(m.group(1).strip()) if tables: parts.append(f"tables: {', '.join(tables)}") kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=") in_table = False top_keys = [] for line in lines: if table_pat.match(line): in_table = True continue if not in_table: m = kv_pat.match(line) if m: top_keys.append(m.group(1)) if top_keys: parts.append(f"top-level keys: {', '.join(top_keys)}") return "\n".join(parts) def _summarise_markdown(path: Path, content: str) -> str: lines = content.splitlines() line_count = len(lines) parts = [f"**Markdown** — {line_count} lines"] headings = [] for line in lines: m = re.match(r"^(#{1,3})\s+(.+)", line) if m: level = len(m.group(1)) text = m.group(2).strip() indent = " " * (level - 1) headings.append(f"{indent}{text}") if headings: parts.append("headings:\n" + "\n".join(f" {h}" for h in headings)) return "\n".join(parts) def _summarise_generic(path: Path, content: str) -> str: lines = content.splitlines() line_count = len(lines) suffix = path.suffix.lstrip(".").upper() or "TEXT" parts = [f"**{suffix}** — {line_count} lines"] # Heuristic for C-style languages important_lines = [] for line in lines[:200]: trimmed = line.strip() if not trimmed or trimmed.startswith("//") or trimmed.startswith("/*") or trimmed.startswith("*"): continue if re.match(r'^\s*(class|struct|namespace|enum|template|void|int|float|double|char|bool|virtual|static|inline|extern|#define|#include)\b', line): important_lines.append(trimmed) if len(important_lines) >= 15: break if important_lines: parts.append("Key elements / Outline:\n- " + "\n- ".join(important_lines)) else: preview = [l for l in lines[:10] if l.strip()] if preview: parts.append("preview:\n```\n" + "\n".join(preview) + "\n```") return "\n".join(parts) _SUMMARISERS: dict[str, Callable[[Path, str], str]] = { ".py": _summarise_python, ".toml": _summarise_toml, ".md": _summarise_markdown, ".ini": _summarise_generic, ".txt": _summarise_generic, ".c": _summarise_generic, ".h": _summarise_generic, ".cpp": _summarise_generic, ".hpp": _summarise_generic, ".ps1": _summarise_generic, } def summarise_file(path: Path, content: str) -> str: """ Return a compact markdown summary string for a single file. `content` is the already-read file text (or an error string). [C: tests/test_subagent_summarization.py:test_summarise_file_integration] """ content_hash = get_file_hash(content) cached = _summary_cache.get_summary(str(path), content_hash) if cached: return cached suffix = path.suffix.lower() if hasattr(path, "suffix") else "" fn = _SUMMARISERS.get(suffix, _summarise_generic) try: heuristic_outline = fn(path, content) # Smart AI Summarization is_code = suffix in [".py", ".ps1", ".js", ".ts", ".cpp", ".c", ".h", ".cs", ".go", ".rs", ".lua"] try: smart_summary = ai_client.run_subagent_summarization( file_path=str(path), content=content[:10000], is_code=is_code, outline=heuristic_outline ) if smart_summary and not smart_summary.startswith("ERROR:"): summary = f"{smart_summary}\n\n**Outline:**\n{heuristic_outline}" else: summary = heuristic_outline except Exception: summary = heuristic_outline _summary_cache.set_summary(str(path), content_hash, summary) return summary except Exception as e: return f"_Summariser error: {e}_" def summarise_items(file_items: list[dict[str, Any]]) -> list[dict[str, Any]]: """ Given a list of file_item dicts (as returned by aggregate.build_file_items), return a parallel list of dicts with an added `summary` key. """ result = [] for item in file_items: path = item.get("path") content = item.get("content", "") error = item.get("error", False) if error or path is None: summary = "_Error reading file_" else: p = Path(path) if not isinstance(path, Path) else path summary = summarise_file(p, content) result.append({**item, "summary": summary}) return result def build_summary_markdown(file_items: list[dict[str, Any]]) -> str: """ Build a compact markdown string of file summaries, suitable for the initial block instead of full file contents. """ summarised = summarise_items(file_items) parts = [] for item in summarised: path = item.get("path") or item.get("entry", "unknown") summary = item.get("summary", "") parts.append(f"### `{path}`\n\n{summary}") return "\n\n---\n\n".join(parts)