wip

2026-02-21 21:29:45 -05:00
parent 0a591d58eb
commit 59208b27ec
4 changed files with 221 additions and 17 deletions
--- a/summarize.py
+++ b/summarize.py
@@ -0,0 +1,200 @@
+# summarize.py
+"""
+Local symbolic summariser — no AI calls, no network.
+
+For each file, extracts structural information:
+  .py   : imports, classes (with methods), top-level functions, global constants
+  .toml : top-level table keys + array lengths
+  .md   : headings (h1-h3)
+  other : line count + first 8 lines as preview
+
+Returns a compact markdown string per file, suitable for use as a low-token
+context block that replaces full file contents in the initial <context> send.
+"""
+
+import ast
+import re
+from pathlib import Path
+
+
+# ------------------------------------------------------------------ per-type extractors
+
+def _summarise_python(path: Path, content: str) -> str:
+    lines = content.splitlines()
+    line_count = len(lines)
+    parts = [f"**Python** — {line_count} lines"]
+
+    try:
+        tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
+    except SyntaxError as e:
+        parts.append(f"_Parse error: {e}_")
+        return "\n".join(parts)
+
+    # Imports
+    imports = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                imports.append(alias.name.split(".")[0])
+        elif isinstance(node, ast.ImportFrom):
+            if node.module:
+                imports.append(node.module.split(".")[0])
+    if imports:
+        unique_imports = sorted(set(imports))
+        parts.append(f"imports: {', '.join(unique_imports)}")
+
+    # Top-level constants (ALL_CAPS assignments)
+    constants = []
+    for node in ast.iter_child_nodes(tree):
+        if isinstance(node, ast.Assign):
+            for t in node.targets:
+                if isinstance(t, ast.Name) and t.id.isupper():
+                    constants.append(t.id)
+        elif isinstance(node, (ast.AnnAssign,)):
+            if isinstance(node.target, ast.Name) and node.target.id.isupper():
+                constants.append(node.target.id)
+    if constants:
+        parts.append(f"constants: {', '.join(constants)}")
+
+    # Classes + their methods
+    for node in ast.iter_child_nodes(tree):
+        if isinstance(node, ast.ClassDef):
+            methods = [
+                n.name for n in ast.iter_child_nodes(node)
+                if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
+            ]
+            if methods:
+                parts.append(f"class {node.name}: {', '.join(methods)}")
+            else:
+                parts.append(f"class {node.name}")
+
+    # Top-level functions
+    top_fns = [
+        node.name for node in ast.iter_child_nodes(tree)
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
+    ]
+    if top_fns:
+        parts.append(f"functions: {', '.join(top_fns)}")
+
+    return "\n".join(parts)
+
+
+def _summarise_toml(path: Path, content: str) -> str:
+    lines = content.splitlines()
+    line_count = len(lines)
+    parts = [f"**TOML** — {line_count} lines"]
+
+    # Extract top-level table headers [key] and [[key]]
+    table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
+    tables = []
+    for line in lines:
+        m = table_pat.match(line)
+        if m:
+            tables.append(m.group(1).strip())
+    if tables:
+        parts.append(f"tables: {', '.join(tables)}")
+
+    # Top-level key = value (not inside a [table])
+    kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
+    in_table = False
+    top_keys = []
+    for line in lines:
+        if table_pat.match(line):
+            in_table = True
+            continue
+        if not in_table:
+            m = kv_pat.match(line)
+            if m:
+                top_keys.append(m.group(1))
+    if top_keys:
+        parts.append(f"top-level keys: {', '.join(top_keys)}")
+
+    return "\n".join(parts)
+
+
+def _summarise_markdown(path: Path, content: str) -> str:
+    lines = content.splitlines()
+    line_count = len(lines)
+    parts = [f"**Markdown** — {line_count} lines"]
+
+    headings = []
+    for line in lines:
+        m = re.match(r"^(#{1,3})\s+(.+)", line)
+        if m:
+            level = len(m.group(1))
+            text = m.group(2).strip()
+            indent = "  " * (level - 1)
+            headings.append(f"{indent}{text}")
+    if headings:
+        parts.append("headings:\n" + "\n".join(f"  {h}" for h in headings))
+
+    return "\n".join(parts)
+
+
+def _summarise_generic(path: Path, content: str) -> str:
+    lines = content.splitlines()
+    line_count = len(lines)
+    suffix = path.suffix.lstrip(".").upper() or "TEXT"
+    parts = [f"**{suffix}** — {line_count} lines"]
+    preview = lines[:8]
+    if preview:
+        parts.append("preview:\n```\n" + "\n".join(preview) + "\n```")
+    return "\n".join(parts)
+
+
+# ------------------------------------------------------------------ dispatch
+
+_SUMMARISERS = {
+    ".py":   _summarise_python,
+    ".toml": _summarise_toml,
+    ".md":   _summarise_markdown,
+    ".ini":  _summarise_generic,
+    ".txt":  _summarise_generic,
+    ".ps1":  _summarise_generic,
+}
+
+
+def summarise_file(path: Path, content: str) -> str:
+    """
+    Return a compact markdown summary string for a single file.
+    `content` is the already-read file text (or an error string).
+    """
+    suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
+    fn = _SUMMARISERS.get(suffix, _summarise_generic)
+    try:
+        return fn(path, content)
+    except Exception as e:
+        return f"_Summariser error: {e}_"
+
+
+def summarise_items(file_items: list[dict]) -> list[dict]:
+    """
+    Given a list of file_item dicts (as returned by aggregate.build_file_items),
+    return a parallel list of dicts with an added `summary` key.
+    """
+    result = []
+    for item in file_items:
+        path = item.get("path")
+        content = item.get("content", "")
+        error = item.get("error", False)
+        if error or path is None:
+            summary = f"_Error reading file_"
+        else:
+            p = Path(path) if not isinstance(path, Path) else path
+            summary = summarise_file(p, content)
+        result.append({**item, "summary": summary})
+    return result
+
+
+def build_summary_markdown(file_items: list[dict]) -> str:
+    """
+    Build a compact markdown string of file summaries, suitable for the
+    initial <context> block instead of full file contents.
+    """
+    summarised = summarise_items(file_items)
+    parts = []
+    for item in summarised:
+        path = item.get("path") or item.get("entry", "unknown")
+        summary = item.get("summary", "")
+        parts.append(f"### `{path}`\n\n{summary}")
+    return "\n\n---\n\n".join(parts)