some organization pass, still need to review a bunch

2026-06-06 00:21:36 -04:00
parent f8b0a1243d
commit 053f5d867a
18 changed files with 658 additions and 706 deletions
@@ -1,23 +1,12 @@
 # summarize.py
 """
-Note(Gemini):
-Local heuristic summariser. Doesn't use any AI or network. 
-Uses Python's AST to reliably pull out classes, methods, and functions. 
-Regex is used for TOML and Markdown. 
-
-The rationale here is simple: giving the AI the *structure* of a codebase is 90% 
-as good as giving it the full source, but costs 1% of the tokens. 
-If it needs the full source of a file after reading the summary, it can just call read_file.
-"""
-# summarize.py
-"""
 Local symbolic summariser — no AI calls, no network.

 For each file, extracts structural information:
-  .py   : imports, classes (with methods), top-level functions, global constants
-  .toml : top-level table keys + array lengths
-  .md   : headings (h1-h3)
-  other : line count + first 8 lines as preview
+ .py   : imports, classes (with methods), top-level functions, global constants
+ .toml : top-level table keys + array lengths
+ .md   : headings (h1-h3)
+ other : line count + first 8 lines as preview

 Returns a compact markdown string per file, suitable for use as a low-token
 context block that replaces full file contents in the initial <context> send.
@@ -28,6 +17,8 @@ import re
 from pathlib import Path
 from typing  import Callable, Any

+from src import ai_client
+
 from src.summary_cache import SummaryCache, get_file_hash


@@ -37,9 +28,9 @@ _summary_cache = SummaryCache()
 # ------------------------------------------------------------------ per-type extractors

 def _summarise_python(path: Path, content: str) -> str:
- lines = content.splitlines()
+ lines      = content.splitlines()
 line_count = len(lines)
- parts = [f"**Python** — {line_count} lines"]
+ parts      = [f"**Python** — {line_count} lines"]
 try:
  tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
 except SyntaxError as e:
@@ -73,31 +64,28 @@ def _summarise_python(path: Path, content: str) -> str:
    n.name for n in ast.iter_child_nodes(node)
    if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
   ]
-   if methods:
-    parts.append(f"class {node.name}: {', '.join(methods)}")
-   else:
-    parts.append(f"class {node.name}")
+   if methods: parts.append(f"class {node.name}: {', '.join(methods)}")
+   else:       parts.append(f"class {node.name}")
 top_fns = [
  node.name for node in ast.iter_child_nodes(tree)
  if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
 ]
- if top_fns:
-  parts.append(f"functions: {', '.join(top_fns)}")
+ if top_fns: parts.append(f"functions: {', '.join(top_fns)}")
 return "\n".join(parts)

 def _summarise_toml(path: Path, content: str) -> str:
- lines = content.splitlines()
+ lines      = content.splitlines()
 line_count = len(lines)
- parts = [f"**TOML** — {line_count} lines"]
- table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
- tables = []
+ parts      = [f"**TOML** — {line_count} lines"]
+ table_pat  = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
+ tables     = []
 for line in lines:
  m = table_pat.match(line)
  if m:
   tables.append(m.group(1).strip())
 if tables:
  parts.append(f"tables: {', '.join(tables)}")
- kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
+ kv_pat   = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
 in_table = False
 top_keys = []
 for line in lines:
@@ -113,15 +101,15 @@ def _summarise_toml(path: Path, content: str) -> str:
 return "\n".join(parts)

 def _summarise_markdown(path: Path, content: str) -> str:
- lines = content.splitlines()
+ lines      = content.splitlines()
 line_count = len(lines)
- parts = [f"**Markdown** — {line_count} lines"]
- headings = []
+ parts      = [f"**Markdown** — {line_count} lines"]
+ headings   = []
 for line in lines:
  m = re.match(r"^(#{1,3})\s+(.+)", line)
  if m:
-   level = len(m.group(1))
-   text = m.group(2).strip()
+   level  = len(m.group(1))
+   text   = m.group(2).strip()
   indent = "  " * (level - 1)
   headings.append(f"{indent}{text}")
 if headings:
@@ -129,10 +117,10 @@ def _summarise_markdown(path: Path, content: str) -> str:
 return "\n".join(parts)

 def _summarise_generic(path: Path, content: str) -> str:
- lines = content.splitlines()
+ lines      = content.splitlines()
 line_count = len(lines)
- suffix = path.suffix.lstrip(".").upper() or "TEXT"
- parts = [f"**{suffix}** — {line_count} lines"]
+ suffix     = path.suffix.lstrip(".").upper() or "TEXT"
+ parts      = [f"**{suffix}** — {line_count} lines"]
 
 # Heuristic for C-style languages
 important_lines = []
@@ -168,24 +156,20 @@ _SUMMARISERS: dict[str, Callable[[Path, str], str]] = {

 def summarise_file(path: Path, content: str) -> str:
 """
- 
-  
-   Return a compact markdown summary string for a single file.
-   `content` is the already-read file text (or an error string).
-  [C: tests/test_subagent_summarization.py:test_summarise_file_integration]
+ Return a compact markdown summary string for a single file.
+ `content` is the already-read file text (or an error string).
+ [C: tests/test_subagent_summarization.py:test_summarise_file_integration]
 """
 content_hash = get_file_hash(content)
- cached = _summary_cache.get_summary(str(path), content_hash)
- if cached:
-  return cached
+ cached       = _summary_cache.get_summary(str(path), content_hash)
+ if cached: return cached
 suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
- fn = _SUMMARISERS.get(suffix, _summarise_generic)
+ fn     = _SUMMARISERS.get(suffix, _summarise_generic)
 try:
  heuristic_outline = fn(path, content)
  # Smart AI Summarization
  is_code = suffix in [".py", ".ps1", ".js", ".ts", ".cpp", ".c", ".h", ".cs", ".go", ".rs", ".lua"]
  try:
-   from src import ai_client
   smart_summary = ai_client.run_subagent_summarization(
    file_path=str(path),
    content=content[:10000],
@@ -205,31 +189,31 @@ def summarise_file(path: Path, content: str) -> str:

 def summarise_items(file_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
 """
-  Given a list of file_item dicts (as returned by aggregate.build_file_items),
-  return a parallel list of dicts with an added `summary` key.
+ Given a list of file_item dicts (as returned by aggregate.build_file_items),
+ return a parallel list of dicts with an added `summary` key.
 """
 result = []
 for item in file_items:
-  path = item.get("path")
+  path    = item.get("path")
  content = item.get("content", "")
-  error = item.get("error", False)
+  error   = item.get("error", False)
  if error or path is None:
   summary = "_Error reading file_"
  else:
-   p = Path(path) if not isinstance(path, Path) else path
+   p       = Path(path) if not isinstance(path, Path) else path
   summary = summarise_file(p, content)
  result.append({**item, "summary": summary})
 return result

 def build_summary_markdown(file_items: list[dict[str, Any]]) -> str:
 """
-  Build a compact markdown string of file summaries, suitable for the
-  initial <context> block instead of full file contents.
+ Build a compact markdown string of file summaries, suitable for the
+ initial <context> block instead of full file contents.
 """
 summarised = summarise_items(file_items)
- parts = []
+ parts      = []
 for item in summarised:
-  path = item.get("path") or item.get("entry", "unknown")
+  path    = item.get("path") or item.get("entry", "unknown")
  summary = item.get("summary", "")
  parts.append(f"### `{path}`\n\n{summary}")
- return "\n\n---\n\n".join(parts)
+ return "\n\n---\n\n".join(parts)