Private
Public Access
0
0

some organization pass, still need to review a bunch

This commit is contained in:
2026-06-06 00:21:36 -04:00
parent f8b0a1243d
commit 053f5d867a
18 changed files with 658 additions and 706 deletions
+40 -56
View File
@@ -1,23 +1,12 @@
# summarize.py
"""
Note(Gemini):
Local heuristic summariser. Doesn't use any AI or network.
Uses Python's AST to reliably pull out classes, methods, and functions.
Regex is used for TOML and Markdown.
The rationale here is simple: giving the AI the *structure* of a codebase is 90%
as good as giving it the full source, but costs 1% of the tokens.
If it needs the full source of a file after reading the summary, it can just call read_file.
"""
# summarize.py
"""
Local symbolic summariser — no AI calls, no network.
For each file, extracts structural information:
.py : imports, classes (with methods), top-level functions, global constants
.toml : top-level table keys + array lengths
.md : headings (h1-h3)
other : line count + first 8 lines as preview
.py : imports, classes (with methods), top-level functions, global constants
.toml : top-level table keys + array lengths
.md : headings (h1-h3)
other : line count + first 8 lines as preview
Returns a compact markdown string per file, suitable for use as a low-token
context block that replaces full file contents in the initial <context> send.
@@ -28,6 +17,8 @@ import re
from pathlib import Path
from typing import Callable, Any
from src import ai_client
from src.summary_cache import SummaryCache, get_file_hash
@@ -37,9 +28,9 @@ _summary_cache = SummaryCache()
# ------------------------------------------------------------------ per-type extractors
def _summarise_python(path: Path, content: str) -> str:
lines = content.splitlines()
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Python** — {line_count} lines"]
parts = [f"**Python** — {line_count} lines"]
try:
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
except SyntaxError as e:
@@ -73,31 +64,28 @@ def _summarise_python(path: Path, content: str) -> str:
n.name for n in ast.iter_child_nodes(node)
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if methods:
parts.append(f"class {node.name}: {', '.join(methods)}")
else:
parts.append(f"class {node.name}")
if methods: parts.append(f"class {node.name}: {', '.join(methods)}")
else: parts.append(f"class {node.name}")
top_fns = [
node.name for node in ast.iter_child_nodes(tree)
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
]
if top_fns:
parts.append(f"functions: {', '.join(top_fns)}")
if top_fns: parts.append(f"functions: {', '.join(top_fns)}")
return "\n".join(parts)
def _summarise_toml(path: Path, content: str) -> str:
lines = content.splitlines()
lines = content.splitlines()
line_count = len(lines)
parts = [f"**TOML** — {line_count} lines"]
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
tables = []
parts = [f"**TOML** — {line_count} lines"]
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
tables = []
for line in lines:
m = table_pat.match(line)
if m:
tables.append(m.group(1).strip())
if tables:
parts.append(f"tables: {', '.join(tables)}")
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
in_table = False
top_keys = []
for line in lines:
@@ -113,15 +101,15 @@ def _summarise_toml(path: Path, content: str) -> str:
return "\n".join(parts)
def _summarise_markdown(path: Path, content: str) -> str:
lines = content.splitlines()
lines = content.splitlines()
line_count = len(lines)
parts = [f"**Markdown** — {line_count} lines"]
headings = []
parts = [f"**Markdown** — {line_count} lines"]
headings = []
for line in lines:
m = re.match(r"^(#{1,3})\s+(.+)", line)
if m:
level = len(m.group(1))
text = m.group(2).strip()
level = len(m.group(1))
text = m.group(2).strip()
indent = " " * (level - 1)
headings.append(f"{indent}{text}")
if headings:
@@ -129,10 +117,10 @@ def _summarise_markdown(path: Path, content: str) -> str:
return "\n".join(parts)
def _summarise_generic(path: Path, content: str) -> str:
lines = content.splitlines()
lines = content.splitlines()
line_count = len(lines)
suffix = path.suffix.lstrip(".").upper() or "TEXT"
parts = [f"**{suffix}** — {line_count} lines"]
suffix = path.suffix.lstrip(".").upper() or "TEXT"
parts = [f"**{suffix}** — {line_count} lines"]
# Heuristic for C-style languages
important_lines = []
@@ -168,24 +156,20 @@ _SUMMARISERS: dict[str, Callable[[Path, str], str]] = {
def summarise_file(path: Path, content: str) -> str:
"""
Return a compact markdown summary string for a single file.
`content` is the already-read file text (or an error string).
[C: tests/test_subagent_summarization.py:test_summarise_file_integration]
Return a compact markdown summary string for a single file.
`content` is the already-read file text (or an error string).
[C: tests/test_subagent_summarization.py:test_summarise_file_integration]
"""
content_hash = get_file_hash(content)
cached = _summary_cache.get_summary(str(path), content_hash)
if cached:
return cached
cached = _summary_cache.get_summary(str(path), content_hash)
if cached: return cached
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
fn = _SUMMARISERS.get(suffix, _summarise_generic)
fn = _SUMMARISERS.get(suffix, _summarise_generic)
try:
heuristic_outline = fn(path, content)
# Smart AI Summarization
is_code = suffix in [".py", ".ps1", ".js", ".ts", ".cpp", ".c", ".h", ".cs", ".go", ".rs", ".lua"]
try:
from src import ai_client
smart_summary = ai_client.run_subagent_summarization(
file_path=str(path),
content=content[:10000],
@@ -205,31 +189,31 @@ def summarise_file(path: Path, content: str) -> str:
def summarise_items(file_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""
Given a list of file_item dicts (as returned by aggregate.build_file_items),
return a parallel list of dicts with an added `summary` key.
Given a list of file_item dicts (as returned by aggregate.build_file_items),
return a parallel list of dicts with an added `summary` key.
"""
result = []
for item in file_items:
path = item.get("path")
path = item.get("path")
content = item.get("content", "")
error = item.get("error", False)
error = item.get("error", False)
if error or path is None:
summary = "_Error reading file_"
else:
p = Path(path) if not isinstance(path, Path) else path
p = Path(path) if not isinstance(path, Path) else path
summary = summarise_file(p, content)
result.append({**item, "summary": summary})
return result
def build_summary_markdown(file_items: list[dict[str, Any]]) -> str:
"""
Build a compact markdown string of file summaries, suitable for the
initial <context> block instead of full file contents.
Build a compact markdown string of file summaries, suitable for the
initial <context> block instead of full file contents.
"""
summarised = summarise_items(file_items)
parts = []
parts = []
for item in summarised:
path = item.get("path") or item.get("entry", "unknown")
path = item.get("path") or item.get("entry", "unknown")
summary = item.get("summary", "")
parts.append(f"### `{path}`\n\n{summary}")
return "\n\n---\n\n".join(parts)
return "\n\n---\n\n".join(parts)