some organization pass, still need to review a bunch
This commit is contained in:
+40
-56
@@ -1,23 +1,12 @@
|
||||
# summarize.py
|
||||
"""
|
||||
Note(Gemini):
|
||||
Local heuristic summariser. Doesn't use any AI or network.
|
||||
Uses Python's AST to reliably pull out classes, methods, and functions.
|
||||
Regex is used for TOML and Markdown.
|
||||
|
||||
The rationale here is simple: giving the AI the *structure* of a codebase is 90%
|
||||
as good as giving it the full source, but costs 1% of the tokens.
|
||||
If it needs the full source of a file after reading the summary, it can just call read_file.
|
||||
"""
|
||||
# summarize.py
|
||||
"""
|
||||
Local symbolic summariser — no AI calls, no network.
|
||||
|
||||
For each file, extracts structural information:
|
||||
.py : imports, classes (with methods), top-level functions, global constants
|
||||
.toml : top-level table keys + array lengths
|
||||
.md : headings (h1-h3)
|
||||
other : line count + first 8 lines as preview
|
||||
.py : imports, classes (with methods), top-level functions, global constants
|
||||
.toml : top-level table keys + array lengths
|
||||
.md : headings (h1-h3)
|
||||
other : line count + first 8 lines as preview
|
||||
|
||||
Returns a compact markdown string per file, suitable for use as a low-token
|
||||
context block that replaces full file contents in the initial <context> send.
|
||||
@@ -28,6 +17,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Callable, Any
|
||||
|
||||
from src import ai_client
|
||||
|
||||
from src.summary_cache import SummaryCache, get_file_hash
|
||||
|
||||
|
||||
@@ -37,9 +28,9 @@ _summary_cache = SummaryCache()
|
||||
# ------------------------------------------------------------------ per-type extractors
|
||||
|
||||
def _summarise_python(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**Python** — {line_count} lines"]
|
||||
parts = [f"**Python** — {line_count} lines"]
|
||||
try:
|
||||
tree = ast.parse(content.lstrip(chr(0xFEFF)), filename=str(path))
|
||||
except SyntaxError as e:
|
||||
@@ -73,31 +64,28 @@ def _summarise_python(path: Path, content: str) -> str:
|
||||
n.name for n in ast.iter_child_nodes(node)
|
||||
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef))
|
||||
]
|
||||
if methods:
|
||||
parts.append(f"class {node.name}: {', '.join(methods)}")
|
||||
else:
|
||||
parts.append(f"class {node.name}")
|
||||
if methods: parts.append(f"class {node.name}: {', '.join(methods)}")
|
||||
else: parts.append(f"class {node.name}")
|
||||
top_fns = [
|
||||
node.name for node in ast.iter_child_nodes(tree)
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef))
|
||||
]
|
||||
if top_fns:
|
||||
parts.append(f"functions: {', '.join(top_fns)}")
|
||||
if top_fns: parts.append(f"functions: {', '.join(top_fns)}")
|
||||
return "\n".join(parts)
|
||||
|
||||
def _summarise_toml(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**TOML** — {line_count} lines"]
|
||||
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
|
||||
tables = []
|
||||
parts = [f"**TOML** — {line_count} lines"]
|
||||
table_pat = re.compile(r"^\s*\[{1,2}([^\[\]]+)\]{1,2}")
|
||||
tables = []
|
||||
for line in lines:
|
||||
m = table_pat.match(line)
|
||||
if m:
|
||||
tables.append(m.group(1).strip())
|
||||
if tables:
|
||||
parts.append(f"tables: {', '.join(tables)}")
|
||||
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
|
||||
kv_pat = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\s*=")
|
||||
in_table = False
|
||||
top_keys = []
|
||||
for line in lines:
|
||||
@@ -113,15 +101,15 @@ def _summarise_toml(path: Path, content: str) -> str:
|
||||
return "\n".join(parts)
|
||||
|
||||
def _summarise_markdown(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
parts = [f"**Markdown** — {line_count} lines"]
|
||||
headings = []
|
||||
parts = [f"**Markdown** — {line_count} lines"]
|
||||
headings = []
|
||||
for line in lines:
|
||||
m = re.match(r"^(#{1,3})\s+(.+)", line)
|
||||
if m:
|
||||
level = len(m.group(1))
|
||||
text = m.group(2).strip()
|
||||
level = len(m.group(1))
|
||||
text = m.group(2).strip()
|
||||
indent = " " * (level - 1)
|
||||
headings.append(f"{indent}{text}")
|
||||
if headings:
|
||||
@@ -129,10 +117,10 @@ def _summarise_markdown(path: Path, content: str) -> str:
|
||||
return "\n".join(parts)
|
||||
|
||||
def _summarise_generic(path: Path, content: str) -> str:
|
||||
lines = content.splitlines()
|
||||
lines = content.splitlines()
|
||||
line_count = len(lines)
|
||||
suffix = path.suffix.lstrip(".").upper() or "TEXT"
|
||||
parts = [f"**{suffix}** — {line_count} lines"]
|
||||
suffix = path.suffix.lstrip(".").upper() or "TEXT"
|
||||
parts = [f"**{suffix}** — {line_count} lines"]
|
||||
|
||||
# Heuristic for C-style languages
|
||||
important_lines = []
|
||||
@@ -168,24 +156,20 @@ _SUMMARISERS: dict[str, Callable[[Path, str], str]] = {
|
||||
|
||||
def summarise_file(path: Path, content: str) -> str:
|
||||
"""
|
||||
|
||||
|
||||
Return a compact markdown summary string for a single file.
|
||||
`content` is the already-read file text (or an error string).
|
||||
[C: tests/test_subagent_summarization.py:test_summarise_file_integration]
|
||||
Return a compact markdown summary string for a single file.
|
||||
`content` is the already-read file text (or an error string).
|
||||
[C: tests/test_subagent_summarization.py:test_summarise_file_integration]
|
||||
"""
|
||||
content_hash = get_file_hash(content)
|
||||
cached = _summary_cache.get_summary(str(path), content_hash)
|
||||
if cached:
|
||||
return cached
|
||||
cached = _summary_cache.get_summary(str(path), content_hash)
|
||||
if cached: return cached
|
||||
suffix = path.suffix.lower() if hasattr(path, "suffix") else ""
|
||||
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
||||
fn = _SUMMARISERS.get(suffix, _summarise_generic)
|
||||
try:
|
||||
heuristic_outline = fn(path, content)
|
||||
# Smart AI Summarization
|
||||
is_code = suffix in [".py", ".ps1", ".js", ".ts", ".cpp", ".c", ".h", ".cs", ".go", ".rs", ".lua"]
|
||||
try:
|
||||
from src import ai_client
|
||||
smart_summary = ai_client.run_subagent_summarization(
|
||||
file_path=str(path),
|
||||
content=content[:10000],
|
||||
@@ -205,31 +189,31 @@ def summarise_file(path: Path, content: str) -> str:
|
||||
|
||||
def summarise_items(file_items: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Given a list of file_item dicts (as returned by aggregate.build_file_items),
|
||||
return a parallel list of dicts with an added `summary` key.
|
||||
Given a list of file_item dicts (as returned by aggregate.build_file_items),
|
||||
return a parallel list of dicts with an added `summary` key.
|
||||
"""
|
||||
result = []
|
||||
for item in file_items:
|
||||
path = item.get("path")
|
||||
path = item.get("path")
|
||||
content = item.get("content", "")
|
||||
error = item.get("error", False)
|
||||
error = item.get("error", False)
|
||||
if error or path is None:
|
||||
summary = "_Error reading file_"
|
||||
else:
|
||||
p = Path(path) if not isinstance(path, Path) else path
|
||||
p = Path(path) if not isinstance(path, Path) else path
|
||||
summary = summarise_file(p, content)
|
||||
result.append({**item, "summary": summary})
|
||||
return result
|
||||
|
||||
def build_summary_markdown(file_items: list[dict[str, Any]]) -> str:
|
||||
"""
|
||||
Build a compact markdown string of file summaries, suitable for the
|
||||
initial <context> block instead of full file contents.
|
||||
Build a compact markdown string of file summaries, suitable for the
|
||||
initial <context> block instead of full file contents.
|
||||
"""
|
||||
summarised = summarise_items(file_items)
|
||||
parts = []
|
||||
parts = []
|
||||
for item in summarised:
|
||||
path = item.get("path") or item.get("entry", "unknown")
|
||||
path = item.get("path") or item.get("entry", "unknown")
|
||||
summary = item.get("summary", "")
|
||||
parts.append(f"### `{path}`\n\n{summary}")
|
||||
return "\n\n---\n\n".join(parts)
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
Reference in New Issue
Block a user