783e5fd9fe
- src/code_path_audit_ssdl.py: 9 functions translating per-aggregate findings into SSDL primitives (compute_effective_codepaths, count_branches_in_function, detect_nil_check_pattern, compute_field_access_efficiency, suggest_defusing_technique, render_ssdl_sketch/rollup, render_organization_deductions). - src/code_path_audit.py:render_rollups() now emits ssdl_analysis.md + organization_deductions.md alongside the existing 8 rollups. - src/code_path_audit_render.py:render_full_markdown() adds SSDL sketch section per profile (effective codepaths + defusing recommendations). Real findings (Metadata aggregate): - 35 consumers, 251 total branches, 1.13e18 effective codepaths - 6 nil-check functions (candidates for [N] sentinel) - 130 field-access sites, 0% typed (candidates for immediate-mode cache) - Verdict: needs restructuring Audit output grew 2136 -> 2415 lines. All 131 tests pass. Meta-audit clean (0 violations).
329 lines
13 KiB
Python
329 lines
13 KiB
Python
"""Enriched markdown renderers for code_path_audit v2.
|
|
|
|
Provides per-profile detail: call graph, field access breakdown,
|
|
struct shape, frequency per function, and concrete optimization
|
|
candidates. Designed for 2k+ line audit reports.
|
|
"""
|
|
from __future__ import annotations
|
|
from collections import Counter
|
|
from src.code_path_audit import (
|
|
AggregateProfile,
|
|
FunctionRef,
|
|
)
|
|
from src.code_path_audit_ssdl import render_ssdl_sketch
|
|
|
|
|
|
def render_full_markdown(profile: AggregateProfile) -> str:
|
|
"""Render the per-aggregate markdown with full detail.
|
|
|
|
Sections (15+):
|
|
1. Header (name, kind, memory_dim, is_candidate, totals)
|
|
2. Pipeline summary (producer/consumer counts)
|
|
3. Producers detail (per-producer: file, role, fields returned)
|
|
4. Consumers detail (per-consumer: file, role, fields accessed)
|
|
5. Field access matrix (every field x every consumer)
|
|
6. Access pattern (dominant + per-function breakdown)
|
|
7. Frequency (aggregate-level + per-function)
|
|
8. Result coverage
|
|
9. Type alias coverage (typed vs untyped breakdown)
|
|
10. Cross-audit findings (per bucket, with examples)
|
|
11. Decomposition cost (current/savings/direction/rationale)
|
|
12. Struct shape (inferred from producer return shapes)
|
|
13. Optimization candidates (concrete refactor steps)
|
|
14. Verdict (1-sentence summary)
|
|
15. Evidence appendix (every per-function evidence item)
|
|
"""
|
|
lines: list[str] = []
|
|
# Header
|
|
lines.append(f"# Aggregate Profile: {profile.name}")
|
|
lines.append("")
|
|
lines.append(f"**Aggregate kind:** {profile.aggregate_kind}")
|
|
lines.append(f"**Memory dim:** {profile.memory_dim}")
|
|
lines.append(f"**Is candidate:** {profile.is_candidate}")
|
|
lines.append("")
|
|
# Pipeline summary
|
|
lines.append("## Pipeline summary")
|
|
lines.append("")
|
|
lines.append(f"- Producers: {len(profile.producers)}")
|
|
lines.append(f"- Consumers: {len(profile.consumers)}")
|
|
lines.append(f"- Distinct producer fqnames: {len({f.fqname for f in profile.producers})}")
|
|
lines.append(f"- Distinct consumer fqnames: {len({f.fqname for f in profile.consumers})}")
|
|
lines.append(f"- Access pattern (aggregate): {profile.access_pattern}")
|
|
lines.append(f"- Frequency (aggregate): {profile.frequency}")
|
|
lines.append(f"- Decomposition direction: {profile.decomposition_cost.recommended_direction}")
|
|
lines.append(f"- Struct field count (estimated): {profile.decomposition_cost.struct_field_count}")
|
|
lines.append("")
|
|
# Producers detail
|
|
lines.append(f"## Producers ({len(profile.producers)})")
|
|
lines.append("")
|
|
if profile.producers:
|
|
# Group by file
|
|
by_file: dict[str, list[FunctionRef]] = {}
|
|
for p in profile.producers:
|
|
by_file.setdefault(p.file, []).append(p)
|
|
for file in sorted(by_file.keys()):
|
|
funcs = by_file[file]
|
|
lines.append(f"### `{file}` ({len(funcs)} producer{'s' if len(funcs) != 1 else ''})")
|
|
lines.append("")
|
|
for f in funcs:
|
|
lines.append(f"- `{f.fqname}` (line {f.line})")
|
|
lines.append("")
|
|
else:
|
|
lines.append("_(none)_")
|
|
lines.append("")
|
|
# Consumers detail
|
|
lines.append(f"## Consumers ({len(profile.consumers)})")
|
|
lines.append("")
|
|
if profile.consumers:
|
|
by_file = {}
|
|
for c in profile.consumers:
|
|
by_file.setdefault(c.file, []).append(c)
|
|
for file in sorted(by_file.keys()):
|
|
funcs = by_file[file]
|
|
lines.append(f"### `{file}` ({len(funcs)} consumer{'s' if len(funcs) != 1 else ''})")
|
|
lines.append("")
|
|
for f in funcs:
|
|
lines.append(f"- `{f.fqname}` (line {f.line})")
|
|
lines.append("")
|
|
else:
|
|
lines.append("_(none)_")
|
|
lines.append("")
|
|
# Field access matrix
|
|
lines.append("## Field access matrix")
|
|
lines.append("")
|
|
if profile.access_pattern_evidence:
|
|
all_fields: set[str] = set()
|
|
for ev in profile.access_pattern_evidence:
|
|
all_fields.update(ev.field_accesses.keys())
|
|
if all_fields:
|
|
sorted_fields = sorted(all_fields)
|
|
consumer_names = [ev.function.fqname.rsplit(".", 1)[-1] for ev in profile.access_pattern_evidence]
|
|
lines.append("| consumer | " + " | ".join(sorted_fields[:20]) + " |")
|
|
lines.append("|---|" + "|".join(["---"] * min(len(sorted_fields), 20)) + "|")
|
|
for ev in profile.access_pattern_evidence:
|
|
name = ev.function.fqname.rsplit(".", 1)[-1]
|
|
cells = []
|
|
for f in sorted_fields[:20]:
|
|
count = ev.field_accesses.get(f, 0)
|
|
cells.append(str(count) if count > 0 else ".")
|
|
lines.append(f"| `{name}` | " + " | ".join(cells) + " |")
|
|
if len(sorted_fields) > 20:
|
|
lines.append("")
|
|
lines.append(f"_... {len(sorted_fields) - 20} more fields_")
|
|
else:
|
|
lines.append("_(no field accesses detected)_")
|
|
else:
|
|
lines.append("_(no field accesses detected)_")
|
|
lines.append("")
|
|
# Access pattern
|
|
lines.append("## Access pattern")
|
|
lines.append("")
|
|
lines.append(f"**Dominant pattern:** {profile.access_pattern}")
|
|
lines.append(f"**Evidence count:** {len(profile.access_pattern_evidence)}")
|
|
if profile.access_pattern_evidence:
|
|
pattern_counts: Counter[str] = Counter()
|
|
for ev in profile.access_pattern_evidence:
|
|
pattern_counts[ev.pattern] += 1
|
|
lines.append("")
|
|
lines.append("**Per-function pattern distribution:**")
|
|
lines.append("")
|
|
for pat, count in pattern_counts.most_common():
|
|
pct = count / len(profile.access_pattern_evidence) * 100
|
|
lines.append(f"- `{pat}`: {count} functions ({pct:.0f}%)")
|
|
lines.append("")
|
|
# SSDL Sketch (between Access pattern and Frequency)
|
|
lines.append(render_ssdl_sketch(profile, "src"))
|
|
lines.append("")
|
|
# Frequency
|
|
lines.append("## Frequency")
|
|
lines.append("")
|
|
lines.append(f"**Dominant frequency:** {profile.frequency}")
|
|
lines.append(f"**Evidence count:** {len(profile.frequency_evidence)}")
|
|
if profile.frequency_evidence:
|
|
freq_counts: Counter[str] = Counter()
|
|
for ev in profile.frequency_evidence:
|
|
freq_counts[ev.frequency] += 1
|
|
lines.append("")
|
|
lines.append("**Per-function frequency distribution:**")
|
|
lines.append("")
|
|
for freq, count in freq_counts.most_common():
|
|
lines.append(f"- `{freq}`: {count} functions")
|
|
lines.append("")
|
|
# Result coverage
|
|
lines.append("## Result coverage")
|
|
lines.append("")
|
|
lines.append(f"**Summary:** {profile.result_coverage.summary}")
|
|
lines.append("")
|
|
lines.append("| metric | value |")
|
|
lines.append("|---|---|")
|
|
lines.append(f"| total producers | {profile.result_coverage.total_producers} |")
|
|
lines.append(f"| result producers | {profile.result_coverage.result_producers} |")
|
|
lines.append(f"| total consumers | {profile.result_coverage.total_consumers} |")
|
|
lines.append(f"| result consumers | {profile.result_coverage.result_consumers} |")
|
|
lines.append("")
|
|
# Type alias coverage
|
|
lines.append("## Type alias coverage")
|
|
lines.append("")
|
|
lines.append(f"**Summary:** {profile.type_alias_coverage.summary}")
|
|
lines.append("")
|
|
lines.append("| metric | value |")
|
|
lines.append("|---|---|")
|
|
lines.append(f"| total field-access sites | {profile.type_alias_coverage.total_sites} |")
|
|
lines.append(f"| typed sites (canonical field) | {profile.type_alias_coverage.typed_sites} |")
|
|
lines.append(f"| untyped sites (wildcard) | {profile.type_alias_coverage.untyped_sites} |")
|
|
lines.append("")
|
|
# Cross-audit findings
|
|
lines.append("## Cross-audit findings")
|
|
lines.append("")
|
|
total_cf = (
|
|
len(profile.cross_audit_findings.weak_types)
|
|
+ len(profile.cross_audit_findings.exception_handling)
|
|
+ len(profile.cross_audit_findings.optional_in_baseline)
|
|
+ len(profile.cross_audit_findings.config_io_ownership)
|
|
+ len(profile.cross_audit_findings.import_graph)
|
|
)
|
|
if total_cf == 0:
|
|
lines.append("_(no cross-audit findings mapped to this aggregate)_")
|
|
else:
|
|
lines.append("| bucket | audit script | site count | example file | example line | note |")
|
|
lines.append("|---|---|---|---|---|---|")
|
|
for f in profile.cross_audit_findings.weak_types:
|
|
lines.append(f"| weak_types | `{f.audit_script}` | {f.site_count} | `{f.example_file}` | {f.example_line} | {f.note} |")
|
|
for f in profile.cross_audit_findings.exception_handling:
|
|
lines.append(f"| exception_handling | `{f.audit_script}` | {f.site_count} | `{f.example_file}` | {f.example_line} | {f.note} |")
|
|
for f in profile.cross_audit_findings.optional_in_baseline:
|
|
lines.append(f"| optional_in_baseline | `{f.audit_script}` | {f.site_count} | `{f.example_file}` | {f.example_line} | {f.note} |")
|
|
for f in profile.cross_audit_findings.config_io_ownership:
|
|
lines.append(f"| config_io_ownership | `{f.audit_script}` | {f.site_count} | `{f.example_file}` | {f.example_line} | {f.note} |")
|
|
for f in profile.cross_audit_findings.import_graph:
|
|
lines.append(f"| import_graph | `{f.audit_script}` | {f.site_count} | `{f.example_file}` | {f.example_line} | {f.note} |")
|
|
lines.append("")
|
|
# Decomposition cost
|
|
lines.append("## Decomposition cost")
|
|
lines.append("")
|
|
dc = profile.decomposition_cost
|
|
lines.append(f"**Current cost estimate:** {dc.current_cost_estimate} us/turn")
|
|
lines.append(f"**Componentize savings:** {dc.componentize_savings} us/turn")
|
|
lines.append(f"**Unify savings:** {dc.unify_savings} us/turn")
|
|
lines.append(f"**Recommended direction:** {dc.recommended_direction}")
|
|
lines.append(f"**Rationale:** {dc.recommended_rationale}")
|
|
lines.append(f"**Struct field count (estimated):** {dc.struct_field_count}")
|
|
lines.append(f"**Struct frozen:** {dc.struct_frozen}")
|
|
lines.append("")
|
|
# Struct shape (inferred)
|
|
lines.append("## Struct shape (inferred from producer returns)")
|
|
lines.append("")
|
|
if profile.producers:
|
|
field_usage: Counter[str] = Counter()
|
|
for ev in profile.access_pattern_evidence:
|
|
field_usage.update(ev.field_accesses.keys())
|
|
if field_usage:
|
|
lines.append("| field | access count | access pattern |")
|
|
lines.append("|---|---|---|")
|
|
sorted_fields_by_use = field_usage.most_common()
|
|
for field_name, count in sorted_fields_by_use:
|
|
if count >= 3:
|
|
pattern = "hot"
|
|
elif count >= 1:
|
|
pattern = "used"
|
|
else:
|
|
pattern = "dead"
|
|
lines.append(f"| `{field_name}` | {count} | {pattern} |")
|
|
else:
|
|
lines.append("_(no field access data; cannot infer shape)_")
|
|
else:
|
|
lines.append("_(no producers; cannot infer shape)_")
|
|
lines.append("")
|
|
# Optimization candidates
|
|
lines.append("## Optimization candidates")
|
|
lines.append("")
|
|
if profile.optimization_candidates:
|
|
for cand in profile.optimization_candidates:
|
|
lines.append(f"### {cand.direction.upper()}: {cand.candidate}")
|
|
lines.append("")
|
|
lines.append(f"- **Effort:** {cand.effort}")
|
|
lines.append(f"- **Priority:** {cand.priority}")
|
|
lines.append(f"- **Estimated savings:** {cand.estimated_savings_us} us/turn")
|
|
lines.append(f"- **Affected files ({len(cand.affected_files)}):**")
|
|
for f in cand.affected_files:
|
|
lines.append(f" - `{f}`")
|
|
lines.append(f"- **Reference:** {cand.cross_ref}")
|
|
lines.append("")
|
|
else:
|
|
lines.append("_(no optimization candidates generated)_")
|
|
lines.append("")
|
|
# Verdict
|
|
lines.append("## Verdict")
|
|
lines.append("")
|
|
lines.append(f"{dc.recommended_rationale}")
|
|
lines.append("")
|
|
# Evidence appendix
|
|
lines.append("## Evidence appendix")
|
|
lines.append("")
|
|
if profile.access_pattern_evidence:
|
|
lines.append("### Access pattern evidence")
|
|
lines.append("")
|
|
lines.append("| function | pattern | field_accesses | confidence |")
|
|
lines.append("|---|---|---|---|")
|
|
for ev in profile.access_pattern_evidence:
|
|
fields_str = ", ".join(f"`{k}`={v}" for k, v in list(ev.field_accesses.items())[:10])
|
|
if len(ev.field_accesses) > 10:
|
|
fields_str += f" (+{len(ev.field_accesses) - 10} more)"
|
|
lines.append(f"| `{ev.function.fqname}` | `{ev.pattern}` | {fields_str} | {ev.confidence} |")
|
|
lines.append("")
|
|
if profile.frequency_evidence:
|
|
lines.append("### Frequency evidence")
|
|
lines.append("")
|
|
lines.append("| function | frequency | source | note |")
|
|
lines.append("|---|---|---|---|")
|
|
for ev in profile.frequency_evidence:
|
|
lines.append(f"| `{ev.function.fqname}` | `{ev.frequency}` | `{ev.source}` | {ev.note} |")
|
|
lines.append("")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def render_field_usage_rollup(profiles: tuple[AggregateProfile, ...]) -> str:
|
|
"""Render the field usage rollup (cross-aggregate)."""
|
|
lines: list[str] = ["# Field Usage Rollup", ""]
|
|
lines.append("Cross-aggregate analysis of which fields are accessed how often across the codebase.")
|
|
lines.append("")
|
|
all_field_usage: dict[str, dict[str, int]] = {}
|
|
for p in profiles:
|
|
if p.is_candidate:
|
|
continue
|
|
for ev in p.access_pattern_evidence:
|
|
aggregate_fields = all_field_usage.setdefault(p.name, {})
|
|
for field_name, count in ev.field_accesses.items():
|
|
aggregate_fields[field_name] = aggregate_fields.get(field_name, 0) + count
|
|
if all_field_usage:
|
|
lines.append("| aggregate | field | total accesses |")
|
|
lines.append("|---|---|---|")
|
|
for aggregate in sorted(all_field_usage.keys()):
|
|
fields = all_field_usage[aggregate]
|
|
for field_name, count in sorted(fields.items(), key=lambda x: -x[1])[:10]:
|
|
lines.append(f"| `{aggregate}` | `{field_name}` | {count} |")
|
|
lines.append("")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def render_call_graph_rollup(profiles: tuple[AggregateProfile, ...]) -> str:
|
|
"""Render the call graph rollup (most-touched functions per aggregate)."""
|
|
lines: list[str] = ["# Call Graph Rollup", ""]
|
|
lines.append("Functions that are producers or consumers of each aggregate, grouped by file.")
|
|
lines.append("")
|
|
for p in profiles:
|
|
if p.is_candidate:
|
|
continue
|
|
lines.append(f"## {p.name} ({len(p.producers)} producers + {len(p.consumers)} consumers)")
|
|
lines.append("")
|
|
if p.producers or p.consumers:
|
|
lines.append("| role | fqname | file |")
|
|
lines.append("|---|---|---|")
|
|
for prod in p.producers:
|
|
lines.append(f"| producer | `{prod.fqname}` | `{prod.file}` |")
|
|
for cons in p.consumers:
|
|
lines.append(f"| consumer | `{cons.fqname}` | `{cons.file}` |")
|
|
else:
|
|
lines.append("_(no producers or consumers)_")
|
|
lines.append("")
|
|
return "\n".join(lines) |