From c82538474f5f4ec9efdc4f9a5195541aec609863 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Mon, 22 Jun 2026 01:59:07 -0400
Subject: [PATCH] feat(audit): implement Phase 8 v2 DSL + Phase 9 run_audit +
 CLI + MCP

Phase 8: to_dsl_v2 (flat-section writer, 14 sections),
to_markdown (10 sections), to_tree (box-drawing prefix tree),
parse_dsl_v2 (round-trip parser).

Phase 9: AGGREGATES_IN_SCOPE (10) + CANDIDATE_AGGREGATES (3),
synthesize_aggregate_profile (per-aggregate builder, candidate
placeholder path), AuditSummary dataclass, run_audit() main
entry, render_rollups() (4 top-level files: summary,
cross_audit_summary, decomposition_matrix, candidates),
code_path_audit_v2() MCP tool wrapper.

13 new unit tests passing. 124 total tests passing.

Phase 10 (integration tests with synthetic src/) next - may be
deferred to next session if context runs low.
---
 .../_phase89_additions.py                     | 426 +++++++++++++++++
 src/code_path_audit.py                        | 430 +++++++++++++++++-
 tests/test_code_path_audit_phase89.py         | 179 ++++++++
 3 files changed, 1034 insertions(+), 1 deletion(-)
 create mode 100644 scripts/tier2/artifacts/code_path_audit_20260607/_phase89_additions.py
 create mode 100644 tests/test_code_path_audit_phase89.py

diff --git a/scripts/tier2/artifacts/code_path_audit_20260607/_phase89_additions.py b/scripts/tier2/artifacts/code_path_audit_20260607/_phase89_additions.py
new file mode 100644
index 00000000..d7ca5aee
--- /dev/null
+++ b/scripts/tier2/artifacts/code_path_audit_20260607/_phase89_additions.py
@@ -0,0 +1,426 @@
+import re
+from datetime import date as date_mod
+
+def _atom(s: str) -> str:
+ """Format a string as a postfix DSL atom (bare or quoted)."""
+ if any(c in s for c in ('"', "'", " ", "\t", "\n", "(", ")", "{", "}")):
+  return f'"{s}"'
+ return s
+
+def to_dsl_v2(profile: AggregateProfile, generated_date: str = "") -> str:
+ """Serialize an AggregateProfile to v2 postfix DSL (flat sections)."""
+ lines: list[str] = []
+ lines.append(f'\\ AggregateProfile: "{profile.name}"')
+ lines.append(f"\\ generated {generated_date} by src.code_path_audit v2")
+ lines.append("")
+ lines.append("\\ === aggregate_kind ===")
+ lines.append(f' "{profile.aggregate_kind}" kind')
+ lines.append("")
+ lines.append("\\ === memory_dim ===")
+ lines.append(f' "{profile.memory_dim}" mem-dim')
+ lines.append("")
+ lines.append(f"\\ === producers ({len(profile.producers)} items) ===")
+ for p in profile.producers:
+  lines.append(f' "{p.fqname}" "{p.file}" {p.line} "{p.role}" fn-ref')
+ lines.append("")
+ lines.append(f"\\ === consumers ({len(profile.consumers)} items) ===")
+ for c in profile.consumers:
+  lines.append(f' "{c.fqname}" "{c.file}" {c.line} "{c.role}" fn-ref')
+ lines.append("")
+ lines.append("\\ === access_pattern ===")
+ lines.append(f' "{profile.access_pattern}" access-pattern')
+ lines.append("")
+ lines.append(f"\\ === access_pattern_evidence ({len(profile.access_pattern_evidence)} items) ===")
+ for ev in profile.access_pattern_evidence:
+  lines.append(f' "{ev.function.fqname}" "{ev.pattern}" {len(ev.field_accesses)} "{ev.confidence}" ap-evidence')
+ lines.append("")
+ lines.append("\\ === frequency ===")
+ lines.append(f' "{profile.frequency}" frequency')
+ lines.append("")
+ lines.append(f"\\ === frequency_evidence ({len(profile.frequency_evidence)} items) ===")
+ for ev in profile.frequency_evidence:
+  lines.append(f' "{ev.function.fqname}" "{ev.frequency}" "{ev.source}" "{ev.note}" freq-evidence')
+ lines.append("")
+ rc = profile.result_coverage
+ lines.append("\\ === result_coverage ===")
+ lines.append(f" {rc.total_producers} {rc.result_producers} {rc.total_consumers} {rc.result_consumers} result-coverage")
+ lines.append("")
+ tac = profile.type_alias_coverage
+ lines.append("\\ === type_alias_coverage ===")
+ lines.append(f" {tac.total_sites} {tac.typed_sites} {tac.untyped_sites} type-alias-coverage")
+ lines.append("")
+ lines.append("\\ === cross_audit_findings ===")
+ for f in profile.cross_audit_findings.weak_types:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.exception_handling:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.optional_in_baseline:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.config_io_ownership:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.import_graph:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ lines.append(" 5 cross-audit-findings")
+ lines.append("")
+ dc = profile.decomposition_cost
+ lines.append("\\ === decomposition_cost ===")
+ batch_size_str = str(dc.batch_size) if dc.batch_size is not None else "nil"
+ lines.append(f" {dc.current_cost_estimate} {dc.componentize_savings} {dc.unify_savings} \"{dc.recommended_direction}\" \"{dc.recommended_rationale}\" {batch_size_str} {dc.struct_field_count} {str(dc.struct_frozen).lower()} decomp-cost")
+ lines.append("")
+ lines.append(f"\\ === optimization_candidates ({len(profile.optimization_candidates)} items) ===")
+ for cand in profile.optimization_candidates:
+  lines.append(f' "{cand.candidate}" "{cand.direction}" {len(cand.affected_files)} {cand.estimated_savings_us} "{cand.effort}" "{cand.priority}" "{cand.cross_ref}" opt-candidate')
+ lines.append("")
+ lines.append("\\ === is_candidate ===")
+ lines.append(f" {'true' if profile.is_candidate else 'false'} is-candidate")
+ return "\n".join(lines)
+
+def to_markdown(profile: AggregateProfile) -> str:
+ """Render the per-aggregate markdown (10 sections)."""
+ lines: list[str] = []
+ lines.append(f"# Aggregate Profile: {profile.name}")
+ lines.append("")
+ lines.append(f"**Aggregate kind:** {profile.aggregate_kind}")
+ lines.append(f"**Memory dim:** {profile.memory_dim}")
+ lines.append(f"**Is candidate:** {profile.is_candidate}")
+ lines.append("")
+ lines.append("## Pipeline summary")
+ lines.append("")
+ lines.append(f"- Producers: {len(profile.producers)}")
+ lines.append(f"- Consumers: {len(profile.consumers)}")
+ lines.append("")
+ lines.append("## Access pattern")
+ lines.append("")
+ lines.append(f"**Dominant pattern:** {profile.access_pattern}")
+ lines.append(f"**Evidence count:** {len(profile.access_pattern_evidence)}")
+ lines.append("")
+ lines.append("## Frequency")
+ lines.append("")
+ lines.append(f"**Dominant frequency:** {profile.frequency}")
+ lines.append(f"**Evidence count:** {len(profile.frequency_evidence)}")
+ lines.append("")
+ lines.append("## Result coverage")
+ lines.append("")
+ lines.append(f"**Summary:** {profile.result_coverage.summary}")
+ lines.append("")
+ lines.append("## Type alias coverage")
+ lines.append("")
+ lines.append(f"**Summary:** {profile.type_alias_coverage.summary}")
+ lines.append("")
+ lines.append("## Cross-audit findings")
+ lines.append("")
+ lines.append("| Audit script | Site count | Example | Note |")
+ lines.append("|---|---|---|---|")
+ for f in profile.cross_audit_findings.weak_types:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.exception_handling:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.optional_in_baseline:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.config_io_ownership:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.import_graph:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ lines.append("")
+ lines.append("## Decomposition cost")
+ lines.append("")
+ lines.append(f"**Current cost estimate:** {profile.decomposition_cost.current_cost_estimate} us")
+ lines.append(f"**Componentize savings:** {profile.decomposition_cost.componentize_savings} us")
+ lines.append(f"**Unify savings:** {profile.decomposition_cost.unify_savings} us")
+ lines.append(f"**Recommended direction:** {profile.decomposition_cost.recommended_direction}")
+ lines.append(f"**Rationale:** {profile.decomposition_cost.recommended_rationale}")
+ lines.append("")
+ lines.append("## Optimization candidates")
+ lines.append("")
+ if profile.optimization_candidates:
+  for cand in profile.optimization_candidates:
+   lines.append(f"- **{cand.direction}** ({cand.effort}, {cand.priority}): {cand.candidate}")
+ else:
+  lines.append("_(none)_")
+ lines.append("")
+ lines.append("## Verdict")
+ lines.append("")
+ lines.append(f"{profile.decomposition_cost.recommended_rationale}")
+ return "\n".join(lines)
+
+def to_tree(profile: AggregateProfile) -> str:
+ """Render the per-aggregate prefix tree (box-drawing)."""
+ lines: list[str] = [f"Metadata: {profile.name}"]
+ lines.append(f"|- kind: {profile.aggregate_kind}")
+ lines.append(f"|- memory_dim: {profile.memory_dim}")
+ lines.append(f"|- producers: [{len(profile.producers)}]")
+ for p in profile.producers:
+  lines.append(f"|  |- {p.fqname} ({p.role})")
+ lines.append(f"|- consumers: [{len(profile.consumers)}]")
+ for c in profile.consumers:
+  lines.append(f"|  |- {c.fqname} ({c.role})")
+ lines.append(f"|- access_pattern: {profile.access_pattern}")
+ lines.append(f"|- frequency: {profile.frequency}")
+ lines.append(f"|- result_coverage: {profile.result_coverage.summary}")
+ lines.append(f"|- type_alias_coverage: {profile.type_alias_coverage.summary}")
+ cf_total = (
+  len(profile.cross_audit_findings.weak_types) +
+  len(profile.cross_audit_findings.exception_handling) +
+  len(profile.cross_audit_findings.optional_in_baseline) +
+  len(profile.cross_audit_findings.config_io_ownership) +
+  len(profile.cross_audit_findings.import_graph)
+ )
+ lines.append(f"|- cross_audit_findings: {cf_total} findings")
+ lines.append(f"|- decomposition_cost: {profile.decomposition_cost.recommended_direction} ({profile.decomposition_cost.current_cost_estimate} us)")
+ lines.append(f"|- optimization_candidates: [{len(profile.optimization_candidates)}]")
+ return "\n".join(lines)
+
+def parse_dsl_v2(text: str) -> Result[dict]:
+ """Parse a v2 postfix DSL into a nested dict (round-trip)."""
+ tokens: list[str] = []
+ for line in text.splitlines():
+  line = re.sub(r"\\.*", "", line)
+  if not line.strip():
+   continue
+  i = 0
+  while i < len(line):
+   c = line[i]
+   if c.isspace():
+    i += 1
+    continue
+   if c == '"':
+    j = line.find('"', i + 1)
+    if j == -1:
+     j = len(line)
+    tokens.append(line[i + 1 : j])
+    i = j + 1
+   else:
+    j = i
+    while j < len(line) and not line[j].isspace():
+     j += 1
+    tokens.append(line[i:j])
+    i = j
+ stack: list = []
+ i = 0
+ while i < len(tokens):
+  t = tokens[i]
+  if t == "list" and stack and isinstance(stack[-1], int):
+   count = stack.pop()
+   items = stack[-count:] if count > 0 else []
+   stack = stack[:-count] if count > 0 else stack
+   stack.append(items)
+   i += 1
+   continue
+  if t in DSL_WORD_ARITY_V2:
+   nargs = DSL_WORD_ARITY_V2[t]
+   args = stack[-nargs:] if nargs else []
+   stack = stack[:-nargs] if nargs else stack
+   stack.append({"_tag": t, "_args": args})
+   i += 1
+   continue
+  if t in ("true", "false"):
+   stack.append(t == "true")
+  elif t == "nil":
+   stack.append(None)
+  elif t.lstrip("-").isdigit():
+   stack.append(int(t))
+  else:
+   stack.append(t)
+  i += 1
+ if len(stack) != 1:
+  out: dict = {"_sections": stack}
+  return Result(data=out)
+ return Result(data=stack[0])
+
+AGGREGATES_IN_SCOPE: tuple[str, ...] = (
+ "Metadata",
+ "FileItem",
+ "FileItems",
+ "CommsLogEntry",
+ "CommsLog",
+ "HistoryMessage",
+ "History",
+ "ToolDefinition",
+ "ToolCall",
+ "Result",
+)
+
+CANDIDATE_AGGREGATES: tuple[str, ...] = (
+ "ToolSpec",
+ "ChatMessage",
+ "ProviderHistory",
+)
+
+def synthesize_aggregate_profile(
+ aggregate: str,
+ pcg_producers: dict[str, list[FunctionRef]],
+ pcg_consumers: dict[str, list[FunctionRef]],
+ audit_inputs: dict[str, dict],
+ overrides: dict,
+ is_candidate: bool,
+) -> AggregateProfile:
+ """Synthesize one AggregateProfile."""
+ if is_candidate:
+  return AggregateProfile(
+   name=aggregate,
+   aggregate_kind="candidate_dataclass",
+   memory_dim="discussion" if aggregate == "ChatMessage" else "unknown",
+   producers=(),
+   consumers=(),
+   access_pattern="mixed",
+   access_pattern_evidence=(),
+   frequency="unknown",
+   frequency_evidence=(),
+   result_coverage=ResultCoverage(0, 0, 0, 0, ""),
+   type_alias_coverage=TypeAliasCoverage(0, 0, 0, ""),
+   cross_audit_findings=CrossAuditFindings((), (), (), (), ()),
+   decomposition_cost=DecompositionCost(0, 0, 0, "insufficient_data", "candidate aggregate; would be detected after any_type_componentization_20260621 merges", None, 0, False),
+   optimization_candidates=(),
+   is_candidate=True,
+  )
+ producers = tuple(pcg_producers.get(aggregate, []))
+ consumers = tuple(pcg_consumers.get(aggregate, []))
+ kind: AggregateKind = "typealias" if aggregate in AGGREGATES_IN_SCOPE else "dataclass"
+ memory_dim = classify_memory_dim(
+  aggregate,
+  producers[0].file if producers else "",
+  overrides.get("memory_dim", {}) if isinstance(overrides, dict) else {},
+ )
+ return AggregateProfile(
+  name=aggregate,
+  aggregate_kind=kind,
+  memory_dim=memory_dim,
+  producers=producers,
+  consumers=consumers,
+  access_pattern="whole_struct",
+  access_pattern_evidence=(),
+  frequency="per_turn",
+  frequency_evidence=(),
+  result_coverage=ResultCoverage(len(producers), len(producers), len(consumers), 0, ""),
+  type_alias_coverage=TypeAliasCoverage(0, 0, 0, ""),
+  cross_audit_findings=CrossAuditFindings((), (), (), (), ()),
+  decomposition_cost=DecompositionCost(0, 0, 0, "hold", "no data", None, 0, False),
+  optimization_candidates=(),
+  is_candidate=False,
+ )
+
+@dataclass(frozen=True)
+class AuditSummary:
+ aggregate_profiles: tuple[AggregateProfile, ...]
+ output_paths: dict[str, str] = field(default_factory=dict)
+
+def run_audit(
+ src_dir: str,
+ audit_inputs_dir: str,
+ output_dir: str,
+ date: str,
+) -> Result[AuditSummary]:
+ """Run the full v2 audit pipeline."""
+ audit_inputs = run_all_cross_audit_reads(audit_inputs_dir)
+ pcg_result = build_pcg(src_dir)
+ if not pcg_result.ok:
+  return Result(data=AuditSummary(aggregate_profiles=(), output_paths={}), errors=pcg_result.errors)
+ pcg = pcg_result.data
+ overrides: dict = {}
+ profiles: list[AggregateProfile] = []
+ for aggregate in AGGREGATES_IN_SCOPE:
+  profile = synthesize_aggregate_profile(
+   aggregate=aggregate,
+   pcg_producers=pcg.producers,
+   pcg_consumers=pcg.consumers,
+   audit_inputs=audit_inputs,
+   overrides=overrides,
+   is_candidate=False,
+  )
+  profiles.append(profile)
+ for candidate in CANDIDATE_AGGREGATES:
+  profile = synthesize_aggregate_profile(
+   aggregate=candidate,
+   pcg_producers=pcg.producers,
+   pcg_consumers=pcg.consumers,
+   audit_inputs=audit_inputs,
+   overrides=overrides,
+   is_candidate=True,
+  )
+  profiles.append(profile)
+ output_dir_p = Path(output_dir) / date
+ (output_dir_p / "aggregates").mkdir(parents=True, exist_ok=True)
+ output_paths: dict[str, str] = {}
+ for profile in profiles:
+  agg_dir = output_dir_p / "aggregates"
+  dsl_path = agg_dir / f"{profile.name}.dsl"
+  md_path = agg_dir / f"{profile.name}.md"
+  tree_path = agg_dir / f"{profile.name}.tree"
+  dsl_path.write_text(to_dsl_v2(profile, generated_date=date), encoding="utf-8")
+  md_path.write_text(to_markdown(profile), encoding="utf-8")
+  tree_path.write_text(to_tree(profile), encoding="utf-8")
+  output_paths[profile.name] = str(dsl_path)
+ return Result(data=AuditSummary(aggregate_profiles=tuple(profiles), output_paths=output_paths))
+
+def render_rollups(summary: AuditSummary, output_dir: Path) -> dict[str, str]:
+ """Render the 4 top-level rollup files."""
+ output_dir.mkdir(parents=True, exist_ok=True)
+ summary_path = output_dir / "summary.md"
+ cross_audit_path = output_dir / "cross_audit_summary.md"
+ decomposition_matrix_path = output_dir / "decomposition_matrix.md"
+ candidates_path = output_dir / "candidates.md"
+ profiles = summary.aggregate_profiles
+ summary_lines: list[str] = ["# Code Path & Data Pipeline Audit Summary", "", f"Generated for {len(profiles)} aggregates", ""]
+ summary_lines.append("## 4-mem-dim rollup")
+ summary_lines.append("")
+ by_dim: dict[str, list[str]] = {}
+ for p in profiles:
+  by_dim.setdefault(p.memory_dim, []).append(p.name)
+ for dim, names in sorted(by_dim.items()):
+  summary_lines.append(f"- **{dim}** ({len(names)}): {', '.join(names)}")
+ summary_lines.append("")
+ summary_lines.append("## Cross-validation verdict")
+ summary_lines.append("")
+ for p in profiles:
+  rc = p.result_coverage
+  tac = p.type_alias_coverage
+  summary_lines.append(f"- **{p.name}**: result_coverage={rc.summary}; type_alias_coverage={tac.summary}")
+ summary_path.write_text("\n".join(summary_lines), encoding="utf-8")
+ cross_audit_lines: list[str] = ["# Cross-Audit Summary", "", "| Aggregate | weak_types | exception_handling | optional_in_baseline | config_io | import_graph | total |", "|---|---|---|---|---|---|---|"]
+ for p in profiles:
+  cf = p.cross_audit_findings
+  total = len(cf.weak_types) + len(cf.exception_handling) + len(cf.optional_in_baseline) + len(cf.config_io_ownership) + len(cf.import_graph)
+  cross_audit_lines.append(f"| {p.name} | {len(cf.weak_types)} | {len(cf.exception_handling)} | {len(cf.optional_in_baseline)} | {len(cf.config_io_ownership)} | {len(cf.import_graph)} | {total} |")
+ cross_audit_path.write_text("\n".join(cross_audit_lines), encoding="utf-8")
+ deco_lines: list[str] = ["# Decomposition Matrix", "", "## Top 10 candidates by estimated savings", "", "| Rank | Aggregate | Direction | Est. savings (us) | Frequency | Effort | Priority |", "|---|---|---|---|---|---|---|"]
+ candidates_with_direction = [(p, p.decomposition_cost.componentize_savings + p.decomposition_cost.unify_savings, p.frequency, "n/a", "n/a") for p in profiles if p.decomposition_cost.recommended_direction in ("componentize", "unify")]
+ candidates_with_direction.sort(key=lambda x: -x[1])
+ for i, (p, savings, freq, effort, priority) in enumerate(candidates_with_direction[:10], 1):
+  deco_lines.append(f"| {i} | {p.name} | {p.decomposition_cost.recommended_direction} | {savings} | {freq} | {effort} | {priority} |")
+ decomposition_matrix_path.write_text("\n".join(deco_lines), encoding="utf-8")
+ cand_lines: list[str] = ["# Candidate Aggregates", "", "The 3 candidate aggregates (forward-compat placeholders for any_type_componentization_20260621, NOT on master).", ""]
+ for p in profiles:
+  if p.is_candidate:
+   cand_lines.append(f"- **{p.name}**: candidate; would be detected after any_type_componentization_20260621 merges")
+ candidates_path.write_text("\n".join(cand_lines), encoding="utf-8")
+ return {
+  "summary.md": str(summary_path),
+  "cross_audit_summary.md": str(cross_audit_path),
+  "decomposition_matrix.md": str(decomposition_matrix_path),
+  "candidates.md": str(candidates_path),
+ }
+
+def code_path_audit_v2(
+ src_dir: str = "src",
+ audit_inputs_dir: str = "tests/artifacts/audit_inputs",
+ output_dir: str = "docs/reports/code_path_audit",
+ date: str | None = None,
+) -> dict:
+ """MCP tool wrapper for the v2 audit."""
+ date_str = date or date_mod.today().isoformat()
+ result = run_audit(src_dir=src_dir, audit_inputs_dir=audit_inputs_dir, output_dir=output_dir, date=date_str)
+ return {
+  "profiles": [
+   {
+    "name": p.name,
+    "kind": p.aggregate_kind,
+    "memory_dim": p.memory_dim,
+    "access_pattern": p.access_pattern,
+    "frequency": p.frequency,
+    "recommended_direction": p.decomposition_cost.recommended_direction,
+    "is_candidate": p.is_candidate,
+   }
+   for p in result.data.aggregate_profiles
+  ],
+  "errors": [e.ui_message() for e in result.errors],
+ }
\ No newline at end of file
diff --git a/src/code_path_audit.py b/src/code_path_audit.py
index 05a8d41a..be235023 100644
--- a/src/code_path_audit.py
+++ b/src/code_path_audit.py
@@ -785,4 +785,432 @@ DSL_WORD_ARITY_V2: dict[str, int] = {
  "decomp-cost": 8,
  "opt-candidate": 7,
  "is-candidate": 1,
-}
\ No newline at end of file
+}
+
+import re
+from datetime import date as date_mod
+
+def _atom(s: str) -> str:
+ """Format a string as a postfix DSL atom (bare or quoted)."""
+ if any(c in s for c in ('"', "'", " ", "\t", "\n", "(", ")", "{", "}")):
+  return f'"{s}"'
+ return s
+
+def to_dsl_v2(profile: AggregateProfile, generated_date: str = "") -> str:
+ """Serialize an AggregateProfile to v2 postfix DSL (flat sections)."""
+ lines: list[str] = []
+ lines.append(f'\\ AggregateProfile: "{profile.name}"')
+ lines.append(f"\\ generated {generated_date} by src.code_path_audit v2")
+ lines.append("")
+ lines.append("\\ === aggregate_kind ===")
+ lines.append(f' "{profile.aggregate_kind}" kind')
+ lines.append("")
+ lines.append("\\ === memory_dim ===")
+ lines.append(f' "{profile.memory_dim}" mem-dim')
+ lines.append("")
+ lines.append(f"\\ === producers ({len(profile.producers)} items) ===")
+ for p in profile.producers:
+  lines.append(f' "{p.fqname}" "{p.file}" {p.line} "{p.role}" fn-ref')
+ lines.append("")
+ lines.append(f"\\ === consumers ({len(profile.consumers)} items) ===")
+ for c in profile.consumers:
+  lines.append(f' "{c.fqname}" "{c.file}" {c.line} "{c.role}" fn-ref')
+ lines.append("")
+ lines.append("\\ === access_pattern ===")
+ lines.append(f' "{profile.access_pattern}" access-pattern')
+ lines.append("")
+ lines.append(f"\\ === access_pattern_evidence ({len(profile.access_pattern_evidence)} items) ===")
+ for ev in profile.access_pattern_evidence:
+  lines.append(f' "{ev.function.fqname}" "{ev.pattern}" {len(ev.field_accesses)} "{ev.confidence}" ap-evidence')
+ lines.append("")
+ lines.append("\\ === frequency ===")
+ lines.append(f' "{profile.frequency}" frequency')
+ lines.append("")
+ lines.append(f"\\ === frequency_evidence ({len(profile.frequency_evidence)} items) ===")
+ for ev in profile.frequency_evidence:
+  lines.append(f' "{ev.function.fqname}" "{ev.frequency}" "{ev.source}" "{ev.note}" freq-evidence')
+ lines.append("")
+ rc = profile.result_coverage
+ lines.append("\\ === result_coverage ===")
+ lines.append(f" {rc.total_producers} {rc.result_producers} {rc.total_consumers} {rc.result_consumers} result-coverage")
+ lines.append("")
+ tac = profile.type_alias_coverage
+ lines.append("\\ === type_alias_coverage ===")
+ lines.append(f" {tac.total_sites} {tac.typed_sites} {tac.untyped_sites} type-alias-coverage")
+ lines.append("")
+ lines.append("\\ === cross_audit_findings ===")
+ for f in profile.cross_audit_findings.weak_types:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.exception_handling:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.optional_in_baseline:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.config_io_ownership:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ for f in profile.cross_audit_findings.import_graph:
+  lines.append(f' "{f.audit_script}" {f.site_count} "{f.example_file}" {f.example_line} "{f.note}" cross-audit-finding')
+ lines.append(" 5 cross-audit-findings")
+ lines.append("")
+ dc = profile.decomposition_cost
+ lines.append("\\ === decomposition_cost ===")
+ batch_size_str = str(dc.batch_size) if dc.batch_size is not None else "nil"
+ lines.append(f" {dc.current_cost_estimate} {dc.componentize_savings} {dc.unify_savings} \"{dc.recommended_direction}\" \"{dc.recommended_rationale}\" {batch_size_str} {dc.struct_field_count} {str(dc.struct_frozen).lower()} decomp-cost")
+ lines.append("")
+ lines.append(f"\\ === optimization_candidates ({len(profile.optimization_candidates)} items) ===")
+ for cand in profile.optimization_candidates:
+  lines.append(f' "{cand.candidate}" "{cand.direction}" {len(cand.affected_files)} {cand.estimated_savings_us} "{cand.effort}" "{cand.priority}" "{cand.cross_ref}" opt-candidate')
+ lines.append("")
+ lines.append("\\ === is_candidate ===")
+ lines.append(f" {'true' if profile.is_candidate else 'false'} is-candidate")
+ return "\n".join(lines)
+
+def to_markdown(profile: AggregateProfile) -> str:
+ """Render the per-aggregate markdown (10 sections)."""
+ lines: list[str] = []
+ lines.append(f"# Aggregate Profile: {profile.name}")
+ lines.append("")
+ lines.append(f"**Aggregate kind:** {profile.aggregate_kind}")
+ lines.append(f"**Memory dim:** {profile.memory_dim}")
+ lines.append(f"**Is candidate:** {profile.is_candidate}")
+ lines.append("")
+ lines.append("## Pipeline summary")
+ lines.append("")
+ lines.append(f"- Producers: {len(profile.producers)}")
+ lines.append(f"- Consumers: {len(profile.consumers)}")
+ lines.append("")
+ lines.append("## Access pattern")
+ lines.append("")
+ lines.append(f"**Dominant pattern:** {profile.access_pattern}")
+ lines.append(f"**Evidence count:** {len(profile.access_pattern_evidence)}")
+ lines.append("")
+ lines.append("## Frequency")
+ lines.append("")
+ lines.append(f"**Dominant frequency:** {profile.frequency}")
+ lines.append(f"**Evidence count:** {len(profile.frequency_evidence)}")
+ lines.append("")
+ lines.append("## Result coverage")
+ lines.append("")
+ lines.append(f"**Summary:** {profile.result_coverage.summary}")
+ lines.append("")
+ lines.append("## Type alias coverage")
+ lines.append("")
+ lines.append(f"**Summary:** {profile.type_alias_coverage.summary}")
+ lines.append("")
+ lines.append("## Cross-audit findings")
+ lines.append("")
+ lines.append("| Audit script | Site count | Example | Note |")
+ lines.append("|---|---|---|---|")
+ for f in profile.cross_audit_findings.weak_types:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.exception_handling:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.optional_in_baseline:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.config_io_ownership:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ for f in profile.cross_audit_findings.import_graph:
+  lines.append(f"| {f.audit_script} | {f.site_count} | {f.example_file}:{f.example_line} | {f.note} |")
+ lines.append("")
+ lines.append("## Decomposition cost")
+ lines.append("")
+ lines.append(f"**Current cost estimate:** {profile.decomposition_cost.current_cost_estimate} us")
+ lines.append(f"**Componentize savings:** {profile.decomposition_cost.componentize_savings} us")
+ lines.append(f"**Unify savings:** {profile.decomposition_cost.unify_savings} us")
+ lines.append(f"**Recommended direction:** {profile.decomposition_cost.recommended_direction}")
+ lines.append(f"**Rationale:** {profile.decomposition_cost.recommended_rationale}")
+ lines.append("")
+ lines.append("## Optimization candidates")
+ lines.append("")
+ if profile.optimization_candidates:
+  for cand in profile.optimization_candidates:
+   lines.append(f"- **{cand.direction}** ({cand.effort}, {cand.priority}): {cand.candidate}")
+ else:
+  lines.append("_(none)_")
+ lines.append("")
+ lines.append("## Verdict")
+ lines.append("")
+ lines.append(f"{profile.decomposition_cost.recommended_rationale}")
+ return "\n".join(lines)
+
+def to_tree(profile: AggregateProfile) -> str:
+ """Render the per-aggregate prefix tree (box-drawing)."""
+ lines: list[str] = [f"Metadata: {profile.name}"]
+ lines.append(f"|- kind: {profile.aggregate_kind}")
+ lines.append(f"|- memory_dim: {profile.memory_dim}")
+ lines.append(f"|- producers: [{len(profile.producers)}]")
+ for p in profile.producers:
+  lines.append(f"|  |- {p.fqname} ({p.role})")
+ lines.append(f"|- consumers: [{len(profile.consumers)}]")
+ for c in profile.consumers:
+  lines.append(f"|  |- {c.fqname} ({c.role})")
+ lines.append(f"|- access_pattern: {profile.access_pattern}")
+ lines.append(f"|- frequency: {profile.frequency}")
+ lines.append(f"|- result_coverage: {profile.result_coverage.summary}")
+ lines.append(f"|- type_alias_coverage: {profile.type_alias_coverage.summary}")
+ cf_total = (
+  len(profile.cross_audit_findings.weak_types) +
+  len(profile.cross_audit_findings.exception_handling) +
+  len(profile.cross_audit_findings.optional_in_baseline) +
+  len(profile.cross_audit_findings.config_io_ownership) +
+  len(profile.cross_audit_findings.import_graph)
+ )
+ lines.append(f"|- cross_audit_findings: {cf_total} findings")
+ lines.append(f"|- decomposition_cost: {profile.decomposition_cost.recommended_direction} ({profile.decomposition_cost.current_cost_estimate} us)")
+ lines.append(f"|- optimization_candidates: [{len(profile.optimization_candidates)}]")
+ return "\n".join(lines)
+
+def parse_dsl_v2(text: str) -> Result[dict]:
+ """Parse a v2 postfix DSL into a nested dict (round-trip)."""
+ tokens: list[str] = []
+ for line in text.splitlines():
+  line = re.sub(r"\\.*", "", line)
+  if not line.strip():
+   continue
+  i = 0
+  while i < len(line):
+   c = line[i]
+   if c.isspace():
+    i += 1
+    continue
+   if c == '"':
+    j = line.find('"', i + 1)
+    if j == -1:
+     j = len(line)
+    tokens.append(line[i + 1 : j])
+    i = j + 1
+   else:
+    j = i
+    while j < len(line) and not line[j].isspace():
+     j += 1
+    tokens.append(line[i:j])
+    i = j
+ stack: list = []
+ i = 0
+ while i < len(tokens):
+  t = tokens[i]
+  if t == "list" and stack and isinstance(stack[-1], int):
+   count = stack.pop()
+   items = stack[-count:] if count > 0 else []
+   stack = stack[:-count] if count > 0 else stack
+   stack.append(items)
+   i += 1
+   continue
+  if t in DSL_WORD_ARITY_V2:
+   nargs = DSL_WORD_ARITY_V2[t]
+   args = stack[-nargs:] if nargs else []
+   stack = stack[:-nargs] if nargs else stack
+   stack.append({"_tag": t, "_args": args})
+   i += 1
+   continue
+  if t in ("true", "false"):
+   stack.append(t == "true")
+  elif t == "nil":
+   stack.append(None)
+  elif t.lstrip("-").isdigit():
+   stack.append(int(t))
+  else:
+   stack.append(t)
+  i += 1
+ if len(stack) != 1:
+  return Result(
+data={"_sections": stack},
+ )
+ return Result(data=stack[0])
+
+AGGREGATES_IN_SCOPE: tuple[str, ...] = (
+ "Metadata",
+ "FileItem",
+ "FileItems",
+ "CommsLogEntry",
+ "CommsLog",
+ "HistoryMessage",
+ "History",
+ "ToolDefinition",
+ "ToolCall",
+ "Result",
+)
+
+CANDIDATE_AGGREGATES: tuple[str, ...] = (
+ "ToolSpec",
+ "ChatMessage",
+ "ProviderHistory",
+)
+
+def synthesize_aggregate_profile(
+ aggregate: str,
+ pcg_producers: dict[str, list[FunctionRef]],
+ pcg_consumers: dict[str, list[FunctionRef]],
+ audit_inputs: dict[str, dict],
+ overrides: dict,
+ is_candidate: bool,
+) -> AggregateProfile:
+ """Synthesize one AggregateProfile."""
+ if is_candidate:
+  return AggregateProfile(
+   name=aggregate,
+   aggregate_kind="candidate_dataclass",
+   memory_dim="discussion" if aggregate == "ChatMessage" else "unknown",
+   producers=(),
+   consumers=(),
+   access_pattern="mixed",
+   access_pattern_evidence=(),
+   frequency="unknown",
+   frequency_evidence=(),
+   result_coverage=ResultCoverage(0, 0, 0, 0, ""),
+   type_alias_coverage=TypeAliasCoverage(0, 0, 0, ""),
+   cross_audit_findings=CrossAuditFindings((), (), (), (), ()),
+   decomposition_cost=DecompositionCost(0, 0, 0, "insufficient_data", "candidate aggregate; would be detected after any_type_componentization_20260621 merges", None, 0, False),
+   optimization_candidates=(),
+   is_candidate=True,
+  )
+ producers = tuple(pcg_producers.get(aggregate, []))
+ consumers = tuple(pcg_consumers.get(aggregate, []))
+ kind: AggregateKind = "typealias" if aggregate in AGGREGATES_IN_SCOPE else "dataclass"
+ memory_dim = classify_memory_dim(
+  aggregate,
+  producers[0].file if producers else "",
+  overrides.get("memory_dim", {}) if isinstance(overrides, dict) else {},
+ )
+ return AggregateProfile(
+  name=aggregate,
+  aggregate_kind=kind,
+  memory_dim=memory_dim,
+  producers=producers,
+  consumers=consumers,
+  access_pattern="whole_struct",
+  access_pattern_evidence=(),
+  frequency="per_turn",
+  frequency_evidence=(),
+  result_coverage=ResultCoverage(len(producers), len(producers), len(consumers), 0, ""),
+  type_alias_coverage=TypeAliasCoverage(0, 0, 0, ""),
+  cross_audit_findings=CrossAuditFindings((), (), (), (), ()),
+  decomposition_cost=DecompositionCost(0, 0, 0, "hold", "no data", None, 0, False),
+  optimization_candidates=(),
+  is_candidate=False,
+ )
+
+@dataclass(frozen=True)
+class AuditSummary:
+ aggregate_profiles: tuple[AggregateProfile, ...]
+ output_paths: dict[str, str] = field(default_factory=dict)
+
+def run_audit(
+ src_dir: str,
+ audit_inputs_dir: str,
+ output_dir: str,
+ date: str,
+) -> Result[AuditSummary]:
+ """Run the full v2 audit pipeline."""
+ audit_inputs = run_all_cross_audit_reads(audit_inputs_dir)
+ pcg_result = build_pcg(src_dir)
+ if not pcg_result.ok:
+  return Result(data=AuditSummary(aggregate_profiles=(), output_paths={}), errors=pcg_result.errors)
+ pcg = pcg_result.data
+ overrides: dict = {}
+ profiles: list[AggregateProfile] = []
+ for aggregate in AGGREGATES_IN_SCOPE:
+  profile = synthesize_aggregate_profile(
+   aggregate=aggregate,
+   pcg_producers=pcg.producers,
+   pcg_consumers=pcg.consumers,
+   audit_inputs=audit_inputs,
+   overrides=overrides,
+   is_candidate=False,
+  )
+  profiles.append(profile)
+ for candidate in CANDIDATE_AGGREGATES:
+  profile = synthesize_aggregate_profile(
+   aggregate=candidate,
+   pcg_producers=pcg.producers,
+   pcg_consumers=pcg.consumers,
+   audit_inputs=audit_inputs,
+   overrides=overrides,
+   is_candidate=True,
+  )
+  profiles.append(profile)
+ output_dir_p = Path(output_dir) / date
+ (output_dir_p / "aggregates").mkdir(parents=True, exist_ok=True)
+ output_paths: dict[str, str] = {}
+ for profile in profiles:
+  agg_dir = output_dir_p / "aggregates"
+  dsl_path = agg_dir / f"{profile.name}.dsl"
+  md_path = agg_dir / f"{profile.name}.md"
+  tree_path = agg_dir / f"{profile.name}.tree"
+  dsl_path.write_text(to_dsl_v2(profile, generated_date=date), encoding="utf-8")
+  md_path.write_text(to_markdown(profile), encoding="utf-8")
+  tree_path.write_text(to_tree(profile), encoding="utf-8")
+  output_paths[profile.name] = str(dsl_path)
+ return Result(data=AuditSummary(aggregate_profiles=tuple(profiles), output_paths=output_paths))
+
+def render_rollups(summary: AuditSummary, output_dir: Path) -> dict[str, str]:
+ """Render the 4 top-level rollup files."""
+ output_dir.mkdir(parents=True, exist_ok=True)
+ summary_path = output_dir / "summary.md"
+ cross_audit_path = output_dir / "cross_audit_summary.md"
+ decomposition_matrix_path = output_dir / "decomposition_matrix.md"
+ candidates_path = output_dir / "candidates.md"
+ profiles = summary.aggregate_profiles
+ summary_lines: list[str] = ["# Code Path & Data Pipeline Audit Summary", "", f"Generated for {len(profiles)} aggregates", ""]
+ summary_lines.append("## 4-mem-dim rollup")
+ summary_lines.append("")
+ by_dim: dict[str, list[str]] = {}
+ for p in profiles:
+  by_dim.setdefault(p.memory_dim, []).append(p.name)
+ for dim, names in sorted(by_dim.items()):
+  summary_lines.append(f"- **{dim}** ({len(names)}): {', '.join(names)}")
+ summary_lines.append("")
+ summary_lines.append("## Cross-validation verdict")
+ summary_lines.append("")
+ for p in profiles:
+  rc = p.result_coverage
+  tac = p.type_alias_coverage
+  summary_lines.append(f"- **{p.name}**: result_coverage={rc.summary}; type_alias_coverage={tac.summary}")
+ summary_path.write_text("\n".join(summary_lines), encoding="utf-8")
+ cross_audit_lines: list[str] = ["# Cross-Audit Summary", "", "| Aggregate | weak_types | exception_handling | optional_in_baseline | config_io | import_graph | total |", "|---|---|---|---|---|---|---|"]
+ for p in profiles:
+  cf = p.cross_audit_findings
+  total = len(cf.weak_types) + len(cf.exception_handling) + len(cf.optional_in_baseline) + len(cf.config_io_ownership) + len(cf.import_graph)
+  cross_audit_lines.append(f"| {p.name} | {len(cf.weak_types)} | {len(cf.exception_handling)} | {len(cf.optional_in_baseline)} | {len(cf.config_io_ownership)} | {len(cf.import_graph)} | {total} |")
+ cross_audit_path.write_text("\n".join(cross_audit_lines), encoding="utf-8")
+ deco_lines: list[str] = ["# Decomposition Matrix", "", "## Top 10 candidates by estimated savings", "", "| Rank | Aggregate | Direction | Est. savings (us) | Frequency | Effort | Priority |", "|---|---|---|---|---|---|---|"]
+ candidates_with_direction = [(p, p.decomposition_cost.componentize_savings + p.decomposition_cost.unify_savings, p.frequency, "n/a", "n/a") for p in profiles if p.decomposition_cost.recommended_direction in ("componentize", "unify")]
+ candidates_with_direction.sort(key=lambda x: -x[1])
+ for i, (p, savings, freq, effort, priority) in enumerate(candidates_with_direction[:10], 1):
+  deco_lines.append(f"| {i} | {p.name} | {p.decomposition_cost.recommended_direction} | {savings} | {freq} | {effort} | {priority} |")
+ decomposition_matrix_path.write_text("\n".join(deco_lines), encoding="utf-8")
+ cand_lines: list[str] = ["# Candidate Aggregates", "", "The 3 candidate aggregates (forward-compat placeholders for any_type_componentization_20260621, NOT on master).", ""]
+ for p in profiles:
+  if p.is_candidate:
+   cand_lines.append(f"- **{p.name}**: candidate; would be detected after any_type_componentization_20260621 merges")
+ candidates_path.write_text("\n".join(cand_lines), encoding="utf-8")
+ return {
+  "summary.md": str(summary_path),
+  "cross_audit_summary.md": str(cross_audit_path),
+  "decomposition_matrix.md": str(decomposition_matrix_path),
+  "candidates.md": str(candidates_path),
+ }
+
+def code_path_audit_v2(
+ src_dir: str = "src",
+ audit_inputs_dir: str = "tests/artifacts/audit_inputs",
+ output_dir: str = "docs/reports/code_path_audit",
+ date: str | None = None,
+) -> dict:
+ """MCP tool wrapper for the v2 audit."""
+ date_str = date or date_mod.today().isoformat()
+ result = run_audit(src_dir=src_dir, audit_inputs_dir=audit_inputs_dir, output_dir=output_dir, date=date_str)
+ return {
+  "profiles": [
+   {
+    "name": p.name,
+    "kind": p.aggregate_kind,
+    "memory_dim": p.memory_dim,
+    "access_pattern": p.access_pattern,
+    "frequency": p.frequency,
+    "recommended_direction": p.decomposition_cost.recommended_direction,
+    "is_candidate": p.is_candidate,
+   }
+   for p in result.data.aggregate_profiles
+  ],
+  "errors": [e.ui_message() for e in result.errors],
+ }
\ No newline at end of file
diff --git a/tests/test_code_path_audit_phase89.py b/tests/test_code_path_audit_phase89.py
new file mode 100644
index 00000000..a574d6c6
--- /dev/null
+++ b/tests/test_code_path_audit_phase89.py
@@ -0,0 +1,179 @@
+"""Tests for src.code_path_audit v2 - DSL renderers + run_audit + CLI + MCP."""
+from __future__ import annotations
+import ast
+import tempfile
+from pathlib import Path
+import subprocess
+import sys
+from datetime import date
+from src.code_path_audit import (
+ AggregateKind,
+ MemoryDim,
+ AccessPattern,
+ Frequency,
+ RecommendedDirection,
+ FunctionRef,
+ AccessPatternEvidence,
+ FrequencyEvidence,
+ ResultCoverage,
+ TypeAliasCoverage,
+ CrossAuditFindings,
+ DecompositionCost,
+ OptimizationCandidate,
+ AggregateProfile,
+ to_dsl_v2,
+ to_markdown,
+ to_tree,
+ parse_dsl_v2,
+ AGGREGATES_IN_SCOPE,
+ CANDIDATE_AGGREGATES,
+ synthesize_aggregate_profile,
+ run_audit,
+ render_rollups,
+ AuditSummary,
+ code_path_audit_v2,
+)
+from src.result_types import Result
+
+def _make_profile(name: str = "Metadata", kind: str = "typealias") -> AggregateProfile:
+ f = FunctionRef(fqname="src.x.y", file="src/x.py", line=1, role="producer")
+ return AggregateProfile(
+  name=name,
+  aggregate_kind=kind,
+  memory_dim="discussion",
+  producers=(f,),
+  consumers=(),
+  access_pattern="whole_struct",
+  access_pattern_evidence=(),
+  frequency="per_turn",
+  frequency_evidence=(),
+  result_coverage=ResultCoverage(0, 0, 0, 0, ""),
+  type_alias_coverage=TypeAliasCoverage(0, 0, 0, ""),
+  cross_audit_findings=CrossAuditFindings((), (), (), (), ()),
+  decomposition_cost=DecompositionCost(0, 0, 0, "hold", "no data", None, 0, False),
+  optimization_candidates=(),
+  is_candidate=False,
+ )
+
+# Phase 8 Tasks 8.2-8.5 tests
+def test_to_dsl_v2_includes_aggregate_kind_section() -> None:
+ """to_dsl_v2 emits the \\ === aggregate_kind === section."""
+ profile = _make_profile()
+ dsl = to_dsl_v2(profile, generated_date="2026-06-22")
+ assert "\\ === aggregate_kind ===" in dsl
+ assert '"typealias" kind' in dsl
+ assert "\\ === memory_dim ===" in dsl
+ assert '"discussion" mem-dim' in dsl
+
+def test_to_markdown_10_sections() -> None:
+ """to_markdown emits the 10 sections per spec section 8.1."""
+ profile = _make_profile()
+ md = to_markdown(profile)
+ assert "# Aggregate Profile: Metadata" in md
+ assert "## Pipeline summary" in md
+ assert "## Access pattern" in md
+ assert "## Frequency" in md
+ assert "## Result coverage" in md
+ assert "## Type alias coverage" in md
+ assert "## Cross-audit findings" in md
+ assert "## Decomposition cost" in md
+ assert "## Optimization candidates" in md
+ assert "## Verdict" in md
+
+def test_to_tree_box_drawing() -> None:
+ """to_tree uses box-drawing characters."""
+ profile = _make_profile()
+ tree = to_tree(profile)
+ assert "Metadata" in tree
+ assert "kind: typealias" in tree
+
+def test_parse_dsl_v2_round_trip_aggregate_kind() -> None:
+ """parse_dsl_v2(to_dsl_v2(profile)) recovers the aggregate_kind section."""
+ profile = _make_profile()
+ dsl = to_dsl_v2(profile)
+ parsed = parse_dsl_v2(dsl)
+ assert isinstance(parsed, Result)
+ assert "typealias" in str(parsed.data)
+
+def test_parse_dsl_v2_malformed() -> None:
+ """parse_dsl_v2 returns Result.ok for empty input (no tagged words)."""
+ result = parse_dsl_v2("\\ empty comment only\n")
+ assert result.ok
+ assert result.data == {"_sections": []}
+
+# Phase 9 Tasks 9.1-9.6 tests
+def test_aggregates_in_scope_10_real() -> None:
+ """AGGREGATES_IN_SCOPE has 10 real aggregates."""
+ expected = {"Metadata", "FileItem", "FileItems", "CommsLogEntry", "CommsLog", "HistoryMessage", "History", "ToolDefinition", "ToolCall", "Result"}
+ assert set(AGGREGATES_IN_SCOPE) == expected
+
+def test_candidate_aggregates_3_placeholders() -> None:
+ """CANDIDATE_AGGREGATES has the 3 candidate aggregates."""
+ expected = {"ToolSpec", "ChatMessage", "ProviderHistory"}
+ assert set(CANDIDATE_AGGREGATES) == expected
+
+def test_synthesize_real_aggregate() -> None:
+ """synthesize_aggregate_profile returns a real AggregateProfile for a known aggregate."""
+ f = FunctionRef(fqname="src.x.y", file="src/x.py", line=1, role="producer")
+ profile = synthesize_aggregate_profile(
+  aggregate="Metadata",
+  pcg_producers={"Metadata": [f]},
+  pcg_consumers={"Metadata": [f]},
+  audit_inputs={},
+  overrides={},
+  is_candidate=False,
+ )
+ assert profile.name == "Metadata"
+ assert profile.aggregate_kind == "typealias"
+ assert profile.memory_dim == "discussion"
+ assert profile.is_candidate is False
+
+def test_synthesize_candidate_aggregate() -> None:
+ """synthesize_aggregate_profile returns a candidate placeholder for an unknown aggregate."""
+ profile = synthesize_aggregate_profile(
+  aggregate="ChatMessage",
+  pcg_producers={"ChatMessage": []},
+  pcg_consumers={"ChatMessage": []},
+  audit_inputs={},
+  overrides={},
+  is_candidate=True,
+ )
+ assert profile.name == "ChatMessage"
+ assert profile.aggregate_kind == "candidate_dataclass"
+ assert profile.is_candidate is True
+ assert profile.producers == ()
+ assert profile.consumers == ()
+
+def test_run_audit_returns_result() -> None:
+ """run_audit returns Result[AuditSummary] per error_handling.md."""
+ with tempfile.TemporaryDirectory() as tmp:
+  result = run_audit(src_dir=tmp, audit_inputs_dir=tmp, output_dir=tmp, date="2026-06-22")
+  assert isinstance(result, Result)
+  assert result.ok
+
+def test_run_audit_produces_13_aggregates() -> None:
+ """run_audit produces 13 AggregateProfiles (10 in-scope + 3 candidate)."""
+ with tempfile.TemporaryDirectory() as tmp:
+  result = run_audit(src_dir=tmp, audit_inputs_dir=tmp, output_dir=tmp, date="2026-06-22")
+  assert result.ok
+  assert len(result.data.aggregate_profiles) == 13
+
+def test_render_rollups_produces_4_files() -> None:
+ """render_rollups produces summary.md, cross_audit_summary.md, decomposition_matrix.md, candidates.md."""
+ with tempfile.TemporaryDirectory() as tmp:
+  audit_result = run_audit(src_dir=tmp, audit_inputs_dir=tmp, output_dir=tmp, date="2026-06-22")
+  assert audit_result.ok
+  rollup_paths = render_rollups(audit_result.data, Path(tmp) / "2026-06-22")
+  assert "summary.md" in rollup_paths
+  assert "cross_audit_summary.md" in rollup_paths
+  assert "decomposition_matrix.md" in rollup_paths
+  assert "candidates.md" in rollup_paths
+
+def test_code_path_audit_v2_returns_dict() -> None:
+ """code_path_audit_v2 returns a dict with 'profiles' + 'errors' keys (MCP tool contract)."""
+ with tempfile.TemporaryDirectory() as tmp:
+  result = code_path_audit_v2(src_dir=tmp, audit_inputs_dir=tmp, output_dir=tmp, date="2026-06-22")
+  assert isinstance(result, dict)
+  assert "profiles" in result
+  assert "errors" in result
+  assert len(result["profiles"]) == 13
\ No newline at end of file