From 36645f7f3e5e347b9797fc748c9deea68d680cf7 Mon Sep 17 00:00:00 2001
From: Ed_ <edwardgz@gmail.com>
Date: Mon, 4 May 2026 05:10:59 -0400
Subject: [PATCH] feat(aggregation): Implement tier-level aggregation strategy
 tied to Personas

---
 src/aggregate.py                 | 19 ++++++----
 src/app_controller.py            |  9 +++--
 src/models.py                    |  4 +++
 src/multi_agent_conductor.py     | 20 +++++++----
 tests/test_tiered_aggregation.py | 62 ++++++++++++++++++++++++++++++++
 5 files changed, 99 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_tiered_aggregation.py

diff --git a/src/aggregate.py b/src/aggregate.py
index 6a24b47..3cd0f10 100644
--- a/src/aggregate.py
+++ b/src/aggregate.py
@@ -197,15 +197,20 @@ def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str:
   sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```")
  return "\n\n---\n\n".join(sections)
 
-def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str:
+def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False, aggregation_strategy: str = "auto") -> str:
  """Build markdown from pre-read file items instead of re-reading from disk."""
  parts = []
  # STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits
  if file_items:
-  if summary_only:
+  if aggregation_strategy == "summarize":
    parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
-  else:
+  elif aggregation_strategy == "full":
    parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
+  else: # auto
+   if summary_only:
+    parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items))
+   else:
+    parts.append("## Files\n\n" + _build_files_section_from_items(file_items))
  if screenshots:
   parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots))
   # DYNAMIC SUFFIX: History changes every turn, must go last
@@ -213,9 +218,9 @@ def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_
   parts.append("## Discussion History\n\n" + build_discussion_section(history))
  return "\n\n---\n\n".join(parts)
 
-def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str:
+def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False, aggregation_strategy: str = "auto") -> str:
  """Build markdown with only files + screenshots (no history). Used for stable caching."""
- return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only)
+ return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only, aggregation_strategy=aggregation_strategy)
 
 def build_discussion_text(history: list[str]) -> str:
  """Build just the discussion history section text. Returns empty string if no history."""
@@ -319,7 +324,7 @@ def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot
   parts.append("## Discussion History\n\n" + build_discussion_section(history))
  return "\n\n---\n\n".join(parts)
 
-def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
+def run(config: dict[str, Any], aggregation_strategy: str = "auto") -> tuple[str, Path, list[dict[str, Any]]]:
  namespace = config.get("project", {}).get("name")
  if not namespace:
   namespace = config.get("output", {}).get("namespace", "project")
@@ -336,7 +341,7 @@ def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]:
  file_items = build_file_items(base_dir, files)
  summary_only = config.get("project", {}).get("summary_only", False)
  markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history,
-  summary_only=summary_only)
+  summary_only=summary_only, aggregation_strategy=aggregation_strategy)
  output_file.write_text(markdown, encoding="utf-8")
  return markdown, output_file, file_items
 
diff --git a/src/app_controller.py b/src/app_controller.py
index a96fa53..078012f 100644
--- a/src/app_controller.py
+++ b/src/app_controller.py
@@ -148,6 +148,7 @@ class AppController:
   self.project_paths: List[str] = []
   self.active_discussion: str = "main"
   self.disc_entries: List[Dict[str, Any]] = []
+  self.ui_active_persona: str = ""
   self.disc_roles: List[str] = []
   self.files: List[str] = []
   self.screenshots: List[str] = []
@@ -2550,12 +2551,16 @@ class AppController:
   models.save_config(self.config)
   track_id = self.active_track.id if self.active_track else None
   flat = project_manager.flat_config(self.project, self.active_discussion, track_id=track_id)
-  full_md, path, file_items = aggregate.run(flat)
+  
+  persona = self.personas.get(self.ui_active_persona)
+  strategy = persona.aggregation_strategy if persona else "auto"
+  
+  full_md, path, file_items = aggregate.run(flat, aggregation_strategy=strategy)
   # Build stable markdown (no history) for Gemini caching
   screenshot_base_dir = Path(flat.get("screenshots", {}).get("base_dir", "."))
   screenshots = flat.get("screenshots", {}).get("paths", [])
   summary_only = flat.get("project", {}).get("summary_only", False)
-  stable_md = aggregate.build_markdown_no_history(file_items, screenshot_base_dir, screenshots, summary_only=summary_only)
+  stable_md = aggregate.build_markdown_no_history(file_items, screenshot_base_dir, screenshots, summary_only=summary_only, aggregation_strategy=strategy)
   # Build discussion history text separately
   history = flat.get("discussion", {}).get("history", [])
   discussion_text = aggregate.build_discussion_text(history)
diff --git a/src/models.py b/src/models.py
index 8f3e980..a05bf26 100644
--- a/src/models.py
+++ b/src/models.py
@@ -470,6 +470,7 @@ class Persona:
  tool_preset: Optional[str] = None
  bias_profile: Optional[str] = None
  context_preset: Optional[str] = None
+ aggregation_strategy: Optional[str] = None
 
  @property
  def provider(self) -> Optional[str]:
@@ -514,6 +515,8 @@ class Persona:
    res["bias_profile"] = self.bias_profile
   if self.context_preset is not None:
    res["context_preset"] = self.context_preset
+  if self.aggregation_strategy is not None:
+   res["aggregation_strategy"] = self.aggregation_strategy
   return res
 
  @classmethod
@@ -548,6 +551,7 @@ class Persona:
    tool_preset=data.get("tool_preset"),
    bias_profile=data.get("bias_profile"),
    context_preset=data.get("context_preset"),
+   aggregation_strategy=data.get("aggregation_strategy"),
   )
 @dataclass
 class MCPServerConfig:
diff --git a/src/multi_agent_conductor.py b/src/multi_agent_conductor.py
index 29a6bff..88c3c26 100644
--- a/src/multi_agent_conductor.py
+++ b/src/multi_agent_conductor.py
@@ -28,6 +28,7 @@ See Also:
  - src/models.py for Ticket, Track, WorkerContext
 """
 from src import ai_client
+from src import summarize
 import json
 import threading
 import time
@@ -401,6 +402,7 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
  # Apply Persona if specified
  preferred_models = []
  persona_tool_preset = None
+ persona = None
  if context.persona_id:
   from src.personas import PersonaManager
   from src import paths
@@ -443,6 +445,7 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
 
  if context_files:
   parser = ASTParser(language="python")
+  strategy = getattr(persona, "aggregation_strategy", "auto") if persona else "auto"
   for i, file_path in enumerate(context_files):
    try:
     Path(file_path)
@@ -452,12 +455,17 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files:
     
     tokens_before += _count_tokens(content)
     
-    if i == 0:
-     view = parser.get_curated_view(content, path=file_path)
-    elif ticket.target_file and Path(file_path).resolve() == Path(ticket.target_file).resolve() and ticket.target_symbols:
-     view = parser.get_targeted_view(content, ticket.target_symbols, path=file_path)
-    else:
-     view = parser.get_skeleton(content, path=file_path)
+    if strategy == "summarize":
+     view = summarize.summarise_file(Path(file_path), content)
+    elif strategy == "full":
+     view = content
+    else: # auto or skeleton
+     if i == 0:
+      view = parser.get_curated_view(content, path=file_path)
+     elif ticket.target_file and Path(file_path).resolve() == Path(ticket.target_file).resolve() and ticket.target_symbols:
+      view = parser.get_targeted_view(content, ticket.target_symbols, path=file_path)
+     else:
+      view = parser.get_skeleton(content, path=file_path)
     
     tokens_after += _count_tokens(view)
     context_injection += f"\nFile: {file_path}\n{view}\n"
diff --git a/tests/test_tiered_aggregation.py b/tests/test_tiered_aggregation.py
new file mode 100644
index 0000000..c23ac60
--- /dev/null
+++ b/tests/test_tiered_aggregation.py
@@ -0,0 +1,62 @@
+import pytest
+from src.models import Persona, Ticket, WorkerContext
+from src import multi_agent_conductor
+from src import app_controller
+from src import aggregate
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+def test_persona_aggregation_strategy():
+    p = Persona(name="test_persona", aggregation_strategy="summarize")
+    assert p.aggregation_strategy == "summarize"
+    
+    d = p.to_dict()
+    assert d.get("aggregation_strategy") == "summarize"
+    
+    p2 = Persona.from_dict("test2", d)
+    assert p2.aggregation_strategy == "summarize"
+
+@patch("src.aggregate.build_markdown_from_items")
+def test_app_controller_do_generate_uses_persona_strategy(mock_build):
+    mock_build.return_value = "fake_md"
+    app = app_controller.AppController()
+    app.personas = {}
+    p = Persona(name="p1", aggregation_strategy="full")
+    app.personas["p1"] = p
+    app.ui_active_persona = "p1"
+    
+    with patch("src.project_manager.flat_config", return_value={"project": {"name": "test"}, "output": {"output_dir": "."}, "files": {"paths": [], "base_dir": "."}}):
+        with patch("src.aggregate.build_file_items", return_value=[]):
+            with patch("pathlib.Path.mkdir"):
+                with patch("pathlib.Path.write_text"):
+                    with patch.object(app, "_flush_to_project"):
+                        with patch.object(app, "_flush_to_config"):
+                            with patch("src.models.save_config"):
+                                full_md, path, file_items, stable_md, disc = app._do_generate()
+    
+    # Verify aggregate.run and build_markdown_no_history received aggregation_strategy="full"
+    # Actually mock_build captures the inner calls of build_markdown_no_history
+    call_kwargs = mock_build.call_args[1]
+    assert call_kwargs.get("aggregation_strategy") == "full"
+
+@patch("src.summarize.summarise_file")
+@patch("src.multi_agent_conductor.ai_client.send")
+def test_run_worker_lifecycle_uses_strategy(mock_send, mock_summarise, tmp_path):
+    mock_send.return_value = "fake response"
+    mock_summarise.return_value = "fake summary"
+    
+    test_file = tmp_path / "test.py"
+    test_file.write_text("def test():\n    pass")
+    
+    ticket = Ticket(id="1", description="test")
+    context = WorkerContext(ticket_id="1", model_name="test", persona_id="test_persona")
+    
+    p = Persona(name="test_persona", aggregation_strategy="summarize")
+    
+    with patch("src.personas.PersonaManager.load_all", return_value={"test_persona": p}):
+        multi_agent_conductor.run_worker_lifecycle(
+            ticket, context, context_files=[str(test_file)]
+        )
+    
+    # Should have called summarise_file because of the "summarize" strategy
+    mock_summarise.assert_called_once()