From 36645f7f3e5e347b9797fc748c9deea68d680cf7 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Mon, 4 May 2026 05:10:59 -0400 Subject: [PATCH] feat(aggregation): Implement tier-level aggregation strategy tied to Personas --- src/aggregate.py | 19 ++++++---- src/app_controller.py | 9 +++-- src/models.py | 4 +++ src/multi_agent_conductor.py | 20 +++++++---- tests/test_tiered_aggregation.py | 62 ++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 tests/test_tiered_aggregation.py diff --git a/src/aggregate.py b/src/aggregate.py index 6a24b47..3cd0f10 100644 --- a/src/aggregate.py +++ b/src/aggregate.py @@ -197,15 +197,20 @@ def _build_files_section_from_items(file_items: list[dict[str, Any]]) -> str: sections.append(f"### `{original}`\n\n```{lang}\n{content}\n```") return "\n\n---\n\n".join(sections) -def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False) -> str: +def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], history: list[str], summary_only: bool = False, aggregation_strategy: str = "auto") -> str: """Build markdown from pre-read file items instead of re-reading from disk.""" parts = [] # STATIC PREFIX: Files and Screenshots must go first to maximize Cache Hits if file_items: - if summary_only: + if aggregation_strategy == "summarize": parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items)) - else: + elif aggregation_strategy == "full": parts.append("## Files\n\n" + _build_files_section_from_items(file_items)) + else: # auto + if summary_only: + parts.append("## Files (Summary)\n\n" + summarize.build_summary_markdown(file_items)) + else: + parts.append("## Files\n\n" + _build_files_section_from_items(file_items)) if screenshots: parts.append("## Screenshots\n\n" + build_screenshots_section(screenshot_base_dir, screenshots)) # DYNAMIC SUFFIX: History changes every turn, must go last @@ -213,9 +218,9 @@ def build_markdown_from_items(file_items: list[dict[str, Any]], screenshot_base_ parts.append("## Discussion History\n\n" + build_discussion_section(history)) return "\n\n---\n\n".join(parts) -def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False) -> str: +def build_markdown_no_history(file_items: list[dict[str, Any]], screenshot_base_dir: Path, screenshots: list[str], summary_only: bool = False, aggregation_strategy: str = "auto") -> str: """Build markdown with only files + screenshots (no history). Used for stable caching.""" - return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only) + return build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history=[], summary_only=summary_only, aggregation_strategy=aggregation_strategy) def build_discussion_text(history: list[str]) -> str: """Build just the discussion history section text. Returns empty string if no history.""" @@ -319,7 +324,7 @@ def build_markdown(base_dir: Path, files: list[str | dict[str, Any]], screenshot parts.append("## Discussion History\n\n" + build_discussion_section(history)) return "\n\n---\n\n".join(parts) -def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]: +def run(config: dict[str, Any], aggregation_strategy: str = "auto") -> tuple[str, Path, list[dict[str, Any]]]: namespace = config.get("project", {}).get("name") if not namespace: namespace = config.get("output", {}).get("namespace", "project") @@ -336,7 +341,7 @@ def run(config: dict[str, Any]) -> tuple[str, Path, list[dict[str, Any]]]: file_items = build_file_items(base_dir, files) summary_only = config.get("project", {}).get("summary_only", False) markdown = build_markdown_from_items(file_items, screenshot_base_dir, screenshots, history, - summary_only=summary_only) + summary_only=summary_only, aggregation_strategy=aggregation_strategy) output_file.write_text(markdown, encoding="utf-8") return markdown, output_file, file_items diff --git a/src/app_controller.py b/src/app_controller.py index a96fa53..078012f 100644 --- a/src/app_controller.py +++ b/src/app_controller.py @@ -148,6 +148,7 @@ class AppController: self.project_paths: List[str] = [] self.active_discussion: str = "main" self.disc_entries: List[Dict[str, Any]] = [] + self.ui_active_persona: str = "" self.disc_roles: List[str] = [] self.files: List[str] = [] self.screenshots: List[str] = [] @@ -2550,12 +2551,16 @@ class AppController: models.save_config(self.config) track_id = self.active_track.id if self.active_track else None flat = project_manager.flat_config(self.project, self.active_discussion, track_id=track_id) - full_md, path, file_items = aggregate.run(flat) + + persona = self.personas.get(self.ui_active_persona) + strategy = persona.aggregation_strategy if persona else "auto" + + full_md, path, file_items = aggregate.run(flat, aggregation_strategy=strategy) # Build stable markdown (no history) for Gemini caching screenshot_base_dir = Path(flat.get("screenshots", {}).get("base_dir", ".")) screenshots = flat.get("screenshots", {}).get("paths", []) summary_only = flat.get("project", {}).get("summary_only", False) - stable_md = aggregate.build_markdown_no_history(file_items, screenshot_base_dir, screenshots, summary_only=summary_only) + stable_md = aggregate.build_markdown_no_history(file_items, screenshot_base_dir, screenshots, summary_only=summary_only, aggregation_strategy=strategy) # Build discussion history text separately history = flat.get("discussion", {}).get("history", []) discussion_text = aggregate.build_discussion_text(history) diff --git a/src/models.py b/src/models.py index 8f3e980..a05bf26 100644 --- a/src/models.py +++ b/src/models.py @@ -470,6 +470,7 @@ class Persona: tool_preset: Optional[str] = None bias_profile: Optional[str] = None context_preset: Optional[str] = None + aggregation_strategy: Optional[str] = None @property def provider(self) -> Optional[str]: @@ -514,6 +515,8 @@ class Persona: res["bias_profile"] = self.bias_profile if self.context_preset is not None: res["context_preset"] = self.context_preset + if self.aggregation_strategy is not None: + res["aggregation_strategy"] = self.aggregation_strategy return res @classmethod @@ -548,6 +551,7 @@ class Persona: tool_preset=data.get("tool_preset"), bias_profile=data.get("bias_profile"), context_preset=data.get("context_preset"), + aggregation_strategy=data.get("aggregation_strategy"), ) @dataclass class MCPServerConfig: diff --git a/src/multi_agent_conductor.py b/src/multi_agent_conductor.py index 29a6bff..88c3c26 100644 --- a/src/multi_agent_conductor.py +++ b/src/multi_agent_conductor.py @@ -28,6 +28,7 @@ See Also: - src/models.py for Ticket, Track, WorkerContext """ from src import ai_client +from src import summarize import json import threading import time @@ -401,6 +402,7 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: # Apply Persona if specified preferred_models = [] persona_tool_preset = None + persona = None if context.persona_id: from src.personas import PersonaManager from src import paths @@ -443,6 +445,7 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: if context_files: parser = ASTParser(language="python") + strategy = getattr(persona, "aggregation_strategy", "auto") if persona else "auto" for i, file_path in enumerate(context_files): try: Path(file_path) @@ -452,12 +455,17 @@ def run_worker_lifecycle(ticket: Ticket, context: WorkerContext, context_files: tokens_before += _count_tokens(content) - if i == 0: - view = parser.get_curated_view(content, path=file_path) - elif ticket.target_file and Path(file_path).resolve() == Path(ticket.target_file).resolve() and ticket.target_symbols: - view = parser.get_targeted_view(content, ticket.target_symbols, path=file_path) - else: - view = parser.get_skeleton(content, path=file_path) + if strategy == "summarize": + view = summarize.summarise_file(Path(file_path), content) + elif strategy == "full": + view = content + else: # auto or skeleton + if i == 0: + view = parser.get_curated_view(content, path=file_path) + elif ticket.target_file and Path(file_path).resolve() == Path(ticket.target_file).resolve() and ticket.target_symbols: + view = parser.get_targeted_view(content, ticket.target_symbols, path=file_path) + else: + view = parser.get_skeleton(content, path=file_path) tokens_after += _count_tokens(view) context_injection += f"\nFile: {file_path}\n{view}\n" diff --git a/tests/test_tiered_aggregation.py b/tests/test_tiered_aggregation.py new file mode 100644 index 0000000..c23ac60 --- /dev/null +++ b/tests/test_tiered_aggregation.py @@ -0,0 +1,62 @@ +import pytest +from src.models import Persona, Ticket, WorkerContext +from src import multi_agent_conductor +from src import app_controller +from src import aggregate +from pathlib import Path +from unittest.mock import MagicMock, patch + +def test_persona_aggregation_strategy(): + p = Persona(name="test_persona", aggregation_strategy="summarize") + assert p.aggregation_strategy == "summarize" + + d = p.to_dict() + assert d.get("aggregation_strategy") == "summarize" + + p2 = Persona.from_dict("test2", d) + assert p2.aggregation_strategy == "summarize" + +@patch("src.aggregate.build_markdown_from_items") +def test_app_controller_do_generate_uses_persona_strategy(mock_build): + mock_build.return_value = "fake_md" + app = app_controller.AppController() + app.personas = {} + p = Persona(name="p1", aggregation_strategy="full") + app.personas["p1"] = p + app.ui_active_persona = "p1" + + with patch("src.project_manager.flat_config", return_value={"project": {"name": "test"}, "output": {"output_dir": "."}, "files": {"paths": [], "base_dir": "."}}): + with patch("src.aggregate.build_file_items", return_value=[]): + with patch("pathlib.Path.mkdir"): + with patch("pathlib.Path.write_text"): + with patch.object(app, "_flush_to_project"): + with patch.object(app, "_flush_to_config"): + with patch("src.models.save_config"): + full_md, path, file_items, stable_md, disc = app._do_generate() + + # Verify aggregate.run and build_markdown_no_history received aggregation_strategy="full" + # Actually mock_build captures the inner calls of build_markdown_no_history + call_kwargs = mock_build.call_args[1] + assert call_kwargs.get("aggregation_strategy") == "full" + +@patch("src.summarize.summarise_file") +@patch("src.multi_agent_conductor.ai_client.send") +def test_run_worker_lifecycle_uses_strategy(mock_send, mock_summarise, tmp_path): + mock_send.return_value = "fake response" + mock_summarise.return_value = "fake summary" + + test_file = tmp_path / "test.py" + test_file.write_text("def test():\n pass") + + ticket = Ticket(id="1", description="test") + context = WorkerContext(ticket_id="1", model_name="test", persona_id="test_persona") + + p = Persona(name="test_persona", aggregation_strategy="summarize") + + with patch("src.personas.PersonaManager.load_all", return_value={"test_persona": p}): + multi_agent_conductor.run_worker_lifecycle( + ticket, context, context_files=[str(test_file)] + ) + + # Should have called summarise_file because of the "summarize" strategy + mock_summarise.assert_called_once()