feat(conductor): Add retry loop for Tech Lead JSON parsing

This commit is contained in:
2026-03-06 12:30:23 -05:00
parent a16ed4b1ae
commit 880ef5f370
3 changed files with 91 additions and 85 deletions

View File

@@ -3,13 +3,13 @@
> **TEST DEBT FIX:** Due to ongoing test architecture instability (documented in `test_architecture_integrity_audit_20260304`), do NOT write new `live_gui` integration tests for this track. Rely strictly on in-process `unittest.mock` for the `ai_client` to verify the retry logic. > **TEST DEBT FIX:** Due to ongoing test architecture instability (documented in `test_architecture_integrity_audit_20260304`), do NOT write new `live_gui` integration tests for this track. Rely strictly on in-process `unittest.mock` for the `ai_client` to verify the retry logic.
## Phase 1: Implementation of Retry Logic ## Phase 1: Implementation of Retry Logic
- [ ] Task: Initialize MMA Environment `activate_skill mma-orchestrator` - [x] Task: Initialize MMA Environment `activate_skill mma-orchestrator`
- [ ] Task: Implement Retry Loop in `generate_tickets` - [x] Task: Implement Retry Loop in `generate_tickets`
- [ ] WHERE: `conductor_tech_lead.py:generate_tickets` - [ ] WHERE: `conductor_tech_lead.py:generate_tickets`
- [ ] WHAT: Wrap the `send` and `json.loads` calls in a `for _ in range(max_retries)` loop. - [ ] WHAT: Wrap the `send` and `json.loads` calls in a `for _ in range(max_retries)` loop.
- [ ] HOW: If `JSONDecodeError` is caught, append an error message to the context and loop. If it succeeds, `break` and return. - [ ] HOW: If `JSONDecodeError` is caught, append an error message to the context and loop. If it succeeds, `break` and return.
- [ ] SAFETY: Ensure token limits aren't massively breached by appending huge error states. Truncate raw output if necessary. - [ ] SAFETY: Ensure token limits aren't massively breached by appending huge error states. Truncate raw output if necessary.
- [ ] Task: Conductor - User Manual Verification 'Phase 1: Implementation' (Protocol in workflow.md) - [x] Task: Conductor - User Manual Verification 'Phase 1: Implementation' (Protocol in workflow.md)
## Phase 2: Unit Testing ## Phase 2: Unit Testing
- [ ] Task: Write Simulation Tests for JSON Parsing - [ ] Task: Write Simulation Tests for JSON Parsing

View File

@@ -21,29 +21,35 @@ def generate_tickets(track_brief: str, module_skeletons: str) -> list[dict[str,
old_system_prompt = ai_client._custom_system_prompt old_system_prompt = ai_client._custom_system_prompt
ai_client.set_custom_system_prompt(system_prompt or "") ai_client.set_custom_system_prompt(system_prompt or "")
ai_client.current_tier = "Tier 2" ai_client.current_tier = "Tier 2"
last_error = None
try: try:
# 3. Call Tier 2 Model for _ in range(3):
response = ai_client.send( try:
md_content="", # 3. Call Tier 2 Model
user_message=user_message response = ai_client.send(
) md_content="",
# 4. Parse JSON Output user_message=user_message
# Extract JSON array from markdown code blocks if present )
json_match = response.strip() # 4. Parse JSON Output
if "```json" in json_match: # Extract JSON array from markdown code blocks if present
json_match = json_match.split("```json")[1].split("```")[0].strip() json_match = response.strip()
elif "```" in json_match: if "```json" in json_match:
json_match = json_match.split("```")[1].split("```")[0].strip() json_match = json_match.split("```json")[1].split("```")[0].strip()
# If it's still not valid JSON, try to find a [ ... ] block elif "```" in json_match:
if not (json_match.startswith('[') and json_match.endswith(']')): json_match = json_match.split("```")[1].split("```")[0].strip()
match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL) # If it's still not valid JSON, try to find a [ ... ] block
if match: if not (json_match.startswith('[') and json_match.endswith(']')):
json_match = match.group(0) match = re.search(r'\[\s*\{.*\}\s*\]', json_match, re.DOTALL)
tickets: list[dict[str, Any]] = json.loads(json_match) if match:
return tickets json_match = match.group(0)
except Exception as e: tickets: list[dict[str, Any]] = json.loads(json_match)
print(f"Error parsing Tier 2 response: {e}") return tickets
return [] except json.JSONDecodeError as e:
last_error = e
correction = f"\n\nYour previous output failed to parse as JSON: {e}. Here was your raw output:\n{json_match[:500]}\n\nPlease fix the formatting and output ONLY valid JSON array."
user_message += correction
print(f"JSON parsing error, retrying... ({_ + 1}/3)")
raise RuntimeError(f"Failed to generate valid JSON tickets after 3 attempts. Last error: {last_error}")
finally: finally:
# Restore old system prompt and clear tier tag # Restore old system prompt and clear tier tag
ai_client.set_custom_system_prompt(old_system_prompt or "") ai_client.set_custom_system_prompt(old_system_prompt or "")

View File

@@ -4,72 +4,72 @@ from src import conductor_tech_lead
import pytest import pytest
class TestConductorTechLead(unittest.TestCase): class TestConductorTechLead(unittest.TestCase):
def test_generate_tickets_parse_error(self) -> None: def test_generate_tickets_parse_error(self) -> None:
with patch('src.ai_client.send') as mock_send: with patch('src.ai_client.send') as mock_send:
mock_send.return_value = "invalid json" mock_send.return_value = "invalid json"
# conductor_tech_lead.generate_tickets returns [] on error, doesn't raise # conductor_tech_lead.generate_tickets now raises RuntimeError on error
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons") with pytest.raises(RuntimeError):
self.assertEqual(tickets, []) conductor_tech_lead.generate_tickets("brief", "skeletons")
def test_generate_tickets_success(self) -> None: def test_generate_tickets_success(self) -> None:
with patch('src.ai_client.send') as mock_send: with patch('src.ai_client.send') as mock_send:
mock_send.return_value = '[{"id": "T1", "description": "desc", "depends_on": []}]' mock_send.return_value = '[{"id": "T1", "description": "desc", "depends_on": []}]'
tickets = conductor_tech_lead.generate_tickets("brief", "skeletons") tickets = conductor_tech_lead.generate_tickets("brief", "skeletons")
self.assertEqual(len(tickets), 1) self.assertEqual(len(tickets), 1)
self.assertEqual(tickets[0]['id'], "T1") self.assertEqual(tickets[0]['id'], "T1")
class TestTopologicalSort(unittest.TestCase): class TestTopologicalSort(unittest.TestCase):
def test_topological_sort_linear(self) -> None: def test_topological_sort_linear(self) -> None:
tickets = [ tickets = [
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []}, {"id": "t1", "depends_on": []},
] ]
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets[0]['id'], "t1") self.assertEqual(sorted_tickets[0]['id'], "t1")
self.assertEqual(sorted_tickets[1]['id'], "t2") self.assertEqual(sorted_tickets[1]['id'], "t2")
def test_topological_sort_complex(self) -> None: def test_topological_sort_complex(self) -> None:
tickets = [ tickets = [
{"id": "t3", "depends_on": ["t1", "t2"]}, {"id": "t3", "depends_on": ["t1", "t2"]},
{"id": "t1", "depends_on": []}, {"id": "t1", "depends_on": []},
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
] ]
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
self.assertEqual(sorted_tickets[0]['id'], "t1") self.assertEqual(sorted_tickets[0]['id'], "t1")
self.assertEqual(sorted_tickets[1]['id'], "t2") self.assertEqual(sorted_tickets[1]['id'], "t2")
self.assertEqual(sorted_tickets[2]['id'], "t3") self.assertEqual(sorted_tickets[2]['id'], "t3")
def test_topological_sort_cycle(self) -> None: def test_topological_sort_cycle(self) -> None:
tickets = [ tickets = [
{"id": "t1", "depends_on": ["t2"]}, {"id": "t1", "depends_on": ["t2"]},
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
] ]
with self.assertRaises(ValueError) as cm: with self.assertRaises(ValueError) as cm:
conductor_tech_lead.topological_sort(tickets) conductor_tech_lead.topological_sort(tickets)
# Match against our new standard ValueError message # Match against our new standard ValueError message
self.assertIn("Dependency cycle detected", str(cm.exception)) self.assertIn("Dependency cycle detected", str(cm.exception))
def test_topological_sort_empty(self) -> None: def test_topological_sort_empty(self) -> None:
self.assertEqual(conductor_tech_lead.topological_sort([]), []) self.assertEqual(conductor_tech_lead.topological_sort([]), [])
def test_topological_sort_missing_dependency(self) -> None: def test_topological_sort_missing_dependency(self) -> None:
# If a ticket depends on something not in the list, we should probably handle it or let it fail. # If a ticket depends on something not in the list, we should probably handle it or let it fail.
# Usually in our context, we only care about dependencies within the same track. # Usually in our context, we only care about dependencies within the same track.
tickets = [ tickets = [
{"id": "t1", "depends_on": ["missing"]}, {"id": "t1", "depends_on": ["missing"]},
] ]
# Currently this raises KeyError in the list comprehension # Currently this raises KeyError in the list comprehension
with self.assertRaises(KeyError): with self.assertRaises(KeyError):
conductor_tech_lead.topological_sort(tickets) conductor_tech_lead.topological_sort(tickets)
def test_topological_sort_vlog(vlogger) -> None: def test_topological_sort_vlog(vlogger) -> None:
tickets = [ tickets = [
{"id": "t2", "depends_on": ["t1"]}, {"id": "t2", "depends_on": ["t1"]},
{"id": "t1", "depends_on": []}, {"id": "t1", "depends_on": []},
] ]
vlogger.log_state("Input Order", ["t2", "t1"], ["t2", "t1"]) vlogger.log_state("Input Order", ["t2", "t1"], ["t2", "t1"])
sorted_tickets = conductor_tech_lead.topological_sort(tickets) sorted_tickets = conductor_tech_lead.topological_sort(tickets)
result_ids = [t['id'] for t in sorted_tickets] result_ids = [t['id'] for t in sorted_tickets]
vlogger.log_state("Sorted Order", "N/A", result_ids) vlogger.log_state("Sorted Order", "N/A", result_ids)
assert result_ids == ["t1", "t2"] assert result_ids == ["t1", "t2"]
vlogger.finalize("Topological Sort Verification", "PASS", "Linear dependencies correctly ordered.") vlogger.finalize("Topological Sort Verification", "PASS", "Linear dependencies correctly ordered.")