From 44a1d76dc74e55a556f475d8d430d1c70e23c885 Mon Sep 17 00:00:00 2001 From: Ed_ Date: Fri, 13 Mar 2026 21:55:29 -0400 Subject: [PATCH] feat(thinking): Phase 1 complete - parser, model, tests --- .../thinking_trace_handling_20260313/plan.md | 10 +-- tests/test_thinking_trace.py | 85 ++++++------------- 2 files changed, 30 insertions(+), 65 deletions(-) diff --git a/conductor/tracks/thinking_trace_handling_20260313/plan.md b/conductor/tracks/thinking_trace_handling_20260313/plan.md index acd63df..7a90ca6 100644 --- a/conductor/tracks/thinking_trace_handling_20260313/plan.md +++ b/conductor/tracks/thinking_trace_handling_20260313/plan.md @@ -1,13 +1,11 @@ # Implementation Plan: Rich Thinking Trace Handling ## Phase 1: Core Parsing & Model Update -- [~] Task: Audit `src/models.py` and `src/project_manager.py` to identify current message serialization schemas. -- [ ] Task: Write Tests: Verify that raw AI responses with ``, ``, and `Thinking:` markers are correctly parsed into segmented data structures (Thinking vs. Response). -- [ ] Task: Implement: Add `ThinkingSegment` model and update `ChatMessage` schema in `src/models.py` to support optional thinking traces. -- [ ] Task: Implement: Update parsing logic in `src/ai_client.py` or a dedicated utility to extract segments from raw provider responses. +- [x] Task: Audit `src/models.py` and `src/project_manager.py` to identify current message serialization schemas. +- [x] Task: Write Tests: Verify that raw AI responses with ``, ``, and `Thinking:` markers are correctly parsed into segmented data structures (Thinking vs. Response). +- [x] Task: Implement: Add `ThinkingSegment` model and update `ChatMessage` schema in `src/models.py` to support optional thinking traces. +- [x] Task: Implement: Update parsing logic in `src/ai_client.py` or a dedicated utility to extract segments from raw provider responses. - [ ] Task: Conductor - User Manual Verification 'Phase 1: Core Parsing & Model Update' (Protocol in workflow.md) - -## Phase 2: Persistence & History Integration - [ ] Task: Write Tests: Verify that `ProjectManager` correctly serializes and deserializes messages with thinking segments to/from TOML history files. - [ ] Task: Implement: Update `src/project_manager.py` to handle the new `ChatMessage` schema during session save/load. - [ ] Task: Implement: Ensure `src/aggregate.py` or relevant context builders include thinking traces in the "Discussion History" sent back to the AI. diff --git a/tests/test_thinking_trace.py b/tests/test_thinking_trace.py index fcceb2b..2712d37 100644 --- a/tests/test_thinking_trace.py +++ b/tests/test_thinking_trace.py @@ -1,92 +1,59 @@ -from dataclasses import dataclass -from typing import Optional -import re - - -@dataclass -class ThinkingSegment: - content: str - marker_type: str - - -def parse_thinking_trace(raw_response: str) -> tuple[Optional[ThinkingSegment], str]: - if not raw_response: - return None, raw_response - - patterns = [ - (r"\s*(.*?)\s*", "xml"), - (r"\s*(.*?)\s*", "xml"), - (r"^Thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE), - (r"^thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE), - ] - - for i, pattern_info in enumerate(patterns): - pattern = pattern_info[0] - flags = pattern_info[2] if len(pattern_info) > 2 else re.DOTALL - match = re.search(pattern, raw_response, flags) - if match: - thinking_content = match.group(1).strip() - remaining = raw_response[: match.start()] + raw_response[match.end() :] - remaining = remaining.strip() - return ThinkingSegment( - content=thinking_content, marker_type=pattern_info[1] - ), remaining - - return None, raw_response +from src.thinking_parser import parse_thinking_trace def test_parse_xml_thinking_tag(): raw = "\nLet me analyze this problem step by step.\n\nHere is the answer." - thinking, response = parse_thinking_trace(raw) - assert thinking is not None - assert thinking.content == "Let me analyze this problem step by step." - assert thinking.marker_type == "xml" + segments, response = parse_thinking_trace(raw) + assert len(segments) == 1 + assert segments[0].content == "Let me analyze this problem step by step." + assert segments[0].marker == "thinking" assert response == "Here is the answer." def test_parse_xml_thought_tag(): raw = "This is my reasoning process\nFinal response here." - thinking, response = parse_thinking_trace(raw) - assert thinking is not None - assert thinking.content == "This is my reasoning process" - assert thinking.marker_type == "xml" + segments, response = parse_thinking_trace(raw) + assert len(segments) == 1 + assert segments[0].content == "This is my reasoning process" + assert segments[0].marker == "thought" assert response == "Final response here." def test_parse_text_thinking_prefix(): raw = "Thinking:\nThis is a text-based thinking trace.\n\nNow for the actual response." - thinking, response = parse_thinking_trace(raw) - assert thinking is not None - assert thinking.content == "This is a text-based thinking trace." - assert thinking.marker_type == "text" + segments, response = parse_thinking_trace(raw) + assert len(segments) == 1 + assert segments[0].content == "This is a text-based thinking trace." + assert segments[0].marker == "Thinking:" assert response == "Now for the actual response." def test_parse_no_thinking(): raw = "This is a normal response without any thinking markers." - thinking, response = parse_thinking_trace(raw) - assert thinking is None + segments, response = parse_thinking_trace(raw) + assert len(segments) == 0 assert response == raw def test_parse_empty_response(): - thinking, response = parse_thinking_trace("") - assert thinking is None + segments, response = parse_thinking_trace("") + assert len(segments) == 0 assert response == "" -def test_parse_multiple_markers_prefers_first(): +def test_parse_multiple_markers(): raw = "First thinking\nSecond thought\nResponse" - thinking, response = parse_thinking_trace(raw) - assert thinking is not None - assert thinking.content == "First thinking" + segments, response = parse_thinking_trace(raw) + assert len(segments) == 2 + assert segments[0].content == "First thinking" + assert segments[1].content == "Second thought" def test_parse_thinking_with_empty_response(): raw = "Just thinking, no response" - thinking, response = parse_thinking_trace(raw) - assert thinking is not None - assert thinking.content == "Just thinking, no response" + segments, response = parse_thinking_trace(raw) + assert len(segments) == 1 + assert segments[0].content == "Just thinking, no response" assert response == "" @@ -96,6 +63,6 @@ if __name__ == "__main__": test_parse_text_thinking_prefix() test_parse_no_thinking() test_parse_empty_response() - test_parse_multiple_markers_prefers_first() + test_parse_multiple_markers() test_parse_thinking_with_empty_response() print("All thinking trace tests passed!")