diff --git a/src/models.py b/src/models.py index f4b0ee7..4166f47 100644 --- a/src/models.py +++ b/src/models.py @@ -132,7 +132,18 @@ def parse_history_entries(history_strings: list[str], roles: list[str]) -> list[ return entries @dataclass -@dataclass +class ThinkingSegment: + content: str + marker: str # 'thinking', 'thought', or 'Thinking:' + + def to_dict(self) -> Dict[str, Any]: + return {"content": self.content, "marker": self.marker} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ThinkingSegment": + return cls(content=data["content"], marker=data["marker"]) + + @dataclass class Ticket: id: str @@ -239,8 +250,6 @@ class Track: ) -@dataclass -@dataclass @dataclass class WorkerContext: ticket_id: str diff --git a/src/thinking_parser.py b/src/thinking_parser.py new file mode 100644 index 0000000..77b3517 --- /dev/null +++ b/src/thinking_parser.py @@ -0,0 +1,53 @@ +import re +from typing import List, Tuple +from src.models import ThinkingSegment + +def parse_thinking_trace(text: str) -> Tuple[List[ThinkingSegment], str]: + """ + Parses thinking segments from text and returns (segments, response_content). + Support extraction of thinking traces from ..., ..., + and blocks prefixed with Thinking:. + """ + segments = [] + + # 1. Extract and tags + current_text = text + + # Combined pattern for tags + tag_pattern = re.compile(r'<(thinking|thought)>(.*?)', re.DOTALL | re.IGNORECASE) + + def extract_tags(txt: str) -> Tuple[List[ThinkingSegment], str]: + found_segments = [] + + def replace_func(match): + marker = match.group(1).lower() + content = match.group(2).strip() + found_segments.append(ThinkingSegment(content=content, marker=marker)) + return "" + + remaining = tag_pattern.sub(replace_func, txt) + return found_segments, remaining + + tag_segments, remaining = extract_tags(current_text) + segments.extend(tag_segments) + + # 2. Extract Thinking: prefix + # This usually appears at the start of a block and ends with a double newline or a response marker. + thinking_colon_pattern = re.compile(r'(?:^|\n)Thinking:\s*(.*?)(?:\n\n|\nResponse:|\nAnswer:|$)', re.DOTALL | re.IGNORECASE) + + def extract_colon_blocks(txt: str) -> Tuple[List[ThinkingSegment], str]: + found_segments = [] + + def replace_func(match): + content = match.group(1).strip() + if content: + found_segments.append(ThinkingSegment(content=content, marker="Thinking:")) + return "\n\n" + + res = thinking_colon_pattern.sub(replace_func, txt) + return found_segments, res + + colon_segments, final_remaining = extract_colon_blocks(remaining) + segments.extend(colon_segments) + + return segments, final_remaining.strip() diff --git a/tests/test_thinking_trace.py b/tests/test_thinking_trace.py new file mode 100644 index 0000000..fcceb2b --- /dev/null +++ b/tests/test_thinking_trace.py @@ -0,0 +1,101 @@ +from dataclasses import dataclass +from typing import Optional +import re + + +@dataclass +class ThinkingSegment: + content: str + marker_type: str + + +def parse_thinking_trace(raw_response: str) -> tuple[Optional[ThinkingSegment], str]: + if not raw_response: + return None, raw_response + + patterns = [ + (r"\s*(.*?)\s*", "xml"), + (r"\s*(.*?)\s*", "xml"), + (r"^Thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE), + (r"^thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE), + ] + + for i, pattern_info in enumerate(patterns): + pattern = pattern_info[0] + flags = pattern_info[2] if len(pattern_info) > 2 else re.DOTALL + match = re.search(pattern, raw_response, flags) + if match: + thinking_content = match.group(1).strip() + remaining = raw_response[: match.start()] + raw_response[match.end() :] + remaining = remaining.strip() + return ThinkingSegment( + content=thinking_content, marker_type=pattern_info[1] + ), remaining + + return None, raw_response + + +def test_parse_xml_thinking_tag(): + raw = "\nLet me analyze this problem step by step.\n\nHere is the answer." + thinking, response = parse_thinking_trace(raw) + assert thinking is not None + assert thinking.content == "Let me analyze this problem step by step." + assert thinking.marker_type == "xml" + assert response == "Here is the answer." + + +def test_parse_xml_thought_tag(): + raw = "This is my reasoning process\nFinal response here." + thinking, response = parse_thinking_trace(raw) + assert thinking is not None + assert thinking.content == "This is my reasoning process" + assert thinking.marker_type == "xml" + assert response == "Final response here." + + +def test_parse_text_thinking_prefix(): + raw = "Thinking:\nThis is a text-based thinking trace.\n\nNow for the actual response." + thinking, response = parse_thinking_trace(raw) + assert thinking is not None + assert thinking.content == "This is a text-based thinking trace." + assert thinking.marker_type == "text" + assert response == "Now for the actual response." + + +def test_parse_no_thinking(): + raw = "This is a normal response without any thinking markers." + thinking, response = parse_thinking_trace(raw) + assert thinking is None + assert response == raw + + +def test_parse_empty_response(): + thinking, response = parse_thinking_trace("") + assert thinking is None + assert response == "" + + +def test_parse_multiple_markers_prefers_first(): + raw = "First thinking\nSecond thought\nResponse" + thinking, response = parse_thinking_trace(raw) + assert thinking is not None + assert thinking.content == "First thinking" + + +def test_parse_thinking_with_empty_response(): + raw = "Just thinking, no response" + thinking, response = parse_thinking_trace(raw) + assert thinking is not None + assert thinking.content == "Just thinking, no response" + assert response == "" + + +if __name__ == "__main__": + test_parse_xml_thinking_tag() + test_parse_xml_thought_tag() + test_parse_text_thinking_prefix() + test_parse_no_thinking() + test_parse_empty_response() + test_parse_multiple_markers_prefers_first() + test_parse_thinking_with_empty_response() + print("All thinking trace tests passed!")