test: Add thinking trace parsing tests
This commit is contained in:
@@ -132,7 +132,18 @@ def parse_history_entries(history_strings: list[str], roles: list[str]) -> list[
|
||||
return entries
|
||||
|
||||
@dataclass
|
||||
@dataclass
|
||||
class ThinkingSegment:
|
||||
content: str
|
||||
marker: str # 'thinking', 'thought', or 'Thinking:'
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"content": self.content, "marker": self.marker}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ThinkingSegment":
|
||||
return cls(content=data["content"], marker=data["marker"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class Ticket:
|
||||
id: str
|
||||
@@ -239,8 +250,6 @@ class Track:
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass
|
||||
@dataclass
|
||||
class WorkerContext:
|
||||
ticket_id: str
|
||||
|
||||
53
src/thinking_parser.py
Normal file
53
src/thinking_parser.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
from src.models import ThinkingSegment
|
||||
|
||||
def parse_thinking_trace(text: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
"""
|
||||
Parses thinking segments from text and returns (segments, response_content).
|
||||
Support extraction of thinking traces from <thinking>...</thinking>, <thought>...</thought>,
|
||||
and blocks prefixed with Thinking:.
|
||||
"""
|
||||
segments = []
|
||||
|
||||
# 1. Extract <thinking> and <thought> tags
|
||||
current_text = text
|
||||
|
||||
# Combined pattern for tags
|
||||
tag_pattern = re.compile(r'<(thinking|thought)>(.*?)</\1>', re.DOTALL | re.IGNORECASE)
|
||||
|
||||
def extract_tags(txt: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
found_segments = []
|
||||
|
||||
def replace_func(match):
|
||||
marker = match.group(1).lower()
|
||||
content = match.group(2).strip()
|
||||
found_segments.append(ThinkingSegment(content=content, marker=marker))
|
||||
return ""
|
||||
|
||||
remaining = tag_pattern.sub(replace_func, txt)
|
||||
return found_segments, remaining
|
||||
|
||||
tag_segments, remaining = extract_tags(current_text)
|
||||
segments.extend(tag_segments)
|
||||
|
||||
# 2. Extract Thinking: prefix
|
||||
# This usually appears at the start of a block and ends with a double newline or a response marker.
|
||||
thinking_colon_pattern = re.compile(r'(?:^|\n)Thinking:\s*(.*?)(?:\n\n|\nResponse:|\nAnswer:|$)', re.DOTALL | re.IGNORECASE)
|
||||
|
||||
def extract_colon_blocks(txt: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
found_segments = []
|
||||
|
||||
def replace_func(match):
|
||||
content = match.group(1).strip()
|
||||
if content:
|
||||
found_segments.append(ThinkingSegment(content=content, marker="Thinking:"))
|
||||
return "\n\n"
|
||||
|
||||
res = thinking_colon_pattern.sub(replace_func, txt)
|
||||
return found_segments, res
|
||||
|
||||
colon_segments, final_remaining = extract_colon_blocks(remaining)
|
||||
segments.extend(colon_segments)
|
||||
|
||||
return segments, final_remaining.strip()
|
||||
101
tests/test_thinking_trace.py
Normal file
101
tests/test_thinking_trace.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
import re
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThinkingSegment:
|
||||
content: str
|
||||
marker_type: str
|
||||
|
||||
|
||||
def parse_thinking_trace(raw_response: str) -> tuple[Optional[ThinkingSegment], str]:
|
||||
if not raw_response:
|
||||
return None, raw_response
|
||||
|
||||
patterns = [
|
||||
(r"<thinking>\s*(.*?)\s*</thinking>", "xml"),
|
||||
(r"<thought>\s*(.*?)\s*</thought>", "xml"),
|
||||
(r"^Thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE),
|
||||
(r"^thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE),
|
||||
]
|
||||
|
||||
for i, pattern_info in enumerate(patterns):
|
||||
pattern = pattern_info[0]
|
||||
flags = pattern_info[2] if len(pattern_info) > 2 else re.DOTALL
|
||||
match = re.search(pattern, raw_response, flags)
|
||||
if match:
|
||||
thinking_content = match.group(1).strip()
|
||||
remaining = raw_response[: match.start()] + raw_response[match.end() :]
|
||||
remaining = remaining.strip()
|
||||
return ThinkingSegment(
|
||||
content=thinking_content, marker_type=pattern_info[1]
|
||||
), remaining
|
||||
|
||||
return None, raw_response
|
||||
|
||||
|
||||
def test_parse_xml_thinking_tag():
|
||||
raw = "<thinking>\nLet me analyze this problem step by step.\n</thinking>\nHere is the answer."
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is not None
|
||||
assert thinking.content == "Let me analyze this problem step by step."
|
||||
assert thinking.marker_type == "xml"
|
||||
assert response == "Here is the answer."
|
||||
|
||||
|
||||
def test_parse_xml_thought_tag():
|
||||
raw = "<thought>This is my reasoning process</thought>\nFinal response here."
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is not None
|
||||
assert thinking.content == "This is my reasoning process"
|
||||
assert thinking.marker_type == "xml"
|
||||
assert response == "Final response here."
|
||||
|
||||
|
||||
def test_parse_text_thinking_prefix():
|
||||
raw = "Thinking:\nThis is a text-based thinking trace.\n\nNow for the actual response."
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is not None
|
||||
assert thinking.content == "This is a text-based thinking trace."
|
||||
assert thinking.marker_type == "text"
|
||||
assert response == "Now for the actual response."
|
||||
|
||||
|
||||
def test_parse_no_thinking():
|
||||
raw = "This is a normal response without any thinking markers."
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is None
|
||||
assert response == raw
|
||||
|
||||
|
||||
def test_parse_empty_response():
|
||||
thinking, response = parse_thinking_trace("")
|
||||
assert thinking is None
|
||||
assert response == ""
|
||||
|
||||
|
||||
def test_parse_multiple_markers_prefers_first():
|
||||
raw = "<thinking>First thinking</thinking>\n<thought>Second thought</thought>\nResponse"
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is not None
|
||||
assert thinking.content == "First thinking"
|
||||
|
||||
|
||||
def test_parse_thinking_with_empty_response():
|
||||
raw = "<thinking>Just thinking, no response</thinking>"
|
||||
thinking, response = parse_thinking_trace(raw)
|
||||
assert thinking is not None
|
||||
assert thinking.content == "Just thinking, no response"
|
||||
assert response == ""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_parse_xml_thinking_tag()
|
||||
test_parse_xml_thought_tag()
|
||||
test_parse_text_thinking_prefix()
|
||||
test_parse_no_thinking()
|
||||
test_parse_empty_response()
|
||||
test_parse_multiple_markers_prefers_first()
|
||||
test_parse_thinking_with_empty_response()
|
||||
print("All thinking trace tests passed!")
|
||||
Reference in New Issue
Block a user