test: Add thinking trace parsing tests
This commit is contained in:
53
src/thinking_parser.py
Normal file
53
src/thinking_parser.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
from src.models import ThinkingSegment
|
||||
|
||||
def parse_thinking_trace(text: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
"""
|
||||
Parses thinking segments from text and returns (segments, response_content).
|
||||
Support extraction of thinking traces from <thinking>...</thinking>, <thought>...</thought>,
|
||||
and blocks prefixed with Thinking:.
|
||||
"""
|
||||
segments = []
|
||||
|
||||
# 1. Extract <thinking> and <thought> tags
|
||||
current_text = text
|
||||
|
||||
# Combined pattern for tags
|
||||
tag_pattern = re.compile(r'<(thinking|thought)>(.*?)</\1>', re.DOTALL | re.IGNORECASE)
|
||||
|
||||
def extract_tags(txt: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
found_segments = []
|
||||
|
||||
def replace_func(match):
|
||||
marker = match.group(1).lower()
|
||||
content = match.group(2).strip()
|
||||
found_segments.append(ThinkingSegment(content=content, marker=marker))
|
||||
return ""
|
||||
|
||||
remaining = tag_pattern.sub(replace_func, txt)
|
||||
return found_segments, remaining
|
||||
|
||||
tag_segments, remaining = extract_tags(current_text)
|
||||
segments.extend(tag_segments)
|
||||
|
||||
# 2. Extract Thinking: prefix
|
||||
# This usually appears at the start of a block and ends with a double newline or a response marker.
|
||||
thinking_colon_pattern = re.compile(r'(?:^|\n)Thinking:\s*(.*?)(?:\n\n|\nResponse:|\nAnswer:|$)', re.DOTALL | re.IGNORECASE)
|
||||
|
||||
def extract_colon_blocks(txt: str) -> Tuple[List[ThinkingSegment], str]:
|
||||
found_segments = []
|
||||
|
||||
def replace_func(match):
|
||||
content = match.group(1).strip()
|
||||
if content:
|
||||
found_segments.append(ThinkingSegment(content=content, marker="Thinking:"))
|
||||
return "\n\n"
|
||||
|
||||
res = thinking_colon_pattern.sub(replace_func, txt)
|
||||
return found_segments, res
|
||||
|
||||
colon_segments, final_remaining = extract_colon_blocks(remaining)
|
||||
segments.extend(colon_segments)
|
||||
|
||||
return segments, final_remaining.strip()
|
||||
Reference in New Issue
Block a user