import re from typing import List, Tuple from src.models import ThinkingSegment def parse_thinking_trace(text: str) -> Tuple[List[ThinkingSegment], str]: """ Parses thinking segments from text and returns (segments, response_content). Support extraction of thinking traces from ..., ..., and blocks prefixed with Thinking:. """ segments = [] # 1. Extract and tags current_text = text # Combined pattern for tags tag_pattern = re.compile(r'<(thinking|thought)>(.*?)', re.DOTALL | re.IGNORECASE) def extract_tags(txt: str) -> Tuple[List[ThinkingSegment], str]: found_segments = [] def replace_func(match): marker = match.group(1).lower() content = match.group(2).strip() found_segments.append(ThinkingSegment(content=content, marker=marker)) return "" remaining = tag_pattern.sub(replace_func, txt) return found_segments, remaining tag_segments, remaining = extract_tags(current_text) segments.extend(tag_segments) # 2. Extract Thinking: prefix # This usually appears at the start of a block and ends with a double newline or a response marker. thinking_colon_pattern = re.compile(r'(?:^|\n)Thinking:\s*(.*?)(?:\n\n|\nResponse:|\nAnswer:|$)', re.DOTALL | re.IGNORECASE) def extract_colon_blocks(txt: str) -> Tuple[List[ThinkingSegment], str]: found_segments = [] def replace_func(match): content = match.group(1).strip() if content: found_segments.append(ThinkingSegment(content=content, marker="Thinking:")) return "\n\n" res = thinking_colon_pattern.sub(replace_func, txt) return found_segments, res colon_segments, final_remaining = extract_colon_blocks(remaining) segments.extend(colon_segments) return segments, final_remaining.strip()