manual_slop/tests/test_thinking_trace.py

from dataclasses import dataclass
from typing import Optional
import re


@dataclass
class ThinkingSegment:
    content: str
    marker_type: str


def parse_thinking_trace(raw_response: str) -> tuple[Optional[ThinkingSegment], str]:
    if not raw_response:
        return None, raw_response

    patterns = [
        (r"<thinking>\s*(.*?)\s*</thinking>", "xml"),
        (r"<thought>\s*(.*?)\s*</thought>", "xml"),
        (r"^Thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE),
        (r"^thinking:\s*\n(.+?)(?:\n\n|\n?$)", "text", re.MULTILINE),
    ]

    for i, pattern_info in enumerate(patterns):
        pattern = pattern_info[0]
        flags = pattern_info[2] if len(pattern_info) > 2 else re.DOTALL
        match = re.search(pattern, raw_response, flags)
        if match:
            thinking_content = match.group(1).strip()
            remaining = raw_response[: match.start()] + raw_response[match.end() :]
            remaining = remaining.strip()
            return ThinkingSegment(
                content=thinking_content, marker_type=pattern_info[1]
            ), remaining

    return None, raw_response


def test_parse_xml_thinking_tag():
    raw = "<thinking>\nLet me analyze this problem step by step.\n</thinking>\nHere is the answer."
    thinking, response = parse_thinking_trace(raw)
    assert thinking is not None
    assert thinking.content == "Let me analyze this problem step by step."
    assert thinking.marker_type == "xml"
    assert response == "Here is the answer."


def test_parse_xml_thought_tag():
    raw = "<thought>This is my reasoning process</thought>\nFinal response here."
    thinking, response = parse_thinking_trace(raw)
    assert thinking is not None
    assert thinking.content == "This is my reasoning process"
    assert thinking.marker_type == "xml"
    assert response == "Final response here."


def test_parse_text_thinking_prefix():
    raw = "Thinking:\nThis is a text-based thinking trace.\n\nNow for the actual response."
    thinking, response = parse_thinking_trace(raw)
    assert thinking is not None
    assert thinking.content == "This is a text-based thinking trace."
    assert thinking.marker_type == "text"
    assert response == "Now for the actual response."


def test_parse_no_thinking():
    raw = "This is a normal response without any thinking markers."
    thinking, response = parse_thinking_trace(raw)
    assert thinking is None
    assert response == raw


def test_parse_empty_response():
    thinking, response = parse_thinking_trace("")
    assert thinking is None
    assert response == ""


def test_parse_multiple_markers_prefers_first():
    raw = "<thinking>First thinking</thinking>\n<thought>Second thought</thought>\nResponse"
    thinking, response = parse_thinking_trace(raw)
    assert thinking is not None
    assert thinking.content == "First thinking"


def test_parse_thinking_with_empty_response():
    raw = "<thinking>Just thinking, no response</thinking>"
    thinking, response = parse_thinking_trace(raw)
    assert thinking is not None
    assert thinking.content == "Just thinking, no response"
    assert response == ""


if __name__ == "__main__":
    test_parse_xml_thinking_tag()
    test_parse_xml_thought_tag()
    test_parse_text_thinking_prefix()
    test_parse_no_thinking()
    test_parse_empty_response()
    test_parse_multiple_markers_prefers_first()
    test_parse_thinking_with_empty_response()
    print("All thinking trace tests passed!")