manual_slop/src/fuzzy_anchor.py

import hashlib
import re
from typing import Optional, Tuple

class FuzzyAnchor:
    @staticmethod
    def get_context(lines: list[str], index: int, count: int, direction: int) -> list[str]:
        context = []
        curr = index
        while len(context) < count and 0 <= curr < len(lines):
            line = lines[curr].strip()
            if line:
                context.append(line)
            curr += direction
        return context

    @classmethod
    def create_slice(cls, text: str, start_line: int, end_line: int) -> dict:
        """start_line and end_line are 1-based."""
        lines = text.splitlines()
        s_idx = max(0, start_line - 1)
        e_idx = min(len(lines), end_line)
        slice_lines = lines[s_idx:e_idx]
        slice_text = "\n".join(slice_lines)

        return {
            "start_line": start_line,
            "end_line": end_line,
            "start_context": cls.get_context(lines, s_idx, 3, 1),
            "end_context": cls.get_context(lines, e_idx - 1, 3, -1)[::-1], # Reverse back to normal order
            "content_hash": hashlib.mdsafe(slice_text.encode()).hexdigest() if hasattr(hashlib, 'mdsafe') else hashlib.md5(slice_text.encode()).hexdigest()
        }

    @classmethod
    def resolve_slice(cls, text: str, slice_data: dict) -> Optional[Tuple[int, int]]:
        lines = text.splitlines()
        # 1. Try exact match
        s_idx = slice_data["start_line"] - 1
        e_idx = slice_data["end_line"]
        if 0 <= s_idx < len(lines) and e_idx <= len(lines):
            current_text = "\n".join(lines[s_idx:e_idx])
            curr_hash = hashlib.md5(current_text.encode()).hexdigest()
            if curr_hash == slice_data["content_hash"]:
                return (slice_data["start_line"], slice_data["end_line"])

        # 2. Fuzzy match
        start_ctx = slice_data["start_context"]
        end_ctx = slice_data["end_context"]
        if not start_ctx or not end_ctx: return None

        # Search for start_ctx
        best_s = -1
        for i in range(len(lines)):
            match = True
            for j, ctx_line in enumerate(start_ctx):
                if i+j >= len(lines) or lines[i+j].strip() != ctx_line:
                    match = False
                    break
            if match:
                best_s = i
                break

        if best_s == -1: return None

        # Search for end_ctx after start_ctx
        best_e = -1
        for i in range(best_s, len(lines)):
            match = True
            for j, ctx_line in enumerate(end_ctx):
                # end_ctx is the LAST 3 lines. So we match backwards from i.
                idx = i - (len(end_ctx) - 1) + j
                if idx < 0 or idx >= len(lines) or lines[idx].strip() != ctx_line:
                    match = False
                    break
            if match:
                best_e = i + 1
                break

        if best_e != -1:
            return (best_s + 1, best_e)

        return None