83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
import hashlib
|
|
import re
|
|
from typing import Optional, Tuple
|
|
|
|
class FuzzyAnchor:
|
|
@staticmethod
|
|
def get_context(lines: list[str], index: int, count: int, direction: int) -> list[str]:
|
|
context = []
|
|
curr = index
|
|
while len(context) < count and 0 <= curr < len(lines):
|
|
line = lines[curr].strip()
|
|
if line:
|
|
context.append(line)
|
|
curr += direction
|
|
return context
|
|
|
|
@classmethod
|
|
def create_slice(cls, text: str, start_line: int, end_line: int) -> dict:
|
|
"""start_line and end_line are 1-based."""
|
|
lines = text.splitlines()
|
|
s_idx = max(0, start_line - 1)
|
|
e_idx = min(len(lines), end_line)
|
|
slice_lines = lines[s_idx:e_idx]
|
|
slice_text = "\n".join(slice_lines)
|
|
|
|
return {
|
|
"start_line": start_line,
|
|
"end_line": end_line,
|
|
"start_context": cls.get_context(lines, s_idx, 3, 1),
|
|
"end_context": cls.get_context(lines, e_idx - 1, 3, -1)[::-1], # Reverse back to normal order
|
|
"content_hash": hashlib.mdsafe(slice_text.encode()).hexdigest() if hasattr(hashlib, 'mdsafe') else hashlib.md5(slice_text.encode()).hexdigest()
|
|
}
|
|
|
|
@classmethod
|
|
def resolve_slice(cls, text: str, slice_data: dict) -> Optional[Tuple[int, int]]:
|
|
lines = text.splitlines()
|
|
# 1. Try exact match
|
|
s_idx = slice_data["start_line"] - 1
|
|
e_idx = slice_data["end_line"]
|
|
if 0 <= s_idx < len(lines) and e_idx <= len(lines):
|
|
current_text = "\n".join(lines[s_idx:e_idx])
|
|
curr_hash = hashlib.md5(current_text.encode()).hexdigest()
|
|
if curr_hash == slice_data["content_hash"]:
|
|
return (slice_data["start_line"], slice_data["end_line"])
|
|
|
|
# 2. Fuzzy match
|
|
start_ctx = slice_data["start_context"]
|
|
end_ctx = slice_data["end_context"]
|
|
if not start_ctx or not end_ctx: return None
|
|
|
|
# Search for start_ctx
|
|
best_s = -1
|
|
for i in range(len(lines)):
|
|
match = True
|
|
for j, ctx_line in enumerate(start_ctx):
|
|
if i+j >= len(lines) or lines[i+j].strip() != ctx_line:
|
|
match = False
|
|
break
|
|
if match:
|
|
best_s = i
|
|
break
|
|
|
|
if best_s == -1: return None
|
|
|
|
# Search for end_ctx after start_ctx
|
|
best_e = -1
|
|
for i in range(best_s, len(lines)):
|
|
match = True
|
|
for j, ctx_line in enumerate(end_ctx):
|
|
# end_ctx is the LAST 3 lines. So we match backwards from i.
|
|
idx = i - (len(end_ctx) - 1) + j
|
|
if idx < 0 or idx >= len(lines) or lines[idx].strip() != ctx_line:
|
|
match = False
|
|
break
|
|
if match:
|
|
best_e = i + 1
|
|
break
|
|
|
|
if best_e != -1:
|
|
return (best_s + 1, best_e)
|
|
|
|
return None
|