feat(diff): Add diff parser for unified diff format

- Create src/diff_viewer.py with parse_diff function
- Parse unified diff into DiffFile and DiffHunk dataclasses
- Extract file paths, hunk headers, and line changes
- Add unit tests for diff parser
This commit is contained in:
2026-03-07 00:12:06 -05:00
parent da58f46e89
commit 130001c0ba
2 changed files with 212 additions and 0 deletions

129
src/diff_viewer.py Normal file
View File

@@ -0,0 +1,129 @@
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class DiffHunk:
header: str
lines: List[str]
old_start: int
old_count: int
new_start: int
new_count: int
@dataclass
class DiffFile:
old_path: str
new_path: str
hunks: List[DiffHunk]
def parse_diff_header(line: str) -> tuple[Optional[str], Optional[str], Optional[tuple[int, int, int, int]]]:
if not line.startswith(("--- ", "+++ ")):
return None, None, None
if line.startswith("--- "):
path = line[4:]
if path.startswith("a/"):
path = path[2:]
return path, None, None
elif line.startswith("+++ "):
path = line[4:]
if path.startswith("b/"):
path = path[2:]
return None, path, None
return None, None, None
def parse_hunk_header(line: str) -> Optional[tuple[int, int, int, int]]:
if not line.startswith("@@"):
return None
parts = line.split()
if len(parts) < 2:
return None
old_part = parts[1][1:]
new_part = parts[2][1:]
old_parts = old_part.split(",")
new_parts = new_part.split(",")
old_start = int(old_parts[0])
old_count = int(old_parts[1]) if len(old_parts) > 1 else 1
new_start = int(new_parts[0])
new_count = int(new_parts[1]) if len(new_parts) > 1 else 1
return (old_start, old_count, new_start, new_count)
def parse_diff(diff_text: str) -> List[DiffFile]:
if not diff_text or not diff_text.strip():
return []
files: List[DiffFile] = []
current_file: Optional[DiffFile] = None
current_hunk: Optional[DiffHunk] = None
for line in diff_text.split("\n"):
if line.startswith("--- "):
if current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
current_hunk = None
files.append(current_file)
path = line[4:]
if path.startswith("a/"):
path = path[2:]
current_file = DiffFile(old_path=path, new_path="", hunks=[])
elif line.startswith("+++ ") and current_file:
path = line[4:]
if path.startswith("b/"):
path = path[2:]
current_file.new_path = path
elif line.startswith("@@") and current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
hunk_info = parse_hunk_header(line)
if hunk_info:
old_start, old_count, new_start, new_count = hunk_info
current_hunk = DiffHunk(
header=line,
lines=[],
old_start=old_start,
old_count=old_count,
new_start=new_start,
new_count=new_count
)
else:
current_hunk = DiffHunk(
header=line,
lines=[],
old_start=0,
old_count=0,
new_start=0,
new_count=0
)
elif current_hunk is not None:
current_hunk.lines.append(line)
elif line and not line.startswith("diff ") and not line.startswith("index "):
pass
if current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
files.append(current_file)
return files
def format_diff_for_display(diff_files: List[DiffFile]) -> str:
output = []
for df in diff_files:
output.append(f"File: {df.old_path}")
for hunk in df.hunks:
output.append(f" {hunk.header}")
for line in hunk.lines:
output.append(f" {line}")
return "\n".join(output)