feat(diff): Add diff parser for unified diff format

- Create src/diff_viewer.py with parse_diff function
- Parse unified diff into DiffFile and DiffHunk dataclasses
- Extract file paths, hunk headers, and line changes
- Add unit tests for diff parser
This commit is contained in:
2026-03-07 00:12:06 -05:00
parent da58f46e89
commit 130001c0ba
2 changed files with 212 additions and 0 deletions

129
src/diff_viewer.py Normal file
View File

@@ -0,0 +1,129 @@
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class DiffHunk:
header: str
lines: List[str]
old_start: int
old_count: int
new_start: int
new_count: int
@dataclass
class DiffFile:
old_path: str
new_path: str
hunks: List[DiffHunk]
def parse_diff_header(line: str) -> tuple[Optional[str], Optional[str], Optional[tuple[int, int, int, int]]]:
if not line.startswith(("--- ", "+++ ")):
return None, None, None
if line.startswith("--- "):
path = line[4:]
if path.startswith("a/"):
path = path[2:]
return path, None, None
elif line.startswith("+++ "):
path = line[4:]
if path.startswith("b/"):
path = path[2:]
return None, path, None
return None, None, None
def parse_hunk_header(line: str) -> Optional[tuple[int, int, int, int]]:
if not line.startswith("@@"):
return None
parts = line.split()
if len(parts) < 2:
return None
old_part = parts[1][1:]
new_part = parts[2][1:]
old_parts = old_part.split(",")
new_parts = new_part.split(",")
old_start = int(old_parts[0])
old_count = int(old_parts[1]) if len(old_parts) > 1 else 1
new_start = int(new_parts[0])
new_count = int(new_parts[1]) if len(new_parts) > 1 else 1
return (old_start, old_count, new_start, new_count)
def parse_diff(diff_text: str) -> List[DiffFile]:
if not diff_text or not diff_text.strip():
return []
files: List[DiffFile] = []
current_file: Optional[DiffFile] = None
current_hunk: Optional[DiffHunk] = None
for line in diff_text.split("\n"):
if line.startswith("--- "):
if current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
current_hunk = None
files.append(current_file)
path = line[4:]
if path.startswith("a/"):
path = path[2:]
current_file = DiffFile(old_path=path, new_path="", hunks=[])
elif line.startswith("+++ ") and current_file:
path = line[4:]
if path.startswith("b/"):
path = path[2:]
current_file.new_path = path
elif line.startswith("@@") and current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
hunk_info = parse_hunk_header(line)
if hunk_info:
old_start, old_count, new_start, new_count = hunk_info
current_hunk = DiffHunk(
header=line,
lines=[],
old_start=old_start,
old_count=old_count,
new_start=new_start,
new_count=new_count
)
else:
current_hunk = DiffHunk(
header=line,
lines=[],
old_start=0,
old_count=0,
new_start=0,
new_count=0
)
elif current_hunk is not None:
current_hunk.lines.append(line)
elif line and not line.startswith("diff ") and not line.startswith("index "):
pass
if current_file:
if current_hunk:
current_file.hunks.append(current_hunk)
files.append(current_file)
return files
def format_diff_for_display(diff_files: List[DiffFile]) -> str:
output = []
for df in diff_files:
output.append(f"File: {df.old_path}")
for hunk in df.hunks:
output.append(f" {hunk.header}")
for line in hunk.lines:
output.append(f" {line}")
return "\n".join(output)

83
tests/test_diff_viewer.py Normal file
View File

@@ -0,0 +1,83 @@
import pytest
from src.diff_viewer import parse_diff, DiffFile, DiffHunk, parse_hunk_header
def test_parse_diff_empty() -> None:
result = parse_diff("")
assert result == []
def test_parse_diff_none() -> None:
result = parse_diff(None) # type: ignore
assert result == []
def test_parse_simple_diff() -> None:
diff_text = """--- a/src/test.py
+++ b/src/test.py
@@ -1 +1 @@
-old
+new"""
result = parse_diff(diff_text)
assert len(result) == 1
assert result[0].old_path == "src/test.py"
assert result[0].new_path == "src/test.py"
assert len(result[0].hunks) == 1
assert result[0].hunks[0].header == "@@ -1 +1 @@"
def test_parse_diff_with_context() -> None:
diff_text = """--- a/src/example.py
+++ b/src/example.py
@@ -10,5 +10,6 @@
def existing_function():
pass
- old_line
+ old_line
+ new_line
more_code"""
result = parse_diff(diff_text)
assert len(result) == 1
assert result[0].old_path == "src/example.py"
assert len(result[0].hunks) == 1
hunk = result[0].hunks[0]
assert hunk.old_start == 10
assert hunk.old_count == 5
assert hunk.new_start == 10
assert hunk.new_count == 6
assert "- old_line" in hunk.lines
assert "+ new_line" in hunk.lines
def test_parse_multiple_files() -> None:
diff_text = """--- a/file1.py
+++ b/file1.py
@@ -1 +1 @@
-a
+b
--- a/file2.py
+++ b/file2.py
@@ -1 +1 @@
-c
+d"""
result = parse_diff(diff_text)
assert len(result) == 2
assert result[0].old_path == "file1.py"
assert result[1].old_path == "file2.py"
def test_parse_hunk_header() -> None:
result = parse_hunk_header("@@ -10,5 +10,6 @@")
assert result == (10, 5, 10, 6)
result = parse_hunk_header("@@ -1 +1 @@")
assert result == (1, 1, 1, 1)
def test_diff_line_classification() -> None:
diff_text = """--- a/test.py
+++ b/test.py
@@ -1,3 +1,4 @@
context line
-removed line
+removed line
+added line
another context"""
result = parse_diff(diff_text)
hunk = result[0].hunks[0]
assert any(line.startswith("-") for line in hunk.lines)
assert any(line.startswith("+") for line in hunk.lines)
assert any(line.startswith(" ") or not line.startswith(("-", "+")) for line in hunk.lines)