feat(diff): Add diff parser for unified diff format
- Create src/diff_viewer.py with parse_diff function - Parse unified diff into DiffFile and DiffHunk dataclasses - Extract file paths, hunk headers, and line changes - Add unit tests for diff parser
This commit is contained in:
129
src/diff_viewer.py
Normal file
129
src/diff_viewer.py
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
from typing import List, Dict, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DiffHunk:
|
||||||
|
header: str
|
||||||
|
lines: List[str]
|
||||||
|
old_start: int
|
||||||
|
old_count: int
|
||||||
|
new_start: int
|
||||||
|
new_count: int
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DiffFile:
|
||||||
|
old_path: str
|
||||||
|
new_path: str
|
||||||
|
hunks: List[DiffHunk]
|
||||||
|
|
||||||
|
def parse_diff_header(line: str) -> tuple[Optional[str], Optional[str], Optional[tuple[int, int, int, int]]]:
|
||||||
|
if not line.startswith(("--- ", "+++ ")):
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
if line.startswith("--- "):
|
||||||
|
path = line[4:]
|
||||||
|
if path.startswith("a/"):
|
||||||
|
path = path[2:]
|
||||||
|
return path, None, None
|
||||||
|
elif line.startswith("+++ "):
|
||||||
|
path = line[4:]
|
||||||
|
if path.startswith("b/"):
|
||||||
|
path = path[2:]
|
||||||
|
return None, path, None
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
def parse_hunk_header(line: str) -> Optional[tuple[int, int, int, int]]:
|
||||||
|
if not line.startswith("@@"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
parts = line.split()
|
||||||
|
if len(parts) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
old_part = parts[1][1:]
|
||||||
|
new_part = parts[2][1:]
|
||||||
|
|
||||||
|
old_parts = old_part.split(",")
|
||||||
|
new_parts = new_part.split(",")
|
||||||
|
|
||||||
|
old_start = int(old_parts[0])
|
||||||
|
old_count = int(old_parts[1]) if len(old_parts) > 1 else 1
|
||||||
|
new_start = int(new_parts[0])
|
||||||
|
new_count = int(new_parts[1]) if len(new_parts) > 1 else 1
|
||||||
|
|
||||||
|
return (old_start, old_count, new_start, new_count)
|
||||||
|
|
||||||
|
def parse_diff(diff_text: str) -> List[DiffFile]:
|
||||||
|
if not diff_text or not diff_text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
files: List[DiffFile] = []
|
||||||
|
current_file: Optional[DiffFile] = None
|
||||||
|
current_hunk: Optional[DiffHunk] = None
|
||||||
|
|
||||||
|
for line in diff_text.split("\n"):
|
||||||
|
if line.startswith("--- "):
|
||||||
|
if current_file:
|
||||||
|
if current_hunk:
|
||||||
|
current_file.hunks.append(current_hunk)
|
||||||
|
current_hunk = None
|
||||||
|
files.append(current_file)
|
||||||
|
|
||||||
|
path = line[4:]
|
||||||
|
if path.startswith("a/"):
|
||||||
|
path = path[2:]
|
||||||
|
current_file = DiffFile(old_path=path, new_path="", hunks=[])
|
||||||
|
|
||||||
|
elif line.startswith("+++ ") and current_file:
|
||||||
|
path = line[4:]
|
||||||
|
if path.startswith("b/"):
|
||||||
|
path = path[2:]
|
||||||
|
current_file.new_path = path
|
||||||
|
|
||||||
|
elif line.startswith("@@") and current_file:
|
||||||
|
if current_hunk:
|
||||||
|
current_file.hunks.append(current_hunk)
|
||||||
|
|
||||||
|
hunk_info = parse_hunk_header(line)
|
||||||
|
if hunk_info:
|
||||||
|
old_start, old_count, new_start, new_count = hunk_info
|
||||||
|
current_hunk = DiffHunk(
|
||||||
|
header=line,
|
||||||
|
lines=[],
|
||||||
|
old_start=old_start,
|
||||||
|
old_count=old_count,
|
||||||
|
new_start=new_start,
|
||||||
|
new_count=new_count
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
current_hunk = DiffHunk(
|
||||||
|
header=line,
|
||||||
|
lines=[],
|
||||||
|
old_start=0,
|
||||||
|
old_count=0,
|
||||||
|
new_start=0,
|
||||||
|
new_count=0
|
||||||
|
)
|
||||||
|
|
||||||
|
elif current_hunk is not None:
|
||||||
|
current_hunk.lines.append(line)
|
||||||
|
|
||||||
|
elif line and not line.startswith("diff ") and not line.startswith("index "):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if current_file:
|
||||||
|
if current_hunk:
|
||||||
|
current_file.hunks.append(current_hunk)
|
||||||
|
files.append(current_file)
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
def format_diff_for_display(diff_files: List[DiffFile]) -> str:
|
||||||
|
output = []
|
||||||
|
for df in diff_files:
|
||||||
|
output.append(f"File: {df.old_path}")
|
||||||
|
for hunk in df.hunks:
|
||||||
|
output.append(f" {hunk.header}")
|
||||||
|
for line in hunk.lines:
|
||||||
|
output.append(f" {line}")
|
||||||
|
return "\n".join(output)
|
||||||
83
tests/test_diff_viewer.py
Normal file
83
tests/test_diff_viewer.py
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import pytest
|
||||||
|
from src.diff_viewer import parse_diff, DiffFile, DiffHunk, parse_hunk_header
|
||||||
|
|
||||||
|
def test_parse_diff_empty() -> None:
|
||||||
|
result = parse_diff("")
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_parse_diff_none() -> None:
|
||||||
|
result = parse_diff(None) # type: ignore
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_parse_simple_diff() -> None:
|
||||||
|
diff_text = """--- a/src/test.py
|
||||||
|
+++ b/src/test.py
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-old
|
||||||
|
+new"""
|
||||||
|
result = parse_diff(diff_text)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].old_path == "src/test.py"
|
||||||
|
assert result[0].new_path == "src/test.py"
|
||||||
|
assert len(result[0].hunks) == 1
|
||||||
|
assert result[0].hunks[0].header == "@@ -1 +1 @@"
|
||||||
|
|
||||||
|
def test_parse_diff_with_context() -> None:
|
||||||
|
diff_text = """--- a/src/example.py
|
||||||
|
+++ b/src/example.py
|
||||||
|
@@ -10,5 +10,6 @@
|
||||||
|
def existing_function():
|
||||||
|
pass
|
||||||
|
- old_line
|
||||||
|
+ old_line
|
||||||
|
+ new_line
|
||||||
|
more_code"""
|
||||||
|
result = parse_diff(diff_text)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].old_path == "src/example.py"
|
||||||
|
assert len(result[0].hunks) == 1
|
||||||
|
hunk = result[0].hunks[0]
|
||||||
|
assert hunk.old_start == 10
|
||||||
|
assert hunk.old_count == 5
|
||||||
|
assert hunk.new_start == 10
|
||||||
|
assert hunk.new_count == 6
|
||||||
|
assert "- old_line" in hunk.lines
|
||||||
|
assert "+ new_line" in hunk.lines
|
||||||
|
|
||||||
|
def test_parse_multiple_files() -> None:
|
||||||
|
diff_text = """--- a/file1.py
|
||||||
|
+++ b/file1.py
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-a
|
||||||
|
+b
|
||||||
|
--- a/file2.py
|
||||||
|
+++ b/file2.py
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-c
|
||||||
|
+d"""
|
||||||
|
result = parse_diff(diff_text)
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0].old_path == "file1.py"
|
||||||
|
assert result[1].old_path == "file2.py"
|
||||||
|
|
||||||
|
def test_parse_hunk_header() -> None:
|
||||||
|
result = parse_hunk_header("@@ -10,5 +10,6 @@")
|
||||||
|
assert result == (10, 5, 10, 6)
|
||||||
|
|
||||||
|
result = parse_hunk_header("@@ -1 +1 @@")
|
||||||
|
assert result == (1, 1, 1, 1)
|
||||||
|
|
||||||
|
def test_diff_line_classification() -> None:
|
||||||
|
diff_text = """--- a/test.py
|
||||||
|
+++ b/test.py
|
||||||
|
@@ -1,3 +1,4 @@
|
||||||
|
context line
|
||||||
|
-removed line
|
||||||
|
+removed line
|
||||||
|
+added line
|
||||||
|
another context"""
|
||||||
|
result = parse_diff(diff_text)
|
||||||
|
hunk = result[0].hunks[0]
|
||||||
|
assert any(line.startswith("-") for line in hunk.lines)
|
||||||
|
assert any(line.startswith("+") for line in hunk.lines)
|
||||||
|
assert any(line.startswith(" ") or not line.startswith(("-", "+")) for line in hunk.lines)
|
||||||
Reference in New Issue
Block a user