"""Parse unified diffs to extract edit locations."""
import os
import re
from typing import Dict, List, Tuple
from ..core import ByteInterval, merge
from ..core.fileio import line_to_byte
[docs]
def parse_diff(diff_text: str, repo_dir: str) -> Dict[str, List[ByteInterval]]:
"""Extract edited byte ranges per file from unified diff."""
edits = _parse_hunks(diff_text)
result = {}
for file_path, line_ranges in edits.items():
abs_path = os.path.join(repo_dir, file_path)
if not os.path.exists(abs_path):
continue
intervals = []
for start, end, _ in line_ranges:
span = line_to_byte(abs_path, start, end)
if span:
intervals.append(span)
if intervals:
result[file_path] = merge(intervals)
return result
def parse_diff_lines(diff_text: str, deletions_only: bool = False) -> Dict[str, List[Tuple[int, int]]]:
"""Extract edited line ranges per file from unified diff.
Args:
deletions_only: If True, only extract deleted lines (for EditLoc).
If False, extract added/modified lines (for repair evaluation).
Returns {file: [(start_line, end_line)]} where lines are inclusive.
"""
hunks = _parse_hunks(diff_text, deletions_only=deletions_only)
result = {}
for file_path, line_ranges in hunks.items():
line_intervals = []
for start, end, _ in line_ranges:
line_intervals.append((start, end))
if line_intervals:
result[file_path] = _merge_line_intervals(line_intervals)
return result
def _merge_line_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
"""Merge overlapping or adjacent line intervals."""
if not intervals:
return []
sorted_intervals = sorted(intervals)
merged = [sorted_intervals[0]]
for current in sorted_intervals[1:]:
last = merged[-1]
if current[0] <= last[1] + 1:
merged[-1] = (last[0], max(last[1], current[1]))
else:
merged.append(current)
return merged
def _parse_hunks(diff_text: str, deletions_only: bool = False) -> Dict[str, List[Tuple[int, int, str]]]:
"""Parse diff hunks. Returns {file: [(start_line, end_line, change_type)]}.
Args:
deletions_only: If True, extract deleted lines (OLD file line numbers).
If False, extract added/modified lines (NEW file line numbers).
"""
result = {}
current_file = None
lines = diff_text.split('\n')
hunk_re = re.compile(r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@')
for i, line in enumerate(lines):
if line.startswith('+++'):
match = re.match(r'^\+\+\+\s+b/(.+)', line)
if match:
current_file = match.group(1)
elif line.startswith('@@') and current_file:
match = hunk_re.match(line)
if match:
old_start = int(match.group(1))
new_start = int(match.group(3))
if deletions_only:
# Extract deleted lines (OLD file coordinates)
j = i + 1
old_line = old_start
edit_start = None
while j < len(lines) and not lines[j].startswith(('@@', 'diff ', '---')):
if lines[j].startswith('-') and not lines[j].startswith('---'):
# Deleted line in OLD file
if edit_start is None:
edit_start = old_line
old_line += 1
elif lines[j].startswith('+') and not lines[j].startswith('+++'):
# Added line - skip for deletions_only
pass
elif lines[j].startswith(' '):
# Context line - ends current deletion block
if edit_start is not None:
result.setdefault(current_file, []).append(
(edit_start, old_line - 1, 'del'))
edit_start = None
old_line += 1
j += 1
# Handle deletion block at end of hunk
if edit_start is not None:
result.setdefault(current_file, []).append(
(edit_start, old_line - 1, 'del'))
else:
# Extract added/modified lines (NEW file coordinates)
j = i + 1
new_line = new_start
edit_start = None
while j < len(lines) and not lines[j].startswith(('@@', 'diff ', '---')):
if lines[j].startswith('+') and not lines[j].startswith('+++'):
# Added or modified line in new file
if edit_start is None:
edit_start = new_line
new_line += 1
elif lines[j].startswith('-') and not lines[j].startswith('---'):
# Deleted line - skip for additions
pass
elif lines[j].startswith(' '):
# Context line - ends current edit block
if edit_start is not None:
result.setdefault(current_file, []).append(
(edit_start, new_line - 1, 'add'))
edit_start = None
new_line += 1
j += 1
# Handle edit block at end of hunk
if edit_start is not None:
result.setdefault(current_file, []).append(
(edit_start, new_line - 1, 'add'))
return result