Commit 5adcc24a authored by Vladislav Yarmak's avatar Vladislav Yarmak Committed by GitHub

Merge pull request #49 from Snawoot/1337-diff-improvements

1337-diff improvements
parents 0fa7df74 6e5d608b
...@@ -22,6 +22,11 @@ class LengthMismatchException(ByteDiffException): ...@@ -22,6 +22,11 @@ class LengthMismatchException(ByteDiffException):
pass pass
class DiffLimitException(ByteDiffException):
""" Throwed when difference limit hit """
pass
def check_positive_int(value): def check_positive_int(value):
value = int(value) value = int(value)
if value <= 0: if value <= 0:
...@@ -64,31 +69,43 @@ def feed_chunks(f, chunk_size=4096): ...@@ -64,31 +69,43 @@ def feed_chunks(f, chunk_size=4096):
yield buf yield buf
def zip_files_bytes(*files): def zip_files_bytes(left, right):
""" Iterate over two files, returning pair of bytes. """ Iterate over two files, returning pair of bytes.
Throw LengthMismatch if file sizes is uneven. """ Throw LengthMismatch if file sizes is uneven. """
class EndMarker(object): class EndMarker(object):
pass pass
end_marker = EndMarker() end_marker = EndMarker()
iterators = (itertools.chain.from_iterable(feed_chunks(f)) for f in files) left_iter = itertools.chain.from_iterable(
for tup in itertools.zip_longest(*iterators, fillvalue=end_marker): feed_chunks(left))
if any(v is end_marker for v in tup): right_iter = itertools.chain.from_iterable(
feed_chunks(right))
for a, b in itertools.zip_longest(left_iter,
right_iter,
fillvalue=end_marker):
if a is end_marker or b is end_marker:
raise LengthMismatchException("Length of input files inequal.") raise LengthMismatchException("Length of input files inequal.")
yield tup yield a, b
def diff(left, right): def diff(left, right, limit=None):
for offset, (a, b) in enumerate(zip_files_bytes(left, right)): offset = 0
diff_count = 0
for a, b in zip_files_bytes(left, right):
if a != b: if a != b:
diff_count += 1
if limit is not None and diff_count > limit:
raise DiffLimitException()
yield offset, a, b yield offset, a, b
offset += 1
def compose_diff_file(orig, patched, output, header, offset_adjustment=True): def compose_diff_file(orig, patched, output, header, *,
limit=None, offset_adjustment=True):
output.write(HEADER_FORMAT % (header.encode('latin-1'),)) output.write(HEADER_FORMAT % (header.encode('latin-1'),))
for offset, a, b in diff(orig, patched): adj = OFFSET_ADJUSTMENT if offset_adjustment else 0
o = offset + OFFSET_ADJUSTMENT if offset_adjustment else offset for offset, a, b in diff(orig, patched, limit):
output.write(LINE_FORMAT % (o, a, b)) output.write(LINE_FORMAT % (offset + adj, a, b))
def main(): def main():
...@@ -109,10 +126,14 @@ def main(): ...@@ -109,10 +126,14 @@ def main():
open(args.patched_file, 'rb') as patched,\ open(args.patched_file, 'rb') as patched,\
open(output_filename, 'wb') as output: open(output_filename, 'wb') as output:
try: try:
compose_diff_file(orig, patched, output, header_filename) compose_diff_file(orig, patched, output, header_filename,
limit=args.limit)
except LengthMismatchException: except LengthMismatchException:
print("Input files have inequal length. Aborting...", print("Input files have inequal length. Aborting...",
file=sys.stderr) file=sys.stderr)
except DiffLimitException:
print("Differences limit hit. Aborting...",
file=sys.stderr)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment