ihsm/paper/diffinator.py
2021-09-29 13:51:22 +02:00

143 lines
5.3 KiB
Python

#!/usr/bin/env python3
import re
import sys
import subprocess
import string
import click
@click.command()
@click.argument('texfile')
@click.argument('bibliography')
@click.argument('revision')
def generate_git_tex_diff(texfile, bibliography, revision):
with open(texfile) as f:
tex_lines = len(list(f))
with open(bibliography) as f:
bib_lines = len(list(f))
tex_proc = subprocess.run(['git', 'diff', f'-U{tex_lines+1}', '--word-diff', '--color=always', revision, texfile],
check=True, capture_output=True)
bib_proc = subprocess.run(['git', 'diff', f'-U{bib_lines+1}', '--word-diff', '--color=always', revision, bibliography],
check=True, capture_output=True)
ADDITION_RE_R = '\033\\[32m\\{\\+([^\033]*?)\\+\\}\033\\[m'
DELETION_RE_R = '\033\\[31m\\[-([^\033]*?)\\-]\033\\[m'
addition_re = re.compile(ADDITION_RE_R)
deletion_re = re.compile(DELETION_RE_R)
combined_re = re.compile(f'{DELETION_RE_R}{ADDITION_RE_R}')
csi_re = re.compile('\033\\[.*?m')
bibtex_entry_def_re = re.compile('@.*?{(.*?),')
bibliography_categories = '\\DeclareBibliographyCategory{diff_new_entry}\n'
bibliography_categories += '\\DeclareBibliographyCategory{diff_deleted_entry}\n'
bibliography_categories += '\\AtEveryBibitem{\\ifcategory{diff_new_entry}{\\color{diffgreen}}{\\ifcategory{diff_deleted_entry}{\\color{diffred}}{\\color{black}}}}\n'
added_entries, removed_entries = [], []
for line in bib_proc.stdout.decode().splitlines():
if (match := addition_re.fullmatch(line.strip())):
if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))):
added_entries.append(entry_def.group(1))
if (match := deletion_re.fullmatch(line.strip())):
if (entry_def := bibtex_entry_def_re.fullmatch(match.group(1))):
removed_entries.append(entry_def.group(1))
if added_entries:
bibliography_categories += '\\addtocategory{diff_new_entry}{' + ','.join(added_entries) + '}\n'
if removed_entries:
bibliography_categories += '\\addtocategory{diff_deleted_entry}{' + ','.join(removed_entries) + '}\n'
content_started = False
document_started = False
for line in tex_proc.stdout.decode().splitlines():
if not content_started:
if '@@' in line:
content_started = True
continue
line = line.rstrip()
if document_started: # diff results in preamble
debug = False #'battery' in line
if debug:
print('orig:', repr(line), file=sys.stderr)
def suppress_small_changes(match):
nonlocal debug
old, new = match.groups()
old, _, _rest = old.partition('%')
new, _, _rest = new.partition('%')
if debug:
print(f'old={repr(old)}, new={repr(new)}', file=sys.stderr)
if len(old) < 12 and len(new) < 12:
return new
if old.count(' ') < 3 and new.count(' ') < 3:
return new
if '}' in old or '{' in old or '{' in new or '}' in new:
return new
new_chars = list(new)
for char in old:
if char not in string.ascii_letters:
continue
if char not in new_chars:
return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}'
new_chars.remove(char)
if any(char in string.ascii_letters for char in new_chars):
return r' \color{diffred}' + old + r' \color{diffgreen}' + new + ' \color{black}'
return new
line = combined_re.sub(suppress_small_changes, line)
if debug:
print('[1]', line, file=sys.stderr)
def suppress_small_changes(match, action):
change = match.group(1)
change, _, _rest = change.partition('%')
if len(change) < 12 or change.count(' ') < 3 or '}' in change or '{' in change:
if action == 'addition':
return change
else: # deletion
return ''
if action == 'addition':
return r' \color{diffgreen}' + change + r' \color{black}'
else: # deletion
return r' \color{diffred}' + change + r' \color{black}'
line = addition_re.sub(lambda match: suppress_small_changes(match, 'addition'), line)
if debug:
print('[2]', line, file=sys.stderr)
line = deletion_re.sub(lambda match: suppress_small_changes(match, 'deletion'), line)
if debug:
print('[3]', line, file=sys.stderr)
else:
if '\\begin{document}' in line:
document_started = True
print(bibliography_categories)
print('\\definecolor{diffgreen}{HTML}{1e8449}')
print('\\definecolor{diffred}{HTML}{cb4335}')
line = addition_re.sub(r'\1', line)
line = deletion_re.sub(r'', line)
line = csi_re.sub('', line)
print(line)
if __name__ == '__main__':
generate_git_tex_diff()