diff --git a/constants.py b/constants.py index 81f0623..8dcbf48 100644 --- a/constants.py +++ b/constants.py @@ -38,3 +38,13 @@ UNICODE = { '⇒': r'\implies', '⇐': r'\impliedby' } + +UTF8_REPLACEMENTS = { + r'\"o': 'ö', + r'\"a': 'ä', + r'\"u': 'ü', + r'\"O': 'Ö', + r'\"A': 'Ä', + r'\"U': 'Ü', + r'\ss': 'ß', +} diff --git a/latex_legacy_converter.py b/latex_legacy_converter.py index 34c0e50..0b1dfbc 100644 --- a/latex_legacy_converter.py +++ b/latex_legacy_converter.py @@ -2,6 +2,17 @@ from constants import * import re from pathlib import Path +def migrate_old_utf8_escape(line:str): + for old, new in UTF8_REPLACEMENTS.items(): + while True: + old = old.replace('\\', r'\\') + m = re.search(old, line) + if m is None: + break + else: + line = line[:m.start()] + new + line[m.end():] + return line + def _migrate_lazy_math_alphabet(line: str, letters: str, short: str, alph: str): for letter in letters: @@ -59,6 +70,7 @@ def optimize_line(line: str) -> str: line = migrate_lazy_math_alphabets(line) line = migrate_deprecated(line) line = replace_unicode(line) + line = migrate_old_utf8_escape(line) return line