From f4c5d4c4ae8308982b70e78b86a4ed9a7a87b9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Ke=C3=9Fler?= Date: Thu, 31 Mar 2022 19:05:54 +0200 Subject: [PATCH] =?UTF-8?q?UTF8=20replacements=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- constants.py | 10 ++++++++++ latex_legacy_converter.py | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/constants.py b/constants.py index 81f0623..8dcbf48 100644 --- a/constants.py +++ b/constants.py @@ -38,3 +38,13 @@ UNICODE = { '⇒': r'\implies', '⇐': r'\impliedby' } + +UTF8_REPLACEMENTS = { + r'\"o': 'ö', + r'\"a': 'ä', + r'\"u': 'ü', + r'\"O': 'Ö', + r'\"A': 'Ä', + r'\"U': 'Ü', + r'\ss': 'ß', +} diff --git a/latex_legacy_converter.py b/latex_legacy_converter.py index 34c0e50..0b1dfbc 100644 --- a/latex_legacy_converter.py +++ b/latex_legacy_converter.py @@ -2,6 +2,17 @@ from constants import * import re from pathlib import Path +def migrate_old_utf8_escape(line:str): + for old, new in UTF8_REPLACEMENTS.items(): + while True: + old = old.replace('\\', r'\\') + m = re.search(old, line) + if m is None: + break + else: + line = line[:m.start()] + new + line[m.end():] + return line + def _migrate_lazy_math_alphabet(line: str, letters: str, short: str, alph: str): for letter in letters: @@ -59,6 +70,7 @@ def optimize_line(line: str) -> str: line = migrate_lazy_math_alphabets(line) line = migrate_deprecated(line) line = replace_unicode(line) + line = migrate_old_utf8_escape(line) return line