normalize_chars.py
1 import sys 2 from pathlib import Path 3 4 # Mapping of characters to normalize. Start with quotes; extend as needed. 5 CHAR_MAP = { 6 "\u2018": "'", # left single quotation mark 7 "\u2019": "'", # right single quotation mark 8 "\u201c": '"', # left double quotation mark 9 "\u201d": '"', # right double quotation mark 10 } 11 12 13 def fix_file(path: Path) -> bool: 14 try: 15 text = path.read_text(encoding="utf-8") 16 except UnicodeDecodeError: 17 # Non-UTF8 (likely binary) — skip 18 return False 19 20 new_text = text 21 for bad, good in CHAR_MAP.items(): 22 new_text = new_text.replace(bad, good) 23 24 if new_text != text: 25 path.write_text(new_text, encoding="utf-8") 26 return True 27 return False 28 29 30 def main(argv: list[str]) -> int: 31 changed = 0 32 for arg in argv: 33 p = Path(arg) 34 if p.is_file(): 35 if fix_file(p): 36 changed += 1 37 if changed: 38 print(f"Normalized characters in {changed} file(s).") 39 return 0 40 41 42 if __name__ == "__main__": 43 raise SystemExit(main(sys.argv[1:]))