/ dev / normalize_chars.py
normalize_chars.py
 1  import sys
 2  from pathlib import Path
 3  
 4  # Mapping of characters to normalize. Start with quotes; extend as needed.
 5  CHAR_MAP = {
 6      "\u2018": "'",  # left single quotation mark
 7      "\u2019": "'",  # right single quotation mark
 8      "\u201c": '"',  # left double quotation mark
 9      "\u201d": '"',  # right double quotation mark
10  }
11  
12  
13  def fix_file(path: Path) -> bool:
14      try:
15          text = path.read_text(encoding="utf-8")
16      except UnicodeDecodeError:
17          # Non-UTF8 (likely binary) — skip
18          return False
19  
20      new_text = text
21      for bad, good in CHAR_MAP.items():
22          new_text = new_text.replace(bad, good)
23  
24      if new_text != text:
25          path.write_text(new_text, encoding="utf-8")
26          return True
27      return False
28  
29  
30  def main(argv: list[str]) -> int:
31      changed = 0
32      for arg in argv:
33          p = Path(arg)
34          if p.is_file():
35              if fix_file(p):
36                  changed += 1
37      if changed:
38          print(f"Normalized characters in {changed} file(s).")
39      return 0
40  
41  
42  if __name__ == "__main__":
43      raise SystemExit(main(sys.argv[1:]))