Cradicle Explorer

util.py
 1  import re
 2  import unicodedata
 3  
 4  invalid_rendering = ["🕵️", "☝"]
 5  
 6  def strip_modifiers(text):
 7      def process_characters(text):
 8          result = []
 9          i = 0
10          while i < len(text):
11              char = text[i]
12              category = unicodedata.category(char)
13              
14              if category.startswith(('L', 'N', 'P', 'S')):
15                  result.append(char)
16                  i += 1
17              elif category.startswith(('M', 'Sk', 'Cf')) or char in '\u200d\u200c':
18                  i += 1
19              else:
20                  result.append(char)
21                  i += 1
22                  
23          return ''.join(result)
24  
25      for char in invalid_rendering:
26          text = text.replace(char, " ")
27      
28      stripped = process_characters(text)
29      stripped = re.sub(r'[\uFE00-\uFE0F]', '', stripped)
30      stripped = re.sub(r'[\U000E0100-\U000E01EF]', '', stripped, flags=re.UNICODE)
31      stripped = re.sub(r'[\U0001F3FB-\U0001F3FF]', '', stripped, flags=re.UNICODE)
32      stripped = re.sub(r'[\u200D\u200C]', '', stripped)
33      stripped = re.sub(r'\r\n?', '\n', stripped)
34      
35      return stripped