/ src / evidently / legacy / features / word_count_feature.py
word_count_feature.py
 1  import re
 2  from typing import Any
 3  from typing import ClassVar
 4  from typing import Optional
 5  
 6  import numpy as np
 7  
 8  from evidently.legacy.core import ColumnType
 9  from evidently.legacy.features.generated_features import ApplyColumnGeneratedFeature
10  
11  
12  class WordCount(ApplyColumnGeneratedFeature):
13      class Config:
14          type_alias = "evidently:feature:WordCount"
15  
16      __feature_type__: ClassVar = ColumnType.Numerical
17      _reg: ClassVar[re.Pattern] = re.compile(r"[^a-zA-Z ]+")
18      display_name_template: ClassVar = "Word Count for {column_name}"
19      column_name: str
20  
21      def __init__(self, column_name: str, display_name: Optional[str] = None):
22          self.display_name = display_name
23          super().__init__(column_name=column_name)
24  
25      def apply(self, value: Any):
26          if value is None or (isinstance(value, float) and np.isnan(value)):
27              return 0
28          return len(self._reg.sub("", value).split())