word_count_feature.py
1 import re 2 from typing import Any 3 from typing import ClassVar 4 from typing import Optional 5 6 import numpy as np 7 8 from evidently.legacy.core import ColumnType 9 from evidently.legacy.features.generated_features import ApplyColumnGeneratedFeature 10 11 12 class WordCount(ApplyColumnGeneratedFeature): 13 class Config: 14 type_alias = "evidently:feature:WordCount" 15 16 __feature_type__: ClassVar = ColumnType.Numerical 17 _reg: ClassVar[re.Pattern] = re.compile(r"[^a-zA-Z ]+") 18 display_name_template: ClassVar = "Word Count for {column_name}" 19 column_name: str 20 21 def __init__(self, column_name: str, display_name: Optional[str] = None): 22 self.display_name = display_name 23 super().__init__(column_name=column_name) 24 25 def apply(self, value: Any): 26 if value is None or (isinstance(value, float) and np.isnan(value)): 27 return 0 28 return len(self._reg.sub("", value).split())