/ src / evidently / legacy / features / contains_link_feature.py
contains_link_feature.py
 1  from typing import Any
 2  from typing import ClassVar
 3  from typing import Optional
 4  from urllib.parse import urlparse
 5  
 6  import numpy as np
 7  
 8  from evidently.legacy.core import ColumnType
 9  from evidently.legacy.features.generated_features import ApplyColumnGeneratedFeature
10  
11  
12  class ContainsLink(ApplyColumnGeneratedFeature):
13      class Config:
14          type_alias = "evidently:feature:ContainsLink"
15  
16      __feature_type__: ClassVar = ColumnType.Categorical
17      display_name_template: ClassVar = "{column_name} contains link"
18  
19      def __init__(self, column_name: str, display_name: Optional[str] = None):
20          self.display_name = display_name
21          super().__init__(column_name=column_name)
22  
23      def apply(self, value: Any):
24          if value is None or (isinstance(value, float) and np.isnan(value)):
25              return 0
26          # Split the text into words
27          words = str(value).split()
28  
29          # Check if any word is a valid URL using urlparse
30          for word in words:
31              parsed = urlparse(word)
32              if parsed.scheme and parsed.netloc:
33                  return True
34          return False