contains_link_feature.py
1 from typing import Any 2 from typing import ClassVar 3 from typing import Optional 4 from urllib.parse import urlparse 5 6 import numpy as np 7 8 from evidently.legacy.core import ColumnType 9 from evidently.legacy.features.generated_features import ApplyColumnGeneratedFeature 10 11 12 class ContainsLink(ApplyColumnGeneratedFeature): 13 class Config: 14 type_alias = "evidently:feature:ContainsLink" 15 16 __feature_type__: ClassVar = ColumnType.Categorical 17 display_name_template: ClassVar = "{column_name} contains link" 18 19 def __init__(self, column_name: str, display_name: Optional[str] = None): 20 self.display_name = display_name 21 super().__init__(column_name=column_name) 22 23 def apply(self, value: Any): 24 if value is None or (isinstance(value, float) and np.isnan(value)): 25 return 0 26 # Split the text into words 27 words = str(value).split() 28 29 # Check if any word is a valid URL using urlparse 30 for word in words: 31 parsed = urlparse(word) 32 if parsed.scheme and parsed.netloc: 33 return True 34 return False