_custom_descriptors.py
1 from typing import Callable 2 from typing import Dict 3 from typing import List 4 from typing import Optional 5 from typing import Union 6 7 from evidently._pydantic_compat import PrivateAttr 8 from evidently.core.datasets import AnyDescriptorTest 9 from evidently.core.datasets import Dataset 10 from evidently.core.datasets import DatasetColumn 11 from evidently.core.datasets import Descriptor 12 from evidently.legacy.options.base import Options 13 14 CustomColumnCallable = Callable[[DatasetColumn], DatasetColumn] 15 16 17 class CustomColumnDescriptor(Descriptor): 18 """Descriptor that applies a custom function to a single column.""" 19 20 column_name: str 21 """Name of the column to process.""" 22 func: str 23 """Function name or callable to apply to column data.""" 24 _func: Optional[CustomColumnCallable] = PrivateAttr(None) 25 """Internal cached callable.""" 26 27 def __init__( 28 self, 29 column_name: str, 30 func: Union[str, CustomColumnCallable], 31 alias: Optional[str] = None, 32 tests: Optional[List[AnyDescriptorTest]] = None, 33 ): 34 self.column_name = column_name 35 if callable(func): 36 self._func = func 37 func = f"{func.__module__}.{func.__name__}" 38 else: 39 self._func = None 40 self.func = func 41 super().__init__(alias=alias or f"custom_column_descriptor:{func}", tests=tests) 42 43 def generate_data(self, dataset: Dataset, options: Options) -> Union[DatasetColumn, Dict[str, DatasetColumn]]: 44 """Apply custom function to column data.""" 45 if self._func is None: 46 raise ValueError("CustomColumnDescriptor is not configured with callable func") 47 column_data = dataset.column(self.column_name) 48 return self._func(column_data) 49 50 def list_input_columns(self) -> Optional[List[str]]: 51 """Return list of required input column names.""" 52 return [self.column_name] 53 54 55 CustomDescriptorCallable = Callable[[Dataset], Union[DatasetColumn, Dict[str, DatasetColumn]]] 56 57 58 class CustomDescriptor(Descriptor): 59 """Descriptor that applies a custom function to the entire dataset.""" 60 61 func: str 62 """Function name or callable to apply to dataset.""" 63 _func: Optional[CustomDescriptorCallable] = PrivateAttr(None) 64 """Internal cached callable.""" 65 66 def __init__( 67 self, 68 func: Union[str, CustomDescriptorCallable], 69 alias: Optional[str] = None, 70 tests: Optional[List[AnyDescriptorTest]] = None, 71 ): 72 if callable(func): 73 self._func = func 74 func = f"{func.__module__}.{func.__name__}" 75 else: 76 self._func = None 77 self.func = func 78 super().__init__(alias=alias or f"custom_descriptor:{func}", tests=tests) 79 80 def generate_data(self, dataset: "Dataset", options: Options) -> Union[DatasetColumn, Dict[str, DatasetColumn]]: 81 """Apply custom function to dataset.""" 82 if self._func is None: 83 raise ValueError("CustomDescriptor is not configured with callable func") 84 return self._func(dataset)