/ src / evidently / descriptors / _custom_descriptors.py
_custom_descriptors.py
 1  from typing import Callable
 2  from typing import Dict
 3  from typing import List
 4  from typing import Optional
 5  from typing import Union
 6  
 7  from evidently._pydantic_compat import PrivateAttr
 8  from evidently.core.datasets import AnyDescriptorTest
 9  from evidently.core.datasets import Dataset
10  from evidently.core.datasets import DatasetColumn
11  from evidently.core.datasets import Descriptor
12  from evidently.legacy.options.base import Options
13  
14  CustomColumnCallable = Callable[[DatasetColumn], DatasetColumn]
15  
16  
17  class CustomColumnDescriptor(Descriptor):
18      """Descriptor that applies a custom function to a single column."""
19  
20      column_name: str
21      """Name of the column to process."""
22      func: str
23      """Function name or callable to apply to column data."""
24      _func: Optional[CustomColumnCallable] = PrivateAttr(None)
25      """Internal cached callable."""
26  
27      def __init__(
28          self,
29          column_name: str,
30          func: Union[str, CustomColumnCallable],
31          alias: Optional[str] = None,
32          tests: Optional[List[AnyDescriptorTest]] = None,
33      ):
34          self.column_name = column_name
35          if callable(func):
36              self._func = func
37              func = f"{func.__module__}.{func.__name__}"
38          else:
39              self._func = None
40          self.func = func
41          super().__init__(alias=alias or f"custom_column_descriptor:{func}", tests=tests)
42  
43      def generate_data(self, dataset: Dataset, options: Options) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
44          """Apply custom function to column data."""
45          if self._func is None:
46              raise ValueError("CustomColumnDescriptor is not configured with callable func")
47          column_data = dataset.column(self.column_name)
48          return self._func(column_data)
49  
50      def list_input_columns(self) -> Optional[List[str]]:
51          """Return list of required input column names."""
52          return [self.column_name]
53  
54  
55  CustomDescriptorCallable = Callable[[Dataset], Union[DatasetColumn, Dict[str, DatasetColumn]]]
56  
57  
58  class CustomDescriptor(Descriptor):
59      """Descriptor that applies a custom function to the entire dataset."""
60  
61      func: str
62      """Function name or callable to apply to dataset."""
63      _func: Optional[CustomDescriptorCallable] = PrivateAttr(None)
64      """Internal cached callable."""
65  
66      def __init__(
67          self,
68          func: Union[str, CustomDescriptorCallable],
69          alias: Optional[str] = None,
70          tests: Optional[List[AnyDescriptorTest]] = None,
71      ):
72          if callable(func):
73              self._func = func
74              func = f"{func.__module__}.{func.__name__}"
75          else:
76              self._func = None
77          self.func = func
78          super().__init__(alias=alias or f"custom_descriptor:{func}", tests=tests)
79  
80      def generate_data(self, dataset: "Dataset", options: Options) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
81          """Apply custom function to dataset."""
82          if self._func is None:
83              raise ValueError("CustomDescriptor is not configured with callable func")
84          return self._func(dataset)