generators.py
1 import abc 2 from typing import Dict 3 from typing import Generic 4 from typing import List 5 from typing import Optional 6 from typing import Type 7 from typing import TypeVar 8 from typing import Union 9 10 from evidently.legacy.core import ColumnType 11 from evidently.legacy.utils.data_preprocessing import DataDefinition 12 13 TObject = TypeVar("TObject") 14 15 16 class BaseGenerator(Generic[TObject]): 17 """Base class for tests and metrics generator creation 18 19 To create a new generator: 20 - inherit a class from the base class 21 - implement `generate_tests` method and return a list of test objects from it 22 23 A Suite or a Report will call the method and add generated tests to its list instead of the generator object. 24 25 You can use `columns_info` parameter in `generate` for getting data structure meta info like columns list. 26 27 For example: 28 if you want to create a test generator for 50, 90, 99 quantiles tests 29 for all numeric columns with default condition, by reference quantiles 30 >>> class TestQuantiles(BaseTestGenerator): 31 ... def generate(self, data_definition: DataDefinition) -> List[TestValueQuantile]: 32 ... return [ 33 ... TestColumnQuantile(column_name=name, quantile=quantile) 34 ... for quantile in (0.5, 0.9, 0.99) 35 ... for name in data_definition.list_columns(ColumnType.Numerical, features_only=True) 36 ... ] 37 38 Do not forget set correct test type for `generate` return value 39 """ 40 41 @abc.abstractmethod 42 def generate(self, data_definition: DataDefinition) -> List[TObject]: 43 raise NotImplementedError() 44 45 46 def make_generator_by_columns( 47 base_class: Type[TObject], 48 columns: Optional[Union[str, list]] = None, 49 parameters: Optional[Dict] = None, 50 skip_id_column: bool = False, 51 ) -> BaseGenerator[TObject]: 52 """Create a test generator for a columns list with a test class. 53 54 Base class is specified with `base_class` parameter. 55 If the test have no "column_name" parameter - TypeError will be raised. 56 57 Columns list can be defined with parameter `columns`. 58 If it is a list - just use it as a list of the columns. 59 If `columns` is a string, it can be one of values: 60 - "all" - make tests for all columns, including target/prediction columns 61 - "num" - for numeric features 62 - "cat" - for category features 63 - "text" - for text features 64 - "features" - for all features, not target/prediction columns. 65 None value is the same as "all". 66 If `columns` is string, and it is not one of the values, ValueError will be raised. 67 68 `parameters` is used for specifying other parameters for each object, it is the same for all generated objects. 69 """ 70 if parameters is None: 71 parameters_for_generation: Dict = {} 72 73 else: 74 parameters_for_generation = parameters 75 76 class ColumnsGenerator(BaseGenerator): 77 def generate(self, data_definition: DataDefinition) -> List[TObject]: 78 nonlocal parameters_for_generation 79 result = [] 80 81 if isinstance(columns, list): 82 columns_for_generation = columns 83 84 elif columns == "all" or columns is None: 85 columns_for_generation = [ 86 column.column_name 87 for column in data_definition.get_columns() 88 if column != data_definition.get_id_column() 89 ] 90 91 elif columns == "cat": 92 columns_for_generation = [ 93 column.column_name 94 for column in data_definition.get_columns(ColumnType.Categorical, features_only=True) 95 ] 96 97 elif columns == "num": 98 columns_for_generation = [ 99 column.column_name 100 for column in data_definition.get_columns(ColumnType.Numerical, features_only=True) 101 ] 102 103 elif columns == "text": 104 columns_for_generation = [ 105 column.column_name for column in data_definition.get_columns(ColumnType.Text, features_only=True) 106 ] 107 108 elif columns == "features": 109 columns_for_generation = [ 110 column.column_name for column in data_definition.get_columns(features_only=True) 111 ] 112 113 else: 114 raise ValueError("Incorrect parameter 'columns' for test generator") 115 116 for column_name in columns_for_generation: 117 parameters_for_generation["column_name"] = column_name 118 # ignore possible parameters incompatibility 119 # we cannot guarantee that a base class has column_name parameter 120 # if it has not, type error will ve raised 121 try: 122 result.append(base_class(**parameters_for_generation)) # type: ignore 123 124 except TypeError as error: 125 raise TypeError(f"Cannot generate {base_class.__name__}. Error: {error}") 126 127 return result 128 129 return ColumnsGenerator()