/ src / evidently / legacy / utils / generators.py
generators.py
  1  import abc
  2  from typing import Dict
  3  from typing import Generic
  4  from typing import List
  5  from typing import Optional
  6  from typing import Type
  7  from typing import TypeVar
  8  from typing import Union
  9  
 10  from evidently.legacy.core import ColumnType
 11  from evidently.legacy.utils.data_preprocessing import DataDefinition
 12  
 13  TObject = TypeVar("TObject")
 14  
 15  
 16  class BaseGenerator(Generic[TObject]):
 17      """Base class for tests and metrics generator creation
 18  
 19      To create a new generator:
 20          - inherit a class from the base class
 21          - implement `generate_tests` method and return a list of test objects from it
 22  
 23      A Suite or a Report will call the method and add generated tests to its list instead of the generator object.
 24  
 25      You can use `columns_info` parameter in `generate` for getting data structure meta info like columns list.
 26  
 27      For example:
 28          if you want to create a test generator for 50, 90, 99 quantiles tests
 29          for all numeric columns with default condition, by reference quantiles
 30      >>> class TestQuantiles(BaseTestGenerator):
 31      ...    def generate(self, data_definition: DataDefinition) -> List[TestValueQuantile]:
 32      ...        return [
 33      ...            TestColumnQuantile(column_name=name, quantile=quantile)
 34      ...            for quantile in (0.5, 0.9, 0.99)
 35      ...            for name in data_definition.list_columns(ColumnType.Numerical, features_only=True)
 36      ...        ]
 37  
 38      Do not forget set correct test type for `generate` return value
 39      """
 40  
 41      @abc.abstractmethod
 42      def generate(self, data_definition: DataDefinition) -> List[TObject]:
 43          raise NotImplementedError()
 44  
 45  
 46  def make_generator_by_columns(
 47      base_class: Type[TObject],
 48      columns: Optional[Union[str, list]] = None,
 49      parameters: Optional[Dict] = None,
 50      skip_id_column: bool = False,
 51  ) -> BaseGenerator[TObject]:
 52      """Create a test generator for a columns list with a test class.
 53  
 54      Base class is specified with `base_class` parameter.
 55      If the test have no "column_name" parameter - TypeError will be raised.
 56  
 57      Columns list can be defined with parameter `columns`.
 58      If it is a list - just use it as a list of the columns.
 59      If `columns` is a string, it can be one of values:
 60      - "all" - make tests for all columns, including target/prediction columns
 61      - "num" - for numeric features
 62      - "cat" - for category features
 63      - "text" - for text features
 64      - "features" - for all features, not target/prediction columns.
 65      None value is the same as "all".
 66      If `columns` is string, and it is not one of the values, ValueError will be raised.
 67  
 68      `parameters` is used for specifying other parameters for each object, it is the same for all generated objects.
 69      """
 70      if parameters is None:
 71          parameters_for_generation: Dict = {}
 72  
 73      else:
 74          parameters_for_generation = parameters
 75  
 76      class ColumnsGenerator(BaseGenerator):
 77          def generate(self, data_definition: DataDefinition) -> List[TObject]:
 78              nonlocal parameters_for_generation
 79              result = []
 80  
 81              if isinstance(columns, list):
 82                  columns_for_generation = columns
 83  
 84              elif columns == "all" or columns is None:
 85                  columns_for_generation = [
 86                      column.column_name
 87                      for column in data_definition.get_columns()
 88                      if column != data_definition.get_id_column()
 89                  ]
 90  
 91              elif columns == "cat":
 92                  columns_for_generation = [
 93                      column.column_name
 94                      for column in data_definition.get_columns(ColumnType.Categorical, features_only=True)
 95                  ]
 96  
 97              elif columns == "num":
 98                  columns_for_generation = [
 99                      column.column_name
100                      for column in data_definition.get_columns(ColumnType.Numerical, features_only=True)
101                  ]
102  
103              elif columns == "text":
104                  columns_for_generation = [
105                      column.column_name for column in data_definition.get_columns(ColumnType.Text, features_only=True)
106                  ]
107  
108              elif columns == "features":
109                  columns_for_generation = [
110                      column.column_name for column in data_definition.get_columns(features_only=True)
111                  ]
112  
113              else:
114                  raise ValueError("Incorrect parameter 'columns' for test generator")
115  
116              for column_name in columns_for_generation:
117                  parameters_for_generation["column_name"] = column_name
118                  # ignore possible parameters incompatibility
119                  # we cannot guarantee that a base class has column_name parameter
120                  # if it has not, type error will ve raised
121                  try:
122                      result.append(base_class(**parameters_for_generation))  # type: ignore
123  
124                  except TypeError as error:
125                      raise TypeError(f"Cannot generate {base_class.__name__}. Error: {error}")
126  
127              return result
128  
129      return ColumnsGenerator()