/ src / evidently / descriptors / _generate_descriptors.py
_generate_descriptors.py
  1  import inspect
  2  from itertools import chain
  3  from pathlib import Path
  4  from typing import Any
  5  from typing import Dict
  6  from typing import List
  7  from typing import Optional
  8  from typing import Tuple
  9  from typing import Type
 10  from typing import Union
 11  
 12  from evidently._pydantic_compat import import_string
 13  from evidently.core.datasets import Descriptor
 14  from evidently.core.datasets import DescriptorTest
 15  from evidently.core.datasets import FeatureDescriptor
 16  from evidently.core.tests import GenericTest
 17  from evidently.legacy.core import ColumnType
 18  from evidently.legacy.descriptors.llm_judges import BaseLLMEval
 19  from evidently.legacy.features.custom_feature import CustomFeature
 20  from evidently.legacy.features.custom_feature import CustomPairColumnFeature
 21  from evidently.legacy.features.custom_feature import CustomSingleColumnFeature
 22  from evidently.legacy.features.generated_features import GeneratedFeatures
 23  from evidently.legacy.features.llm_judge import BaseLLMPromptTemplate
 24  from evidently.legacy.features.llm_judge import Uncertainty
 25  from evidently.legacy.features.text_length_feature import TextLength
 26  from evidently.pydantic_utils import TYPE_ALIASES
 27  
 28  SOURCE_FILE = "generated_descriptors.py"
 29  
 30  REPLACES = {
 31      "pandas.core.frame.DataFrame": "DataFrame",
 32      "evidently.utils.data_preprocessing.DataDefinition": "DataDefinition",
 33      "pandas.core.series.Series": "Series",
 34      "evidently.features.llm_judge.Uncertainty": "Uncertainty",
 35      "evidently.legacy.features.llm_judge.Uncertainty": "Uncertainty",
 36  }
 37  
 38  NAME_MAPPING = {
 39      "open_a_i_feature": "openai_feature",
 40      "is_valid_j_s_o_n": "is_valid_json",
 41      "is_valid_s_q_l": "is_valid_sql",
 42  }
 43  
 44  SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature, TextLength}
 45  
 46  
 47  def _get_type_name(tp: Type):
 48      if tp.__module__.startswith("typing"):
 49          return str(tp).replace("typing.", "")
 50      return tp.__name__
 51      # return str(tp)
 52  
 53  
 54  def _get_value_str(value):
 55      if isinstance(value, str):
 56          return f'"{value}"'
 57      return str(value)
 58  
 59  
 60  def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, str], Dict[str, Tuple[str, str]]]:
 61      if feature_class.__dict__.get("__init__") is None:
 62          # get from fields
 63          args = {
 64              key: _get_type_name(field.annotation) for key, field in feature_class.__fields__.items() if field.required
 65          }
 66          kwargs = {
 67              key: (_get_type_name(field.annotation), _get_value_str(field.default))
 68              for key, field in feature_class.__fields__.items()
 69              if not field.required and key != "type"
 70          }
 71          return args, kwargs
 72      # get from constructor
 73      sig = inspect.getfullargspec(feature_class.__init__)
 74  
 75      defaults = sig.defaults or tuple()
 76      args = {a: _get_type_name(sig.annotations.get(a, Any)) for a in sig.args[1 : -len(defaults)]}
 77      kwargs = {
 78          a: (_get_type_name(sig.annotations.get(a, Any)), _get_value_str(d))
 79          for a, d in zip(sig.args[-len(defaults) :], defaults)
 80      }
 81      kwonlydefaults = sig.kwonlydefaults or {}
 82      args.update({k: _get_type_name(sig.annotations.get(k, Any)) for k in sig.kwonlyargs if k not in kwonlydefaults})
 83      kwargs.update(
 84          {
 85              k: (_get_type_name(sig.annotations.get(k, Any)), _get_value_str(kwonlydefaults[k]))
 86              for k in sig.kwonlyargs
 87              if k in kwonlydefaults
 88          }
 89      )
 90      return args, kwargs
 91  
 92  
 93  def create_descriptor_function(feature_class: Type[GeneratedFeatures]):
 94      class_name = feature_class.__name__
 95      name = class_name
 96      # name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower()
 97      name = NAME_MAPPING.get(name, name)
 98      if name.endswith("Feature"):
 99          name = name[: -len("Feature")]
100  
101      args, kwargs = get_args_kwargs(feature_class)
102      display_name_required = "display_name" in args
103      has_display_name = display_name_required or "display_name" in kwargs
104      if not display_name_required:
105          kwargs["alias"] = ("Optional[str]", "None")
106          kwargs.pop("display_name", None)
107      else:
108          args["alias"] = "str"
109          args.pop("display_name")
110      kwargs["tests"] = ('Optional[List[Union["DescriptorTest", "GenericTest"]]]', "None")
111      args_str = ", ".join(f"{a}: {t}" for a, t in args.items())
112      if len(kwargs) > 0:
113          kwargs_str = ", " + ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items())
114      else:
115          kwargs_str = ""
116  
117      class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k not in ("alias", "tests"))
118      if has_display_name:
119          class_args += ", display_name=alias"
120      res = f"""
121  def {name}({args_str}{kwargs_str}):
122          from {feature_class.__module__} import {feature_class.__name__} as {feature_class.__name__}V1
123          feature = {class_name}V1({class_args})
124          return FeatureDescriptor(feature=feature, alias=alias, tests=tests)
125  """
126      for substr, repl in REPLACES.items():
127          res = res.replace(substr, repl)
128      return res, name
129  
130  
131  def create_llm_descriptor_functions(feature_class: Type[BaseLLMEval]):
132      class_name = feature_class.__name__
133      name = class_name
134      # name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower()
135      name = NAME_MAPPING.get(name, name)
136      if name.endswith("Feature"):
137          name = name[: -len("Feature")]
138  
139      args, kwargs = get_args_kwargs(feature_class)  # type: ignore[arg-type]
140      kwargs["alias"] = ("Optional[str]", "None")
141      kwargs["tests"] = ('Optional[List[Union["DescriptorTest", "GenericTest"]]]', "None")
142      has_display_name = kwargs.pop("display_name", None) is not None
143      args_str = ", ".join(f"{a}: {t}" for a, t in args.items())
144      if len(kwargs) > 0:
145          kwargs_str = ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items())
146          if len(args_str) > 0:
147              kwargs_str = ", " + kwargs_str
148      else:
149          kwargs_str = ""
150      class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k not in ("alias", "tests"))
151      if has_display_name:
152          class_args += ", display_name=alias"
153      res = f"""
154  def {name}(column_name: str, {args_str}{kwargs_str}):
155      from {feature_class.__module__} import {feature_class.__name__} as {feature_class.__name__}V1
156      feature = {class_name}V1({class_args}).feature(column_name)
157      return FeatureDescriptor(feature=feature, alias=alias, tests=tests)
158      """
159      for substr, repl in REPLACES.items():
160          res = res.replace(substr, repl)
161      return res, name
162  
163  
164  def load_all_subtypes(base_class):
165      classpaths = [
166          cp for (base, _), cp in TYPE_ALIASES.items() if isinstance(base, type) and issubclass(base, base_class)
167      ]
168      for cp in classpaths:
169          try:
170              import_string(cp)
171          except ImportError as e:
172              raise ImportError(f"Cannot import type {cp}") from e
173  
174  
175  def main():
176      for (base_class, _), classpath in list(sorted(TYPE_ALIASES.items(), key=lambda x: x[0][1])):
177          if base_class is GeneratedFeatures:
178              import_string(classpath)
179      subtypes__ = GeneratedFeatures.__subtypes__()
180  
181      srcs = []
182      fnames = []
183      imports: List[Type] = [
184          FeatureDescriptor,
185          ColumnType,
186          BaseLLMPromptTemplate,
187          Any,
188          List,
189          Optional,
190          Dict,
191          Uncertainty,
192          Type,
193          DescriptorTest,
194          GenericTest,
195          Union,
196      ]
197      for feature_class in sorted(subtypes__, key=lambda x: x.__name__):
198          if inspect.isabstract(feature_class):
199              continue
200          if feature_class in SKIP_CLASSES:
201              continue
202          src, fname = create_descriptor_function(feature_class)
203          fnames.append(fname)
204          srcs.append(src)
205          # imports.append(feature_class)
206      for llm_feature_class in sorted(BaseLLMEval.__subtypes__(), key=lambda x: x.__name__):
207          src, fname = create_llm_descriptor_functions(llm_feature_class)
208          fnames.append(fname)
209          srcs.append(src)
210      with open(Path(__file__).parent / SOURCE_FILE, "w") as f:
211          f.write("\n".join(f"from {t.__module__} import {t.__name__}" for t in imports) + "\n\n")
212          f.write("\n\n".join(srcs))
213  
214      load_all_subtypes(Descriptor)
215      print(f"from .{SOURCE_FILE[:-3]} import ({', '.join(fnames)})")
216      print("__all__ = [")
217      fnames.extend(
218          [
219              sc.__name__
220              for sc in Descriptor.__subtypes__()
221              if sc not in (FeatureDescriptor,) and not inspect.isabstract(sc)
222          ]
223      )
224      print("\n".join(f'"{fname}",' for fname in sorted(fnames)))
225      print("]")
226  
227  
228  if __name__ == "__main__":
229      main()