_generate_descriptors.py
1 import inspect 2 from itertools import chain 3 from pathlib import Path 4 from typing import Any 5 from typing import Dict 6 from typing import List 7 from typing import Optional 8 from typing import Tuple 9 from typing import Type 10 from typing import Union 11 12 from evidently._pydantic_compat import import_string 13 from evidently.core.datasets import Descriptor 14 from evidently.core.datasets import DescriptorTest 15 from evidently.core.datasets import FeatureDescriptor 16 from evidently.core.tests import GenericTest 17 from evidently.legacy.core import ColumnType 18 from evidently.legacy.descriptors.llm_judges import BaseLLMEval 19 from evidently.legacy.features.custom_feature import CustomFeature 20 from evidently.legacy.features.custom_feature import CustomPairColumnFeature 21 from evidently.legacy.features.custom_feature import CustomSingleColumnFeature 22 from evidently.legacy.features.generated_features import GeneratedFeatures 23 from evidently.legacy.features.llm_judge import BaseLLMPromptTemplate 24 from evidently.legacy.features.llm_judge import Uncertainty 25 from evidently.legacy.features.text_length_feature import TextLength 26 from evidently.pydantic_utils import TYPE_ALIASES 27 28 SOURCE_FILE = "generated_descriptors.py" 29 30 REPLACES = { 31 "pandas.core.frame.DataFrame": "DataFrame", 32 "evidently.utils.data_preprocessing.DataDefinition": "DataDefinition", 33 "pandas.core.series.Series": "Series", 34 "evidently.features.llm_judge.Uncertainty": "Uncertainty", 35 "evidently.legacy.features.llm_judge.Uncertainty": "Uncertainty", 36 } 37 38 NAME_MAPPING = { 39 "open_a_i_feature": "openai_feature", 40 "is_valid_j_s_o_n": "is_valid_json", 41 "is_valid_s_q_l": "is_valid_sql", 42 } 43 44 SKIP_CLASSES = {CustomFeature, CustomPairColumnFeature, CustomSingleColumnFeature, TextLength} 45 46 47 def _get_type_name(tp: Type): 48 if tp.__module__.startswith("typing"): 49 return str(tp).replace("typing.", "") 50 return tp.__name__ 51 # return str(tp) 52 53 54 def _get_value_str(value): 55 if isinstance(value, str): 56 return f'"{value}"' 57 return str(value) 58 59 60 def get_args_kwargs(feature_class: Type[GeneratedFeatures]) -> Tuple[Dict[str, str], Dict[str, Tuple[str, str]]]: 61 if feature_class.__dict__.get("__init__") is None: 62 # get from fields 63 args = { 64 key: _get_type_name(field.annotation) for key, field in feature_class.__fields__.items() if field.required 65 } 66 kwargs = { 67 key: (_get_type_name(field.annotation), _get_value_str(field.default)) 68 for key, field in feature_class.__fields__.items() 69 if not field.required and key != "type" 70 } 71 return args, kwargs 72 # get from constructor 73 sig = inspect.getfullargspec(feature_class.__init__) 74 75 defaults = sig.defaults or tuple() 76 args = {a: _get_type_name(sig.annotations.get(a, Any)) for a in sig.args[1 : -len(defaults)]} 77 kwargs = { 78 a: (_get_type_name(sig.annotations.get(a, Any)), _get_value_str(d)) 79 for a, d in zip(sig.args[-len(defaults) :], defaults) 80 } 81 kwonlydefaults = sig.kwonlydefaults or {} 82 args.update({k: _get_type_name(sig.annotations.get(k, Any)) for k in sig.kwonlyargs if k not in kwonlydefaults}) 83 kwargs.update( 84 { 85 k: (_get_type_name(sig.annotations.get(k, Any)), _get_value_str(kwonlydefaults[k])) 86 for k in sig.kwonlyargs 87 if k in kwonlydefaults 88 } 89 ) 90 return args, kwargs 91 92 93 def create_descriptor_function(feature_class: Type[GeneratedFeatures]): 94 class_name = feature_class.__name__ 95 name = class_name 96 # name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower() 97 name = NAME_MAPPING.get(name, name) 98 if name.endswith("Feature"): 99 name = name[: -len("Feature")] 100 101 args, kwargs = get_args_kwargs(feature_class) 102 display_name_required = "display_name" in args 103 has_display_name = display_name_required or "display_name" in kwargs 104 if not display_name_required: 105 kwargs["alias"] = ("Optional[str]", "None") 106 kwargs.pop("display_name", None) 107 else: 108 args["alias"] = "str" 109 args.pop("display_name") 110 kwargs["tests"] = ('Optional[List[Union["DescriptorTest", "GenericTest"]]]', "None") 111 args_str = ", ".join(f"{a}: {t}" for a, t in args.items()) 112 if len(kwargs) > 0: 113 kwargs_str = ", " + ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items()) 114 else: 115 kwargs_str = "" 116 117 class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k not in ("alias", "tests")) 118 if has_display_name: 119 class_args += ", display_name=alias" 120 res = f""" 121 def {name}({args_str}{kwargs_str}): 122 from {feature_class.__module__} import {feature_class.__name__} as {feature_class.__name__}V1 123 feature = {class_name}V1({class_args}) 124 return FeatureDescriptor(feature=feature, alias=alias, tests=tests) 125 """ 126 for substr, repl in REPLACES.items(): 127 res = res.replace(substr, repl) 128 return res, name 129 130 131 def create_llm_descriptor_functions(feature_class: Type[BaseLLMEval]): 132 class_name = feature_class.__name__ 133 name = class_name 134 # name = cmpx.lower() + re.sub(r"(?<!^)(?=[A-Z])", "_", class_name[len(cmpx) :]).lower() 135 name = NAME_MAPPING.get(name, name) 136 if name.endswith("Feature"): 137 name = name[: -len("Feature")] 138 139 args, kwargs = get_args_kwargs(feature_class) # type: ignore[arg-type] 140 kwargs["alias"] = ("Optional[str]", "None") 141 kwargs["tests"] = ('Optional[List[Union["DescriptorTest", "GenericTest"]]]', "None") 142 has_display_name = kwargs.pop("display_name", None) is not None 143 args_str = ", ".join(f"{a}: {t}" for a, t in args.items()) 144 if len(kwargs) > 0: 145 kwargs_str = ", ".join(f"{a}: {t} = {d}" for a, (t, d) in kwargs.items()) 146 if len(args_str) > 0: 147 kwargs_str = ", " + kwargs_str 148 else: 149 kwargs_str = "" 150 class_args = ", ".join(f"{k}={k}" for k in chain(args, kwargs) if k not in ("alias", "tests")) 151 if has_display_name: 152 class_args += ", display_name=alias" 153 res = f""" 154 def {name}(column_name: str, {args_str}{kwargs_str}): 155 from {feature_class.__module__} import {feature_class.__name__} as {feature_class.__name__}V1 156 feature = {class_name}V1({class_args}).feature(column_name) 157 return FeatureDescriptor(feature=feature, alias=alias, tests=tests) 158 """ 159 for substr, repl in REPLACES.items(): 160 res = res.replace(substr, repl) 161 return res, name 162 163 164 def load_all_subtypes(base_class): 165 classpaths = [ 166 cp for (base, _), cp in TYPE_ALIASES.items() if isinstance(base, type) and issubclass(base, base_class) 167 ] 168 for cp in classpaths: 169 try: 170 import_string(cp) 171 except ImportError as e: 172 raise ImportError(f"Cannot import type {cp}") from e 173 174 175 def main(): 176 for (base_class, _), classpath in list(sorted(TYPE_ALIASES.items(), key=lambda x: x[0][1])): 177 if base_class is GeneratedFeatures: 178 import_string(classpath) 179 subtypes__ = GeneratedFeatures.__subtypes__() 180 181 srcs = [] 182 fnames = [] 183 imports: List[Type] = [ 184 FeatureDescriptor, 185 ColumnType, 186 BaseLLMPromptTemplate, 187 Any, 188 List, 189 Optional, 190 Dict, 191 Uncertainty, 192 Type, 193 DescriptorTest, 194 GenericTest, 195 Union, 196 ] 197 for feature_class in sorted(subtypes__, key=lambda x: x.__name__): 198 if inspect.isabstract(feature_class): 199 continue 200 if feature_class in SKIP_CLASSES: 201 continue 202 src, fname = create_descriptor_function(feature_class) 203 fnames.append(fname) 204 srcs.append(src) 205 # imports.append(feature_class) 206 for llm_feature_class in sorted(BaseLLMEval.__subtypes__(), key=lambda x: x.__name__): 207 src, fname = create_llm_descriptor_functions(llm_feature_class) 208 fnames.append(fname) 209 srcs.append(src) 210 with open(Path(__file__).parent / SOURCE_FILE, "w") as f: 211 f.write("\n".join(f"from {t.__module__} import {t.__name__}" for t in imports) + "\n\n") 212 f.write("\n\n".join(srcs)) 213 214 load_all_subtypes(Descriptor) 215 print(f"from .{SOURCE_FILE[:-3]} import ({', '.join(fnames)})") 216 print("__all__ = [") 217 fnames.extend( 218 [ 219 sc.__name__ 220 for sc in Descriptor.__subtypes__() 221 if sc not in (FeatureDescriptor,) and not inspect.isabstract(sc) 222 ] 223 ) 224 print("\n".join(f'"{fname}",' for fname in sorted(fnames))) 225 print("]") 226 227 228 if __name__ == "__main__": 229 main()