adult.py
1 import os 2 import pathlib 3 from datetime import datetime 4 from datetime import timedelta 5 6 import pandas as pd 7 from sklearn import datasets 8 9 from evidently.legacy.pipeline.column_mapping import ColumnMapping 10 from evidently.legacy.renderers.html_widgets import WidgetSize 11 from evidently.legacy.test_preset import DataDriftTestPreset 12 from evidently.legacy.test_suite import TestSuite 13 from evidently.legacy.ui.dashboards import DashboardPanelTestSuite 14 from evidently.legacy.ui.dashboards import ReportFilter 15 from evidently.legacy.ui.dashboards import TestFilter 16 from evidently.legacy.ui.dashboards import TestSuitePanelType 17 from evidently.legacy.ui.demo_projects.base import DemoProject 18 from evidently.legacy.ui.workspace import WorkspaceBase 19 20 21 def create_data(): 22 if os.environ.get("EVIDENTLY_TEST_ENVIRONMENT", "0") != "1": 23 adult_data = datasets.fetch_openml(name="adult", version=2, as_frame="auto") 24 adult = adult_data.frame 25 else: 26 adult = pd.read_parquet(pathlib.Path(__file__).parent.joinpath("../../../../../test_data/adults.parquet")) 27 28 reference = adult[~adult.education.isin(["Some-college", "HS-grad", "Bachelors"])] 29 current = adult[adult.education.isin(["Some-college", "HS-grad", "Bachelors"])] 30 column_mapping = ColumnMapping() 31 return current, reference, column_mapping 32 33 34 def create_test_suite(i: int, data): 35 current, reference, column_mapping = data 36 ts = TestSuite( 37 tests=[ 38 DataDriftTestPreset(), 39 ], 40 timestamp=datetime(2023, 1, 29) + timedelta(days=i + 1), 41 ) 42 ts.metadata["batch_size"] = "daily" 43 44 ts.run( 45 reference_data=reference, 46 current_data=current.iloc[1000 * i : 1000 * (i + 1), :], 47 column_mapping=column_mapping, 48 ) 49 50 return ts 51 52 53 def create_project(workspace: WorkspaceBase, name: str): 54 project = workspace.create_project(name) 55 project.description = "A toy demo project using Adult dataset. Showcases TestSuite panels" 56 project.dashboard.add_panel( 57 DashboardPanelTestSuite( 58 title="Column Drift tests for key features: aggregated", 59 test_filters=[ 60 TestFilter(test_id="TestColumnDrift", test_args={"column_name.name": "hours-per-week"}), 61 TestFilter(test_id="TestColumnDrift", test_args={"column_name.name": "capital-gain"}), 62 ], 63 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 64 size=WidgetSize.HALF, 65 time_agg="1D", 66 ) 67 ) 68 project.dashboard.add_panel( 69 DashboardPanelTestSuite( 70 title="All tests: aggregated", 71 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 72 size=WidgetSize.HALF, 73 time_agg="1D", 74 ) 75 ) 76 project.dashboard.add_panel( 77 DashboardPanelTestSuite( 78 title="Column Drift tests for key features: detailed", 79 test_filters=[ 80 TestFilter(test_id="TestColumnDrift", test_args={"column_name.name": "hours-per-week"}), 81 TestFilter(test_id="TestColumnDrift", test_args={"column_name.name": "capital-gain"}), 82 ], 83 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 84 size=WidgetSize.HALF, 85 panel_type=TestSuitePanelType.DETAILED, 86 time_agg="1D", 87 ) 88 ) 89 project.dashboard.add_panel( 90 DashboardPanelTestSuite( 91 title="All tests: detailed", 92 filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True), 93 size=WidgetSize.HALF, 94 panel_type=TestSuitePanelType.DETAILED, 95 time_agg="1D", 96 ) 97 ) 98 99 project.save() 100 return project 101 102 103 adult_demo_project = DemoProject( 104 name="Demo project - Adult", 105 create_data=create_data, 106 create_snapshot=None, 107 create_report=None, 108 create_project=create_project, 109 create_test_suite=create_test_suite, 110 count=19, 111 ) 112 113 if __name__ == "__main__": 114 # create_demo_project("http://localhost:8080") 115 adult_demo_project.create("workspace")