test_sql_datasets.py
1 import datetime 2 3 import pandas as pd 4 import pytest 5 import pytest_asyncio 6 7 from evidently.core.datasets import Dataset 8 from evidently.legacy.core import new_id 9 from evidently.ui.service.datasets.data_source import FileDataSource 10 from evidently.ui.service.datasets.metadata import DatasetMetadata 11 from evidently.ui.service.datasets.metadata import DatasetOrigin 12 from evidently.ui.service.storage.sql.dataset import SQLDatasetMetadataStorage 13 14 15 @pytest.fixture 16 def dataset_metadata_storage(sqlite_engine): 17 """Create SQL dataset metadata storage instance.""" 18 return SQLDatasetMetadataStorage(sqlite_engine) 19 20 21 @pytest_asyncio.fixture 22 async def setup_user_and_project(metadata_storage, test_user, test_project_id): 23 """Set up test user and project in database.""" 24 from evidently.ui.service.base import Project 25 26 project = Project( 27 id=test_project_id, 28 name="Test Project", 29 description="Test", 30 created_at=datetime.datetime.now(), 31 ) 32 await metadata_storage.add_project(project, test_user, org_id=None) 33 34 35 @pytest.fixture 36 def sample_dataframe(): 37 """Create a sample dataframe for testing.""" 38 return pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) 39 40 41 @pytest.fixture 42 def sample_dataset_metadata(test_project_id, test_user, sample_dataframe): 43 """Create a sample dataset metadata.""" 44 df = sample_dataframe 45 data_def = Dataset.from_pandas(df).data_definition 46 return DatasetMetadata( 47 id=new_id(), 48 project_id=test_project_id, 49 author_id=test_user.id, 50 name="test_dataset", 51 description="Test dataset", 52 data_definition=data_def, 53 source=FileDataSource(project_id=test_project_id, filename="test_file.parquet"), 54 size_bytes=100, 55 row_count=len(df), 56 column_count=len(df.columns), 57 all_columns=list(df.columns), 58 is_draft=False, 59 draft_params=None, 60 origin=DatasetOrigin.file, 61 metadata={}, 62 tags=[], 63 ) 64 65 66 @pytest.mark.asyncio 67 async def test_add_dataset_metadata( 68 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 69 ): 70 """Test adding dataset metadata.""" 71 dataset_id = await dataset_metadata_storage.add_dataset_metadata( 72 test_user.id, test_project_id, sample_dataset_metadata 73 ) 74 assert dataset_id == sample_dataset_metadata.id 75 76 77 @pytest.mark.asyncio 78 async def test_get_dataset_metadata( 79 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 80 ): 81 """Test retrieving dataset metadata.""" 82 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 83 retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id) 84 assert retrieved is not None 85 assert retrieved.id == sample_dataset_metadata.id 86 assert retrieved.name == sample_dataset_metadata.name 87 assert retrieved.description == sample_dataset_metadata.description 88 assert retrieved.project_id == test_project_id 89 90 91 @pytest.mark.asyncio 92 async def test_update_dataset_metadata( 93 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 94 ): 95 """Test updating dataset metadata.""" 96 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 97 sample_dataset_metadata.name = "updated_name" 98 sample_dataset_metadata.description = "updated_description" 99 await dataset_metadata_storage.update_dataset_metadata(sample_dataset_metadata.id, sample_dataset_metadata) 100 retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id) 101 assert retrieved.name == "updated_name" 102 assert retrieved.description == "updated_description" 103 104 105 @pytest.mark.asyncio 106 async def test_list_datasets_metadata( 107 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 108 ): 109 """Test listing datasets metadata.""" 110 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 111 datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, None) 112 assert len(datasets) == 1 113 assert datasets[0].id == sample_dataset_metadata.id 114 115 116 @pytest.mark.asyncio 117 async def test_mark_dataset_deleted( 118 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 119 ): 120 """Test soft deleting a dataset.""" 121 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 122 await dataset_metadata_storage.mark_dataset_deleted(sample_dataset_metadata.id) 123 retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id) 124 assert retrieved is None 125 126 127 @pytest.mark.asyncio 128 async def test_datasets_count( 129 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 130 ): 131 """Test counting datasets.""" 132 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 133 count = await dataset_metadata_storage.datasets_count(test_project_id) 134 assert count == 1 135 136 137 @pytest.mark.asyncio 138 async def test_list_datasets_filter_by_origin( 139 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 140 ): 141 """Test filtering datasets by origin.""" 142 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 143 datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, [DatasetOrigin.file], None) 144 assert len(datasets) == 1 145 datasets = await dataset_metadata_storage.list_datasets_metadata( 146 test_project_id, None, [DatasetOrigin.dataset], None 147 ) 148 assert len(datasets) == 0 149 150 151 @pytest.mark.asyncio 152 async def test_list_datasets_filter_by_draft( 153 dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata 154 ): 155 """Test filtering datasets by draft status.""" 156 await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata) 157 datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, False) 158 assert len(datasets) == 1 159 datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, True) 160 assert len(datasets) == 0