/ tests / future / test_ui / test_sql_datasets.py
test_sql_datasets.py
  1  import datetime
  2  
  3  import pandas as pd
  4  import pytest
  5  import pytest_asyncio
  6  
  7  from evidently.core.datasets import Dataset
  8  from evidently.legacy.core import new_id
  9  from evidently.ui.service.datasets.data_source import FileDataSource
 10  from evidently.ui.service.datasets.metadata import DatasetMetadata
 11  from evidently.ui.service.datasets.metadata import DatasetOrigin
 12  from evidently.ui.service.storage.sql.dataset import SQLDatasetMetadataStorage
 13  
 14  
 15  @pytest.fixture
 16  def dataset_metadata_storage(sqlite_engine):
 17      """Create SQL dataset metadata storage instance."""
 18      return SQLDatasetMetadataStorage(sqlite_engine)
 19  
 20  
 21  @pytest_asyncio.fixture
 22  async def setup_user_and_project(metadata_storage, test_user, test_project_id):
 23      """Set up test user and project in database."""
 24      from evidently.ui.service.base import Project
 25  
 26      project = Project(
 27          id=test_project_id,
 28          name="Test Project",
 29          description="Test",
 30          created_at=datetime.datetime.now(),
 31      )
 32      await metadata_storage.add_project(project, test_user, org_id=None)
 33  
 34  
 35  @pytest.fixture
 36  def sample_dataframe():
 37      """Create a sample dataframe for testing."""
 38      return pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
 39  
 40  
 41  @pytest.fixture
 42  def sample_dataset_metadata(test_project_id, test_user, sample_dataframe):
 43      """Create a sample dataset metadata."""
 44      df = sample_dataframe
 45      data_def = Dataset.from_pandas(df).data_definition
 46      return DatasetMetadata(
 47          id=new_id(),
 48          project_id=test_project_id,
 49          author_id=test_user.id,
 50          name="test_dataset",
 51          description="Test dataset",
 52          data_definition=data_def,
 53          source=FileDataSource(project_id=test_project_id, filename="test_file.parquet"),
 54          size_bytes=100,
 55          row_count=len(df),
 56          column_count=len(df.columns),
 57          all_columns=list(df.columns),
 58          is_draft=False,
 59          draft_params=None,
 60          origin=DatasetOrigin.file,
 61          metadata={},
 62          tags=[],
 63      )
 64  
 65  
 66  @pytest.mark.asyncio
 67  async def test_add_dataset_metadata(
 68      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
 69  ):
 70      """Test adding dataset metadata."""
 71      dataset_id = await dataset_metadata_storage.add_dataset_metadata(
 72          test_user.id, test_project_id, sample_dataset_metadata
 73      )
 74      assert dataset_id == sample_dataset_metadata.id
 75  
 76  
 77  @pytest.mark.asyncio
 78  async def test_get_dataset_metadata(
 79      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
 80  ):
 81      """Test retrieving dataset metadata."""
 82      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
 83      retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id)
 84      assert retrieved is not None
 85      assert retrieved.id == sample_dataset_metadata.id
 86      assert retrieved.name == sample_dataset_metadata.name
 87      assert retrieved.description == sample_dataset_metadata.description
 88      assert retrieved.project_id == test_project_id
 89  
 90  
 91  @pytest.mark.asyncio
 92  async def test_update_dataset_metadata(
 93      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
 94  ):
 95      """Test updating dataset metadata."""
 96      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
 97      sample_dataset_metadata.name = "updated_name"
 98      sample_dataset_metadata.description = "updated_description"
 99      await dataset_metadata_storage.update_dataset_metadata(sample_dataset_metadata.id, sample_dataset_metadata)
100      retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id)
101      assert retrieved.name == "updated_name"
102      assert retrieved.description == "updated_description"
103  
104  
105  @pytest.mark.asyncio
106  async def test_list_datasets_metadata(
107      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
108  ):
109      """Test listing datasets metadata."""
110      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
111      datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, None)
112      assert len(datasets) == 1
113      assert datasets[0].id == sample_dataset_metadata.id
114  
115  
116  @pytest.mark.asyncio
117  async def test_mark_dataset_deleted(
118      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
119  ):
120      """Test soft deleting a dataset."""
121      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
122      await dataset_metadata_storage.mark_dataset_deleted(sample_dataset_metadata.id)
123      retrieved = await dataset_metadata_storage.get_dataset_metadata(sample_dataset_metadata.id)
124      assert retrieved is None
125  
126  
127  @pytest.mark.asyncio
128  async def test_datasets_count(
129      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
130  ):
131      """Test counting datasets."""
132      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
133      count = await dataset_metadata_storage.datasets_count(test_project_id)
134      assert count == 1
135  
136  
137  @pytest.mark.asyncio
138  async def test_list_datasets_filter_by_origin(
139      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
140  ):
141      """Test filtering datasets by origin."""
142      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
143      datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, [DatasetOrigin.file], None)
144      assert len(datasets) == 1
145      datasets = await dataset_metadata_storage.list_datasets_metadata(
146          test_project_id, None, [DatasetOrigin.dataset], None
147      )
148      assert len(datasets) == 0
149  
150  
151  @pytest.mark.asyncio
152  async def test_list_datasets_filter_by_draft(
153      dataset_metadata_storage, setup_user_and_project, test_user, test_project_id, sample_dataset_metadata
154  ):
155      """Test filtering datasets by draft status."""
156      await dataset_metadata_storage.add_dataset_metadata(test_user.id, test_project_id, sample_dataset_metadata)
157      datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, False)
158      assert len(datasets) == 1
159      datasets = await dataset_metadata_storage.list_datasets_metadata(test_project_id, None, None, True)
160      assert len(datasets) == 0