/ src / analysis / data / data_file_handler.py
data_file_handler.py
 1  # Python Imports
 2  import logging
 3  from pathlib import Path
 4  from typing import List, Optional
 5  
 6  import pandas as pd
 7  from result import Err, Ok, Result
 8  
 9  # Project Imports
10  from src.analysis.data.data_handler import DataHandler
11  from src.analysis.utils import file_utils
12  
13  logger = logging.getLogger(__name__)
14  
15  
16  class DataFileHandler(DataHandler):
17  
18      def __init__(self, ignore_columns: Optional[List] = None, include_files: Optional[List] = None):
19          super().__init__(ignore_columns)
20          self._include_files = include_files
21  
22      def concat_dataframes_from_folders_as_mean(self, folders: List, points: int):
23          for folder in folders:
24              folder_path = Path(folder)
25              folder_df = pd.DataFrame()
26              match file_utils.get_files_from_folder_path(folder_path, self._include_files):
27                  case Ok(data_files_names):
28                      folder_df = self._concat_files_as_mean(
29                          folder_df, data_files_names, folder_path, points
30                      )
31                      folder_df["class"] = f"{folder_path.parent.name}/{folder_path.name}"
32                      self._dataframe = pd.concat([self._dataframe, folder_df])
33                  case Err(error):
34                      logger.error(error)
35  
36      def _concat_files_as_mean(
37          self, target_df: pd.DataFrame, data_files_path: List, location: Path, points: int
38      ) -> pd.DataFrame:
39          for file_path in data_files_path:
40              match self._concat_data_as_mean_from_file(target_df, location / file_path, points):
41                  case Ok(result_df):
42                      logger.info(f"{file_path} added")
43                      target_df = result_df
44                  case Err(msg):
45                      logger.error(msg)
46  
47          return target_df
48  
49      def _concat_data_as_mean_from_file(
50          self, target_df: pd.DataFrame, file_path: Path, points: int
51      ) -> Result[pd.DataFrame, str]:
52          if not file_path.exists():
53              return Err(f"{file_path} cannot be dumped to memory.")
54  
55          logger.info(f"Reading {file_path} with {points} datapoints")
56          file_df = pd.read_csv(file_path, parse_dates=["Time"], index_col="Time", nrows=points)
57          if len(file_df) < points:
58              logger.warning(f"Not enough datapoints in {file_path}")
59  
60          target_df = self.concat_data_as_mean(target_df, file_df, file_path.name)
61  
62          return Ok(target_df)