/ src / python / txtai / workflow / task / export.py
export.py
 1  """
 2  ExportTask module
 3  """
 4  
 5  import datetime
 6  import os
 7  
 8  # Conditional import
 9  try:
10      import pandas as pd
11  
12      PANDAS = True
13  except ImportError:
14      PANDAS = False
15  
16  from .base import Task
17  
18  
19  class ExportTask(Task):
20      """
21      Task that exports task elements using Pandas.
22      """
23  
24      def register(self, output=None, timestamp=None):
25          """
26          Add export parameters to task. Checks if required dependencies are installed.
27  
28          Args:
29              output: output file path
30              timestamp: true if output file should be timestamped
31          """
32  
33          if not PANDAS:
34              raise ImportError('ExportTask is not available - install "workflow" extra to enable')
35  
36          # pylint: disable=W0201
37          self.output = output
38          self.timestamp = timestamp
39  
40      def __call__(self, elements, executor=None):
41          # Run task
42          outputs = super().__call__(elements, executor)
43  
44          # Get output file extension
45          output = self.output
46          parts = list(os.path.splitext(output))
47          extension = parts[-1].lower()
48  
49          # Add timestamp to filename
50          if self.timestamp:
51              timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%dT%H%M%SZ")
52              parts[-1] = timestamp + parts[-1]
53  
54              # Create full path to output file
55              output = ".".join(parts)
56  
57          # Write output
58          if extension == ".xlsx":
59              pd.DataFrame(outputs).to_excel(output, index=False)
60          else:
61              pd.DataFrame(outputs).to_csv(output, index=False)
62  
63          # Return results
64          return outputs