Cradicle Explorer

/ test / python / testpipeline / testdata / testtabular.py
testtabular.py
  1  """
  2  Tabular module tests
  3  """
  4  
  5  import unittest
  6  
  7  from txtai.pipeline import Tabular
  8  
  9  # pylint: disable=C0411
 10  from utils import Utils
 11  
 12  
 13  class TestTabular(unittest.TestCase):
 14      """
 15      Tabular tests.
 16      """
 17  
 18      @classmethod
 19      def setUpClass(cls):
 20          """
 21          Create single tabular instance
 22          """
 23  
 24          cls.tabular = Tabular("id", ["text"])
 25  
 26      def testContent(self):
 27          """
 28          Test parsing additional content
 29          """
 30  
 31          tabular = Tabular("id", ["text"], True)
 32  
 33          row = {"id": 0, "text": "This is a test", "flag": 1}
 34  
 35          # When content is enabled, both (uid, text, tags) and (uid, data, tags) rows are generated
 36          # given that data doesn't necessarily include the text to index
 37          rows = tabular([row])
 38          uid, data, _ = rows[1]
 39  
 40          # Data should contain the entire input row
 41          self.assertEqual(uid, 0)
 42          self.assertEqual(data, row)
 43  
 44          # Only select flag field
 45          tabular.content = ["flag"]
 46          rows = tabular([row])
 47          uid, data, _ = rows[1]
 48  
 49          # Data should only contain a single field, flag
 50          self.assertEqual(uid, 0)
 51          self.assertTrue(list(data.keys()) == ["flag"])
 52          self.assertEqual(data["flag"], 1)
 53  
 54      def testCSV(self):
 55          """
 56          Test parsing a CSV file
 57          """
 58  
 59          rows = self.tabular([Utils.PATH + "/tabular.csv"])
 60          uid, text, _ = rows[0][0]
 61  
 62          self.assertEqual(uid, 0)
 63          self.assertEqual(text, "The first sentence")
 64  
 65      def testDict(self):
 66          """
 67          Test parsing a dict
 68          """
 69  
 70          rows = self.tabular([{"id": 0, "text": "This is a test"}])
 71          uid, text, _ = rows[0]
 72  
 73          self.assertEqual(uid, 0)
 74          self.assertEqual(text, "This is a test")
 75  
 76      def testList(self):
 77          """
 78          Test parsing a list
 79          """
 80  
 81          rows = self.tabular([[{"id": 0, "text": "This is a test"}]])
 82          uid, text, _ = rows[0][0]
 83  
 84          self.assertEqual(uid, 0)
 85          self.assertEqual(text, "This is a test")
 86  
 87      def testMissingColumns(self):
 88          """
 89          Test rows with uneven or missing columns
 90          """
 91  
 92          tabular = Tabular("id", ["text"], True)
 93  
 94          rows = tabular([{"id": 0, "text": "This is a test", "metadata": "meta"}, {"id": 1, "text": "This is a test"}])
 95  
 96          # When content is enabled both (id, text, tag) and (id, data, tag) tuples are generated given that
 97          # data doesn't necessarily include the text to index
 98          _, data, _ = rows[3]
 99  
100          self.assertIsNone(data["metadata"])
101  
102      def testNoColumns(self):
103          """
104          Test creating text without specifying columns
105          """
106  
107          tabular = Tabular("id")
108          rows = tabular([{"id": 0, "text": "This is a test", "summary": "Describes text in more detail"}])
109          uid, text, _ = rows[0]
110  
111          self.assertEqual(uid, 0)
112          self.assertEqual(text, "This is a test. Describes text in more detail")