testtabular.py
1 """ 2 Tabular module tests 3 """ 4 5 import unittest 6 7 from txtai.pipeline import Tabular 8 9 # pylint: disable=C0411 10 from utils import Utils 11 12 13 class TestTabular(unittest.TestCase): 14 """ 15 Tabular tests. 16 """ 17 18 @classmethod 19 def setUpClass(cls): 20 """ 21 Create single tabular instance 22 """ 23 24 cls.tabular = Tabular("id", ["text"]) 25 26 def testContent(self): 27 """ 28 Test parsing additional content 29 """ 30 31 tabular = Tabular("id", ["text"], True) 32 33 row = {"id": 0, "text": "This is a test", "flag": 1} 34 35 # When content is enabled, both (uid, text, tags) and (uid, data, tags) rows are generated 36 # given that data doesn't necessarily include the text to index 37 rows = tabular([row]) 38 uid, data, _ = rows[1] 39 40 # Data should contain the entire input row 41 self.assertEqual(uid, 0) 42 self.assertEqual(data, row) 43 44 # Only select flag field 45 tabular.content = ["flag"] 46 rows = tabular([row]) 47 uid, data, _ = rows[1] 48 49 # Data should only contain a single field, flag 50 self.assertEqual(uid, 0) 51 self.assertTrue(list(data.keys()) == ["flag"]) 52 self.assertEqual(data["flag"], 1) 53 54 def testCSV(self): 55 """ 56 Test parsing a CSV file 57 """ 58 59 rows = self.tabular([Utils.PATH + "/tabular.csv"]) 60 uid, text, _ = rows[0][0] 61 62 self.assertEqual(uid, 0) 63 self.assertEqual(text, "The first sentence") 64 65 def testDict(self): 66 """ 67 Test parsing a dict 68 """ 69 70 rows = self.tabular([{"id": 0, "text": "This is a test"}]) 71 uid, text, _ = rows[0] 72 73 self.assertEqual(uid, 0) 74 self.assertEqual(text, "This is a test") 75 76 def testList(self): 77 """ 78 Test parsing a list 79 """ 80 81 rows = self.tabular([[{"id": 0, "text": "This is a test"}]]) 82 uid, text, _ = rows[0][0] 83 84 self.assertEqual(uid, 0) 85 self.assertEqual(text, "This is a test") 86 87 def testMissingColumns(self): 88 """ 89 Test rows with uneven or missing columns 90 """ 91 92 tabular = Tabular("id", ["text"], True) 93 94 rows = tabular([{"id": 0, "text": "This is a test", "metadata": "meta"}, {"id": 1, "text": "This is a test"}]) 95 96 # When content is enabled both (id, text, tag) and (id, data, tag) tuples are generated given that 97 # data doesn't necessarily include the text to index 98 _, data, _ = rows[3] 99 100 self.assertIsNone(data["metadata"]) 101 102 def testNoColumns(self): 103 """ 104 Test creating text without specifying columns 105 """ 106 107 tabular = Tabular("id") 108 rows = tabular([{"id": 0, "text": "This is a test", "summary": "Describes text in more detail"}]) 109 uid, text, _ = rows[0] 110 111 self.assertEqual(uid, 0) 112 self.assertEqual(text, "This is a test. Describes text in more detail")