/ test / python / testdatabase / testsql.py
testsql.py
  1  """
  2  SQL module tests
  3  """
  4  
  5  import unittest
  6  
  7  from txtai.database import DatabaseFactory, SQL, SQLError
  8  
  9  
 10  class TestSQL(unittest.TestCase):
 11      """
 12      Test SQL parsing and generation.
 13      """
 14  
 15      @classmethod
 16      def setUpClass(cls):
 17          """
 18          Initialize test data.
 19          """
 20  
 21          # Create SQL parser for SQLite
 22          cls.db = DatabaseFactory.create({"content": True})
 23          cls.db.initialize()
 24  
 25          cls.sql = SQL(cls.db)
 26  
 27      def testAlias(self):
 28          """
 29          Test alias clauses
 30          """
 31  
 32          self.assertSql("select", "select a as a1 from txtai", "json_extract(data, '$.a') as a1")
 33          self.assertSql("select", "select a 'a1' from txtai", "json_extract(data, '$.a') 'a1'")
 34          self.assertSql("select", 'select a "a1" from txtai', "json_extract(data, '$.a') \"a1\"")
 35          self.assertSql("select", "select a a1 from txtai", "json_extract(data, '$.a') a1")
 36          self.assertSql(
 37              "select",
 38              "select a, b as b1, c, d + 1 as 'd1' from txtai",
 39              "json_extract(data, '$.a') as \"a\", json_extract(data, '$.b') as b1, "
 40              + "json_extract(data, '$.c') as \"c\", json_extract(data, '$.d') + 1 as 'd1'",
 41          )
 42          self.assertSql("select", "select id as myid from txtai", "s.id as myid")
 43          self.assertSql("select", "select length(a) t from txtai", "length(json_extract(data, '$.a')) t")
 44  
 45          self.assertSql("where", "select id as myid from txtai where myid != 3 and a != 1", "myid != 3 and json_extract(data, '$.a') != 1")
 46          self.assertSql("where", "select txt T from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 47          self.assertSql("where", "select txt 'T' from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 48          self.assertSql("where", "select txt \"T\" from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 49          self.assertSql("where", "select txt as T from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 50          self.assertSql("where", "select txt as 'T' from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 51          self.assertSql("where", "select txt as \"T\" from txtai where t LIKE '%abc%'", "t LIKE '%abc%'")
 52  
 53          self.assertSql("groupby", "select id as myid, count(*) from txtai group by myid, a", "myid, json_extract(data, '$.a')")
 54          self.assertSql("orderby", "select id as myid from txtai order by myid, a", "myid, json_extract(data, '$.a')")
 55  
 56      def testBadSQL(self):
 57          """
 58          Test invalid SQL
 59          """
 60  
 61          with self.assertRaises(SQLError):
 62              self.db.search("select * from txtai where order by")
 63  
 64          with self.assertRaises(SQLError):
 65              self.db.search("select * from txtai where groupby order by")
 66  
 67          with self.assertRaises(SQLError):
 68              self.db.search("select * from txtai where a(1)")
 69  
 70          with self.assertRaises(SQLError):
 71              self.db.search("select a b c from txtai where id match id")
 72  
 73      def testBracket(self):
 74          """
 75          Test bracket expressions
 76          """
 77  
 78          self.assertSql("select", "select [a] from txtai", "json_extract(data, '$.a') as \"a\"")
 79          self.assertSql("select", "select [a] ab from txtai", "json_extract(data, '$.a') ab")
 80          self.assertSql("select", "select [abc] from txtai", "json_extract(data, '$.abc') as \"abc\"")
 81          self.assertSql("select", "select [id], text, score from txtai", "s.id, text, score")
 82          self.assertSql("select", "select [ab cd], text, score from txtai", "json_extract(data, '$.ab cd') as \"ab cd\", text, score")
 83          self.assertSql("select", "select [a[0]] from txtai", "json_extract(data, '$.a[0]') as \"a[0]\"")
 84          self.assertSql("select", "select [a[0].ab] from txtai", "json_extract(data, '$.a[0].ab') as \"a[0].ab\"")
 85          self.assertSql("select", "select [a[0].c[0]] from txtai", "json_extract(data, '$.a[0].c[0]') as \"a[0].c[0]\"")
 86          self.assertSql("select", "select avg([a]) from txtai", "avg(json_extract(data, '$.a')) as \"avg([a])\"")
 87  
 88          # Test single quote escaping in bracket expressions
 89          self.assertSql("select", "select [field'] from txtai", "json_extract(data, '$.field''') as \"field'\"")
 90  
 91          self.assertSql("where", "select * from txtai where [a b] < 1 or a > 1", "json_extract(data, '$.a b') < 1 or json_extract(data, '$.a') > 1")
 92          self.assertSql("where", "select [a[0].c[0]] a from txtai where a < 1", "a < 1")
 93          self.assertSql("groupby", "select * from txtai group by [a]", "json_extract(data, '$.a')")
 94          self.assertSql("orderby", "select * from txtai where order by [a]", "json_extract(data, '$.a')")
 95  
 96      def testDistinct(self):
 97          """
 98          Test distinct expressions
 99          """
100  
101          # Attributes
102          self.assertSql("select", "select distinct id from txtai", "distinct s.id")
103          self.assertSql("select", "select distinct id as myid from txtai", "distinct s.id as myid")
104          self.assertSql("select", "select distinct a from txtai", "distinct json_extract(data, '$.a') as \"a\"")
105          self.assertSql("select", "select distinct a.b from txtai", "distinct json_extract(data, '$.a.b') as \"a.b\"")
106  
107          # Bracket expression
108          self.assertSql("select", "select distinct [ab cd] from txtai", "distinct json_extract(data, '$.ab cd') as \"distinct[ab cd]\"")
109  
110          # Function expression
111          self.assertSql("select", "select distinct(id) from txtai", 'distinct(s.id) as "distinct(id)"')
112          self.assertSql("select", "select count(distinct id) from txtai", 'count(distinct s.id) as "count(distinct id)"')
113          self.assertSql("select", "select count(distinct a) from txtai", "count(distinct json_extract(data, '$.a')) as \"count(distinct a)\"")
114          self.assertSql("select", "select count(distinct avg(id)) from txtai", 'count(distinct avg(s.id)) as "count(distinct avg(id))"')
115          self.assertSql(
116              "select", "select count(distinct avg(a)) from txtai", "count(distinct avg(json_extract(data, '$.a'))) as \"count(distinct avg(a))\""
117          )
118  
119          # Compound expression
120          self.assertSql("select", "select distinct a/1 from txtai", "distinct json_extract(data, '$.a') / 1 as \"a / 1\"")
121          self.assertSql("select", "select distinct(a/1) from txtai", "distinct(json_extract(data, '$.a') / 1) as \"distinct(a / 1)\"")
122  
123      def testGroupby(self):
124          """
125          Test group by clauses
126          """
127  
128          prefix = "select count(*), flag from txtai "
129  
130          self.assertSql("groupby", prefix + "group by text", "text")
131          self.assertSql("groupby", prefix + "group by distinct(a)", "distinct(json_extract(data, '$.a'))")
132          self.assertSql("groupby", prefix + "where a > 1 group by text", "text")
133  
134      def testHaving(self):
135          """
136          Test having clauses
137          """
138  
139          prefix = "select count(*), flag from txtai "
140  
141          self.assertSql("having", prefix + "group by text having count(*) > 1", "count(*) > 1")
142          self.assertSql("having", prefix + "where flag = 1 group by text having count(*) > 1", "count(*) > 1")
143  
144      def testIsSQL(self):
145          """
146          Test SQL detection method.
147          """
148  
149          self.assertTrue(self.sql.issql("select text from txtai where id = 1"))
150          self.assertFalse(self.sql.issql(1234))
151  
152      def testLimit(self):
153          """
154          Test limit clauses
155          """
156  
157          prefix = "select count(*) from txtai "
158  
159          self.assertSql("limit", prefix + "limit 100", "100")
160  
161      def testOffset(self):
162          """
163          Test offset clauses
164          """
165  
166          prefix = "select count(*) from txtai "
167  
168          self.assertSql("offset", prefix + "limit 100 offset 50", "50")
169          self.assertSql("offset", prefix + "offset 50", "50")
170  
171      def testOrderby(self):
172          """
173          Test order by clauses
174          """
175  
176          prefix = "select * from txtai "
177  
178          self.assertSql("orderby", prefix + "order by id", "s.id")
179          self.assertSql("orderby", prefix + "order by id, text", "s.id, text")
180          self.assertSql("orderby", prefix + "order by id asc", "s.id asc")
181          self.assertSql("orderby", prefix + "order by id desc", "s.id desc")
182          self.assertSql("orderby", prefix + "order by id asc, text desc", "s.id asc, text desc")
183  
184      def testSelectBasic(self):
185          """
186          Test basic select clauses
187          """
188  
189          self.assertSql("select", "select id, indexid, tags from txtai", "s.id, s.indexid, s.tags")
190          self.assertSql("select", "select id, indexid, flag from txtai", "s.id, s.indexid, json_extract(data, '$.flag') as \"flag\"")
191          self.assertSql("select", "select id, indexid, a.b.c from txtai", "s.id, s.indexid, json_extract(data, '$.a.b.c') as \"a.b.c\"")
192          self.assertSql("select", "select 'id', [id], (id) from txtai", "'id', s.id, (s.id)")
193          self.assertSql("select", "select * from txtai", "*")
194  
195      def testSelectCompound(self):
196          """
197          Test compound select clauses
198          """
199  
200          self.assertSql("select", "select a + 1 from txtai", "json_extract(data, '$.a') + 1 as \"a + 1\"")
201          self.assertSql("select", "select 1 * a from txtai", "1 * json_extract(data, '$.a') as \"1 * a\"")
202          self.assertSql("select", "select a/1 from txtai", "json_extract(data, '$.a') / 1 as \"a / 1\"")
203          self.assertSql("select", "select avg(a-b) from txtai", "avg(json_extract(data, '$.a') - json_extract(data, '$.b')) as \"avg(a - b)\"")
204          self.assertSql("select", "select distinct(text) from txtai", "distinct(text)")
205          self.assertSql("select", "select id, score, (a/2)*3 from txtai", "s.id, score, (json_extract(data, '$.a') / 2) * 3 as \"(a / 2) * 3\"")
206          self.assertSql("select", "select id, score, (a/2*3) from txtai", "s.id, score, (json_extract(data, '$.a') / 2 * 3) as \"(a / 2 * 3)\"")
207          self.assertSql(
208              "select",
209              "select func(func2(indexid + 1), a) from txtai",
210              "func(func2(s.indexid + 1), json_extract(data, '$.a')) as \"func(func2(indexid + 1), a)\"",
211          )
212          self.assertSql("select", "select func(func2(indexid + 1), a) a from txtai", "func(func2(s.indexid + 1), json_extract(data, '$.a')) a")
213          self.assertSql("select", "select 'prefix' || id from txtai", "'prefix' || s.id as \"'prefix' || id\"")
214          self.assertSql("select", "select 'prefix' || id id from txtai", "'prefix' || s.id id")
215          self.assertSql("select", "select 'prefix' || a a from txtai", "'prefix' || json_extract(data, '$.a') a")
216  
217      def testSimilar(self):
218          """
219          Test similar functions
220          """
221  
222          prefix = "select * from txtai "
223  
224          self.assertSql("where", prefix + "where similar('abc')", "__SIMILAR__0")
225          self.assertSql("similar", prefix + "where similar('abc')", [["abc"]])
226  
227          self.assertSql("where", prefix + "where similar('abc') AND id = 1", "__SIMILAR__0 AND s.id = 1")
228          self.assertSql("similar", prefix + "where similar('abc')", [["abc"]])
229  
230          self.assertSql("where", prefix + "where similar('abc') and similar('def')", "__SIMILAR__0 and __SIMILAR__1")
231          self.assertSql("similar", prefix + "where similar('abc') and similar('def')", [["abc"], ["def"]])
232  
233          self.assertSql("where", prefix + "where similar('abc', 1000)", "__SIMILAR__0")
234          self.assertSql("similar", prefix + "where similar('abc', 1000)", [["abc", "1000"]])
235  
236          self.assertSql("where", prefix + "where similar('abc', 1000) and similar('def', 10)", "__SIMILAR__0 and __SIMILAR__1")
237          self.assertSql("similar", prefix + "where similar('abc', 1000) and similar('def', 10)", [["abc", "1000"], ["def", "10"]])
238  
239          self.assertSql("where", prefix + "where coalesce(similar('abc'), similar('abc'))", "coalesce(__SIMILAR__0, __SIMILAR__1)")
240          self.assertSql("similar", prefix + "where coalesce(similar('abc'), similar('abc'))", [["abc"], ["abc"]])
241  
242      def testUpper(self):
243          """
244          Test SQL statements are case insensitive.
245          """
246  
247          self.assertSql("groupby", "SELECT * FROM TXTAI WHERE a = 1 GROUP BY id", "s.id")
248          self.assertSql("orderby", "SELECT * FROM TXTAI WHERE a = 1 ORDER BY id", "s.id")
249  
250      def testWhereBasic(self):
251          """
252          Test basic where clauses
253          """
254  
255          prefix = "select * from txtai "
256  
257          self.assertSql("where", prefix + "where a = b", "json_extract(data, '$.a') = json_extract(data, '$.b')")
258          self.assertSql("where", prefix + "where abc = def", "json_extract(data, '$.abc') = json_extract(data, '$.def')")
259          self.assertSql("where", prefix + "where a = b.value", "json_extract(data, '$.a') = json_extract(data, '$.b.value')")
260          self.assertSql("where", prefix + "where a = 1", "json_extract(data, '$.a') = 1")
261          self.assertSql("where", prefix + "WHERE 1 = a", "1 = json_extract(data, '$.a')")
262          self.assertSql("where", prefix + "WHERE a LIKE 'abc'", "json_extract(data, '$.a') LIKE 'abc'")
263          self.assertSql("where", prefix + "WHERE a NOT LIKE 'abc'", "json_extract(data, '$.a') NOT LIKE 'abc'")
264          self.assertSql("where", prefix + "WHERE a IN (1, 2, 3, b)", "json_extract(data, '$.a') IN (1, 2, 3, json_extract(data, '$.b'))")
265          self.assertSql("where", prefix + "WHERE a is not null", "json_extract(data, '$.a') is not null")
266          self.assertSql("where", prefix + "WHERE score >= 0.15", "score >= 0.15")
267  
268      def testWhereCompound(self):
269          """
270          Test compound where clauses
271          """
272  
273          prefix = "select * from txtai "
274  
275          self.assertSql("where", prefix + "where a > (b + 1)", "json_extract(data, '$.a') > (json_extract(data, '$.b') + 1)")
276          self.assertSql("where", prefix + "where a > func('abc')", "json_extract(data, '$.a') > func('abc')")
277          self.assertSql(
278              "where", prefix + "where (id = 1 or id = 2) and a like 'abc'", "(s.id = 1 or s.id = 2) and json_extract(data, '$.a') like 'abc'"
279          )
280          self.assertSql(
281              "where",
282              prefix + "where a > f(d(b, c, 1),1)",
283              "json_extract(data, '$.a') > f(d(json_extract(data, '$.b'), json_extract(data, '$.c'), 1), 1)",
284          )
285          self.assertSql("where", prefix + "where (id = 1 AND id = 2) OR indexid = 3", "(s.id = 1 AND s.id = 2) OR s.indexid = 3")
286          self.assertSql("where", prefix + "where f(id) = b(id)", "f(s.id) = b(s.id)")
287          self.assertSql("where", prefix + "WHERE f(id)", "f(s.id)")
288  
289      def assertSql(self, clause, query, expected):
290          """
291          Helper method to assert a query clause is as expected.
292  
293          Args:
294              clause: clause to select
295              query: input query
296              expected: expected transformed query value
297          """
298  
299          self.assertEqual(self.sql(query)[clause], expected)