/ tests / test_date_filter.py
test_date_filter.py
  1  import re
  2  from datetime import datetime
  3  
  4  import pytest
  5  
  6  from khoj.search_filter.date_filter import DateFilter
  7  
  8  
  9  @pytest.mark.filterwarnings("ignore:The localize method is no longer necessary.")
 10  def test_extract_date_range():
 11      assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [
 12          datetime(1984, 1, 5, 0, 0, 0).timestamp(),
 13          datetime(1984, 1, 7, 0, 0, 0).timestamp(),
 14      ]
 15      assert DateFilter().extract_date_range('head dt<="1984-01-01"') == [None, datetime(1984, 1, 2, 0, 0, 0).timestamp()]
 16      assert DateFilter().extract_date_range('head dt>="1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), None]
 17      assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [
 18          datetime(1984, 1, 1, 0, 0, 0).timestamp(),
 19          datetime(1984, 1, 2, 0, 0, 0).timestamp(),
 20      ]
 21      assert DateFilter().extract_date_range('head dt="1984-01-01"') == [
 22          datetime(1984, 1, 1, 0, 0, 0).timestamp(),
 23          datetime(1984, 1, 2, 0, 0, 0).timestamp(),
 24      ]
 25  
 26      # Unparseable date filter specified in query
 27      assert DateFilter().extract_date_range('head dt:"Summer of 69" tail') == []
 28  
 29      # No date filter specified in query
 30      assert DateFilter().extract_date_range("head tail") == []
 31  
 32      # Non intersecting date ranges
 33      assert DateFilter().extract_date_range('head dt>"1984-01-01" dt<"1984-01-01" tail') == []
 34  
 35  
 36  @pytest.mark.filterwarnings("ignore:The localize method is no longer necessary.")
 37  def test_parse():
 38      test_now = datetime(1984, 4, 1, 21, 21, 21)
 39  
 40      # day variations
 41      assert DateFilter().parse("today", relative_base=test_now) == (
 42          datetime(1984, 4, 1, 0, 0, 0),
 43          datetime(1984, 4, 2, 0, 0, 0),
 44      )
 45      assert DateFilter().parse("tomorrow", relative_base=test_now) == (
 46          datetime(1984, 4, 2, 0, 0, 0),
 47          datetime(1984, 4, 3, 0, 0, 0),
 48      )
 49      assert DateFilter().parse("yesterday", relative_base=test_now) == (
 50          datetime(1984, 3, 31, 0, 0, 0),
 51          datetime(1984, 4, 1, 0, 0, 0),
 52      )
 53      assert DateFilter().parse("5 days ago", relative_base=test_now) == (
 54          datetime(1984, 3, 27, 0, 0, 0),
 55          datetime(1984, 3, 28, 0, 0, 0),
 56      )
 57  
 58      # week variations
 59      assert DateFilter().parse("last week", relative_base=test_now) == (
 60          datetime(1984, 3, 18, 0, 0, 0),
 61          datetime(1984, 3, 25, 0, 0, 0),
 62      )
 63      assert DateFilter().parse("2 weeks ago", relative_base=test_now) == (
 64          datetime(1984, 3, 11, 0, 0, 0),
 65          datetime(1984, 3, 18, 0, 0, 0),
 66      )
 67  
 68      # month variations
 69      assert DateFilter().parse("next month", relative_base=test_now) == (
 70          datetime(1984, 5, 1, 0, 0, 0),
 71          datetime(1984, 6, 1, 0, 0, 0),
 72      )
 73      assert DateFilter().parse("2 months ago", relative_base=test_now) == (
 74          datetime(1984, 2, 1, 0, 0, 0),
 75          datetime(1984, 3, 1, 0, 0, 0),
 76      )
 77  
 78      # year variations
 79      assert DateFilter().parse("this year", relative_base=test_now) == (
 80          datetime(1984, 1, 1, 0, 0, 0),
 81          datetime(1985, 1, 1, 0, 0, 0),
 82      )
 83      assert DateFilter().parse("20 years later", relative_base=test_now) == (
 84          datetime(2004, 1, 1, 0, 0, 0),
 85          datetime(2005, 1, 1, 0, 0, 0),
 86      )
 87  
 88      # specific month/date variation
 89      assert DateFilter().parse("in august", relative_base=test_now) == (
 90          datetime(1983, 8, 1, 0, 0, 0),
 91          datetime(1983, 8, 2, 0, 0, 0),
 92      )
 93      assert DateFilter().parse("on 1983-08-01", relative_base=test_now) == (
 94          datetime(1983, 8, 1, 0, 0, 0),
 95          datetime(1983, 8, 2, 0, 0, 0),
 96      )
 97  
 98  
 99  def test_date_filter_regex():
100      dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>"today" dt:"1984-01-01"')
101      assert dtrange_match == [(">", "today"), (":", "1984-01-01")]
102  
103      dtrange_match = re.findall(DateFilter().date_regex, 'head dt>"today" dt:"1984-01-01" multi word tail')
104      assert dtrange_match == [(">", "today"), (":", "1984-01-01")]
105  
106      dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>="today" dt="1984-01-01"')
107      assert dtrange_match == [(">=", "today"), ("=", "1984-01-01")]
108  
109      dtrange_match = re.findall(DateFilter().date_regex, 'dt<"multi word date" multi word tail')
110      assert dtrange_match == [("<", "multi word date")]
111  
112      dtrange_match = re.findall(DateFilter().date_regex, 'head dt<="multi word date"')
113      assert dtrange_match == [("<=", "multi word date")]
114  
115      dtrange_match = re.findall(DateFilter().date_regex, "head tail")
116      assert dtrange_match == []
117  
118  
119  def test_get_date_filter_terms():
120      dtrange_match = DateFilter().get_filter_terms('multi word head dt>"today" dt:"1984-01-01"')
121      assert dtrange_match == ["dt>'today'", "dt:'1984-01-01'"]
122  
123      dtrange_match = DateFilter().get_filter_terms('head dt>"today" dt:"1984-01-01" multi word tail')
124      assert dtrange_match == ["dt>'today'", "dt:'1984-01-01'"]
125  
126      dtrange_match = DateFilter().get_filter_terms('multi word head dt>="today" dt="1984-01-01"')
127      assert dtrange_match == ["dt>='today'", "dt='1984-01-01'"]
128  
129      dtrange_match = DateFilter().get_filter_terms('dt<"multi word date" multi word tail')
130      assert dtrange_match == ["dt<'multi word date'"]
131  
132      dtrange_match = DateFilter().get_filter_terms('head dt<="multi word date"')
133      assert dtrange_match == ["dt<='multi word date'"]
134  
135      dtrange_match = DateFilter().get_filter_terms("head tail")
136      assert dtrange_match == []
137  
138  
139  def test_date_extraction():
140      extracted_dates = DateFilter().extract_dates("")
141      assert extracted_dates == [], "Expected to handle empty string"
142  
143      extracted_dates = DateFilter().extract_dates("head tail")
144      assert extracted_dates == [], "Expected to handle no dates"
145  
146      extracted_dates = DateFilter().extract_dates("head CREATED: today tail")
147      assert extracted_dates == [], "Expected relative date to be ignored"
148  
149      extracted_dates = DateFilter().extract_dates("head CREATED: today SCHEDULED: 1984-04-01 tail")
150      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected only Y-m-d structured date to be extracted"
151  
152      extracted_dates = DateFilter().extract_dates("head SCHEDULED: 01-04-1984 tail")
153      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d-m-Y structured date to be extracted"
154  
155      extracted_dates = DateFilter().extract_dates("head CREATED: today SCHEDULED: 1984/04/01 tail")
156      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected only Y/m/d structured date to be extracted"
157  
158      extracted_dates = DateFilter().extract_dates("head SCHEDULED: 01/04/1984 tail")
159      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d/m/Y structured date to be extracted"
160  
161      extracted_dates = DateFilter().extract_dates("head DEADLINE: 01.04.1984 tail")
162      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d.m.Y structured date to be extracted"
163  
164      extracted_dates = DateFilter().extract_dates("CLOCK: [1984-04-01 Sun 09:50]--[1984-04-01 Sun 10:10] => 24:20")
165      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
166          "Expected single deduplicated date extracted from logbook entry"
167      )
168  
169      extracted_dates = DateFilter().extract_dates("CLOCK: [1984/03/31 mer 09:50]--[1984/04/01 mer 10:10] => 24:20")
170      expected_dates = [datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 3, 31, 0, 0, 0)]
171      assert all([dt in extracted_dates for dt in expected_dates]), (
172          "Expected multiple different dates extracted from logbook entry"
173      )
174  
175  
176  def test_natual_date_extraction():
177      extracted_dates = DateFilter().extract_dates("head 1 April 1984 tail")
178      assert datetime(1984, 4, 1, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted"
179  
180      extracted_dates = DateFilter().extract_dates("head 1st April 1984 tail")
181      assert datetime(1984, 4, 1, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted"
182  
183      extracted_dates = DateFilter().extract_dates("head 2nd Apr 1984 tail")
184      assert datetime(1984, 4, 2, 0, 0, 0) in extracted_dates, "Expected natural date with short month to be extracted"
185  
186      extracted_dates = DateFilter().extract_dates("head 4th Apr 1984 tail")
187      assert datetime(1984, 4, 4, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted"
188  
189      extracted_dates = DateFilter().extract_dates("head 11th april 1984 tail")
190      assert datetime(1984, 4, 11, 0, 0, 0) in extracted_dates, (
191          "Expected natural date with lowercase month to be extracted"
192      )
193  
194      extracted_dates = DateFilter().extract_dates("head 23rd april 84 tail")
195      assert datetime(1984, 4, 23, 0, 0, 0) in extracted_dates, "Expected natural date with 2-digit year to be extracted"
196  
197      extracted_dates = DateFilter().extract_dates("head 31st march 84 tail")
198      assert datetime(1984, 3, 31, 0, 0, 0) in extracted_dates, "Expected natural date with 2-digit year to be extracted"
199  
200      extracted_dates = DateFilter().extract_dates("head April 1984 tail")
201      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected partial natural date to be extracted"
202  
203      extracted_dates = DateFilter().extract_dates("head Apr 1984 tail")
204      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
205          "Expected partial natural date with short month to be extracted"
206      )
207  
208      extracted_dates = DateFilter().extract_dates("head apr 1984 tail")
209      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
210          "Expected partial natural date with lowercase month to be extracted"
211      )
212  
213      extracted_dates = DateFilter().extract_dates("head apr 84 tail")
214      assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
215          "Expected partial natural date with 2-digit year to be extracted"
216      )