test_date_filter.py
1 import re 2 from datetime import datetime 3 4 import pytest 5 6 from khoj.search_filter.date_filter import DateFilter 7 8 9 @pytest.mark.filterwarnings("ignore:The localize method is no longer necessary.") 10 def test_extract_date_range(): 11 assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [ 12 datetime(1984, 1, 5, 0, 0, 0).timestamp(), 13 datetime(1984, 1, 7, 0, 0, 0).timestamp(), 14 ] 15 assert DateFilter().extract_date_range('head dt<="1984-01-01"') == [None, datetime(1984, 1, 2, 0, 0, 0).timestamp()] 16 assert DateFilter().extract_date_range('head dt>="1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), None] 17 assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [ 18 datetime(1984, 1, 1, 0, 0, 0).timestamp(), 19 datetime(1984, 1, 2, 0, 0, 0).timestamp(), 20 ] 21 assert DateFilter().extract_date_range('head dt="1984-01-01"') == [ 22 datetime(1984, 1, 1, 0, 0, 0).timestamp(), 23 datetime(1984, 1, 2, 0, 0, 0).timestamp(), 24 ] 25 26 # Unparseable date filter specified in query 27 assert DateFilter().extract_date_range('head dt:"Summer of 69" tail') == [] 28 29 # No date filter specified in query 30 assert DateFilter().extract_date_range("head tail") == [] 31 32 # Non intersecting date ranges 33 assert DateFilter().extract_date_range('head dt>"1984-01-01" dt<"1984-01-01" tail') == [] 34 35 36 @pytest.mark.filterwarnings("ignore:The localize method is no longer necessary.") 37 def test_parse(): 38 test_now = datetime(1984, 4, 1, 21, 21, 21) 39 40 # day variations 41 assert DateFilter().parse("today", relative_base=test_now) == ( 42 datetime(1984, 4, 1, 0, 0, 0), 43 datetime(1984, 4, 2, 0, 0, 0), 44 ) 45 assert DateFilter().parse("tomorrow", relative_base=test_now) == ( 46 datetime(1984, 4, 2, 0, 0, 0), 47 datetime(1984, 4, 3, 0, 0, 0), 48 ) 49 assert DateFilter().parse("yesterday", relative_base=test_now) == ( 50 datetime(1984, 3, 31, 0, 0, 0), 51 datetime(1984, 4, 1, 0, 0, 0), 52 ) 53 assert DateFilter().parse("5 days ago", relative_base=test_now) == ( 54 datetime(1984, 3, 27, 0, 0, 0), 55 datetime(1984, 3, 28, 0, 0, 0), 56 ) 57 58 # week variations 59 assert DateFilter().parse("last week", relative_base=test_now) == ( 60 datetime(1984, 3, 18, 0, 0, 0), 61 datetime(1984, 3, 25, 0, 0, 0), 62 ) 63 assert DateFilter().parse("2 weeks ago", relative_base=test_now) == ( 64 datetime(1984, 3, 11, 0, 0, 0), 65 datetime(1984, 3, 18, 0, 0, 0), 66 ) 67 68 # month variations 69 assert DateFilter().parse("next month", relative_base=test_now) == ( 70 datetime(1984, 5, 1, 0, 0, 0), 71 datetime(1984, 6, 1, 0, 0, 0), 72 ) 73 assert DateFilter().parse("2 months ago", relative_base=test_now) == ( 74 datetime(1984, 2, 1, 0, 0, 0), 75 datetime(1984, 3, 1, 0, 0, 0), 76 ) 77 78 # year variations 79 assert DateFilter().parse("this year", relative_base=test_now) == ( 80 datetime(1984, 1, 1, 0, 0, 0), 81 datetime(1985, 1, 1, 0, 0, 0), 82 ) 83 assert DateFilter().parse("20 years later", relative_base=test_now) == ( 84 datetime(2004, 1, 1, 0, 0, 0), 85 datetime(2005, 1, 1, 0, 0, 0), 86 ) 87 88 # specific month/date variation 89 assert DateFilter().parse("in august", relative_base=test_now) == ( 90 datetime(1983, 8, 1, 0, 0, 0), 91 datetime(1983, 8, 2, 0, 0, 0), 92 ) 93 assert DateFilter().parse("on 1983-08-01", relative_base=test_now) == ( 94 datetime(1983, 8, 1, 0, 0, 0), 95 datetime(1983, 8, 2, 0, 0, 0), 96 ) 97 98 99 def test_date_filter_regex(): 100 dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>"today" dt:"1984-01-01"') 101 assert dtrange_match == [(">", "today"), (":", "1984-01-01")] 102 103 dtrange_match = re.findall(DateFilter().date_regex, 'head dt>"today" dt:"1984-01-01" multi word tail') 104 assert dtrange_match == [(">", "today"), (":", "1984-01-01")] 105 106 dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>="today" dt="1984-01-01"') 107 assert dtrange_match == [(">=", "today"), ("=", "1984-01-01")] 108 109 dtrange_match = re.findall(DateFilter().date_regex, 'dt<"multi word date" multi word tail') 110 assert dtrange_match == [("<", "multi word date")] 111 112 dtrange_match = re.findall(DateFilter().date_regex, 'head dt<="multi word date"') 113 assert dtrange_match == [("<=", "multi word date")] 114 115 dtrange_match = re.findall(DateFilter().date_regex, "head tail") 116 assert dtrange_match == [] 117 118 119 def test_get_date_filter_terms(): 120 dtrange_match = DateFilter().get_filter_terms('multi word head dt>"today" dt:"1984-01-01"') 121 assert dtrange_match == ["dt>'today'", "dt:'1984-01-01'"] 122 123 dtrange_match = DateFilter().get_filter_terms('head dt>"today" dt:"1984-01-01" multi word tail') 124 assert dtrange_match == ["dt>'today'", "dt:'1984-01-01'"] 125 126 dtrange_match = DateFilter().get_filter_terms('multi word head dt>="today" dt="1984-01-01"') 127 assert dtrange_match == ["dt>='today'", "dt='1984-01-01'"] 128 129 dtrange_match = DateFilter().get_filter_terms('dt<"multi word date" multi word tail') 130 assert dtrange_match == ["dt<'multi word date'"] 131 132 dtrange_match = DateFilter().get_filter_terms('head dt<="multi word date"') 133 assert dtrange_match == ["dt<='multi word date'"] 134 135 dtrange_match = DateFilter().get_filter_terms("head tail") 136 assert dtrange_match == [] 137 138 139 def test_date_extraction(): 140 extracted_dates = DateFilter().extract_dates("") 141 assert extracted_dates == [], "Expected to handle empty string" 142 143 extracted_dates = DateFilter().extract_dates("head tail") 144 assert extracted_dates == [], "Expected to handle no dates" 145 146 extracted_dates = DateFilter().extract_dates("head CREATED: today tail") 147 assert extracted_dates == [], "Expected relative date to be ignored" 148 149 extracted_dates = DateFilter().extract_dates("head CREATED: today SCHEDULED: 1984-04-01 tail") 150 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected only Y-m-d structured date to be extracted" 151 152 extracted_dates = DateFilter().extract_dates("head SCHEDULED: 01-04-1984 tail") 153 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d-m-Y structured date to be extracted" 154 155 extracted_dates = DateFilter().extract_dates("head CREATED: today SCHEDULED: 1984/04/01 tail") 156 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected only Y/m/d structured date to be extracted" 157 158 extracted_dates = DateFilter().extract_dates("head SCHEDULED: 01/04/1984 tail") 159 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d/m/Y structured date to be extracted" 160 161 extracted_dates = DateFilter().extract_dates("head DEADLINE: 01.04.1984 tail") 162 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d.m.Y structured date to be extracted" 163 164 extracted_dates = DateFilter().extract_dates("CLOCK: [1984-04-01 Sun 09:50]--[1984-04-01 Sun 10:10] => 24:20") 165 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], ( 166 "Expected single deduplicated date extracted from logbook entry" 167 ) 168 169 extracted_dates = DateFilter().extract_dates("CLOCK: [1984/03/31 mer 09:50]--[1984/04/01 mer 10:10] => 24:20") 170 expected_dates = [datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 3, 31, 0, 0, 0)] 171 assert all([dt in extracted_dates for dt in expected_dates]), ( 172 "Expected multiple different dates extracted from logbook entry" 173 ) 174 175 176 def test_natual_date_extraction(): 177 extracted_dates = DateFilter().extract_dates("head 1 April 1984 tail") 178 assert datetime(1984, 4, 1, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted" 179 180 extracted_dates = DateFilter().extract_dates("head 1st April 1984 tail") 181 assert datetime(1984, 4, 1, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted" 182 183 extracted_dates = DateFilter().extract_dates("head 2nd Apr 1984 tail") 184 assert datetime(1984, 4, 2, 0, 0, 0) in extracted_dates, "Expected natural date with short month to be extracted" 185 186 extracted_dates = DateFilter().extract_dates("head 4th Apr 1984 tail") 187 assert datetime(1984, 4, 4, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted" 188 189 extracted_dates = DateFilter().extract_dates("head 11th april 1984 tail") 190 assert datetime(1984, 4, 11, 0, 0, 0) in extracted_dates, ( 191 "Expected natural date with lowercase month to be extracted" 192 ) 193 194 extracted_dates = DateFilter().extract_dates("head 23rd april 84 tail") 195 assert datetime(1984, 4, 23, 0, 0, 0) in extracted_dates, "Expected natural date with 2-digit year to be extracted" 196 197 extracted_dates = DateFilter().extract_dates("head 31st march 84 tail") 198 assert datetime(1984, 3, 31, 0, 0, 0) in extracted_dates, "Expected natural date with 2-digit year to be extracted" 199 200 extracted_dates = DateFilter().extract_dates("head April 1984 tail") 201 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected partial natural date to be extracted" 202 203 extracted_dates = DateFilter().extract_dates("head Apr 1984 tail") 204 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], ( 205 "Expected partial natural date with short month to be extracted" 206 ) 207 208 extracted_dates = DateFilter().extract_dates("head apr 1984 tail") 209 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], ( 210 "Expected partial natural date with lowercase month to be extracted" 211 ) 212 213 extracted_dates = DateFilter().extract_dates("head apr 84 tail") 214 assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], ( 215 "Expected partial natural date with 2-digit year to be extracted" 216 )