test_loader.py
1 import copy 2 import json 3 from io import StringIO 4 from itertools import zip_longest 5 from unittest import TestCase 6 7 from json_stream import load 8 from json_stream.base import ( 9 TransientAccessException, 10 PersistentStreamingJSONObject, 11 TransientStreamingJSONList, 12 TransientStreamingJSONObject, 13 PersistentStreamingJSONList, 14 ) 15 16 17 class TestLoader(TestCase): 18 def test_load_empty_object(self): 19 obj = {} 20 self._test_object(obj, persistent=True) 21 self._test_object(obj, persistent=False) 22 23 def test_load_object(self): 24 obj = {"a": 1, "b": None, "c": True} 25 self._test_object(obj, persistent=True) 26 self._test_object(obj, persistent=False) 27 28 def test_load_object_get_persistent(self): 29 json = '{"a": 1, "b": null, "c": true}' 30 31 # Access in order 32 data = load(StringIO(json), persistent=True) 33 self.assertEqual(data['a'], 1) 34 self.assertEqual(data['b'], None) 35 self.assertEqual(data['c'], True) 36 with self.assertRaises(KeyError): 37 _ = data['d'] 38 39 # Access out of order 40 data = load(StringIO(json), persistent=True) 41 self.assertEqual(data['b'], None) 42 self.assertEqual(data['a'], 1) 43 self.assertEqual(data['c'], True) 44 with self.assertRaises(KeyError): 45 _ = data['d'] 46 47 # Access with key error first order 48 data = load(StringIO(json), persistent=True) 49 with self.assertRaises(KeyError): 50 _ = data['d'] 51 self.assertEqual(data['a'], 1) 52 self.assertEqual(data['b'], None) 53 self.assertEqual(data['c'], True) 54 55 def test_load_object_get_transient(self): 56 json = '{"a": 1, "b": null, "c": true}' 57 58 # Access in order 59 data = load(StringIO(json), persistent=False) 60 self.assertEqual(data['a'], 1) 61 self.assertEqual(data['b'], None) 62 self.assertEqual(data['c'], True) 63 with self.assertRaises(TransientAccessException): 64 _ = data['d'] 65 66 # Access out of order 67 data = load(StringIO(json), persistent=False) 68 self.assertEqual(data['b'], None) 69 with self.assertRaises(TransientAccessException): 70 _ = data['a'] 71 with self.assertRaises(TransientAccessException): 72 _ = data['c'] # stream was exhausted in search for 'a' 73 with self.assertRaises(TransientAccessException): 74 _ = data['d'] # don't know if this was a key error or was in the past 75 76 # Access with key error first order 77 data = load(StringIO(json), persistent=False) 78 with self.assertRaises(KeyError): 79 _ = data['d'] 80 with self.assertRaises(TransientAccessException): 81 _ = data['a'] # stream was exhausted in search for 'd' 82 83 def test_load_empty_list(self): 84 obj = [] 85 self._test_list(obj, persistent=True) 86 self._test_list(obj, persistent=False) 87 88 def test_load_list(self): 89 obj = [1, True, ""] 90 self._test_list(obj, persistent=True) 91 self._test_list(obj, persistent=False) 92 93 def test_load_list_get_persistent(self): 94 json = '[1, true, ""]' 95 96 # Access in order 97 data = load(StringIO(json), persistent=True) 98 self.assertEqual(data[0], 1) 99 self.assertTrue(data[1]) 100 self.assertEqual(data[2], "") 101 with self.assertRaises(IndexError): 102 _ = data[3] 103 104 # Access out of order 105 data = load(StringIO(json), persistent=True) 106 self.assertEqual(data[0], 1) 107 self.assertTrue(data[1]) 108 self.assertEqual(data[2], "") 109 with self.assertRaises(IndexError): 110 _ = data[3] 111 112 def test_load_list_get_transient(self): 113 json = '[1, true, ""]' 114 115 # Access in order 116 data = load(StringIO(json), persistent=False) 117 self.assertEqual(data[0], 1) 118 self.assertTrue(data[1]) 119 self.assertEqual(data[2], "") 120 with self.assertRaises(IndexError): 121 _ = data[3] 122 123 # Access out of order 124 data = load(StringIO(json), persistent=False) 125 self.assertTrue(data[1]) 126 with self.assertRaises(TransientAccessException): 127 _ = data[0] 128 self.assertEqual(data[2], "") 129 with self.assertRaises(IndexError): 130 _ = data[3] 131 132 def test_load_nested_persistent(self): 133 json = '{"count": 3, "results": ["a", "b", {}]}' 134 data = load(StringIO(json), persistent=True) 135 self.assertIsInstance(data, PersistentStreamingJSONObject) 136 results = data['results'] 137 self.assertIsInstance(results, PersistentStreamingJSONList) 138 self.assertEqual(results[0], 'a') 139 self.assertEqual(results[1], 'b') 140 self.assertIsInstance(results[2], PersistentStreamingJSONObject) 141 self.assertEqual(len(results), 3) 142 self.assertEqual(len(results[2]), 0) 143 self.assertEqual(len(data), 2) 144 self.assertEqual(data["count"], 3) 145 146 def test_load_nested_transient(self): 147 json = '{"count": 3, "results": ["a", "b", "c"]}' 148 data = load(StringIO(json), persistent=False) 149 self.assertIsInstance(data, TransientStreamingJSONObject) 150 results = data['results'] 151 self.assertIsInstance(results, TransientStreamingJSONList) 152 self.assertEqual(list(results), ['a', 'b', 'c']) 153 154 def test_load_nested_transient_first_list_item_object(self): 155 json = '[{"a": 4}, "b", "c"]' 156 data = load(StringIO(json), persistent=False) 157 self.assertIsInstance(data, TransientStreamingJSONList) 158 items = iter(data) 159 item = next(items) 160 self.assertIsInstance(item, TransientStreamingJSONObject) 161 self.assertDictEqual({"a": 4}, dict(item.items())) 162 self.assertEqual(list(items), ['b', 'c']) 163 164 def test_load_nested_transient_first_list_item_list(self): 165 json = '[["a"], "b", "c"]' 166 data = load(StringIO(json), persistent=False) 167 self.assertIsInstance(data, TransientStreamingJSONList) 168 items = iter(data) 169 item = next(items) 170 self.assertIsInstance(item, TransientStreamingJSONList) 171 self.assertListEqual(["a"], list(item)) 172 self.assertEqual(list(items), ['b', 'c']) 173 174 def test_not_copiable(self): 175 json = '[["a"], "b", "c"]' 176 with self.assertRaisesRegex(copy.Error, "^Copying json_steam objects leads to a bad time$"): 177 copy.copy(load(StringIO(json))) 178 with self.assertRaisesRegex(copy.Error, "^Copying json_steam objects leads to a bad time$"): 179 copy.deepcopy(load(StringIO(json))) 180 181 def test_transient_to_persistent(self): 182 json = '{"results": [{"x": 1, "y": 3}, {"y": 4, "x": 2}]}' 183 xs = iter((1, 2)) 184 ys = iter((3, 4)) 185 186 data = load(StringIO(json)) # data is a transient dict-like object 187 self.assertIsInstance(data, TransientStreamingJSONObject) 188 189 results = data['results'] 190 self.assertIsInstance(results, TransientStreamingJSONList) 191 192 # iterate transient list, but produce persistent items 193 for result in results.persistent(): 194 # result is a persistent dict-like object 195 self.assertIsInstance(result, PersistentStreamingJSONObject) 196 x = next(xs) 197 y = next(ys) 198 self.assertEqual(result['x'], x) 199 self.assertEqual(result['y'], y) # would error on second result without .persistent() 200 self.assertEqual(result['x'], x) # would error without .persistent() 201 202 def test_persistent_to_transient(self): 203 json = """{"a": 1, "x": ["long", "list", "I", "don't", "want", "in", "memory"], "b": 2}""" 204 data = load(StringIO(json), persistent=True).transient() 205 self.assertIsInstance(data, PersistentStreamingJSONObject) 206 207 self.assertEqual(data["a"], 1) 208 l = data["x"] 209 self.assertIsInstance(l, TransientStreamingJSONList) 210 self.assertEqual(data["b"], 2) 211 self.assertEqual(data["b"], 2) # would error if data was transient 212 with self.assertRaisesRegex(TransientAccessException, "Index 0 already passed in this stream"): 213 _ = l[0] # cannot access transient list 214 215 def _test_object(self, obj, persistent): 216 self.assertListEqual(list(self._to_data(obj, persistent)), list(obj)) 217 self.assertListEqual(list(self._to_data(obj, persistent).keys()), list(obj.keys())) 218 self.assertListEqual(list(self._to_data(obj, persistent).values()), list(obj.values())) 219 self.assertListEqual(list(self._to_data(obj, persistent).items()), list(obj.items())) 220 if persistent: 221 self.assertEqual(len(self._to_data(obj, persistent)), len(obj)) 222 for k, expected_k in zip_longest(self._to_data(obj, persistent), obj): 223 self.assertEqual(k, expected_k) 224 225 if not persistent: 226 data = self._to_data(obj, persistent) 227 iter(data) # iterates first time 228 with self.assertRaises(TransientAccessException): 229 iter(data) # can't get second iterator 230 with self.assertRaises(TransientAccessException): 231 data.keys() # can't get keys 232 with self.assertRaises(TransientAccessException): 233 data.values() # can't get keys 234 with self.assertRaises(TransientAccessException): 235 data.items() # can't get keys 236 237 def _test_list(self, obj, persistent): 238 self.assertListEqual(list(self._to_data(obj, persistent)), list(obj)) 239 if persistent: 240 self.assertEqual(len(self._to_data(obj, persistent)), len(obj)) 241 for k, expected_k in zip_longest(self._to_data(obj, persistent), obj): 242 self.assertEqual(k, expected_k) 243 244 if not persistent: 245 data = self._to_data(obj, persistent) 246 iter(data) # iterates first time 247 with self.assertRaises(TransientAccessException): 248 iter(data) # can't get second iterator 249 250 def _to_data(self, obj, persistent): 251 return load(StringIO(json.dumps(obj)), persistent)