/ src / json_stream / tests / test_loader.py
test_loader.py
  1  import copy
  2  import json
  3  from io import StringIO
  4  from itertools import zip_longest
  5  from unittest import TestCase
  6  
  7  from json_stream import load
  8  from json_stream.base import (
  9      TransientAccessException,
 10      PersistentStreamingJSONObject,
 11      TransientStreamingJSONList,
 12      TransientStreamingJSONObject,
 13      PersistentStreamingJSONList,
 14  )
 15  
 16  
 17  class TestLoader(TestCase):
 18      def test_load_empty_object(self):
 19          obj = {}
 20          self._test_object(obj, persistent=True)
 21          self._test_object(obj, persistent=False)
 22  
 23      def test_load_object(self):
 24          obj = {"a": 1, "b": None, "c": True}
 25          self._test_object(obj, persistent=True)
 26          self._test_object(obj, persistent=False)
 27  
 28      def test_load_object_get_persistent(self):
 29          json = '{"a": 1, "b": null, "c": true}'
 30  
 31          # Access in order
 32          data = load(StringIO(json), persistent=True)
 33          self.assertEqual(data['a'], 1)
 34          self.assertEqual(data['b'], None)
 35          self.assertEqual(data['c'], True)
 36          with self.assertRaises(KeyError):
 37              _ = data['d']
 38  
 39          # Access out of order
 40          data = load(StringIO(json), persistent=True)
 41          self.assertEqual(data['b'], None)
 42          self.assertEqual(data['a'], 1)
 43          self.assertEqual(data['c'], True)
 44          with self.assertRaises(KeyError):
 45              _ = data['d']
 46  
 47          # Access with key error first order
 48          data = load(StringIO(json), persistent=True)
 49          with self.assertRaises(KeyError):
 50              _ = data['d']
 51          self.assertEqual(data['a'], 1)
 52          self.assertEqual(data['b'], None)
 53          self.assertEqual(data['c'], True)
 54  
 55      def test_load_object_get_transient(self):
 56          json = '{"a": 1, "b": null, "c": true}'
 57  
 58          # Access in order
 59          data = load(StringIO(json), persistent=False)
 60          self.assertEqual(data['a'], 1)
 61          self.assertEqual(data['b'], None)
 62          self.assertEqual(data['c'], True)
 63          with self.assertRaises(TransientAccessException):
 64              _ = data['d']
 65  
 66          # Access out of order
 67          data = load(StringIO(json), persistent=False)
 68          self.assertEqual(data['b'], None)
 69          with self.assertRaises(TransientAccessException):
 70              _ = data['a']
 71          with self.assertRaises(TransientAccessException):
 72              _ = data['c']  # stream was exhausted in search for 'a'
 73          with self.assertRaises(TransientAccessException):
 74              _ = data['d']  # don't know if this was a key error or was in the past
 75  
 76          # Access with key error first order
 77          data = load(StringIO(json), persistent=False)
 78          with self.assertRaises(KeyError):
 79              _ = data['d']
 80          with self.assertRaises(TransientAccessException):
 81              _ = data['a']  # stream was exhausted in search for 'd'
 82  
 83      def test_load_empty_list(self):
 84          obj = []
 85          self._test_list(obj, persistent=True)
 86          self._test_list(obj, persistent=False)
 87  
 88      def test_load_list(self):
 89          obj = [1, True, ""]
 90          self._test_list(obj, persistent=True)
 91          self._test_list(obj, persistent=False)
 92  
 93      def test_load_list_get_persistent(self):
 94          json = '[1, true, ""]'
 95  
 96          # Access in order
 97          data = load(StringIO(json), persistent=True)
 98          self.assertEqual(data[0], 1)
 99          self.assertTrue(data[1])
100          self.assertEqual(data[2], "")
101          with self.assertRaises(IndexError):
102              _ = data[3]
103  
104          # Access out of order
105          data = load(StringIO(json), persistent=True)
106          self.assertEqual(data[0], 1)
107          self.assertTrue(data[1])
108          self.assertEqual(data[2], "")
109          with self.assertRaises(IndexError):
110              _ = data[3]
111  
112      def test_load_list_get_transient(self):
113          json = '[1, true, ""]'
114  
115          # Access in order
116          data = load(StringIO(json), persistent=False)
117          self.assertEqual(data[0], 1)
118          self.assertTrue(data[1])
119          self.assertEqual(data[2], "")
120          with self.assertRaises(IndexError):
121              _ = data[3]
122  
123          # Access out of order
124          data = load(StringIO(json), persistent=False)
125          self.assertTrue(data[1])
126          with self.assertRaises(TransientAccessException):
127              _ = data[0]
128          self.assertEqual(data[2], "")
129          with self.assertRaises(IndexError):
130              _ = data[3]
131  
132      def test_load_nested_persistent(self):
133          json = '{"count": 3, "results": ["a", "b", {}]}'
134          data = load(StringIO(json), persistent=True)
135          self.assertIsInstance(data, PersistentStreamingJSONObject)
136          results = data['results']
137          self.assertIsInstance(results, PersistentStreamingJSONList)
138          self.assertEqual(results[0], 'a')
139          self.assertEqual(results[1], 'b')
140          self.assertIsInstance(results[2], PersistentStreamingJSONObject)
141          self.assertEqual(len(results), 3)
142          self.assertEqual(len(results[2]), 0)
143          self.assertEqual(len(data), 2)
144          self.assertEqual(data["count"], 3)
145  
146      def test_load_nested_transient(self):
147          json = '{"count": 3, "results": ["a", "b", "c"]}'
148          data = load(StringIO(json), persistent=False)
149          self.assertIsInstance(data, TransientStreamingJSONObject)
150          results = data['results']
151          self.assertIsInstance(results, TransientStreamingJSONList)
152          self.assertEqual(list(results), ['a', 'b', 'c'])
153  
154      def test_load_nested_transient_first_list_item_object(self):
155          json = '[{"a": 4}, "b", "c"]'
156          data = load(StringIO(json), persistent=False)
157          self.assertIsInstance(data, TransientStreamingJSONList)
158          items = iter(data)
159          item = next(items)
160          self.assertIsInstance(item, TransientStreamingJSONObject)
161          self.assertDictEqual({"a": 4}, dict(item.items()))
162          self.assertEqual(list(items), ['b', 'c'])
163  
164      def test_load_nested_transient_first_list_item_list(self):
165          json = '[["a"], "b", "c"]'
166          data = load(StringIO(json), persistent=False)
167          self.assertIsInstance(data, TransientStreamingJSONList)
168          items = iter(data)
169          item = next(items)
170          self.assertIsInstance(item, TransientStreamingJSONList)
171          self.assertListEqual(["a"], list(item))
172          self.assertEqual(list(items), ['b', 'c'])
173  
174      def test_not_copiable(self):
175          json = '[["a"], "b", "c"]'
176          with self.assertRaisesRegex(copy.Error, "^Copying json_steam objects leads to a bad time$"):
177              copy.copy(load(StringIO(json)))
178          with self.assertRaisesRegex(copy.Error, "^Copying json_steam objects leads to a bad time$"):
179              copy.deepcopy(load(StringIO(json)))
180  
181      def test_transient_to_persistent(self):
182          json = '{"results": [{"x": 1, "y": 3}, {"y": 4, "x": 2}]}'
183          xs = iter((1, 2))
184          ys = iter((3, 4))
185  
186          data = load(StringIO(json))  # data is a transient dict-like object
187          self.assertIsInstance(data, TransientStreamingJSONObject)
188  
189          results = data['results']
190          self.assertIsInstance(results, TransientStreamingJSONList)
191  
192          # iterate transient list, but produce persistent items
193          for result in results.persistent():
194              # result is a persistent dict-like object
195              self.assertIsInstance(result, PersistentStreamingJSONObject)
196              x = next(xs)
197              y = next(ys)
198              self.assertEqual(result['x'], x)
199              self.assertEqual(result['y'], y)  # would error on second result without .persistent()
200              self.assertEqual(result['x'], x)  # would error without .persistent()
201  
202      def test_persistent_to_transient(self):
203          json = """{"a": 1, "x": ["long", "list", "I", "don't", "want", "in", "memory"], "b": 2}"""
204          data = load(StringIO(json), persistent=True).transient()
205          self.assertIsInstance(data, PersistentStreamingJSONObject)
206  
207          self.assertEqual(data["a"], 1)
208          l = data["x"]
209          self.assertIsInstance(l, TransientStreamingJSONList)
210          self.assertEqual(data["b"], 2)
211          self.assertEqual(data["b"], 2)  # would error if data was transient
212          with self.assertRaisesRegex(TransientAccessException, "Index 0 already passed in this stream"):
213              _ = l[0]  # cannot access transient list
214  
215      def _test_object(self, obj, persistent):
216          self.assertListEqual(list(self._to_data(obj, persistent)), list(obj))
217          self.assertListEqual(list(self._to_data(obj, persistent).keys()), list(obj.keys()))
218          self.assertListEqual(list(self._to_data(obj, persistent).values()), list(obj.values()))
219          self.assertListEqual(list(self._to_data(obj, persistent).items()), list(obj.items()))
220          if persistent:
221              self.assertEqual(len(self._to_data(obj, persistent)), len(obj))
222          for k, expected_k in zip_longest(self._to_data(obj, persistent), obj):
223              self.assertEqual(k, expected_k)
224  
225          if not persistent:
226              data = self._to_data(obj, persistent)
227              iter(data)  # iterates first time
228              with self.assertRaises(TransientAccessException):
229                  iter(data)  # can't get second iterator
230              with self.assertRaises(TransientAccessException):
231                  data.keys()  # can't get keys
232              with self.assertRaises(TransientAccessException):
233                  data.values()  # can't get keys
234              with self.assertRaises(TransientAccessException):
235                  data.items()  # can't get keys
236  
237      def _test_list(self, obj, persistent):
238          self.assertListEqual(list(self._to_data(obj, persistent)), list(obj))
239          if persistent:
240              self.assertEqual(len(self._to_data(obj, persistent)), len(obj))
241          for k, expected_k in zip_longest(self._to_data(obj, persistent), obj):
242              self.assertEqual(k, expected_k)
243  
244          if not persistent:
245              data = self._to_data(obj, persistent)
246              iter(data)  # iterates first time
247              with self.assertRaises(TransientAccessException):
248                  iter(data)  # can't get second iterator
249  
250      def _to_data(self, obj, persistent):
251          return load(StringIO(json.dumps(obj)), persistent)