/ lib / pandas / _libs / testing.pyx
testing.pyx
  1  import cmath
  2  import math
  3  
  4  import numpy as np
  5  
  6  from numpy cimport import_array
  7  
  8  import_array()
  9  
 10  from pandas._libs.util cimport (
 11      is_array,
 12      is_complex_object,
 13      is_real_number_object,
 14  )
 15  
 16  from pandas.core.dtypes.common import is_dtype_equal
 17  from pandas.core.dtypes.missing import (
 18      array_equivalent,
 19      isna,
 20  )
 21  
 22  
 23  cdef bint isiterable(obj):
 24      return hasattr(obj, '__iter__')
 25  
 26  
 27  cdef bint has_length(obj):
 28      return hasattr(obj, '__len__')
 29  
 30  
 31  cdef bint is_dictlike(obj):
 32      return hasattr(obj, 'keys') and hasattr(obj, '__getitem__')
 33  
 34  
 35  cpdef assert_dict_equal(a, b, bint compare_keys=True):
 36      assert is_dictlike(a) and is_dictlike(b), (
 37          "Cannot compare dict objects, one or both is not dict-like"
 38      )
 39  
 40      a_keys = frozenset(a.keys())
 41      b_keys = frozenset(b.keys())
 42  
 43      if compare_keys:
 44          assert a_keys == b_keys
 45  
 46      for k in a_keys:
 47          assert_almost_equal(a[k], b[k])
 48  
 49      return True
 50  
 51  
 52  cpdef assert_almost_equal(a, b,
 53                            rtol=1.e-5, atol=1.e-8,
 54                            bint check_dtype=True,
 55                            obj=None, lobj=None, robj=None, index_values=None):
 56      """
 57      Check that left and right objects are almost equal.
 58  
 59      Parameters
 60      ----------
 61      a : object
 62      b : object
 63      rtol : float, default 1e-5
 64          Relative tolerance.
 65  
 66          .. versionadded:: 1.1.0
 67      atol : float, default 1e-8
 68          Absolute tolerance.
 69  
 70          .. versionadded:: 1.1.0
 71      check_dtype: bool, default True
 72          check dtype if both a and b are np.ndarray.
 73      obj : str, default None
 74          Specify object name being compared, internally used to show
 75          appropriate assertion message.
 76      lobj : str, default None
 77          Specify left object name being compared, internally used to show
 78          appropriate assertion message.
 79      robj : str, default None
 80          Specify right object name being compared, internally used to show
 81          appropriate assertion message.
 82      index_values : ndarray, default None
 83          Specify shared index values of objects being compared, internally used
 84          to show appropriate assertion message.
 85  
 86          .. versionadded:: 1.1.0
 87  
 88      """
 89      cdef:
 90          double diff = 0.0
 91          Py_ssize_t i, na, nb
 92          double fa, fb
 93          bint is_unequal = False, a_is_ndarray, b_is_ndarray
 94  
 95      if lobj is None:
 96          lobj = a
 97      if robj is None:
 98          robj = b
 99  
100      if isinstance(a, dict) or isinstance(b, dict):
101          return assert_dict_equal(a, b)
102  
103      if isinstance(a, str) or isinstance(b, str):
104          assert a == b, f"{a} != {b}"
105          return True
106  
107      a_is_ndarray = is_array(a)
108      b_is_ndarray = is_array(b)
109  
110      if obj is None:
111          if a_is_ndarray or b_is_ndarray:
112              obj = 'numpy array'
113          else:
114              obj = 'Iterable'
115  
116      if isiterable(a):
117  
118          if not isiterable(b):
119              from pandas._testing import assert_class_equal
120  
121              # classes can't be the same, to raise error
122              assert_class_equal(a, b, obj=obj)
123  
124          assert has_length(a) and has_length(b), (
125              f"Can't compare objects without length, one or both is invalid: ({a}, {b})"
126          )
127  
128          if a_is_ndarray and b_is_ndarray:
129              na, nb = a.size, b.size
130              if a.shape != b.shape:
131                  from pandas._testing import raise_assert_detail
132                  raise_assert_detail(
133                      obj, f'{obj} shapes are different', a.shape, b.shape)
134  
135              if check_dtype and not is_dtype_equal(a.dtype, b.dtype):
136                  from pandas._testing import assert_attr_equal
137                  assert_attr_equal('dtype', a, b, obj=obj)
138  
139              if array_equivalent(a, b, strict_nan=True):
140                  return True
141  
142          else:
143              na, nb = len(a), len(b)
144  
145          if na != nb:
146              from pandas._testing import raise_assert_detail
147  
148              # if we have a small diff set, print it
149              if abs(na - nb) < 10:
150                  r = list(set(a) ^ set(b))
151              else:
152                  r = None
153  
154              raise_assert_detail(obj, f"{obj} length are different", na, nb, r)
155  
156          for i in range(len(a)):
157              try:
158                  assert_almost_equal(a[i], b[i], rtol=rtol, atol=atol)
159              except AssertionError:
160                  is_unequal = True
161                  diff += 1
162  
163          if is_unequal:
164              from pandas._testing import raise_assert_detail
165              msg = (f"{obj} values are different "
166                     f"({np.round(diff * 100.0 / na, 5)} %)")
167              raise_assert_detail(obj, msg, lobj, robj, index_values=index_values)
168  
169          return True
170  
171      elif isiterable(b):
172          from pandas._testing import assert_class_equal
173  
174          # classes can't be the same, to raise error
175          assert_class_equal(a, b, obj=obj)
176  
177      if isna(a) and isna(b):
178          # TODO: Should require same-dtype NA?
179          # nan / None comparison
180          return True
181  
182      if isna(a) and not isna(b) or not isna(a) and isna(b):
183          # boolean value of pd.NA is ambigous
184          raise AssertionError(f"{a} != {b}")
185  
186      if a == b:
187          # object comparison
188          return True
189  
190      if is_real_number_object(a) and is_real_number_object(b):
191          if array_equivalent(a, b, strict_nan=True):
192              # inf comparison
193              return True
194  
195          fa, fb = a, b
196  
197          if not math.isclose(fa, fb, rel_tol=rtol, abs_tol=atol):
198              assert False, (f"expected {fb:.5f} but got {fa:.5f}, "
199                             f"with rtol={rtol}, atol={atol}")
200          return True
201  
202      if is_complex_object(a) and is_complex_object(b):
203          if array_equivalent(a, b, strict_nan=True):
204              # inf comparison
205              return True
206  
207          if not cmath.isclose(a, b, rel_tol=rtol, abs_tol=atol):
208              assert False, (f"expected {b:.5f} but got {a:.5f}, "
209                             f"with rtol={rtol}, atol={atol}")
210          return True
211  
212      raise AssertionError(f"{a} != {b}")