/ lib / pandas / _libs / lib.pyx
lib.pyx
   1  from collections import abc
   2  from decimal import Decimal
   3  from enum import Enum
   4  from typing import Literal
   5  import warnings
   6  
   7  cimport cython
   8  from cpython.datetime cimport (
   9      PyDate_Check,
  10      PyDateTime_Check,
  11      PyDelta_Check,
  12      PyTime_Check,
  13      import_datetime,
  14  )
  15  from cpython.iterator cimport PyIter_Check
  16  from cpython.number cimport PyNumber_Check
  17  from cpython.object cimport (
  18      Py_EQ,
  19      PyObject_RichCompareBool,
  20      PyTypeObject,
  21  )
  22  from cpython.ref cimport Py_INCREF
  23  from cpython.sequence cimport PySequence_Check
  24  from cpython.tuple cimport (
  25      PyTuple_New,
  26      PyTuple_SET_ITEM,
  27  )
  28  from cython cimport (
  29      Py_ssize_t,
  30      floating,
  31  )
  32  
  33  from pandas.util._exceptions import find_stack_level
  34  
  35  import_datetime()
  36  
  37  import numpy as np
  38  
  39  cimport numpy as cnp
  40  from numpy cimport (
  41      NPY_OBJECT,
  42      PyArray_Check,
  43      PyArray_GETITEM,
  44      PyArray_ITER_DATA,
  45      PyArray_ITER_NEXT,
  46      PyArray_IterNew,
  47      complex128_t,
  48      flatiter,
  49      float32_t,
  50      float64_t,
  51      int64_t,
  52      intp_t,
  53      ndarray,
  54      uint8_t,
  55      uint64_t,
  56  )
  57  
  58  cnp.import_array()
  59  
  60  cdef extern from "Python.h":
  61      # Note: importing extern-style allows us to declare these as nogil
  62      # functions, whereas `from cpython cimport` does not.
  63      bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
  64  
  65  cdef extern from "numpy/arrayobject.h":
  66      # cython's numpy.dtype specification is incorrect, which leads to
  67      # errors in issubclass(self.dtype.type, np.bool_), so we directly
  68      # include the correct version
  69      # https://github.com/cython/cython/issues/2022
  70  
  71      ctypedef class numpy.dtype [object PyArray_Descr]:
  72          # Use PyDataType_* macros when possible, however there are no macros
  73          # for accessing some of the fields, so some are defined. Please
  74          # ask on cython-dev if you need more.
  75          cdef:
  76              int type_num
  77              int itemsize "elsize"
  78              char byteorder
  79              object fields
  80              tuple names
  81  
  82      PyTypeObject PySignedIntegerArrType_Type
  83      PyTypeObject PyUnsignedIntegerArrType_Type
  84  
  85  cdef extern from "numpy/ndarrayobject.h":
  86      bint PyArray_CheckScalar(obj) nogil
  87  
  88  
  89  cdef extern from "src/parse_helper.h":
  90      int floatify(object, float64_t *result, int *maybe_int) except -1
  91  
  92  from pandas._libs cimport util
  93  from pandas._libs.util cimport (
  94      INT64_MAX,
  95      INT64_MIN,
  96      UINT64_MAX,
  97      is_nan,
  98  )
  99  
 100  from pandas._libs.tslib import array_to_datetime
 101  from pandas._libs.tslibs import (
 102      OutOfBoundsDatetime,
 103      OutOfBoundsTimedelta,
 104  )
 105  from pandas._libs.tslibs.period import Period
 106  
 107  from pandas._libs.missing cimport (
 108      C_NA,
 109      checknull,
 110      is_matching_na,
 111      is_null_datetime64,
 112      is_null_timedelta64,
 113  )
 114  from pandas._libs.tslibs.conversion cimport convert_to_tsobject
 115  from pandas._libs.tslibs.nattype cimport (
 116      NPY_NAT,
 117      c_NaT as NaT,
 118      checknull_with_nat,
 119  )
 120  from pandas._libs.tslibs.offsets cimport is_offset_object
 121  from pandas._libs.tslibs.period cimport is_period_object
 122  from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
 123  from pandas._libs.tslibs.timezones cimport tz_compare
 124  
 125  # constants that will be compared to potentially arbitrarily large
 126  # python int
 127  cdef:
 128      object oINT64_MAX = <int64_t>INT64_MAX
 129      object oINT64_MIN = <int64_t>INT64_MIN
 130      object oUINT64_MAX = <uint64_t>UINT64_MAX
 131  
 132      float64_t NaN = <float64_t>np.NaN
 133  
 134  # python-visible
 135  i8max = <int64_t>INT64_MAX
 136  u8max = <uint64_t>UINT64_MAX
 137  
 138  
 139  @cython.wraparound(False)
 140  @cython.boundscheck(False)
 141  def memory_usage_of_objects(arr: object[:]) -> int64_t:
 142      """
 143      Return the memory usage of an object array in bytes.
 144  
 145      Does not include the actual bytes of the pointers
 146      """
 147      i: Py_ssize_t
 148      n: Py_ssize_t
 149      size: int64_t
 150  
 151      size = 0
 152      n = len(arr)
 153      for i in range(n):
 154          size += arr[i].__sizeof__()
 155      return size
 156  
 157  
 158  # ----------------------------------------------------------------------
 159  
 160  
 161  def is_scalar(val: object) -> bool:
 162      """
 163      Return True if given object is scalar.
 164  
 165      Parameters
 166      ----------
 167      val : object
 168          This includes:
 169  
 170          - numpy array scalar (e.g. np.int64)
 171          - Python builtin numerics
 172          - Python builtin byte arrays and strings
 173          - None
 174          - datetime.datetime
 175          - datetime.timedelta
 176          - Period
 177          - decimal.Decimal
 178          - Interval
 179          - DateOffset
 180          - Fraction
 181          - Number.
 182  
 183      Returns
 184      -------
 185      bool
 186          Return True if given object is scalar.
 187  
 188      Examples
 189      --------
 190      >>> import datetime
 191      >>> dt = datetime.datetime(2018, 10, 3)
 192      >>> pd.api.types.is_scalar(dt)
 193      True
 194  
 195      >>> pd.api.types.is_scalar([2, 3])
 196      False
 197  
 198      >>> pd.api.types.is_scalar({0: 1, 2: 3})
 199      False
 200  
 201      >>> pd.api.types.is_scalar((0, 2))
 202      False
 203  
 204      pandas supports PEP 3141 numbers:
 205  
 206      >>> from fractions import Fraction
 207      >>> pd.api.types.is_scalar(Fraction(3, 5))
 208      True
 209      """
 210  
 211      # Start with C-optimized checks
 212      if (cnp.PyArray_IsAnyScalar(val)
 213              # PyArray_IsAnyScalar is always False for bytearrays on Py3
 214              or PyDate_Check(val)
 215              or PyDelta_Check(val)
 216              or PyTime_Check(val)
 217              # We differ from numpy, which claims that None is not scalar;
 218              # see np.isscalar
 219              or val is C_NA
 220              or val is None):
 221          return True
 222  
 223      # Next use C-optimized checks to exclude common non-scalars before falling
 224      #  back to non-optimized checks.
 225      if PySequence_Check(val):
 226          # e.g. list, tuple
 227          # includes np.ndarray, Series which PyNumber_Check can return True for
 228          return False
 229  
 230      # Note: PyNumber_Check check includes Decimal, Fraction, numbers.Number
 231      return (PyNumber_Check(val)
 232              or is_period_object(val)
 233              or is_interval(val)
 234              or is_offset_object(val))
 235  
 236  
 237  cdef inline int64_t get_itemsize(object val):
 238      """
 239      Get the itemsize of a NumPy scalar, -1 if not a NumPy scalar.
 240  
 241      Parameters
 242      ----------
 243      val : object
 244  
 245      Returns
 246      -------
 247      is_ndarray : bool
 248      """
 249      if PyArray_CheckScalar(val):
 250          return cnp.PyArray_DescrFromScalar(val).itemsize
 251      else:
 252          return -1
 253  
 254  
 255  def is_iterator(obj: object) -> bool:
 256      """
 257      Check if the object is an iterator.
 258  
 259      This is intended for generators, not list-like objects.
 260  
 261      Parameters
 262      ----------
 263      obj : The object to check
 264  
 265      Returns
 266      -------
 267      is_iter : bool
 268          Whether `obj` is an iterator.
 269  
 270      Examples
 271      --------
 272      >>> import datetime
 273      >>> is_iterator((x for x in []))
 274      True
 275      >>> is_iterator([1, 2, 3])
 276      False
 277      >>> is_iterator(datetime.datetime(2017, 1, 1))
 278      False
 279      >>> is_iterator("foo")
 280      False
 281      >>> is_iterator(1)
 282      False
 283      """
 284      return PyIter_Check(obj)
 285  
 286  
 287  def item_from_zerodim(val: object) -> object:
 288      """
 289      If the value is a zerodim array, return the item it contains.
 290  
 291      Parameters
 292      ----------
 293      val : object
 294  
 295      Returns
 296      -------
 297      object
 298  
 299      Examples
 300      --------
 301      >>> item_from_zerodim(1)
 302      1
 303      >>> item_from_zerodim('foobar')
 304      'foobar'
 305      >>> item_from_zerodim(np.array(1))
 306      1
 307      >>> item_from_zerodim(np.array([1]))
 308      array([1])
 309      """
 310      if cnp.PyArray_IsZeroDim(val):
 311          return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val)
 312      return val
 313  
 314  
 315  @cython.wraparound(False)
 316  @cython.boundscheck(False)
 317  def fast_unique_multiple(list arrays, sort: bool = True):
 318      """
 319      Generate a list of unique values from a list of arrays.
 320  
 321      Parameters
 322      ----------
 323      list : array-like
 324          List of array-like objects.
 325      sort : bool
 326          Whether or not to sort the resulting unique list.
 327  
 328      Returns
 329      -------
 330      list of unique values
 331      """
 332      cdef:
 333          ndarray[object] buf
 334          Py_ssize_t k = len(arrays)
 335          Py_ssize_t i, j, n
 336          list uniques = []
 337          dict table = {}
 338          object val, stub = 0
 339  
 340      for i in range(k):
 341          buf = arrays[i]
 342          n = len(buf)
 343          for j in range(n):
 344              val = buf[j]
 345              if val not in table:
 346                  table[val] = stub
 347                  uniques.append(val)
 348  
 349      if sort is None:
 350          try:
 351              uniques.sort()
 352          except TypeError:
 353              warnings.warn(
 354                  "The values in the array are unorderable. "
 355                  "Pass `sort=False` to suppress this warning.",
 356                  RuntimeWarning,
 357                  stacklevel=find_stack_level(),
 358              )
 359              pass
 360  
 361      return uniques
 362  
 363  
 364  @cython.wraparound(False)
 365  @cython.boundscheck(False)
 366  def fast_unique_multiple_list(lists: list, sort: bool | None = True) -> list:
 367      cdef:
 368          list buf
 369          Py_ssize_t k = len(lists)
 370          Py_ssize_t i, j, n
 371          list uniques = []
 372          dict table = {}
 373          object val, stub = 0
 374  
 375      for i in range(k):
 376          buf = lists[i]
 377          n = len(buf)
 378          for j in range(n):
 379              val = buf[j]
 380              if val not in table:
 381                  table[val] = stub
 382                  uniques.append(val)
 383      if sort:
 384          try:
 385              uniques.sort()
 386          except TypeError:
 387              pass
 388  
 389      return uniques
 390  
 391  
 392  @cython.wraparound(False)
 393  @cython.boundscheck(False)
 394  def fast_unique_multiple_list_gen(object gen, bint sort=True) -> list:
 395      """
 396      Generate a list of unique values from a generator of lists.
 397  
 398      Parameters
 399      ----------
 400      gen : generator object
 401          Generator of lists from which the unique list is created.
 402      sort : bool
 403          Whether or not to sort the resulting unique list.
 404  
 405      Returns
 406      -------
 407      list of unique values
 408      """
 409      cdef:
 410          list buf
 411          Py_ssize_t j, n
 412          list uniques = []
 413          dict table = {}
 414          object val, stub = 0
 415  
 416      for buf in gen:
 417          n = len(buf)
 418          for j in range(n):
 419              val = buf[j]
 420              if val not in table:
 421                  table[val] = stub
 422                  uniques.append(val)
 423      if sort:
 424          try:
 425              uniques.sort()
 426          except TypeError:
 427              pass
 428  
 429      return uniques
 430  
 431  
 432  @cython.wraparound(False)
 433  @cython.boundscheck(False)
 434  def dicts_to_array(dicts: list, columns: list):
 435      cdef:
 436          Py_ssize_t i, j, k, n
 437          ndarray[object, ndim=2] result
 438          dict row
 439          object col, onan = np.nan
 440  
 441      k = len(columns)
 442      n = len(dicts)
 443  
 444      result = np.empty((n, k), dtype='O')
 445  
 446      for i in range(n):
 447          row = dicts[i]
 448          for j in range(k):
 449              col = columns[j]
 450              if col in row:
 451                  result[i, j] = row[col]
 452              else:
 453                  result[i, j] = onan
 454  
 455      return result
 456  
 457  
 458  def fast_zip(list ndarrays) -> ndarray[object]:
 459      """
 460      For zipping multiple ndarrays into an ndarray of tuples.
 461      """
 462      cdef:
 463          Py_ssize_t i, j, k, n
 464          ndarray[object, ndim=1] result
 465          flatiter it
 466          object val, tup
 467  
 468      k = len(ndarrays)
 469      n = len(ndarrays[0])
 470  
 471      result = np.empty(n, dtype=object)
 472  
 473      # initialize tuples on first pass
 474      arr = ndarrays[0]
 475      it = <flatiter>PyArray_IterNew(arr)
 476      for i in range(n):
 477          val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
 478          tup = PyTuple_New(k)
 479  
 480          PyTuple_SET_ITEM(tup, 0, val)
 481          Py_INCREF(val)
 482          result[i] = tup
 483          PyArray_ITER_NEXT(it)
 484  
 485      for j in range(1, k):
 486          arr = ndarrays[j]
 487          it = <flatiter>PyArray_IterNew(arr)
 488          if len(arr) != n:
 489              raise ValueError("all arrays must be same length")
 490  
 491          for i in range(n):
 492              val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it))
 493              PyTuple_SET_ITEM(result[i], j, val)
 494              Py_INCREF(val)
 495              PyArray_ITER_NEXT(it)
 496  
 497      return result
 498  
 499  
 500  def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:
 501      """
 502      Reverse indexing operation.
 503  
 504      Given `indexer`, make `indexer_inv` of it, such that::
 505  
 506          indexer_inv[indexer[x]] = x
 507  
 508      Parameters
 509      ----------
 510      indexer : np.ndarray[np.intp]
 511      length : int
 512  
 513      Returns
 514      -------
 515      np.ndarray[np.intp]
 516  
 517      Notes
 518      -----
 519      If indexer is not unique, only first occurrence is accounted.
 520      """
 521      cdef:
 522          Py_ssize_t i, n = len(indexer)
 523          ndarray[intp_t, ndim=1] rev_indexer
 524          intp_t idx
 525  
 526      rev_indexer = np.empty(length, dtype=np.intp)
 527      rev_indexer[:] = -1
 528      for i in range(n):
 529          idx = indexer[i]
 530          if idx != -1:
 531              rev_indexer[idx] = i
 532  
 533      return rev_indexer
 534  
 535  
 536  @cython.wraparound(False)
 537  @cython.boundscheck(False)
 538  # Can add const once https://github.com/cython/cython/issues/1772 resolved
 539  def has_infs(floating[:] arr) -> bool:
 540      cdef:
 541          Py_ssize_t i, n = len(arr)
 542          floating inf, neginf, val
 543          bint ret = False
 544  
 545      inf = np.inf
 546      neginf = -inf
 547      with nogil:
 548          for i in range(n):
 549              val = arr[i]
 550              if val == inf or val == neginf:
 551                  ret = True
 552                  break
 553      return ret
 554  
 555  
 556  def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
 557      cdef:
 558          Py_ssize_t i, n = len(indices)
 559          intp_t k, vstart, vlast, v
 560  
 561      if n == 0:
 562          return slice(0, 0)
 563  
 564      vstart = indices[0]
 565      if vstart < 0 or max_len <= vstart:
 566          return indices
 567  
 568      if n == 1:
 569          return slice(vstart, <intp_t>(vstart + 1))
 570  
 571      vlast = indices[n - 1]
 572      if vlast < 0 or max_len <= vlast:
 573          return indices
 574  
 575      k = indices[1] - indices[0]
 576      if k == 0:
 577          return indices
 578      else:
 579          for i in range(2, n):
 580              v = indices[i]
 581              if v - indices[i - 1] != k:
 582                  return indices
 583  
 584          if k > 0:
 585              return slice(vstart, <intp_t>(vlast + 1), k)
 586          else:
 587              if vlast == 0:
 588                  return slice(vstart, None, k)
 589              else:
 590                  return slice(vstart, <intp_t>(vlast - 1), k)
 591  
 592  
 593  @cython.wraparound(False)
 594  @cython.boundscheck(False)
 595  def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask):
 596      cdef:
 597          Py_ssize_t i, n = len(mask)
 598          Py_ssize_t start = 0, end = 0
 599          bint started = False, finished = False
 600  
 601      for i in range(n):
 602          if mask[i]:
 603              if finished:
 604                  return mask.view(np.bool_)
 605              if not started:
 606                  started = True
 607                  start = i
 608          else:
 609              if finished:
 610                  continue
 611  
 612              if started:
 613                  end = i
 614                  finished = True
 615  
 616      if not started:
 617          return slice(0, 0)
 618      if not finished:
 619          return slice(start, None)
 620      else:
 621          return slice(start, end)
 622  
 623  
 624  @cython.wraparound(False)
 625  @cython.boundscheck(False)
 626  def array_equivalent_object(left: object[:], right: object[:]) -> bool:
 627      """
 628      Perform an element by element comparison on 1-d object arrays
 629      taking into account nan positions.
 630      """
 631      cdef:
 632          Py_ssize_t i, n = left.shape[0]
 633          object x, y
 634  
 635      for i in range(n):
 636          x = left[i]
 637          y = right[i]
 638  
 639          # we are either not equal or both nan
 640          # I think None == None will be true here
 641          try:
 642              if PyArray_Check(x) and PyArray_Check(y):
 643                  if not array_equivalent_object(x, y):
 644                      return False
 645              elif (x is C_NA) ^ (y is C_NA):
 646                  return False
 647              elif not (
 648                  PyObject_RichCompareBool(x, y, Py_EQ)
 649                  or is_matching_na(x, y, nan_matches_none=True)
 650              ):
 651                  return False
 652          except ValueError:
 653              # Avoid raising ValueError when comparing Numpy arrays to other types
 654              if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y):
 655                  # Only compare scalars to scalars and non-scalars to non-scalars
 656                  return False
 657              elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y))
 658                    and not (isinstance(x, type(y)) or isinstance(y, type(x)))):
 659                  # Check if non-scalars have the same type
 660                  return False
 661              raise
 662      return True
 663  
 664  
 665  ctypedef fused ndarr_object:
 666      ndarray[object, ndim=1]
 667      ndarray[object, ndim=2]
 668  
 669  # TODO: get rid of this in StringArray and modify
 670  #  and go through ensure_string_array instead
 671  @cython.wraparound(False)
 672  @cython.boundscheck(False)
 673  def convert_nans_to_NA(ndarr_object arr) -> ndarray:
 674      """
 675      Helper for StringArray that converts null values that
 676      are not pd.NA(e.g. np.nan, None) to pd.NA. Assumes elements
 677      have already been validated as null.
 678      """
 679      cdef:
 680          Py_ssize_t i, m, n
 681          object val
 682          ndarr_object result
 683      result = np.asarray(arr, dtype="object")
 684      if arr.ndim == 2:
 685          m, n = arr.shape[0], arr.shape[1]
 686          for i in range(m):
 687              for j in range(n):
 688                  val = arr[i, j]
 689                  if not isinstance(val, str):
 690                      result[i, j] = <object>C_NA
 691      else:
 692          n = len(arr)
 693          for i in range(n):
 694              val = arr[i]
 695              if not isinstance(val, str):
 696                  result[i] = <object>C_NA
 697      return result
 698  
 699  
 700  @cython.wraparound(False)
 701  @cython.boundscheck(False)
 702  cpdef ndarray[object] ensure_string_array(
 703          arr,
 704          object na_value=np.nan,
 705          bint convert_na_value=True,
 706          bint copy=True,
 707          bint skipna=True,
 708  ):
 709      """
 710      Returns a new numpy array with object dtype and only strings and na values.
 711  
 712      Parameters
 713      ----------
 714      arr : array-like
 715          The values to be converted to str, if needed.
 716      na_value : Any, default np.nan
 717          The value to use for na. For example, np.nan or pd.NA.
 718      convert_na_value : bool, default True
 719          If False, existing na values will be used unchanged in the new array.
 720      copy : bool, default True
 721          Whether to ensure that a new array is returned.
 722      skipna : bool, default True
 723          Whether or not to coerce nulls to their stringified form
 724          (e.g. if False, NaN becomes 'nan').
 725  
 726      Returns
 727      -------
 728      np.ndarray[object]
 729          An array with the input array's elements casted to str or nan-like.
 730      """
 731      cdef:
 732          Py_ssize_t i = 0, n = len(arr)
 733  
 734      if hasattr(arr, "to_numpy"):
 735  
 736          if hasattr(arr, "dtype") and arr.dtype.kind in ["m", "M"]:
 737              # dtype check to exclude DataFrame
 738              # GH#41409 TODO: not a great place for this
 739              out = arr.astype(str).astype(object)
 740              out[arr.isna()] = na_value
 741              return out
 742  
 743          arr = arr.to_numpy()
 744      elif not util.is_array(arr):
 745          arr = np.array(arr, dtype="object")
 746  
 747      result = np.asarray(arr, dtype="object")
 748  
 749      if copy and result is arr:
 750          result = result.copy()
 751  
 752      for i in range(n):
 753          val = arr[i]
 754  
 755          if isinstance(val, str):
 756              continue
 757  
 758          if not checknull(val):
 759              if not util.is_float_object(val):
 760                  # f"{val}" is faster than str(val)
 761                  result[i] = f"{val}"
 762              else:
 763                  # f"{val}" is not always equivalent to str(val) for floats
 764                  result[i] = str(val)
 765          else:
 766              if convert_na_value:
 767                  val = na_value
 768              if skipna:
 769                  result[i] = val
 770              else:
 771                  result[i] = f"{val}"
 772  
 773      return result
 774  
 775  
 776  def is_all_arraylike(obj: list) -> bool:
 777      """
 778      Should we treat these as levels of a MultiIndex, as opposed to Index items?
 779      """
 780      cdef:
 781          Py_ssize_t i, n = len(obj)
 782          object val
 783          bint all_arrays = True
 784  
 785      for i in range(n):
 786          val = obj[i]
 787          if not (isinstance(val, list) or
 788                  util.is_array(val) or hasattr(val, '_data')):
 789              # TODO: EA?
 790              # exclude tuples, frozensets as they may be contained in an Index
 791              all_arrays = False
 792              break
 793  
 794      return all_arrays
 795  
 796  
 797  # ------------------------------------------------------------------------------
 798  # Groupby-related functions
 799  
 800  # TODO: could do even better if we know something about the data. eg, index has
 801  # 1-min data, binner has 5-min data, then bins are just strides in index. This
 802  # is a general, O(max(len(values), len(binner))) method.
 803  @cython.boundscheck(False)
 804  @cython.wraparound(False)
 805  def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner,
 806                         object closed='left', bint hasnans=False):
 807      """
 808      Int64 (datetime64) version of generic python version in ``groupby.py``.
 809      """
 810      cdef:
 811          Py_ssize_t lenidx, lenbin, i, j, bc, vc
 812          ndarray[int64_t, ndim=1] bins
 813          int64_t l_bin, r_bin, nat_count
 814          bint right_closed = closed == 'right'
 815  
 816      nat_count = 0
 817      if hasnans:
 818          mask = values == NPY_NAT
 819          nat_count = np.sum(mask)
 820          values = values[~mask]
 821  
 822      lenidx = len(values)
 823      lenbin = len(binner)
 824  
 825      if lenidx <= 0 or lenbin <= 0:
 826          raise ValueError("Invalid length for values or for binner")
 827  
 828      # check binner fits data
 829      if values[0] < binner[0]:
 830          raise ValueError("Values falls before first bin")
 831  
 832      if values[lenidx - 1] > binner[lenbin - 1]:
 833          raise ValueError("Values falls after last bin")
 834  
 835      bins = np.empty(lenbin - 1, dtype=np.int64)
 836  
 837      j = 0  # index into values
 838      bc = 0  # bin count
 839  
 840      # linear scan
 841      if right_closed:
 842          for i in range(0, lenbin - 1):
 843              r_bin = binner[i + 1]
 844              # count values in current bin, advance to next bin
 845              while j < lenidx and values[j] <= r_bin:
 846                  j += 1
 847              bins[bc] = j
 848              bc += 1
 849      else:
 850          for i in range(0, lenbin - 1):
 851              r_bin = binner[i + 1]
 852              # count values in current bin, advance to next bin
 853              while j < lenidx and values[j] < r_bin:
 854                  j += 1
 855              bins[bc] = j
 856              bc += 1
 857  
 858      if nat_count > 0:
 859          # shift bins by the number of NaT
 860          bins = bins + nat_count
 861          bins = np.insert(bins, 0, nat_count)
 862  
 863      return bins
 864  
 865  
 866  @cython.boundscheck(False)
 867  @cython.wraparound(False)
 868  def get_level_sorter(
 869      ndarray[int64_t, ndim=1] codes, const intp_t[:] starts
 870  ) -> ndarray:
 871      """
 872      Argsort for a single level of a multi-index, keeping the order of higher
 873      levels unchanged. `starts` points to starts of same-key indices w.r.t
 874      to leading levels; equivalent to:
 875          np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='mergesort')
 876              + starts[i] for i in range(len(starts) - 1)])
 877  
 878      Parameters
 879      ----------
 880      codes : np.ndarray[int64_t, ndim=1]
 881      starts : np.ndarray[intp, ndim=1]
 882  
 883      Returns
 884      -------
 885      np.ndarray[np.int, ndim=1]
 886      """
 887      cdef:
 888          Py_ssize_t i, l, r
 889          ndarray[intp_t, ndim=1] out = cnp.PyArray_EMPTY(1, codes.shape, cnp.NPY_INTP, 0)
 890  
 891      for i in range(len(starts) - 1):
 892          l, r = starts[i], starts[i + 1]
 893          out[l:r] = l + codes[l:r].argsort(kind='mergesort')
 894  
 895      return out
 896  
 897  
 898  @cython.boundscheck(False)
 899  @cython.wraparound(False)
 900  def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
 901                     const intp_t[:] labels,
 902                     Py_ssize_t max_bin,
 903                     int axis):
 904      cdef:
 905          Py_ssize_t i, j, k, n
 906          ndarray[int64_t, ndim=2] counts
 907  
 908      assert (axis == 0 or axis == 1)
 909      n, k = (<object>mask).shape
 910  
 911      if axis == 0:
 912          counts = np.zeros((max_bin, k), dtype='i8')
 913          with nogil:
 914              for i in range(n):
 915                  for j in range(k):
 916                      if mask[i, j]:
 917                          counts[labels[i], j] += 1
 918  
 919      else:  # axis == 1
 920          counts = np.zeros((n, max_bin), dtype='i8')
 921          with nogil:
 922              for i in range(n):
 923                  for j in range(k):
 924                      if mask[i, j]:
 925                          counts[i, labels[j]] += 1
 926  
 927      return counts
 928  
 929  
 930  @cython.wraparound(False)
 931  @cython.boundscheck(False)
 932  def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups):
 933      cdef:
 934          Py_ssize_t i, group_size, n, start
 935          intp_t lab
 936          int64_t[::1] starts, ends
 937  
 938      n = len(labels)
 939  
 940      starts = np.zeros(ngroups, dtype=np.int64)
 941      ends = np.zeros(ngroups, dtype=np.int64)
 942  
 943      start = 0
 944      group_size = 0
 945      with nogil:
 946          for i in range(n):
 947              lab = labels[i]
 948              if lab < 0:
 949                  start += 1
 950              else:
 951                  group_size += 1
 952                  if i == n - 1 or lab != labels[i + 1]:
 953                      starts[lab] = start
 954                      ends[lab] = start + group_size
 955                      start += group_size
 956                      group_size = 0
 957  
 958      return np.asarray(starts), np.asarray(ends)
 959  
 960  
 961  def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys,
 962                   list sorted_labels) -> dict:
 963      """
 964      Parameters
 965      ----------
 966      index : ndarray[intp]
 967      labels : ndarray[int64]
 968      keys : list
 969      sorted_labels : list[ndarray[int64]]
 970      """
 971      cdef:
 972          Py_ssize_t i, j, k, lab, cur, start, n = len(labels)
 973          dict result = {}
 974          object tup
 975  
 976      k = len(keys)
 977  
 978      # Start at the first non-null entry
 979      j = 0
 980      for j in range(0, n):
 981          if labels[j] != -1:
 982              break
 983      else:
 984          return result
 985      cur = labels[j]
 986      start = j
 987  
 988      for i in range(j+1, n):
 989          lab = labels[i]
 990  
 991          if lab != cur:
 992              if lab != -1:
 993                  if k == 1:
 994                      # When k = 1 we do not want to return a tuple as key
 995                      tup = keys[0][sorted_labels[0][i - 1]]
 996                  else:
 997                      tup = PyTuple_New(k)
 998                      for j in range(k):
 999                          val = keys[j][sorted_labels[j][i - 1]]
1000                          PyTuple_SET_ITEM(tup, j, val)
1001                          Py_INCREF(val)
1002                  result[tup] = index[start:i]
1003              start = i
1004          cur = lab
1005  
1006      if k == 1:
1007          # When k = 1 we do not want to return a tuple as key
1008          tup = keys[0][sorted_labels[0][n - 1]]
1009      else:
1010          tup = PyTuple_New(k)
1011          for j in range(k):
1012              val = keys[j][sorted_labels[j][n - 1]]
1013              PyTuple_SET_ITEM(tup, j, val)
1014              Py_INCREF(val)
1015      result[tup] = index[start:]
1016  
1017      return result
1018  
1019  
1020  # core.common import for fast inference checks
1021  
1022  def is_float(obj: object) -> bool:
1023      """
1024      Return True if given object is float.
1025  
1026      Returns
1027      -------
1028      bool
1029      """
1030      return util.is_float_object(obj)
1031  
1032  
1033  def is_integer(obj: object) -> bool:
1034      """
1035      Return True if given object is integer.
1036  
1037      Returns
1038      -------
1039      bool
1040      """
1041      return util.is_integer_object(obj)
1042  
1043  
1044  def is_bool(obj: object) -> bool:
1045      """
1046      Return True if given object is boolean.
1047  
1048      Returns
1049      -------
1050      bool
1051      """
1052      return util.is_bool_object(obj)
1053  
1054  
1055  def is_complex(obj: object) -> bool:
1056      """
1057      Return True if given object is complex.
1058  
1059      Returns
1060      -------
1061      bool
1062      """
1063      return util.is_complex_object(obj)
1064  
1065  
1066  cpdef bint is_decimal(object obj):
1067      return isinstance(obj, Decimal)
1068  
1069  
1070  cpdef bint is_interval(object obj):
1071      return getattr(obj, '_typ', '_typ') == 'interval'
1072  
1073  
1074  def is_period(val: object) -> bool:
1075      """
1076      Return True if given object is Period.
1077  
1078      Returns
1079      -------
1080      bool
1081      """
1082      return is_period_object(val)
1083  
1084  
1085  def is_list_like(obj: object, allow_sets: bool = True) -> bool:
1086      """
1087      Check if the object is list-like.
1088  
1089      Objects that are considered list-like are for example Python
1090      lists, tuples, sets, NumPy arrays, and Pandas Series.
1091  
1092      Strings and datetime objects, however, are not considered list-like.
1093  
1094      Parameters
1095      ----------
1096      obj : object
1097          Object to check.
1098      allow_sets : bool, default True
1099          If this parameter is False, sets will not be considered list-like.
1100  
1101      Returns
1102      -------
1103      bool
1104          Whether `obj` has list-like properties.
1105  
1106      Examples
1107      --------
1108      >>> import datetime
1109      >>> is_list_like([1, 2, 3])
1110      True
1111      >>> is_list_like({1, 2, 3})
1112      True
1113      >>> is_list_like(datetime.datetime(2017, 1, 1))
1114      False
1115      >>> is_list_like("foo")
1116      False
1117      >>> is_list_like(1)
1118      False
1119      >>> is_list_like(np.array([2]))
1120      True
1121      >>> is_list_like(np.array(2))
1122      False
1123      """
1124      return c_is_list_like(obj, allow_sets)
1125  
1126  
1127  cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1:
1128      # first, performance short-cuts for the most common cases
1129      if util.is_array(obj):
1130          # exclude zero-dimensional numpy arrays, effectively scalars
1131          return not cnp.PyArray_IsZeroDim(obj)
1132      elif isinstance(obj, list):
1133          return True
1134      # then the generic implementation
1135      return (
1136          # equiv: `isinstance(obj, abc.Iterable)`
1137          getattr(obj, "__iter__", None) is not None and not isinstance(obj, type)
1138          # we do not count strings/unicode/bytes as list-like
1139          and not isinstance(obj, (str, bytes))
1140          # exclude zero-dimensional duck-arrays, effectively scalars
1141          and not (hasattr(obj, "ndim") and obj.ndim == 0)
1142          # exclude sets if allow_sets is False
1143          and not (allow_sets is False and isinstance(obj, abc.Set))
1144      )
1145  
1146  
1147  _TYPE_MAP = {
1148      "categorical": "categorical",
1149      "category": "categorical",
1150      "int8": "integer",
1151      "int16": "integer",
1152      "int32": "integer",
1153      "int64": "integer",
1154      "i": "integer",
1155      "uint8": "integer",
1156      "uint16": "integer",
1157      "uint32": "integer",
1158      "uint64": "integer",
1159      "u": "integer",
1160      "float32": "floating",
1161      "float64": "floating",
1162      "f": "floating",
1163      "complex64": "complex",
1164      "complex128": "complex",
1165      "c": "complex",
1166      "string": "string",
1167      str: "string",
1168      "S": "bytes",
1169      "U": "string",
1170      "bool": "boolean",
1171      "b": "boolean",
1172      "datetime64[ns]": "datetime64",
1173      "M": "datetime64",
1174      "timedelta64[ns]": "timedelta64",
1175      "m": "timedelta64",
1176      "interval": "interval",
1177      Period: "period",
1178  }
1179  
1180  # types only exist on certain platform
1181  try:
1182      np.float128
1183      _TYPE_MAP['float128'] = 'floating'
1184  except AttributeError:
1185      pass
1186  try:
1187      np.complex256
1188      _TYPE_MAP['complex256'] = 'complex'
1189  except AttributeError:
1190      pass
1191  try:
1192      np.float16
1193      _TYPE_MAP['float16'] = 'floating'
1194  except AttributeError:
1195      pass
1196  
1197  
1198  @cython.internal
1199  cdef class Seen:
1200      """
1201      Class for keeping track of the types of elements
1202      encountered when trying to perform type conversions.
1203      """
1204  
1205      cdef:
1206          bint int_             # seen_int
1207          bint nat_             # seen nat
1208          bint bool_            # seen_bool
1209          bint null_            # seen_null
1210          bint nan_             # seen_np.nan
1211          bint uint_            # seen_uint (unsigned integer)
1212          bint sint_            # seen_sint (signed integer)
1213          bint float_           # seen_float
1214          bint object_          # seen_object
1215          bint complex_         # seen_complex
1216          bint datetime_        # seen_datetime
1217          bint coerce_numeric   # coerce data to numeric
1218          bint timedelta_       # seen_timedelta
1219          bint datetimetz_      # seen_datetimetz
1220          bint period_          # seen_period
1221          bint interval_        # seen_interval
1222  
1223      def __cinit__(self, bint coerce_numeric=False):
1224          """
1225          Initialize a Seen instance.
1226  
1227          Parameters
1228          ----------
1229          coerce_numeric : bool, default False
1230              Whether or not to force conversion to a numeric data type if
1231              initial methods to convert to numeric fail.
1232          """
1233          self.int_ = False
1234          self.nat_ = False
1235          self.bool_ = False
1236          self.null_ = False
1237          self.nan_ = False
1238          self.uint_ = False
1239          self.sint_ = False
1240          self.float_ = False
1241          self.object_ = False
1242          self.complex_ = False
1243          self.datetime_ = False
1244          self.timedelta_ = False
1245          self.datetimetz_ = False
1246          self.period_ = False
1247          self.interval_ = False
1248          self.coerce_numeric = coerce_numeric
1249  
1250      cdef inline bint check_uint64_conflict(self) except -1:
1251          """
1252          Check whether we can safely convert a uint64 array to a numeric dtype.
1253  
1254          There are two cases when conversion to numeric dtype with a uint64
1255          array is not safe (and will therefore not be performed)
1256  
1257          1) A NaN element is encountered.
1258  
1259             uint64 cannot be safely cast to float64 due to truncation issues
1260             at the extreme ends of the range.
1261  
1262          2) A negative number is encountered.
1263  
1264             There is no numerical dtype that can hold both negative numbers
1265             and numbers greater than INT64_MAX. Hence, at least one number
1266             will be improperly cast if we convert to a numeric dtype.
1267  
1268          Returns
1269          -------
1270          bool
1271              Whether or not we should return the original input array to avoid
1272              data truncation.
1273  
1274          Raises
1275          ------
1276          ValueError
1277              uint64 elements were detected, and at least one of the
1278              two conflict cases was also detected. However, we are
1279              trying to force conversion to a numeric dtype.
1280          """
1281          return (self.uint_ and (self.null_ or self.sint_)
1282                  and not self.coerce_numeric)
1283  
1284      cdef inline saw_null(self):
1285          """
1286          Set flags indicating that a null value was encountered.
1287          """
1288          self.null_ = True
1289          self.float_ = True
1290  
1291      cdef saw_int(self, object val):
1292          """
1293          Set flags indicating that an integer value was encountered.
1294  
1295          In addition to setting a flag that an integer was seen, we
1296          also set two flags depending on the type of integer seen:
1297  
1298          1) sint_ : a signed numpy integer type or a negative (signed) number in the
1299                     range of [-2**63, 0) was encountered
1300          2) uint_ : an unsigned numpy integer type or a positive number in the range of
1301                     [2**63, 2**64) was encountered
1302  
1303          Parameters
1304          ----------
1305          val : Python int
1306              Value with which to set the flags.
1307          """
1308          self.int_ = True
1309          self.sint_ = (
1310              self.sint_
1311              or (oINT64_MIN <= val < 0)
1312              # Cython equivalent of `isinstance(val, np.signedinteger)`
1313              or PyObject_TypeCheck(val, &PySignedIntegerArrType_Type)
1314          )
1315          self.uint_ = (
1316              self.uint_
1317              or (oINT64_MAX < val <= oUINT64_MAX)
1318              # Cython equivalent of `isinstance(val, np.unsignedinteger)`
1319              or PyObject_TypeCheck(val, &PyUnsignedIntegerArrType_Type)
1320          )
1321  
1322      @property
1323      def numeric_(self):
1324          return self.complex_ or self.float_ or self.int_
1325  
1326      @property
1327      def is_bool(self):
1328          return not (self.datetime_ or self.numeric_ or self.timedelta_
1329                      or self.nat_)
1330  
1331      @property
1332      def is_float_or_complex(self):
1333          return not (self.bool_ or self.datetime_ or self.timedelta_
1334                      or self.nat_)
1335  
1336  
1337  cdef object _try_infer_map(object dtype):
1338      """
1339      If its in our map, just return the dtype.
1340      """
1341      cdef:
1342          object val
1343          str attr
1344      for attr in ["name", "kind", "base", "type"]:
1345          val = getattr(dtype, attr, None)
1346          if val in _TYPE_MAP:
1347              return _TYPE_MAP[val]
1348      return None
1349  
1350  
1351  def infer_dtype(value: object, skipna: bool = True) -> str:
1352      """
1353      Return a string label of the type of a scalar or list-like of values.
1354  
1355      Parameters
1356      ----------
1357      value : scalar, list, ndarray, or pandas type
1358      skipna : bool, default True
1359          Ignore NaN values when inferring the type.
1360  
1361      Returns
1362      -------
1363      str
1364          Describing the common type of the input data.
1365      Results can include:
1366  
1367      - string
1368      - bytes
1369      - floating
1370      - integer
1371      - mixed-integer
1372      - mixed-integer-float
1373      - decimal
1374      - complex
1375      - categorical
1376      - boolean
1377      - datetime64
1378      - datetime
1379      - date
1380      - timedelta64
1381      - timedelta
1382      - time
1383      - period
1384      - mixed
1385      - unknown-array
1386  
1387      Raises
1388      ------
1389      TypeError
1390          If ndarray-like but cannot infer the dtype
1391  
1392      Notes
1393      -----
1394      - 'mixed' is the catchall for anything that is not otherwise
1395        specialized
1396      - 'mixed-integer-float' are floats and integers
1397      - 'mixed-integer' are integers mixed with non-integers
1398      - 'unknown-array' is the catchall for something that *is* an array (has
1399        a dtype attribute), but has a dtype unknown to pandas (e.g. external
1400        extension array)
1401  
1402      Examples
1403      --------
1404      >>> import datetime
1405      >>> infer_dtype(['foo', 'bar'])
1406      'string'
1407  
1408      >>> infer_dtype(['a', np.nan, 'b'], skipna=True)
1409      'string'
1410  
1411      >>> infer_dtype(['a', np.nan, 'b'], skipna=False)
1412      'mixed'
1413  
1414      >>> infer_dtype([b'foo', b'bar'])
1415      'bytes'
1416  
1417      >>> infer_dtype([1, 2, 3])
1418      'integer'
1419  
1420      >>> infer_dtype([1, 2, 3.5])
1421      'mixed-integer-float'
1422  
1423      >>> infer_dtype([1.0, 2.0, 3.5])
1424      'floating'
1425  
1426      >>> infer_dtype(['a', 1])
1427      'mixed-integer'
1428  
1429      >>> infer_dtype([Decimal(1), Decimal(2.0)])
1430      'decimal'
1431  
1432      >>> infer_dtype([True, False])
1433      'boolean'
1434  
1435      >>> infer_dtype([True, False, np.nan])
1436      'boolean'
1437  
1438      >>> infer_dtype([pd.Timestamp('20130101')])
1439      'datetime'
1440  
1441      >>> infer_dtype([datetime.date(2013, 1, 1)])
1442      'date'
1443  
1444      >>> infer_dtype([np.datetime64('2013-01-01')])
1445      'datetime64'
1446  
1447      >>> infer_dtype([datetime.timedelta(0, 1, 1)])
1448      'timedelta'
1449  
1450      >>> infer_dtype(pd.Series(list('aabc')).astype('category'))
1451      'categorical'
1452      """
1453      cdef:
1454          Py_ssize_t i, n
1455          object val
1456          ndarray values
1457          bint seen_pdnat = False
1458          bint seen_val = False
1459          flatiter it
1460  
1461      if util.is_array(value):
1462          values = value
1463      elif hasattr(value, "inferred_type") and skipna is False:
1464          # Index, use the cached attribute if possible, populate the cache otherwise
1465          return value.inferred_type
1466      elif hasattr(value, "dtype"):
1467          # this will handle ndarray-like
1468          # e.g. categoricals
1469          dtype = value.dtype
1470          if not cnp.PyArray_DescrCheck(dtype):
1471              # i.e. not isinstance(dtype, np.dtype)
1472              inferred = _try_infer_map(value.dtype)
1473              if inferred is not None:
1474                  return inferred
1475              return "unknown-array"
1476  
1477          # Unwrap Series/Index
1478          values = np.asarray(value)
1479  
1480      else:
1481          if not isinstance(value, list):
1482              value = list(value)
1483  
1484          from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
1485          values = construct_1d_object_array_from_listlike(value)
1486  
1487      val = _try_infer_map(values.dtype)
1488      if val is not None:
1489          # Anything other than object-dtype should return here.
1490          return val
1491  
1492      if values.descr.type_num != NPY_OBJECT:
1493          # i.e. values.dtype != np.object
1494          # This should not be reached
1495          values = values.astype(object)
1496  
1497      n = cnp.PyArray_SIZE(values)
1498      if n == 0:
1499          return "empty"
1500  
1501      # Iterate until we find our first valid value. We will use this
1502      #  value to decide which of the is_foo_array functions to call.
1503      it = PyArray_IterNew(values)
1504      for i in range(n):
1505          # The PyArray_GETITEM and PyArray_ITER_NEXT are faster
1506          #  equivalents to `val = values[i]`
1507          val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1508          PyArray_ITER_NEXT(it)
1509  
1510          # do not use checknull to keep
1511          # np.datetime64('nat') and np.timedelta64('nat')
1512          if val is None or util.is_nan(val) or val is C_NA:
1513              pass
1514          elif val is NaT:
1515              seen_pdnat = True
1516          else:
1517              seen_val = True
1518              break
1519  
1520      # if all values are nan/NaT
1521      if seen_val is False and seen_pdnat is True:
1522          return "datetime"
1523          # float/object nan is handled in latter logic
1524      if seen_val is False and skipna:
1525          return "empty"
1526  
1527      if util.is_datetime64_object(val):
1528          if is_datetime64_array(values, skipna=skipna):
1529              return "datetime64"
1530  
1531      elif is_timedelta(val):
1532          if is_timedelta_or_timedelta64_array(values, skipna=skipna):
1533              return "timedelta"
1534  
1535      elif util.is_integer_object(val):
1536          # ordering matters here; this check must come after the is_timedelta
1537          #  check otherwise numpy timedelta64 objects would come through here
1538  
1539          if is_integer_array(values, skipna=skipna):
1540              return "integer"
1541          elif is_integer_float_array(values, skipna=skipna):
1542              if is_integer_na_array(values, skipna=skipna):
1543                  return "integer-na"
1544              else:
1545                  return "mixed-integer-float"
1546          return "mixed-integer"
1547  
1548      elif PyDateTime_Check(val):
1549          if is_datetime_array(values, skipna=skipna):
1550              return "datetime"
1551          elif is_date_array(values, skipna=skipna):
1552              return "date"
1553  
1554      elif PyDate_Check(val):
1555          if is_date_array(values, skipna=skipna):
1556              return "date"
1557  
1558      elif PyTime_Check(val):
1559          if is_time_array(values, skipna=skipna):
1560              return "time"
1561  
1562      elif is_decimal(val):
1563          if is_decimal_array(values, skipna=skipna):
1564              return "decimal"
1565  
1566      elif util.is_complex_object(val):
1567          if is_complex_array(values):
1568              return "complex"
1569  
1570      elif util.is_float_object(val):
1571          if is_float_array(values):
1572              return "floating"
1573          elif is_integer_float_array(values, skipna=skipna):
1574              if is_integer_na_array(values, skipna=skipna):
1575                  return "integer-na"
1576              else:
1577                  return "mixed-integer-float"
1578  
1579      elif util.is_bool_object(val):
1580          if is_bool_array(values, skipna=skipna):
1581              return "boolean"
1582  
1583      elif isinstance(val, str):
1584          if is_string_array(values, skipna=skipna):
1585              return "string"
1586  
1587      elif isinstance(val, bytes):
1588          if is_bytes_array(values, skipna=skipna):
1589              return "bytes"
1590  
1591      elif is_period_object(val):
1592          if is_period_array(values, skipna=skipna):
1593              return "period"
1594  
1595      elif is_interval(val):
1596          if is_interval_array(values):
1597              return "interval"
1598  
1599      cnp.PyArray_ITER_RESET(it)
1600      for i in range(n):
1601          val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1602          PyArray_ITER_NEXT(it)
1603  
1604          if util.is_integer_object(val):
1605              return "mixed-integer"
1606  
1607      return "mixed"
1608  
1609  
1610  def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
1611      """
1612      Infer if we have a datetime or timedelta array.
1613      - date: we have *only* date and maybe strings, nulls
1614      - datetime: we have *only* datetimes and maybe strings, nulls
1615      - timedelta: we have *only* timedeltas and maybe strings, nulls
1616      - nat: we do not have *any* date, datetimes or timedeltas, but do have
1617        at least a NaT
1618      - mixed: other objects (strings, a mix of tz-aware and tz-naive, or
1619                              actual objects)
1620  
1621      Parameters
1622      ----------
1623      arr : ndarray[object]
1624  
1625      Returns
1626      -------
1627      str: {datetime, timedelta, date, nat, mixed}
1628      bool
1629      """
1630      cdef:
1631          Py_ssize_t i, n = len(arr)
1632          bint seen_timedelta = False, seen_date = False, seen_datetime = False
1633          bint seen_tz_aware = False, seen_tz_naive = False
1634          bint seen_nat = False, seen_str = False
1635          bint seen_period = False, seen_interval = False
1636          list objs = []
1637          object v
1638  
1639      for i in range(n):
1640          v = arr[i]
1641          if isinstance(v, str):
1642              objs.append(v)
1643              seen_str = True
1644  
1645              if len(objs) == 3:
1646                  break
1647  
1648          elif v is None or util.is_nan(v):
1649              # nan or None
1650              pass
1651          elif v is NaT:
1652              seen_nat = True
1653          elif PyDateTime_Check(v):
1654              # datetime
1655              seen_datetime = True
1656  
1657              # disambiguate between tz-naive and tz-aware
1658              if v.tzinfo is None:
1659                  seen_tz_naive = True
1660              else:
1661                  seen_tz_aware = True
1662  
1663              if seen_tz_naive and seen_tz_aware:
1664                  return "mixed", seen_str
1665          elif util.is_datetime64_object(v):
1666              # np.datetime64
1667              seen_datetime = True
1668          elif PyDate_Check(v):
1669              seen_date = True
1670          elif is_timedelta(v):
1671              # timedelta, or timedelta64
1672              seen_timedelta = True
1673          elif is_period_object(v):
1674              seen_period = True
1675              break
1676          elif is_interval(v):
1677              seen_interval = True
1678              break
1679          else:
1680              return "mixed", seen_str
1681  
1682      if seen_period:
1683          if is_period_array(arr):
1684              return "period", seen_str
1685          return "mixed", seen_str
1686  
1687      if seen_interval:
1688          if is_interval_array(arr):
1689              return "interval", seen_str
1690          return "mixed", seen_str
1691  
1692      if seen_date and not (seen_datetime or seen_timedelta):
1693          return "date", seen_str
1694      elif seen_datetime and not seen_timedelta:
1695          return "datetime", seen_str
1696      elif seen_timedelta and not seen_datetime:
1697          return "timedelta", seen_str
1698      elif seen_nat:
1699          return "nat", seen_str
1700  
1701      # short-circuit by trying to
1702      # actually convert these strings
1703      # this is for performance as we don't need to try
1704      # convert *every* string array
1705      if len(objs):
1706          try:
1707              # require_iso8601 as in maybe_infer_to_datetimelike
1708              array_to_datetime(objs, errors="raise", require_iso8601=True)
1709              return "datetime", seen_str
1710          except (ValueError, TypeError):
1711              pass
1712  
1713          # we are *not* going to infer from strings
1714          # for timedelta as too much ambiguity
1715  
1716      return "mixed", seen_str
1717  
1718  
1719  cdef inline bint is_timedelta(object o):
1720      return PyDelta_Check(o) or util.is_timedelta64_object(o)
1721  
1722  
1723  @cython.internal
1724  cdef class Validator:
1725  
1726      cdef:
1727          Py_ssize_t n
1728          dtype dtype
1729          bint skipna
1730  
1731      def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_),
1732                    bint skipna=False):
1733          self.n = n
1734          self.dtype = dtype
1735          self.skipna = skipna
1736  
1737      cdef bint validate(self, ndarray values) except -1:
1738          if not self.n:
1739              return False
1740  
1741          if self.is_array_typed():
1742              # i.e. this ndarray is already of the desired dtype
1743              return True
1744          elif self.dtype.type_num == NPY_OBJECT:
1745              if self.skipna:
1746                  return self._validate_skipna(values)
1747              else:
1748                  return self._validate(values)
1749          else:
1750              return False
1751  
1752      @cython.wraparound(False)
1753      @cython.boundscheck(False)
1754      cdef bint _validate(self, ndarray values) except -1:
1755          cdef:
1756              Py_ssize_t i
1757              Py_ssize_t n = values.size
1758              flatiter it = PyArray_IterNew(values)
1759  
1760          for i in range(n):
1761              # The PyArray_GETITEM and PyArray_ITER_NEXT are faster
1762              #  equivalents to `val = values[i]`
1763              val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1764              PyArray_ITER_NEXT(it)
1765              if not self.is_valid(val):
1766                  return False
1767  
1768          return True
1769  
1770      @cython.wraparound(False)
1771      @cython.boundscheck(False)
1772      cdef bint _validate_skipna(self, ndarray values) except -1:
1773          cdef:
1774              Py_ssize_t i
1775              Py_ssize_t n = values.size
1776              flatiter it = PyArray_IterNew(values)
1777  
1778          for i in range(n):
1779              # The PyArray_GETITEM and PyArray_ITER_NEXT are faster
1780              #  equivalents to `val = values[i]`
1781              val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
1782              PyArray_ITER_NEXT(it)
1783              if not self.is_valid_skipna(val):
1784                  return False
1785  
1786          return True
1787  
1788      cdef bint is_valid(self, object value) except -1:
1789          return self.is_value_typed(value)
1790  
1791      cdef bint is_valid_skipna(self, object value) except -1:
1792          return self.is_valid(value) or self.is_valid_null(value)
1793  
1794      cdef bint is_value_typed(self, object value) except -1:
1795          raise NotImplementedError(f"{type(self).__name__} child class "
1796                                    "must define is_value_typed")
1797  
1798      cdef bint is_valid_null(self, object value) except -1:
1799          return value is None or value is C_NA or util.is_nan(value)
1800  
1801      cdef bint is_array_typed(self) except -1:
1802          return False
1803  
1804  
1805  @cython.internal
1806  cdef class BoolValidator(Validator):
1807      cdef inline bint is_value_typed(self, object value) except -1:
1808          return util.is_bool_object(value)
1809  
1810      cdef inline bint is_array_typed(self) except -1:
1811          return issubclass(self.dtype.type, np.bool_)
1812  
1813  
1814  cpdef bint is_bool_array(ndarray values, bint skipna=False):
1815      cdef:
1816          BoolValidator validator = BoolValidator(len(values),
1817                                                  values.dtype,
1818                                                  skipna=skipna)
1819      return validator.validate(values)
1820  
1821  
1822  @cython.internal
1823  cdef class IntegerValidator(Validator):
1824      cdef inline bint is_value_typed(self, object value) except -1:
1825          return util.is_integer_object(value)
1826  
1827      cdef inline bint is_array_typed(self) except -1:
1828          return issubclass(self.dtype.type, np.integer)
1829  
1830  
1831  # Note: only python-exposed for tests
1832  cpdef bint is_integer_array(ndarray values, bint skipna=True):
1833      cdef:
1834          IntegerValidator validator = IntegerValidator(len(values),
1835                                                        values.dtype,
1836                                                        skipna=skipna)
1837      return validator.validate(values)
1838  
1839  
1840  @cython.internal
1841  cdef class IntegerNaValidator(Validator):
1842      cdef inline bint is_value_typed(self, object value) except -1:
1843          return (util.is_integer_object(value)
1844                  or (util.is_nan(value) and util.is_float_object(value)))
1845  
1846  
1847  cdef bint is_integer_na_array(ndarray values, bint skipna=True):
1848      cdef:
1849          IntegerNaValidator validator = IntegerNaValidator(len(values),
1850                                                            values.dtype, skipna=skipna)
1851      return validator.validate(values)
1852  
1853  
1854  @cython.internal
1855  cdef class IntegerFloatValidator(Validator):
1856      cdef inline bint is_value_typed(self, object value) except -1:
1857          return util.is_integer_object(value) or util.is_float_object(value)
1858  
1859      cdef inline bint is_array_typed(self) except -1:
1860          return issubclass(self.dtype.type, np.integer)
1861  
1862  
1863  cdef bint is_integer_float_array(ndarray values, bint skipna=True):
1864      cdef:
1865          IntegerFloatValidator validator = IntegerFloatValidator(len(values),
1866                                                                  values.dtype,
1867                                                                  skipna=skipna)
1868      return validator.validate(values)
1869  
1870  
1871  @cython.internal
1872  cdef class FloatValidator(Validator):
1873      cdef inline bint is_value_typed(self, object value) except -1:
1874          return util.is_float_object(value)
1875  
1876      cdef inline bint is_array_typed(self) except -1:
1877          return issubclass(self.dtype.type, np.floating)
1878  
1879  
1880  # Note: only python-exposed for tests
1881  cpdef bint is_float_array(ndarray values):
1882      cdef:
1883          FloatValidator validator = FloatValidator(len(values), values.dtype)
1884      return validator.validate(values)
1885  
1886  
1887  @cython.internal
1888  cdef class ComplexValidator(Validator):
1889      cdef inline bint is_value_typed(self, object value) except -1:
1890          return (
1891              util.is_complex_object(value)
1892              or (util.is_float_object(value) and is_nan(value))
1893          )
1894  
1895      cdef inline bint is_array_typed(self) except -1:
1896          return issubclass(self.dtype.type, np.complexfloating)
1897  
1898  
1899  cdef bint is_complex_array(ndarray values):
1900      cdef:
1901          ComplexValidator validator = ComplexValidator(len(values), values.dtype)
1902      return validator.validate(values)
1903  
1904  
1905  @cython.internal
1906  cdef class DecimalValidator(Validator):
1907      cdef inline bint is_value_typed(self, object value) except -1:
1908          return is_decimal(value)
1909  
1910  
1911  cdef bint is_decimal_array(ndarray values, bint skipna=False):
1912      cdef:
1913          DecimalValidator validator = DecimalValidator(
1914              len(values), values.dtype, skipna=skipna
1915          )
1916      return validator.validate(values)
1917  
1918  
1919  @cython.internal
1920  cdef class StringValidator(Validator):
1921      cdef inline bint is_value_typed(self, object value) except -1:
1922          return isinstance(value, str)
1923  
1924      cdef inline bint is_array_typed(self) except -1:
1925          return issubclass(self.dtype.type, np.str_)
1926  
1927  
1928  cpdef bint is_string_array(ndarray values, bint skipna=False):
1929      cdef:
1930          StringValidator validator = StringValidator(len(values),
1931                                                      values.dtype,
1932                                                      skipna=skipna)
1933      return validator.validate(values)
1934  
1935  
1936  @cython.internal
1937  cdef class BytesValidator(Validator):
1938      cdef inline bint is_value_typed(self, object value) except -1:
1939          return isinstance(value, bytes)
1940  
1941      cdef inline bint is_array_typed(self) except -1:
1942          return issubclass(self.dtype.type, np.bytes_)
1943  
1944  
1945  cdef bint is_bytes_array(ndarray values, bint skipna=False):
1946      cdef:
1947          BytesValidator validator = BytesValidator(len(values), values.dtype,
1948                                                    skipna=skipna)
1949      return validator.validate(values)
1950  
1951  
1952  @cython.internal
1953  cdef class TemporalValidator(Validator):
1954      cdef:
1955          bint all_generic_na
1956  
1957      def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_),
1958                    bint skipna=False):
1959          self.n = n
1960          self.dtype = dtype
1961          self.skipna = skipna
1962          self.all_generic_na = True
1963  
1964      cdef inline bint is_valid(self, object value) except -1:
1965          return self.is_value_typed(value) or self.is_valid_null(value)
1966  
1967      cdef bint is_valid_null(self, object value) except -1:
1968          raise NotImplementedError(f"{type(self).__name__} child class "
1969                                    "must define is_valid_null")
1970  
1971      cdef inline bint is_valid_skipna(self, object value) except -1:
1972          cdef:
1973              bint is_typed_null = self.is_valid_null(value)
1974              bint is_generic_null = value is None or util.is_nan(value)
1975          if not is_generic_null:
1976              self.all_generic_na = False
1977          return self.is_value_typed(value) or is_typed_null or is_generic_null
1978  
1979      cdef bint _validate_skipna(self, ndarray values) except -1:
1980          """
1981          If we _only_ saw non-dtype-specific NA values, even if they are valid
1982          for this dtype, we do not infer this dtype.
1983          """
1984          return Validator._validate_skipna(self, values) and not self.all_generic_na
1985  
1986  
1987  @cython.internal
1988  cdef class DatetimeValidator(TemporalValidator):
1989      cdef bint is_value_typed(self, object value) except -1:
1990          return PyDateTime_Check(value)
1991  
1992      cdef inline bint is_valid_null(self, object value) except -1:
1993          return is_null_datetime64(value)
1994  
1995  
1996  cpdef bint is_datetime_array(ndarray values, bint skipna=True):
1997      cdef:
1998          DatetimeValidator validator = DatetimeValidator(len(values),
1999                                                          skipna=skipna)
2000      return validator.validate(values)
2001  
2002  
2003  @cython.internal
2004  cdef class Datetime64Validator(DatetimeValidator):
2005      cdef inline bint is_value_typed(self, object value) except -1:
2006          return util.is_datetime64_object(value)
2007  
2008  
2009  # Note: only python-exposed for tests
2010  cpdef bint is_datetime64_array(ndarray values, bint skipna=True):
2011      cdef:
2012          Datetime64Validator validator = Datetime64Validator(len(values),
2013                                                              skipna=skipna)
2014      return validator.validate(values)
2015  
2016  
2017  @cython.internal
2018  cdef class AnyDatetimeValidator(DatetimeValidator):
2019      cdef inline bint is_value_typed(self, object value) except -1:
2020          return util.is_datetime64_object(value) or (
2021              PyDateTime_Check(value) and value.tzinfo is None
2022          )
2023  
2024  
2025  cdef bint is_datetime_or_datetime64_array(ndarray values, bint skipna=True):
2026      cdef:
2027          AnyDatetimeValidator validator = AnyDatetimeValidator(len(values),
2028                                                                skipna=skipna)
2029      return validator.validate(values)
2030  
2031  
2032  # Note: only python-exposed for tests
2033  def is_datetime_with_singletz_array(values: ndarray) -> bool:
2034      """
2035      Check values have the same tzinfo attribute.
2036      Doesn't check values are datetime-like types.
2037      """
2038      cdef:
2039          Py_ssize_t i = 0, j, n = len(values)
2040          object base_val, base_tz, val, tz
2041  
2042      if n == 0:
2043          return False
2044  
2045      # Get a reference timezone to compare with the rest of the tzs in the array
2046      for i in range(n):
2047          base_val = values[i]
2048          if base_val is not NaT and base_val is not None and not util.is_nan(base_val):
2049              base_tz = getattr(base_val, 'tzinfo', None)
2050              break
2051  
2052      for j in range(i, n):
2053          # Compare val's timezone with the reference timezone
2054          # NaT can coexist with tz-aware datetimes, so skip if encountered
2055          val = values[j]
2056          if val is not NaT and val is not None and not util.is_nan(val):
2057              tz = getattr(val, 'tzinfo', None)
2058              if not tz_compare(base_tz, tz):
2059                  return False
2060  
2061      # Note: we should only be called if a tzaware datetime has been seen,
2062      #  so base_tz should always be set at this point.
2063      return True
2064  
2065  
2066  @cython.internal
2067  cdef class TimedeltaValidator(TemporalValidator):
2068      cdef bint is_value_typed(self, object value) except -1:
2069          return PyDelta_Check(value)
2070  
2071      cdef inline bint is_valid_null(self, object value) except -1:
2072          return is_null_timedelta64(value)
2073  
2074  
2075  @cython.internal
2076  cdef class AnyTimedeltaValidator(TimedeltaValidator):
2077      cdef inline bint is_value_typed(self, object value) except -1:
2078          return is_timedelta(value)
2079  
2080  
2081  # Note: only python-exposed for tests
2082  cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True):
2083      """
2084      Infer with timedeltas and/or nat/none.
2085      """
2086      cdef:
2087          AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values),
2088                                                                  skipna=skipna)
2089      return validator.validate(values)
2090  
2091  
2092  @cython.internal
2093  cdef class DateValidator(Validator):
2094      cdef inline bint is_value_typed(self, object value) except -1:
2095          return PyDate_Check(value)
2096  
2097  
2098  # Note: only python-exposed for tests
2099  cpdef bint is_date_array(ndarray values, bint skipna=False):
2100      cdef:
2101          DateValidator validator = DateValidator(len(values), skipna=skipna)
2102      return validator.validate(values)
2103  
2104  
2105  @cython.internal
2106  cdef class TimeValidator(Validator):
2107      cdef inline bint is_value_typed(self, object value) except -1:
2108          return PyTime_Check(value)
2109  
2110  
2111  # Note: only python-exposed for tests
2112  cpdef bint is_time_array(ndarray values, bint skipna=False):
2113      cdef:
2114          TimeValidator validator = TimeValidator(len(values), skipna=skipna)
2115      return validator.validate(values)
2116  
2117  
2118  # FIXME: actually use skipna
2119  cdef bint is_period_array(ndarray values, bint skipna=True):
2120      """
2121      Is this an ndarray of Period objects (or NaT) with a single `freq`?
2122      """
2123      # values should be object-dtype, but ndarray[object] assumes 1D, while
2124      #  this _may_ be 2D.
2125      cdef:
2126          Py_ssize_t i, N = values.size
2127          int dtype_code = -10000  # i.e. c_FreqGroup.FR_UND
2128          object val
2129          flatiter it
2130  
2131      if N == 0:
2132          return False
2133  
2134      it = PyArray_IterNew(values)
2135      for i in range(N):
2136          # The PyArray_GETITEM and PyArray_ITER_NEXT are faster
2137          #  equivalents to `val = values[i]`
2138          val = PyArray_GETITEM(values, PyArray_ITER_DATA(it))
2139          PyArray_ITER_NEXT(it)
2140  
2141          if is_period_object(val):
2142              if dtype_code == -10000:
2143                  dtype_code = val._dtype._dtype_code
2144              elif dtype_code != val._dtype._dtype_code:
2145                  # mismatched freqs
2146                  return False
2147          elif checknull_with_nat(val):
2148              pass
2149          else:
2150              # Not a Period or NaT-like
2151              return False
2152  
2153      if dtype_code == -10000:
2154          # we saw all-NaTs, no actual Periods
2155          return False
2156      return True
2157  
2158  
2159  # Note: only python-exposed for tests
2160  cpdef bint is_interval_array(ndarray values):
2161      """
2162      Is this an ndarray of Interval (or np.nan) with a single dtype?
2163      """
2164      cdef:
2165          Py_ssize_t i, n = len(values)
2166          str closed = None
2167          bint numeric = False
2168          bint dt64 = False
2169          bint td64 = False
2170          object val
2171  
2172      if len(values) == 0:
2173          return False
2174  
2175      for i in range(n):
2176          val = values[i]
2177  
2178          if is_interval(val):
2179              if closed is None:
2180                  closed = val.closed
2181                  numeric = (
2182                      util.is_float_object(val.left)
2183                      or util.is_integer_object(val.left)
2184                  )
2185                  td64 = is_timedelta(val.left)
2186                  dt64 = PyDateTime_Check(val.left)
2187              elif val.closed != closed:
2188                  # mismatched closedness
2189                  return False
2190              elif numeric:
2191                  if not (
2192                      util.is_float_object(val.left)
2193                      or util.is_integer_object(val.left)
2194                  ):
2195                      # i.e. datetime64 or timedelta64
2196                      return False
2197              elif td64:
2198                  if not is_timedelta(val.left):
2199                      return False
2200              elif dt64:
2201                  if not PyDateTime_Check(val.left):
2202                      return False
2203              else:
2204                  raise ValueError(val)
2205          elif util.is_nan(val) or val is None:
2206              pass
2207          else:
2208              return False
2209  
2210      if closed is None:
2211          # we saw all-NAs, no actual Intervals
2212          return False
2213      return True
2214  
2215  
2216  @cython.boundscheck(False)
2217  @cython.wraparound(False)
2218  def maybe_convert_numeric(
2219      ndarray[object, ndim=1] values,
2220      set na_values,
2221      bint convert_empty=True,
2222      bint coerce_numeric=False,
2223      bint convert_to_masked_nullable=False,
2224  ) -> tuple[np.ndarray, np.ndarray | None]:
2225      """
2226      Convert object array to a numeric array if possible.
2227  
2228      Parameters
2229      ----------
2230      values : ndarray[object]
2231          Array of object elements to convert.
2232      na_values : set
2233          Set of values that should be interpreted as NaN.
2234      convert_empty : bool, default True
2235          If an empty array-like object is encountered, whether to interpret
2236          that element as NaN or not. If set to False, a ValueError will be
2237          raised if such an element is encountered and 'coerce_numeric' is False.
2238      coerce_numeric : bool, default False
2239          If initial attempts to convert to numeric have failed, whether to
2240          force conversion to numeric via alternative methods or by setting the
2241          element to NaN. Otherwise, an Exception will be raised when such an
2242          element is encountered.
2243  
2244          This boolean also has an impact on how conversion behaves when a
2245          numeric array has no suitable numerical dtype to return (i.e. uint64,
2246          int32, uint8). If set to False, the original object array will be
2247          returned. Otherwise, a ValueError will be raised.
2248      convert_to_masked_nullable : bool, default False
2249          Whether to return a mask for the converted values. This also disables
2250          upcasting for ints with nulls to float64.
2251      Returns
2252      -------
2253      np.ndarray
2254          Array of converted object values to numerical ones.
2255  
2256      Optional[np.ndarray]
2257          If convert_to_masked_nullable is True,
2258          returns a boolean mask for the converted values, otherwise returns None.
2259      """
2260      if len(values) == 0:
2261          return (np.array([], dtype='i8'), None)
2262  
2263      # fastpath for ints - try to convert all based on first value
2264      cdef:
2265          object val = values[0]
2266  
2267      if util.is_integer_object(val):
2268          try:
2269              maybe_ints = values.astype('i8')
2270              if (maybe_ints == values).all():
2271                  return (maybe_ints, None)
2272          except (ValueError, OverflowError, TypeError):
2273              pass
2274  
2275      # Otherwise, iterate and do full inference.
2276      cdef:
2277          int status, maybe_int
2278          Py_ssize_t i, n = values.size
2279          Seen seen = Seen(coerce_numeric)
2280          ndarray[float64_t, ndim=1] floats = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_FLOAT64, 0)
2281          ndarray[complex128_t, ndim=1] complexes = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_COMPLEX128, 0)
2282          ndarray[int64_t, ndim=1] ints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_INT64, 0)
2283          ndarray[uint64_t, ndim=1] uints = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT64, 0)
2284          ndarray[uint8_t, ndim=1] bools = cnp.PyArray_EMPTY(1, values.shape, cnp.NPY_UINT8, 0)
2285          ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1")
2286          float64_t fval
2287          bint allow_null_in_int = convert_to_masked_nullable
2288  
2289      for i in range(n):
2290          val = values[i]
2291          # We only want to disable NaNs showing as float if
2292          # a) convert_to_masked_nullable = True
2293          # b) no floats have been seen ( assuming an int shows up later )
2294          # However, if no ints present (all null array), we need to return floats
2295          allow_null_in_int = convert_to_masked_nullable and not seen.float_
2296  
2297          if val.__hash__ is not None and val in na_values:
2298              if allow_null_in_int:
2299                  seen.null_ = True
2300                  mask[i] = 1
2301              else:
2302                  if convert_to_masked_nullable:
2303                      mask[i] = 1
2304                  seen.saw_null()
2305              floats[i] = complexes[i] = NaN
2306          elif util.is_float_object(val):
2307              fval = val
2308              if fval != fval:
2309                  seen.null_ = True
2310                  if allow_null_in_int:
2311                      mask[i] = 1
2312                  else:
2313                      if convert_to_masked_nullable:
2314                          mask[i] = 1
2315                      seen.float_ = True
2316              else:
2317                  seen.float_ = True
2318              floats[i] = complexes[i] = fval
2319          elif util.is_integer_object(val):
2320              floats[i] = complexes[i] = val
2321  
2322              val = int(val)
2323              seen.saw_int(val)
2324  
2325              if val >= 0:
2326                  if val <= oUINT64_MAX:
2327                      uints[i] = val
2328                  else:
2329                      seen.float_ = True
2330  
2331              if oINT64_MIN <= val <= oINT64_MAX:
2332                  ints[i] = val
2333  
2334              if val < oINT64_MIN or (seen.sint_ and seen.uint_):
2335                  seen.float_ = True
2336  
2337          elif util.is_bool_object(val):
2338              floats[i] = uints[i] = ints[i] = bools[i] = val
2339              seen.bool_ = True
2340          elif val is None or val is C_NA:
2341              if allow_null_in_int:
2342                  seen.null_ = True
2343                  mask[i] = 1
2344              else:
2345                  if convert_to_masked_nullable:
2346                      mask[i] = 1
2347                  seen.saw_null()
2348              floats[i] = complexes[i] = NaN
2349          elif hasattr(val, '__len__') and len(val) == 0:
2350              if convert_empty or seen.coerce_numeric:
2351                  seen.saw_null()
2352                  floats[i] = complexes[i] = NaN
2353              else:
2354                  raise ValueError("Empty string encountered")
2355          elif util.is_complex_object(val):
2356              complexes[i] = val
2357              seen.complex_ = True
2358          elif is_decimal(val):
2359              floats[i] = complexes[i] = val
2360              seen.float_ = True
2361          else:
2362              try:
2363                  status = floatify(val, &fval, &maybe_int)
2364  
2365                  if fval in na_values:
2366                      seen.saw_null()
2367                      floats[i] = complexes[i] = NaN
2368                      mask[i] = 1
2369                  else:
2370                      if fval != fval:
2371                          seen.null_ = True
2372                          mask[i] = 1
2373  
2374                      floats[i] = fval
2375  
2376                  if maybe_int:
2377                      as_int = int(val)
2378  
2379                      if as_int in na_values:
2380                          mask[i] = 1
2381                          seen.null_ = True
2382                          if not allow_null_in_int:
2383                              seen.float_ = True
2384                      else:
2385                          seen.saw_int(as_int)
2386  
2387                      if as_int not in na_values:
2388                          if as_int < oINT64_MIN or as_int > oUINT64_MAX:
2389                              if seen.coerce_numeric:
2390                                  seen.float_ = True
2391                              else:
2392                                  raise ValueError("Integer out of range.")
2393                          else:
2394                              if as_int >= 0:
2395                                  uints[i] = as_int
2396  
2397                              if as_int <= oINT64_MAX:
2398                                  ints[i] = as_int
2399  
2400                      seen.float_ = seen.float_ or (seen.uint_ and seen.sint_)
2401                  else:
2402                      seen.float_ = True
2403              except (TypeError, ValueError) as err:
2404                  if not seen.coerce_numeric:
2405                      raise type(err)(f"{err} at position {i}")
2406  
2407                  seen.saw_null()
2408                  floats[i] = NaN
2409  
2410      if seen.check_uint64_conflict():
2411          return (values, None)
2412  
2413      # This occurs since we disabled float nulls showing as null in anticipation
2414      # of seeing ints that were never seen. So then, we return float
2415      if allow_null_in_int and seen.null_ and not seen.int_:
2416          seen.float_ = True
2417  
2418      if seen.complex_:
2419          return (complexes, None)
2420      elif seen.float_:
2421          if seen.null_ and convert_to_masked_nullable:
2422              return (floats, mask.view(np.bool_))
2423          return (floats, None)
2424      elif seen.int_:
2425          if seen.null_ and convert_to_masked_nullable:
2426              if seen.uint_:
2427                  return (uints, mask.view(np.bool_))
2428              else:
2429                  return (ints, mask.view(np.bool_))
2430          if seen.uint_:
2431              return (uints, None)
2432          else:
2433              return (ints, None)
2434      elif seen.bool_:
2435          return (bools.view(np.bool_), None)
2436      elif seen.uint_:
2437          return (uints, None)
2438      return (ints, None)
2439  
2440  
2441  @cython.boundscheck(False)
2442  @cython.wraparound(False)
2443  def maybe_convert_objects(ndarray[object] objects,
2444                            *,
2445                            bint try_float=False,
2446                            bint safe=False,
2447                            bint convert_datetime=False,
2448                            bint convert_timedelta=False,
2449                            bint convert_period=False,
2450                            bint convert_interval=False,
2451                            bint convert_to_nullable_integer=False,
2452                            object dtype_if_all_nat=None) -> "ArrayLike":
2453      """
2454      Type inference function-- convert object array to proper dtype
2455  
2456      Parameters
2457      ----------
2458      objects : ndarray[object]
2459          Array of object elements to convert.
2460      try_float : bool, default False
2461          If an array-like object contains only float or NaN values is
2462          encountered, whether to convert and return an array of float dtype.
2463      safe : bool, default False
2464          Whether to upcast numeric type (e.g. int cast to float). If set to
2465          True, no upcasting will be performed.
2466      convert_datetime : bool, default False
2467          If an array-like object contains only datetime values or NaT is
2468          encountered, whether to convert and return an array of M8[ns] dtype.
2469      convert_timedelta : bool, default False
2470          If an array-like object contains only timedelta values or NaT is
2471          encountered, whether to convert and return an array of m8[ns] dtype.
2472      convert_period : bool, default False
2473          If an array-like object contains only (homogeneous-freq) Period values
2474          or NaT, whether to convert and return a PeriodArray.
2475      convert_interval : bool, default False
2476          If an array-like object contains only Interval objects (with matching
2477          dtypes and closedness) or NaN, whether to convert to IntervalArray.
2478      convert_to_nullable_integer : bool, default False
2479          If an array-like object contains only integer values (and NaN) is
2480          encountered, whether to convert and return an IntegerArray.
2481      dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
2482          Dtype to cast to if we have all-NaT.
2483  
2484      Returns
2485      -------
2486      np.ndarray or ExtensionArray
2487          Array of converted object values to more specific dtypes if applicable.
2488      """
2489      cdef:
2490          Py_ssize_t i, n, itemsize_max = 0
2491          ndarray[float64_t] floats
2492          ndarray[complex128_t] complexes
2493          ndarray[int64_t] ints
2494          ndarray[uint64_t] uints
2495          ndarray[uint8_t] bools
2496          int64_t[::1]  idatetimes
2497          int64_t[::1] itimedeltas
2498          Seen seen = Seen()
2499          object val
2500          float64_t fval, fnan = np.nan
2501  
2502      n = len(objects)
2503  
2504      floats = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_FLOAT64, 0)
2505      complexes = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_COMPLEX128, 0)
2506      ints = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_INT64, 0)
2507      uints = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_UINT64, 0)
2508      bools = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_UINT8, 0)
2509      mask = np.full(n, False)
2510  
2511      if convert_datetime:
2512          datetimes = np.empty(n, dtype='M8[ns]')
2513          idatetimes = datetimes.view(np.int64)
2514  
2515      if convert_timedelta:
2516          timedeltas = np.empty(n, dtype='m8[ns]')
2517          itimedeltas = timedeltas.view(np.int64)
2518  
2519      for i in range(n):
2520          val = objects[i]
2521          if itemsize_max != -1:
2522              itemsize = get_itemsize(val)
2523              if itemsize > itemsize_max or itemsize == -1:
2524                  itemsize_max = itemsize
2525  
2526          if val is None:
2527              seen.null_ = True
2528              floats[i] = complexes[i] = fnan
2529              mask[i] = True
2530          elif val is NaT:
2531              seen.nat_ = True
2532              if convert_datetime:
2533                  idatetimes[i] = NPY_NAT
2534              if convert_timedelta:
2535                  itimedeltas[i] = NPY_NAT
2536              if not (convert_datetime or convert_timedelta or convert_period):
2537                  seen.object_ = True
2538                  break
2539          elif val is np.nan:
2540              seen.nan_ = True
2541              mask[i] = True
2542              floats[i] = complexes[i] = val
2543          elif util.is_bool_object(val):
2544              seen.bool_ = True
2545              bools[i] = val
2546          elif util.is_float_object(val):
2547              floats[i] = complexes[i] = val
2548              seen.float_ = True
2549          elif is_timedelta(val):
2550              if convert_timedelta:
2551                  seen.timedelta_ = True
2552                  try:
2553                      itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
2554                  except OutOfBoundsTimedelta:
2555                      seen.object_ = True
2556                      break
2557                  break
2558              else:
2559                  seen.object_ = True
2560                  break
2561          elif util.is_integer_object(val):
2562              seen.int_ = True
2563              floats[i] = <float64_t>val
2564              complexes[i] = <double complex>val
2565              if not seen.null_:
2566                  seen.saw_int(val)
2567  
2568                  if ((seen.uint_ and seen.sint_) or
2569                          val > oUINT64_MAX or val < oINT64_MIN):
2570                      seen.object_ = True
2571                      break
2572  
2573                  if seen.uint_:
2574                      uints[i] = val
2575                  elif seen.sint_:
2576                      ints[i] = val
2577                  else:
2578                      uints[i] = val
2579                      ints[i] = val
2580  
2581          elif util.is_complex_object(val):
2582              complexes[i] = val
2583              seen.complex_ = True
2584          elif PyDateTime_Check(val) or util.is_datetime64_object(val):
2585  
2586              # if we have an tz's attached then return the objects
2587              if convert_datetime:
2588                  if getattr(val, 'tzinfo', None) is not None:
2589                      seen.datetimetz_ = True
2590                      break
2591                  else:
2592                      seen.datetime_ = True
2593                      try:
2594                          idatetimes[i] = convert_to_tsobject(
2595                              val, None, None, 0, 0).value
2596                      except OutOfBoundsDatetime:
2597                          seen.object_ = True
2598                          break
2599              else:
2600                  seen.object_ = True
2601                  break
2602          elif is_period_object(val):
2603              if convert_period:
2604                  seen.period_ = True
2605                  break
2606              else:
2607                  seen.object_ = True
2608                  break
2609          elif try_float and not isinstance(val, str):
2610              # this will convert Decimal objects
2611              try:
2612                  floats[i] = float(val)
2613                  complexes[i] = complex(val)
2614                  seen.float_ = True
2615              except (ValueError, TypeError):
2616                  seen.object_ = True
2617                  break
2618          elif is_interval(val):
2619              if convert_interval:
2620                  seen.interval_ = True
2621                  break
2622              else:
2623                  seen.object_ = True
2624                  break
2625          else:
2626              seen.object_ = True
2627              break
2628  
2629      # we try to coerce datetime w/tz but must all have the same tz
2630      if seen.datetimetz_:
2631          if is_datetime_with_singletz_array(objects):
2632              from pandas import DatetimeIndex
2633              dti = DatetimeIndex(objects)
2634  
2635              # unbox to DatetimeArray
2636              return dti._data
2637          seen.object_ = True
2638  
2639      elif seen.datetime_:
2640          if is_datetime_or_datetime64_array(objects):
2641              from pandas import DatetimeIndex
2642  
2643              try:
2644                  dti = DatetimeIndex(objects)
2645              except OutOfBoundsDatetime:
2646                  pass
2647              else:
2648                  # unbox to ndarray[datetime64[ns]]
2649                  return dti._data._ndarray
2650          seen.object_ = True
2651  
2652      elif seen.timedelta_:
2653          if is_timedelta_or_timedelta64_array(objects):
2654              from pandas import TimedeltaIndex
2655  
2656              try:
2657                  tdi = TimedeltaIndex(objects)
2658              except OutOfBoundsTimedelta:
2659                  pass
2660              else:
2661                  # unbox to ndarray[timedelta64[ns]]
2662                  return tdi._data._ndarray
2663          seen.object_ = True
2664  
2665      if seen.period_:
2666          if is_period_array(objects):
2667              from pandas import PeriodIndex
2668              pi = PeriodIndex(objects)
2669  
2670              # unbox to PeriodArray
2671              return pi._data
2672          seen.object_ = True
2673  
2674      if seen.interval_:
2675          if is_interval_array(objects):
2676              from pandas import IntervalIndex
2677              ii = IntervalIndex(objects)
2678  
2679              # unbox to IntervalArray
2680              return ii._data
2681  
2682          seen.object_ = True
2683  
2684      if not seen.object_:
2685          result = None
2686          if not safe:
2687              if seen.null_ or seen.nan_:
2688                  if seen.is_float_or_complex:
2689                      if seen.complex_:
2690                          result = complexes
2691                      elif seen.float_:
2692                          result = floats
2693                      elif seen.int_:
2694                          if convert_to_nullable_integer:
2695                              from pandas.core.arrays import IntegerArray
2696                              result = IntegerArray(ints, mask)
2697                          else:
2698                              result = floats
2699                      elif seen.nan_:
2700                          result = floats
2701              else:
2702                  if not seen.bool_:
2703                      if seen.datetime_:
2704                          if not seen.numeric_ and not seen.timedelta_:
2705                              result = datetimes
2706                      elif seen.timedelta_:
2707                          if not seen.numeric_:
2708                              result = timedeltas
2709                      elif seen.nat_:
2710                          if not seen.numeric_:
2711                              if convert_datetime and convert_timedelta:
2712                                  dtype = dtype_if_all_nat
2713                                  if dtype is not None:
2714                                      # otherwise we keep object dtype
2715                                      result = _infer_all_nats(
2716                                          dtype, datetimes, timedeltas
2717                                      )
2718  
2719                              elif convert_datetime:
2720                                  result = datetimes
2721                              elif convert_timedelta:
2722                                  result = timedeltas
2723                      else:
2724                          if seen.complex_:
2725                              result = complexes
2726                          elif seen.float_:
2727                              result = floats
2728                          elif seen.int_:
2729                              if seen.uint_:
2730                                  result = uints
2731                              else:
2732                                  result = ints
2733                  elif seen.is_bool:
2734                      result = bools.view(np.bool_)
2735  
2736          else:
2737              # don't cast int to float, etc.
2738              if seen.null_:
2739                  if seen.is_float_or_complex:
2740                      if seen.complex_:
2741                          if not seen.int_:
2742                              result = complexes
2743                      elif seen.float_ or seen.nan_:
2744                          if not seen.int_:
2745                              result = floats
2746              else:
2747                  if not seen.bool_:
2748                      if seen.datetime_:
2749                          if not seen.numeric_ and not seen.timedelta_:
2750                              result = datetimes
2751                      elif seen.timedelta_:
2752                          if not seen.numeric_:
2753                              result = timedeltas
2754                      elif seen.nat_:
2755                          if not seen.numeric_:
2756                              if convert_datetime and convert_timedelta:
2757                                  dtype = dtype_if_all_nat
2758                                  if dtype is not None:
2759                                      # otherwise we keep object dtype
2760                                      result = _infer_all_nats(
2761                                          dtype, datetimes, timedeltas
2762                                      )
2763  
2764                              elif convert_datetime:
2765                                  result = datetimes
2766                              elif convert_timedelta:
2767                                  result = timedeltas
2768                      else:
2769                          if seen.complex_:
2770                              if not seen.int_:
2771                                  result = complexes
2772                          elif seen.float_ or seen.nan_:
2773                              if not seen.int_:
2774                                  result = floats
2775                          elif seen.int_:
2776                              if seen.uint_:
2777                                  result = uints
2778                              else:
2779                                  result = ints
2780                  elif seen.is_bool and not seen.nan_:
2781                      result = bools.view(np.bool_)
2782  
2783          if result is uints or result is ints or result is floats or result is complexes:
2784              # cast to the largest itemsize when all values are NumPy scalars
2785              if itemsize_max > 0 and itemsize_max != result.dtype.itemsize:
2786                  result = result.astype(result.dtype.kind + str(itemsize_max))
2787              return result
2788          elif result is not None:
2789              return result
2790  
2791      return objects
2792  
2793  
2794  cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
2795      """
2796      If we have all-NaT values, cast these to the given dtype.
2797      """
2798      if cnp.PyArray_DescrCheck(dtype):
2799          # i.e. isinstance(dtype, np.dtype):
2800          if dtype == "M8[ns]":
2801              result = datetimes
2802          elif dtype == "m8[ns]":
2803              result = timedeltas
2804          else:
2805              raise ValueError(dtype)
2806      else:
2807          # ExtensionDtype
2808          cls = dtype.construct_array_type()
2809          i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0)
2810          i8vals.fill(NPY_NAT)
2811          result = cls(i8vals, dtype=dtype)
2812      return result
2813  
2814  
2815  class _NoDefault(Enum):
2816      # We make this an Enum
2817      # 1) because it round-trips through pickle correctly (see GH#40397)
2818      # 2) because mypy does not understand singletons
2819      no_default = "NO_DEFAULT"
2820  
2821      def __repr__(self) -> str:
2822          return "<no_default>"
2823  
2824  
2825  # Note: no_default is exported to the public API in pandas.api.extensions
2826  no_default = _NoDefault.no_default  # Sentinel indicating the default value.
2827  NoDefault = Literal[_NoDefault.no_default]
2828  
2829  
2830  @cython.boundscheck(False)
2831  @cython.wraparound(False)
2832  def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=True,
2833                     object na_value=no_default, cnp.dtype dtype=np.dtype(object)
2834                     ) -> np.ndarray:
2835      """
2836      Substitute for np.vectorize with pandas-friendly dtype inference.
2837  
2838      Parameters
2839      ----------
2840      arr : ndarray
2841      f : function
2842      mask : ndarray
2843          uint8 dtype ndarray indicating values not to apply `f` to.
2844      convert : bool, default True
2845          Whether to call `maybe_convert_objects` on the resulting ndarray
2846      na_value : Any, optional
2847          The result value to use for masked values. By default, the
2848          input value is used
2849      dtype : numpy.dtype
2850          The numpy dtype to use for the result ndarray.
2851  
2852      Returns
2853      -------
2854      np.ndarray
2855      """
2856      cdef:
2857          Py_ssize_t i, n
2858          ndarray result
2859          object val
2860  
2861      n = len(arr)
2862      result = np.empty(n, dtype=dtype)
2863      for i in range(n):
2864          if mask[i]:
2865              if na_value is no_default:
2866                  val = arr[i]
2867              else:
2868                  val = na_value
2869          else:
2870              val = f(arr[i])
2871  
2872              if cnp.PyArray_IsZeroDim(val):
2873                  # unbox 0-dim arrays, GH#690
2874                  val = val.item()
2875  
2876          result[i] = val
2877  
2878      if convert:
2879          return maybe_convert_objects(result,
2880                                       try_float=False,
2881                                       convert_datetime=False,
2882                                       convert_timedelta=False)
2883  
2884      return result
2885  
2886  
2887  @cython.boundscheck(False)
2888  @cython.wraparound(False)
2889  def map_infer(
2890      ndarray arr, object f, bint convert=True, bint ignore_na=False
2891  ) -> np.ndarray:
2892      """
2893      Substitute for np.vectorize with pandas-friendly dtype inference.
2894  
2895      Parameters
2896      ----------
2897      arr : ndarray
2898      f : function
2899      convert : bint
2900      ignore_na : bint
2901          If True, NA values will not have f applied
2902  
2903      Returns
2904      -------
2905      np.ndarray
2906      """
2907      cdef:
2908          Py_ssize_t i, n
2909          ndarray[object] result
2910          object val
2911  
2912      n = len(arr)
2913      result = cnp.PyArray_EMPTY(1, arr.shape, cnp.NPY_OBJECT, 0)
2914      for i in range(n):
2915          if ignore_na and checknull(arr[i]):
2916              result[i] = arr[i]
2917              continue
2918          val = f(arr[i])
2919  
2920          if cnp.PyArray_IsZeroDim(val):
2921              # unbox 0-dim arrays, GH#690
2922              val = val.item()
2923  
2924          result[i] = val
2925  
2926      if convert:
2927          return maybe_convert_objects(result,
2928                                       try_float=False,
2929                                       convert_datetime=False,
2930                                       convert_timedelta=False)
2931  
2932      return result
2933  
2934  
2935  def to_object_array(rows: object, min_width: int = 0) -> ndarray:
2936      """
2937      Convert a list of lists into an object array.
2938  
2939      Parameters
2940      ----------
2941      rows : 2-d array (N, K)
2942          List of lists to be converted into an array.
2943      min_width : int
2944          Minimum width of the object array. If a list
2945          in `rows` contains fewer than `width` elements,
2946          the remaining elements in the corresponding row
2947          will all be `NaN`.
2948  
2949      Returns
2950      -------
2951      np.ndarray[object, ndim=2]
2952      """
2953      cdef:
2954          Py_ssize_t i, j, n, k, tmp
2955          ndarray[object, ndim=2] result
2956          list row
2957  
2958      rows = list(rows)
2959      n = len(rows)
2960  
2961      k = min_width
2962      for i in range(n):
2963          tmp = len(rows[i])
2964          if tmp > k:
2965              k = tmp
2966  
2967      result = np.empty((n, k), dtype=object)
2968  
2969      for i in range(n):
2970          row = list(rows[i])
2971  
2972          for j in range(len(row)):
2973              result[i, j] = row[j]
2974  
2975      return result
2976  
2977  
2978  def tuples_to_object_array(ndarray[object] tuples):
2979      cdef:
2980          Py_ssize_t i, j, n, k, tmp
2981          ndarray[object, ndim=2] result
2982          tuple tup
2983  
2984      n = len(tuples)
2985      k = len(tuples[0])
2986      result = np.empty((n, k), dtype=object)
2987      for i in range(n):
2988          tup = tuples[i]
2989          for j in range(k):
2990              result[i, j] = tup[j]
2991  
2992      return result
2993  
2994  
2995  def to_object_array_tuples(rows: object) -> np.ndarray:
2996      """
2997      Convert a list of tuples into an object array. Any subclass of
2998      tuple in `rows` will be casted to tuple.
2999  
3000      Parameters
3001      ----------
3002      rows : 2-d array (N, K)
3003          List of tuples to be converted into an array.
3004  
3005      Returns
3006      -------
3007      np.ndarray[object, ndim=2]
3008      """
3009      cdef:
3010          Py_ssize_t i, j, n, k, tmp
3011          ndarray[object, ndim=2] result
3012          tuple row
3013  
3014      rows = list(rows)
3015      n = len(rows)
3016  
3017      k = 0
3018      for i in range(n):
3019          tmp = 1 if checknull(rows[i]) else len(rows[i])
3020          if tmp > k:
3021              k = tmp
3022  
3023      result = np.empty((n, k), dtype=object)
3024  
3025      try:
3026          for i in range(n):
3027              row = rows[i]
3028              for j in range(len(row)):
3029                  result[i, j] = row[j]
3030      except TypeError:
3031          # e.g. "Expected tuple, got list"
3032          # upcast any subclasses to tuple
3033          for i in range(n):
3034              row = (rows[i],) if checknull(rows[i]) else tuple(rows[i])
3035              for j in range(len(row)):
3036                  result[i, j] = row[j]
3037  
3038      return result
3039  
3040  
3041  @cython.wraparound(False)
3042  @cython.boundscheck(False)
3043  def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray:
3044      cdef:
3045          Py_ssize_t i, n = len(keys)
3046          object val
3047          ndarray[object] output = np.empty(n, dtype='O')
3048  
3049      if n == 0:
3050          # kludge, for Series
3051          return np.empty(0, dtype='f8')
3052  
3053      for i in range(n):
3054          val = keys[i]
3055          if val in mapping:
3056              output[i] = mapping[val]
3057          else:
3058              output[i] = default
3059  
3060      return maybe_convert_objects(output)
3061  
3062  
3063  def is_bool_list(obj: list) -> bool:
3064      """
3065      Check if this list contains only bool or np.bool_ objects.
3066  
3067      This is appreciably faster than checking `np.array(obj).dtype == bool`
3068  
3069      obj1 = [True, False] * 100
3070      obj2 = obj1 * 100
3071      obj3 = obj2 * 100
3072      obj4 = [True, None] + obj1
3073  
3074      for obj in [obj1, obj2, obj3, obj4]:
3075          %timeit is_bool_list(obj)
3076          %timeit np.array(obj).dtype.kind == "b"
3077  
3078      340 ns ± 8.22 ns
3079      8.78 µs ± 253 ns
3080  
3081      28.8 µs ± 704 ns
3082      813 µs ± 17.8 µs
3083  
3084      3.4 ms ± 168 µs
3085      78.4 ms ± 1.05 ms
3086  
3087      48.1 ns ± 1.26 ns
3088      8.1 µs ± 198 ns
3089      """
3090      cdef:
3091          object item
3092  
3093      for item in obj:
3094          if not util.is_bool_object(item):
3095              return False
3096  
3097      # Note: we return True for empty list
3098      return True
3099  
3100  
3101  cpdef ndarray eq_NA_compat(ndarray[object] arr, object key):
3102      """
3103      Check for `arr == key`, treating all values as not-equal to pd.NA.
3104  
3105      key is assumed to have `not isna(key)`
3106      """
3107      cdef:
3108          ndarray[uint8_t, cast=True] result = cnp.PyArray_EMPTY(arr.ndim, arr.shape, cnp.NPY_BOOL, 0)
3109          Py_ssize_t i
3110          object item
3111  
3112      for i in range(len(arr)):
3113          item = arr[i]
3114          if item is C_NA:
3115              result[i] = False
3116          else:
3117              result[i] = item == key
3118  
3119      return result
3120  
3121  
3122  def dtypes_all_equal(list types not None) -> bool:
3123      """
3124      Faster version for:
3125  
3126      first = types[0]
3127      all(is_dtype_equal(first, t) for t in types[1:])
3128  
3129      And assuming all elements in the list are np.dtype/ExtensionDtype objects
3130  
3131      See timings at https://github.com/pandas-dev/pandas/pull/44594
3132      """
3133      first = types[0]
3134      for t in types[1:]:
3135          try:
3136              if not t == first:
3137                  return False
3138          except (TypeError, AttributeError):
3139              return False
3140      else:
3141          return True