/ lib / pandas / _libs / groupby.pyx
groupby.pyx
   1  cimport cython
   2  from cython cimport (
   3      Py_ssize_t,
   4      floating,
   5  )
   6  from libc.stdlib cimport (
   7      free,
   8      malloc,
   9  )
  10  
  11  import numpy as np
  12  
  13  cimport numpy as cnp
  14  from numpy cimport (
  15      complex64_t,
  16      complex128_t,
  17      float32_t,
  18      float64_t,
  19      int8_t,
  20      int16_t,
  21      int32_t,
  22      int64_t,
  23      intp_t,
  24      ndarray,
  25      uint8_t,
  26      uint16_t,
  27      uint32_t,
  28      uint64_t,
  29  )
  30  from numpy.math cimport NAN
  31  
  32  cnp.import_array()
  33  
  34  from pandas._libs cimport util
  35  from pandas._libs.algos cimport (
  36      get_rank_nan_fill_val,
  37      kth_smallest_c,
  38  )
  39  
  40  from pandas._libs.algos import (
  41      ensure_platform_int,
  42      groupsort_indexer,
  43      rank_1d,
  44      take_2d_axis1_float64_float64,
  45  )
  46  
  47  from pandas._libs.dtypes cimport (
  48      numeric_object_t,
  49      numeric_t,
  50  )
  51  from pandas._libs.missing cimport checknull
  52  
  53  
  54  cdef int64_t NPY_NAT = util.get_nat()
  55  _int64_max = np.iinfo(np.int64).max
  56  
  57  cdef float64_t NaN = <float64_t>np.NaN
  58  
  59  cdef enum InterpolationEnumType:
  60      INTERPOLATION_LINEAR,
  61      INTERPOLATION_LOWER,
  62      INTERPOLATION_HIGHER,
  63      INTERPOLATION_NEAREST,
  64      INTERPOLATION_MIDPOINT
  65  
  66  
  67  cdef inline float64_t median_linear(float64_t* a, int n) nogil:
  68      cdef:
  69          int i, j, na_count = 0
  70          float64_t result
  71          float64_t* tmp
  72  
  73      if n == 0:
  74          return NaN
  75  
  76      # count NAs
  77      for i in range(n):
  78          if a[i] != a[i]:
  79              na_count += 1
  80  
  81      if na_count:
  82          if na_count == n:
  83              return NaN
  84  
  85          tmp = <float64_t*>malloc((n - na_count) * sizeof(float64_t))
  86  
  87          j = 0
  88          for i in range(n):
  89              if a[i] == a[i]:
  90                  tmp[j] = a[i]
  91                  j += 1
  92  
  93          a = tmp
  94          n -= na_count
  95  
  96      if n % 2:
  97          result = kth_smallest_c(a, n // 2, n)
  98      else:
  99          result = (kth_smallest_c(a, n // 2, n) +
 100                    kth_smallest_c(a, n // 2 - 1, n)) / 2
 101  
 102      if na_count:
 103          free(a)
 104  
 105      return result
 106  
 107  
 108  @cython.boundscheck(False)
 109  @cython.wraparound(False)
 110  def group_median_float64(
 111      ndarray[float64_t, ndim=2] out,
 112      ndarray[int64_t] counts,
 113      ndarray[float64_t, ndim=2] values,
 114      ndarray[intp_t] labels,
 115      Py_ssize_t min_count=-1,
 116  ) -> None:
 117      """
 118      Only aggregates on axis=0
 119      """
 120      cdef:
 121          Py_ssize_t i, j, N, K, ngroups, size
 122          ndarray[intp_t] _counts
 123          ndarray[float64_t, ndim=2] data
 124          ndarray[intp_t] indexer
 125          float64_t* ptr
 126  
 127      assert min_count == -1, "'min_count' only used in sum and prod"
 128  
 129      ngroups = len(counts)
 130      N, K = (<object>values).shape
 131  
 132      indexer, _counts = groupsort_indexer(labels, ngroups)
 133      counts[:] = _counts[1:]
 134  
 135      data = np.empty((K, N), dtype=np.float64)
 136      ptr = <float64_t*>cnp.PyArray_DATA(data)
 137  
 138      take_2d_axis1_float64_float64(values.T, indexer, out=data)
 139  
 140      with nogil:
 141  
 142          for i in range(K):
 143              # exclude NA group
 144              ptr += _counts[0]
 145              for j in range(ngroups):
 146                  size = _counts[j + 1]
 147                  out[j, i] = median_linear(ptr, size)
 148                  ptr += size
 149  
 150  
 151  @cython.boundscheck(False)
 152  @cython.wraparound(False)
 153  def group_cumprod_float64(
 154      float64_t[:, ::1] out,
 155      const float64_t[:, :] values,
 156      const intp_t[::1] labels,
 157      int ngroups,
 158      bint is_datetimelike,
 159      bint skipna=True,
 160  ) -> None:
 161      """
 162      Cumulative product of columns of `values`, in row groups `labels`.
 163  
 164      Parameters
 165      ----------
 166      out : np.ndarray[np.float64, ndim=2]
 167          Array to store cumprod in.
 168      values : np.ndarray[np.float64, ndim=2]
 169          Values to take cumprod of.
 170      labels : np.ndarray[np.intp]
 171          Labels to group by.
 172      ngroups : int
 173          Number of groups, larger than all entries of `labels`.
 174      is_datetimelike : bool
 175          Always false, `values` is never datetime-like.
 176      skipna : bool
 177          If true, ignore nans in `values`.
 178  
 179      Notes
 180      -----
 181      This method modifies the `out` parameter, rather than returning an object.
 182      """
 183      cdef:
 184          Py_ssize_t i, j, N, K, size
 185          float64_t val
 186          float64_t[:, ::1] accum
 187          intp_t lab
 188  
 189      N, K = (<object>values).shape
 190      accum = np.ones((ngroups, K), dtype=np.float64)
 191  
 192      with nogil:
 193          for i in range(N):
 194              lab = labels[i]
 195  
 196              if lab < 0:
 197                  continue
 198              for j in range(K):
 199                  val = values[i, j]
 200                  if val == val:
 201                      accum[lab, j] *= val
 202                      out[i, j] = accum[lab, j]
 203                  else:
 204                      out[i, j] = NaN
 205                      if not skipna:
 206                          accum[lab, j] = NaN
 207  
 208  
 209  ctypedef fused int64float_t:
 210      int64_t
 211      uint64_t
 212      float32_t
 213      float64_t
 214  
 215  
 216  @cython.boundscheck(False)
 217  @cython.wraparound(False)
 218  def group_cumsum(
 219      int64float_t[:, ::1] out,
 220      ndarray[int64float_t, ndim=2] values,
 221      const intp_t[::1] labels,
 222      int ngroups,
 223      bint is_datetimelike,
 224      bint skipna=True,
 225      const uint8_t[:, :] mask=None,
 226      uint8_t[:, ::1] result_mask=None,
 227  ) -> None:
 228      """
 229      Cumulative sum of columns of `values`, in row groups `labels`.
 230  
 231      Parameters
 232      ----------
 233      out : np.ndarray[ndim=2]
 234          Array to store cumsum in.
 235      values : np.ndarray[ndim=2]
 236          Values to take cumsum of.
 237      labels : np.ndarray[np.intp]
 238          Labels to group by.
 239      ngroups : int
 240          Number of groups, larger than all entries of `labels`.
 241      is_datetimelike : bool
 242          True if `values` contains datetime-like entries.
 243      skipna : bool
 244          If true, ignore nans in `values`.
 245      mask: np.ndarray[uint8], optional
 246          Mask of values
 247      result_mask: np.ndarray[int8], optional
 248          Mask of out array
 249  
 250      Notes
 251      -----
 252      This method modifies the `out` parameter, rather than returning an object.
 253      """
 254      cdef:
 255          Py_ssize_t i, j, N, K, size
 256          int64float_t val, y, t, na_val
 257          int64float_t[:, ::1] accum, compensation
 258          uint8_t[:, ::1] accum_mask
 259          intp_t lab
 260          bint isna_entry, isna_prev = False
 261          bint uses_mask = mask is not None
 262  
 263      N, K = (<object>values).shape
 264  
 265      if uses_mask:
 266          accum_mask = np.zeros((ngroups, K), dtype="uint8")
 267  
 268      accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
 269      compensation = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
 270  
 271      na_val = _get_na_val(<int64float_t>0, is_datetimelike)
 272  
 273      with nogil:
 274          for i in range(N):
 275              lab = labels[i]
 276  
 277              if lab < 0:
 278                  continue
 279              for j in range(K):
 280                  val = values[i, j]
 281  
 282                  if uses_mask:
 283                      isna_entry = mask[i, j]
 284                  else:
 285                      isna_entry = _treat_as_na(val, is_datetimelike)
 286  
 287                  if not skipna:
 288                      if uses_mask:
 289                          isna_prev = accum_mask[lab, j]
 290                      else:
 291                          isna_prev = _treat_as_na(accum[lab, j], is_datetimelike)
 292  
 293                      if isna_prev:
 294                          if uses_mask:
 295                              result_mask[i, j] = True
 296                              # Be deterministic, out was initialized as empty
 297                              out[i, j] = 0
 298                          else:
 299                              out[i, j] = na_val
 300                          continue
 301  
 302                  if isna_entry:
 303  
 304                      if uses_mask:
 305                          result_mask[i, j] = True
 306                          # Be deterministic, out was initialized as empty
 307                          out[i, j] = 0
 308                      else:
 309                          out[i, j] = na_val
 310  
 311                      if not skipna:
 312                          if uses_mask:
 313                              accum_mask[lab, j] = True
 314                          else:
 315                              accum[lab, j] = na_val
 316  
 317                  else:
 318                      # For floats, use Kahan summation to reduce floating-point
 319                      # error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
 320                      if int64float_t == float32_t or int64float_t == float64_t:
 321                          y = val - compensation[lab, j]
 322                          t = accum[lab, j] + y
 323                          compensation[lab, j] = t - accum[lab, j] - y
 324                      else:
 325                          t = val + accum[lab, j]
 326  
 327                      accum[lab, j] = t
 328                      out[i, j] = t
 329  
 330  
 331  @cython.boundscheck(False)
 332  @cython.wraparound(False)
 333  def group_shift_indexer(
 334      int64_t[::1] out,
 335      const intp_t[::1] labels,
 336      int ngroups,
 337      int periods,
 338  ) -> None:
 339      cdef:
 340          Py_ssize_t N, i, j, ii, lab
 341          int offset = 0, sign
 342          int64_t idxer, idxer_slot
 343          int64_t[::1] label_seen = np.zeros(ngroups, dtype=np.int64)
 344          int64_t[:, ::1] label_indexer
 345  
 346      N, = (<object>labels).shape
 347  
 348      if periods < 0:
 349          periods = -periods
 350          offset = N - 1
 351          sign = -1
 352      elif periods > 0:
 353          offset = 0
 354          sign = 1
 355  
 356      if periods == 0:
 357          with nogil:
 358              for i in range(N):
 359                  out[i] = i
 360      else:
 361          # array of each previous indexer seen
 362          label_indexer = np.zeros((ngroups, periods), dtype=np.int64)
 363          with nogil:
 364              for i in range(N):
 365                  # reverse iterator if shifting backwards
 366                  ii = offset + sign * i
 367                  lab = labels[ii]
 368  
 369                  # Skip null keys
 370                  if lab == -1:
 371                      out[ii] = -1
 372                      continue
 373  
 374                  label_seen[lab] += 1
 375  
 376                  idxer_slot = label_seen[lab] % periods
 377                  idxer = label_indexer[lab, idxer_slot]
 378  
 379                  if label_seen[lab] > periods:
 380                      out[ii] = idxer
 381                  else:
 382                      out[ii] = -1
 383  
 384                  label_indexer[lab, idxer_slot] = ii
 385  
 386  
 387  @cython.wraparound(False)
 388  @cython.boundscheck(False)
 389  def group_fillna_indexer(
 390      ndarray[intp_t] out,
 391      ndarray[intp_t] labels,
 392      ndarray[intp_t] sorted_labels,
 393      ndarray[uint8_t] mask,
 394      str direction,
 395      int64_t limit,
 396      bint dropna,
 397  ) -> None:
 398      """
 399      Indexes how to fill values forwards or backwards within a group.
 400  
 401      Parameters
 402      ----------
 403      out : np.ndarray[np.intp]
 404          Values into which this method will write its results.
 405      labels : np.ndarray[np.intp]
 406          Array containing unique label for each group, with its ordering
 407          matching up to the corresponding record in `values`.
 408      sorted_labels : np.ndarray[np.intp]
 409          obtained by `np.argsort(labels, kind="mergesort")`; reversed if
 410          direction == "bfill"
 411      values : np.ndarray[np.uint8]
 412          Containing the truth value of each element.
 413      mask : np.ndarray[np.uint8]
 414          Indicating whether a value is na or not.
 415      direction : {'ffill', 'bfill'}
 416          Direction for fill to be applied (forwards or backwards, respectively)
 417      limit : Consecutive values to fill before stopping, or -1 for no limit
 418      dropna : Flag to indicate if NaN groups should return all NaN values
 419  
 420      Notes
 421      -----
 422      This method modifies the `out` parameter rather than returning an object
 423      """
 424      cdef:
 425          Py_ssize_t i, N, idx
 426          intp_t curr_fill_idx=-1
 427          int64_t filled_vals = 0
 428  
 429      N = len(out)
 430  
 431      # Make sure all arrays are the same size
 432      assert N == len(labels) == len(mask)
 433  
 434      with nogil:
 435          for i in range(N):
 436              idx = sorted_labels[i]
 437              if dropna and labels[idx] == -1:  # nan-group gets nan-values
 438                  curr_fill_idx = -1
 439              elif mask[idx] == 1:  # is missing
 440                  # Stop filling once we've hit the limit
 441                  if filled_vals >= limit and limit != -1:
 442                      curr_fill_idx = -1
 443                  filled_vals += 1
 444              else:  # reset items when not missing
 445                  filled_vals = 0
 446                  curr_fill_idx = idx
 447  
 448              out[idx] = curr_fill_idx
 449  
 450              # If we move to the next group, reset
 451              # the fill_idx and counter
 452              if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]:
 453                  curr_fill_idx = -1
 454                  filled_vals = 0
 455  
 456  
 457  @cython.boundscheck(False)
 458  @cython.wraparound(False)
 459  def group_any_all(
 460      int8_t[:, ::1] out,
 461      const int8_t[:, :] values,
 462      const intp_t[::1] labels,
 463      const uint8_t[:, :] mask,
 464      str val_test,
 465      bint skipna,
 466      bint nullable,
 467  ) -> None:
 468      """
 469      Aggregated boolean values to show truthfulness of group elements. If the
 470      input is a nullable type (nullable=True), the result will be computed
 471      using Kleene logic.
 472  
 473      Parameters
 474      ----------
 475      out : np.ndarray[np.int8]
 476          Values into which this method will write its results.
 477      labels : np.ndarray[np.intp]
 478          Array containing unique label for each group, with its
 479          ordering matching up to the corresponding record in `values`
 480      values : np.ndarray[np.int8]
 481          Containing the truth value of each element.
 482      mask : np.ndarray[np.uint8]
 483          Indicating whether a value is na or not.
 484      val_test : {'any', 'all'}
 485          String object dictating whether to use any or all truth testing
 486      skipna : bool
 487          Flag to ignore nan values during truth testing
 488      nullable : bool
 489          Whether or not the input is a nullable type. If True, the
 490          result will be computed using Kleene logic
 491  
 492      Notes
 493      -----
 494      This method modifies the `out` parameter rather than returning an object.
 495      The returned values will either be 0, 1 (False or True, respectively), or
 496      -1 to signify a masked position in the case of a nullable input.
 497      """
 498      cdef:
 499          Py_ssize_t i, j, N = len(labels), K = out.shape[1]
 500          intp_t lab
 501          int8_t flag_val, val
 502  
 503      if val_test == 'all':
 504          # Because the 'all' value of an empty iterable in Python is True we can
 505          # start with an array full of ones and set to zero when a False value
 506          # is encountered
 507          flag_val = 0
 508      elif val_test == 'any':
 509          # Because the 'any' value of an empty iterable in Python is False we
 510          # can start with an array full of zeros and set to one only if any
 511          # value encountered is True
 512          flag_val = 1
 513      else:
 514          raise ValueError("'bool_func' must be either 'any' or 'all'!")
 515  
 516      out[:] = 1 - flag_val
 517  
 518      with nogil:
 519          for i in range(N):
 520              lab = labels[i]
 521              if lab < 0:
 522                  continue
 523  
 524              for j in range(K):
 525                  if skipna and mask[i, j]:
 526                      continue
 527  
 528                  if nullable and mask[i, j]:
 529                      # Set the position as masked if `out[lab] != flag_val`, which
 530                      # would indicate True/False has not yet been seen for any/all,
 531                      # so by Kleene logic the result is currently unknown
 532                      if out[lab, j] != flag_val:
 533                          out[lab, j] = -1
 534                      continue
 535  
 536                  val = values[i, j]
 537  
 538                  # If True and 'any' or False and 'all', the result is
 539                  # already determined
 540                  if val == flag_val:
 541                      out[lab, j] = flag_val
 542  
 543  
 544  # ----------------------------------------------------------------------
 545  # group_sum, group_prod, group_var, group_mean, group_ohlc
 546  # ----------------------------------------------------------------------
 547  
 548  ctypedef fused mean_t:
 549      float64_t
 550      float32_t
 551      complex64_t
 552      complex128_t
 553  
 554  ctypedef fused sum_t:
 555      mean_t
 556      int64_t
 557      uint64_t
 558      object
 559  
 560  
 561  @cython.wraparound(False)
 562  @cython.boundscheck(False)
 563  def group_sum(
 564      sum_t[:, ::1] out,
 565      int64_t[::1] counts,
 566      ndarray[sum_t, ndim=2] values,
 567      const intp_t[::1] labels,
 568      const uint8_t[:, :] mask,
 569      uint8_t[:, ::1] result_mask=None,
 570      Py_ssize_t min_count=0,
 571      bint is_datetimelike=False,
 572  ) -> None:
 573      """
 574      Only aggregates on axis=0 using Kahan summation
 575      """
 576      cdef:
 577          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
 578          sum_t val, t, y
 579          sum_t[:, ::1] sumx, compensation
 580          int64_t[:, ::1] nobs
 581          Py_ssize_t len_values = len(values), len_labels = len(labels)
 582          bint uses_mask = mask is not None
 583          bint isna_entry
 584  
 585      if len_values != len_labels:
 586          raise ValueError("len(index) != len(labels)")
 587  
 588      nobs = np.zeros((<object>out).shape, dtype=np.int64)
 589      # the below is equivalent to `np.zeros_like(out)` but faster
 590      sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 591      compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 592  
 593      N, K = (<object>values).shape
 594  
 595      if sum_t is object:
 596          # NB: this does not use 'compensation' like the non-object track does.
 597          for i in range(N):
 598              lab = labels[i]
 599              if lab < 0:
 600                  continue
 601  
 602              counts[lab] += 1
 603              for j in range(K):
 604                  val = values[i, j]
 605  
 606                  # not nan
 607                  if not checknull(val):
 608                      nobs[lab, j] += 1
 609  
 610                      if nobs[lab, j] == 1:
 611                          # i.e. we haven't added anything yet; avoid TypeError
 612                          #  if e.g. val is a str and sumx[lab, j] is 0
 613                          t = val
 614                      else:
 615                          t = sumx[lab, j] + val
 616                      sumx[lab, j] = t
 617  
 618          for i in range(ncounts):
 619              for j in range(K):
 620                  if nobs[i, j] < min_count:
 621                      out[i, j] = None
 622  
 623                  else:
 624                      out[i, j] = sumx[i, j]
 625      else:
 626          with nogil:
 627              for i in range(N):
 628                  lab = labels[i]
 629                  if lab < 0:
 630                      continue
 631  
 632                  counts[lab] += 1
 633                  for j in range(K):
 634                      val = values[i, j]
 635  
 636                      # not nan
 637                      # With dt64/td64 values, values have been cast to float64
 638                      #  instead if int64 for group_sum, but the logic
 639                      #  is otherwise the same as in _treat_as_na
 640                      if uses_mask:
 641                          isna_entry = mask[i, j]
 642                      elif (sum_t is float32_t or sum_t is float64_t
 643                          or sum_t is complex64_t or sum_t is complex64_t):
 644                          # avoid warnings because of equality comparison
 645                          isna_entry = not val == val
 646                      elif sum_t is int64_t and is_datetimelike and val == NPY_NAT:
 647                          isna_entry = True
 648                      else:
 649                          isna_entry = False
 650  
 651                      if not isna_entry:
 652                          nobs[lab, j] += 1
 653                          y = val - compensation[lab, j]
 654                          t = sumx[lab, j] + y
 655                          compensation[lab, j] = t - sumx[lab, j] - y
 656                          sumx[lab, j] = t
 657  
 658              for i in range(ncounts):
 659                  for j in range(K):
 660                      if nobs[i, j] < min_count:
 661                          # if we are integer dtype, not is_datetimelike, and
 662                          #  not uses_mask, then getting here implies that
 663                          #  counts[i] < min_count, which means we will
 664                          #  be cast to float64 and masked at the end
 665                          #  of WrappedCythonOp._call_cython_op. So we can safely
 666                          #  set a placeholder value in out[i, j].
 667                          if uses_mask:
 668                              result_mask[i, j] = True
 669                          elif (sum_t is float32_t or sum_t is float64_t
 670                              or sum_t is complex64_t or sum_t is complex64_t):
 671                              out[i, j] = NAN
 672                          elif sum_t is int64_t:
 673                              out[i, j] = NPY_NAT
 674                          else:
 675                              # placeholder, see above
 676                              out[i, j] = 0
 677  
 678                      else:
 679                          out[i, j] = sumx[i, j]
 680  
 681  
 682  @cython.wraparound(False)
 683  @cython.boundscheck(False)
 684  def group_prod(
 685      int64float_t[:, ::1] out,
 686      int64_t[::1] counts,
 687      ndarray[int64float_t, ndim=2] values,
 688      const intp_t[::1] labels,
 689      const uint8_t[:, ::1] mask,
 690      uint8_t[:, ::1] result_mask=None,
 691      Py_ssize_t min_count=0,
 692  ) -> None:
 693      """
 694      Only aggregates on axis=0
 695      """
 696      cdef:
 697          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
 698          int64float_t val, count
 699          int64float_t[:, ::1] prodx
 700          int64_t[:, ::1] nobs
 701          Py_ssize_t len_values = len(values), len_labels = len(labels)
 702          bint isna_entry, uses_mask = mask is not None
 703  
 704      if len_values != len_labels:
 705          raise ValueError("len(index) != len(labels)")
 706  
 707      nobs = np.zeros((<object>out).shape, dtype=np.int64)
 708      prodx = np.ones((<object>out).shape, dtype=(<object>out).base.dtype)
 709  
 710      N, K = (<object>values).shape
 711  
 712      with nogil:
 713          for i in range(N):
 714              lab = labels[i]
 715              if lab < 0:
 716                  continue
 717  
 718              counts[lab] += 1
 719              for j in range(K):
 720                  val = values[i, j]
 721  
 722                  if uses_mask:
 723                      isna_entry = mask[i, j]
 724                  elif int64float_t is float32_t or int64float_t is float64_t:
 725                      isna_entry = not val == val
 726                  else:
 727                      isna_entry = False
 728  
 729                  if not isna_entry:
 730                      nobs[lab, j] += 1
 731                      prodx[lab, j] *= val
 732  
 733          for i in range(ncounts):
 734              for j in range(K):
 735                  if nobs[i, j] < min_count:
 736  
 737                      # else case is not possible
 738                      if uses_mask:
 739                          result_mask[i, j] = True
 740                          # Be deterministic, out was initialized as empty
 741                          out[i, j] = 0
 742                      elif int64float_t is float32_t or int64float_t is float64_t:
 743                          out[i, j] = NAN
 744                      else:
 745                          # we only get here when < mincount which gets handled later
 746                          pass
 747  
 748                  else:
 749                      out[i, j] = prodx[i, j]
 750  
 751  
 752  @cython.wraparound(False)
 753  @cython.boundscheck(False)
 754  @cython.cdivision(True)
 755  def group_var(
 756      floating[:, ::1] out,
 757      int64_t[::1] counts,
 758      ndarray[floating, ndim=2] values,
 759      const intp_t[::1] labels,
 760      Py_ssize_t min_count=-1,
 761      int64_t ddof=1,
 762  ) -> None:
 763      cdef:
 764          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
 765          floating val, ct, oldmean
 766          floating[:, ::1] mean
 767          int64_t[:, ::1] nobs
 768          Py_ssize_t len_values = len(values), len_labels = len(labels)
 769  
 770      assert min_count == -1, "'min_count' only used in sum and prod"
 771  
 772      if len_values != len_labels:
 773          raise ValueError("len(index) != len(labels)")
 774  
 775      nobs = np.zeros((<object>out).shape, dtype=np.int64)
 776      mean = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 777  
 778      N, K = (<object>values).shape
 779  
 780      out[:, :] = 0.0
 781  
 782      with nogil:
 783          for i in range(N):
 784              lab = labels[i]
 785              if lab < 0:
 786                  continue
 787  
 788              counts[lab] += 1
 789  
 790              for j in range(K):
 791                  val = values[i, j]
 792  
 793                  # not nan
 794                  if val == val:
 795                      nobs[lab, j] += 1
 796                      oldmean = mean[lab, j]
 797                      mean[lab, j] += (val - oldmean) / nobs[lab, j]
 798                      out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
 799  
 800          for i in range(ncounts):
 801              for j in range(K):
 802                  ct = nobs[i, j]
 803                  if ct <= ddof:
 804                      out[i, j] = NAN
 805                  else:
 806                      out[i, j] /= (ct - ddof)
 807  
 808  
 809  @cython.wraparound(False)
 810  @cython.boundscheck(False)
 811  def group_mean(
 812      mean_t[:, ::1] out,
 813      int64_t[::1] counts,
 814      ndarray[mean_t, ndim=2] values,
 815      const intp_t[::1] labels,
 816      Py_ssize_t min_count=-1,
 817      bint is_datetimelike=False,
 818      const uint8_t[:, ::1] mask=None,
 819      uint8_t[:, ::1] result_mask=None,
 820  ) -> None:
 821      """
 822      Compute the mean per label given a label assignment for each value.
 823      NaN values are ignored.
 824  
 825      Parameters
 826      ----------
 827      out : np.ndarray[floating]
 828          Values into which this method will write its results.
 829      counts : np.ndarray[int64]
 830          A zeroed array of the same shape as labels,
 831          populated by group sizes during algorithm.
 832      values : np.ndarray[floating]
 833          2-d array of the values to find the mean of.
 834      labels : np.ndarray[np.intp]
 835          Array containing unique label for each group, with its
 836          ordering matching up to the corresponding record in `values`.
 837      min_count : Py_ssize_t
 838          Only used in sum and prod. Always -1.
 839      is_datetimelike : bool
 840          True if `values` contains datetime-like entries.
 841      mask : ndarray[bool, ndim=2], optional
 842          Not used.
 843      result_mask : ndarray[bool, ndim=2], optional
 844          Not used.
 845  
 846      Notes
 847      -----
 848      This method modifies the `out` parameter rather than returning an object.
 849      `counts` is modified to hold group sizes
 850      """
 851  
 852      cdef:
 853          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
 854          mean_t val, count, y, t, nan_val
 855          mean_t[:, ::1] sumx, compensation
 856          int64_t[:, ::1] nobs
 857          Py_ssize_t len_values = len(values), len_labels = len(labels)
 858  
 859      assert min_count == -1, "'min_count' only used in sum and prod"
 860  
 861      if len_values != len_labels:
 862          raise ValueError("len(index) != len(labels)")
 863  
 864      # the below is equivalent to `np.zeros_like(out)` but faster
 865      nobs = np.zeros((<object>out).shape, dtype=np.int64)
 866      sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 867      compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 868  
 869      N, K = (<object>values).shape
 870      nan_val = NPY_NAT if is_datetimelike else NAN
 871  
 872      with nogil:
 873          for i in range(N):
 874              lab = labels[i]
 875              if lab < 0:
 876                  continue
 877  
 878              counts[lab] += 1
 879              for j in range(K):
 880                  val = values[i, j]
 881                  # not nan
 882                  if val == val and not (is_datetimelike and val == NPY_NAT):
 883                      nobs[lab, j] += 1
 884                      y = val - compensation[lab, j]
 885                      t = sumx[lab, j] + y
 886                      compensation[lab, j] = t - sumx[lab, j] - y
 887                      sumx[lab, j] = t
 888  
 889          for i in range(ncounts):
 890              for j in range(K):
 891                  count = nobs[i, j]
 892                  if nobs[i, j] == 0:
 893                      out[i, j] = nan_val
 894                  else:
 895                      out[i, j] = sumx[i, j] / count
 896  
 897  
 898  @cython.wraparound(False)
 899  @cython.boundscheck(False)
 900  def group_ohlc(
 901      int64float_t[:, ::1] out,
 902      int64_t[::1] counts,
 903      ndarray[int64float_t, ndim=2] values,
 904      const intp_t[::1] labels,
 905      Py_ssize_t min_count=-1,
 906      const uint8_t[:, ::1] mask=None,
 907      uint8_t[:, ::1] result_mask=None,
 908  ) -> None:
 909      """
 910      Only aggregates on axis=0
 911      """
 912      cdef:
 913          Py_ssize_t i, j, N, K, lab
 914          int64float_t val
 915          uint8_t[::1] first_element_set
 916          bint isna_entry, uses_mask = not mask is None
 917  
 918      assert min_count == -1, "'min_count' only used in sum and prod"
 919  
 920      if len(labels) == 0:
 921          return
 922  
 923      N, K = (<object>values).shape
 924  
 925      if out.shape[1] != 4:
 926          raise ValueError('Output array must have 4 columns')
 927  
 928      if K > 1:
 929          raise NotImplementedError("Argument 'values' must have only one dimension")
 930  
 931      if int64float_t is float32_t or int64float_t is float64_t:
 932          out[:] = np.nan
 933      else:
 934          out[:] = 0
 935  
 936      first_element_set = np.zeros((<object>counts).shape, dtype=np.uint8)
 937      if uses_mask:
 938          result_mask[:] = True
 939  
 940      with nogil:
 941          for i in range(N):
 942              lab = labels[i]
 943              if lab == -1:
 944                  continue
 945  
 946              counts[lab] += 1
 947              val = values[i, 0]
 948  
 949              if uses_mask:
 950                  isna_entry = mask[i, 0]
 951              elif int64float_t is float32_t or int64float_t is float64_t:
 952                  isna_entry = val != val
 953              else:
 954                  isna_entry = False
 955  
 956              if isna_entry:
 957                  continue
 958  
 959              if not first_element_set[lab]:
 960                  out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val
 961                  first_element_set[lab] = True
 962                  if uses_mask:
 963                      result_mask[lab] = False
 964              else:
 965                  out[lab, 1] = max(out[lab, 1], val)
 966                  out[lab, 2] = min(out[lab, 2], val)
 967                  out[lab, 3] = val
 968  
 969  
 970  @cython.boundscheck(False)
 971  @cython.wraparound(False)
 972  def group_quantile(
 973      ndarray[float64_t, ndim=2] out,
 974      ndarray[numeric_t, ndim=1] values,
 975      ndarray[intp_t] labels,
 976      ndarray[uint8_t] mask,
 977      const intp_t[:] sort_indexer,
 978      const float64_t[:] qs,
 979      str interpolation,
 980  ) -> None:
 981      """
 982      Calculate the quantile per group.
 983  
 984      Parameters
 985      ----------
 986      out : np.ndarray[np.float64, ndim=2]
 987          Array of aggregated values that will be written to.
 988      values : np.ndarray
 989          Array containing the values to apply the function against.
 990      labels : ndarray[np.intp]
 991          Array containing the unique group labels.
 992      sort_indexer : ndarray[np.intp]
 993          Indices describing sort order by values and labels.
 994      qs : ndarray[float64_t]
 995          The quantile values to search for.
 996      interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
 997  
 998      Notes
 999      -----
1000      Rather than explicitly returning a value, this function modifies the
1001      provided `out` parameter.
1002      """
1003      cdef:
1004          Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz, k, nqs
1005          Py_ssize_t grp_start=0, idx=0
1006          intp_t lab
1007          InterpolationEnumType interp
1008          float64_t q_val, q_idx, frac, val, next_val
1009          int64_t[::1] counts, non_na_counts
1010  
1011      assert values.shape[0] == N
1012  
1013      if any(not (0 <= q <= 1) for q in qs):
1014          wrong = [x for x in qs if not (0 <= x <= 1)][0]
1015          raise ValueError(
1016              f"Each 'q' must be between 0 and 1. Got '{wrong}' instead"
1017          )
1018  
1019      inter_methods = {
1020          'linear': INTERPOLATION_LINEAR,
1021          'lower': INTERPOLATION_LOWER,
1022          'higher': INTERPOLATION_HIGHER,
1023          'nearest': INTERPOLATION_NEAREST,
1024          'midpoint': INTERPOLATION_MIDPOINT,
1025      }
1026      interp = inter_methods[interpolation]
1027  
1028      nqs = len(qs)
1029      ngroups = len(out)
1030      counts = np.zeros(ngroups, dtype=np.int64)
1031      non_na_counts = np.zeros(ngroups, dtype=np.int64)
1032  
1033      # First figure out the size of every group
1034      with nogil:
1035          for i in range(N):
1036              lab = labels[i]
1037              if lab == -1:  # NA group label
1038                  continue
1039  
1040              counts[lab] += 1
1041              if not mask[i]:
1042                  non_na_counts[lab] += 1
1043  
1044      with nogil:
1045          for i in range(ngroups):
1046              # Figure out how many group elements there are
1047              grp_sz = counts[i]
1048              non_na_sz = non_na_counts[i]
1049  
1050              if non_na_sz == 0:
1051                  for k in range(nqs):
1052                      out[i, k] = NaN
1053              else:
1054                  for k in range(nqs):
1055                      q_val = qs[k]
1056  
1057                      # Calculate where to retrieve the desired value
1058                      # Casting to int will intentionally truncate result
1059                      idx = grp_start + <int64_t>(q_val * <float64_t>(non_na_sz - 1))
1060  
1061                      val = values[sort_indexer[idx]]
1062                      # If requested quantile falls evenly on a particular index
1063                      # then write that index's value out. Otherwise interpolate
1064                      q_idx = q_val * (non_na_sz - 1)
1065                      frac = q_idx % 1
1066  
1067                      if frac == 0.0 or interp == INTERPOLATION_LOWER:
1068                          out[i, k] = val
1069                      else:
1070                          next_val = values[sort_indexer[idx + 1]]
1071                          if interp == INTERPOLATION_LINEAR:
1072                              out[i, k] = val + (next_val - val) * frac
1073                          elif interp == INTERPOLATION_HIGHER:
1074                              out[i, k] = next_val
1075                          elif interp == INTERPOLATION_MIDPOINT:
1076                              out[i, k] = (val + next_val) / 2.0
1077                          elif interp == INTERPOLATION_NEAREST:
1078                              if frac > .5 or (frac == .5 and q_val > .5):  # Always OK?
1079                                  out[i, k] = next_val
1080                              else:
1081                                  out[i, k] = val
1082  
1083              # Increment the index reference in sorted_arr for the next group
1084              grp_start += grp_sz
1085  
1086  
1087  # ----------------------------------------------------------------------
1088  # group_nth, group_last, group_rank
1089  # ----------------------------------------------------------------------
1090  
1091  cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
1092      if numeric_object_t is object:
1093          # Should never be used, but we need to avoid the `val != val` below
1094          #  or else cython will raise about gil acquisition.
1095          raise NotImplementedError
1096  
1097      elif numeric_object_t is int64_t:
1098          return is_datetimelike and val == NPY_NAT
1099      elif numeric_object_t is float32_t or numeric_object_t is float64_t:
1100          return val != val
1101      else:
1102          # non-datetimelike integer
1103          return False
1104  
1105  
1106  cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike):
1107      """
1108      Find either the min or the max supported by numeric_object_t; 'val' is a
1109      placeholder to effectively make numeric_object_t an argument.
1110      """
1111      return get_rank_nan_fill_val(
1112          not compute_max,
1113          val=val,
1114          is_datetimelike=is_datetimelike,
1115      )
1116  
1117  
1118  cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):
1119      cdef:
1120          numeric_t na_val
1121  
1122      if numeric_t == float32_t or numeric_t == float64_t:
1123          na_val = NaN
1124      elif numeric_t is int64_t and is_datetimelike:
1125          na_val = NPY_NAT
1126      else:
1127          # Used in case of masks
1128          na_val = 0
1129      return na_val
1130  
1131  
1132  # TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
1133  #  use `const numeric_object_t[:, :] values`
1134  @cython.wraparound(False)
1135  @cython.boundscheck(False)
1136  def group_last(
1137      numeric_object_t[:, ::1] out,
1138      int64_t[::1] counts,
1139      ndarray[numeric_object_t, ndim=2] values,
1140      const intp_t[::1] labels,
1141      const uint8_t[:, :] mask,
1142      uint8_t[:, ::1] result_mask=None,
1143      Py_ssize_t min_count=-1,
1144      bint is_datetimelike=False,
1145  ) -> None:
1146      """
1147      Only aggregates on axis=0
1148      """
1149      cdef:
1150          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
1151          numeric_object_t val
1152          ndarray[numeric_object_t, ndim=2] resx
1153          ndarray[int64_t, ndim=2] nobs
1154          bint uses_mask = mask is not None
1155          bint isna_entry
1156  
1157      # TODO(cython3):
1158      # Instead of `labels.shape[0]` use `len(labels)`
1159      if not len(values) == labels.shape[0]:
1160          raise AssertionError("len(index) != len(labels)")
1161  
1162      min_count = max(min_count, 1)
1163      nobs = np.zeros((<object>out).shape, dtype=np.int64)
1164      if numeric_object_t is object:
1165          resx = np.empty((<object>out).shape, dtype=object)
1166      else:
1167          resx = np.empty_like(out)
1168  
1169      N, K = (<object>values).shape
1170  
1171      if numeric_object_t is object:
1172          # TODO(cython3): De-duplicate once conditional-nogil is available
1173          for i in range(N):
1174              lab = labels[i]
1175              if lab < 0:
1176                  continue
1177  
1178              counts[lab] += 1
1179              for j in range(K):
1180                  val = values[i, j]
1181  
1182                  if uses_mask:
1183                      isna_entry = mask[i, j]
1184                  else:
1185                      isna_entry = checknull(val)
1186  
1187                  if not isna_entry:
1188                      # NB: use _treat_as_na here once
1189                      #  conditional-nogil is available.
1190                      nobs[lab, j] += 1
1191                      resx[lab, j] = val
1192  
1193          for i in range(ncounts):
1194              for j in range(K):
1195                  if nobs[i, j] < min_count:
1196                      out[i, j] = None
1197                  else:
1198                      out[i, j] = resx[i, j]
1199      else:
1200          with nogil:
1201              for i in range(N):
1202                  lab = labels[i]
1203                  if lab < 0:
1204                      continue
1205  
1206                  counts[lab] += 1
1207                  for j in range(K):
1208                      val = values[i, j]
1209  
1210                      if uses_mask:
1211                          isna_entry = mask[i, j]
1212                      else:
1213                          isna_entry = _treat_as_na(val, is_datetimelike)
1214  
1215                      if not isna_entry:
1216                          nobs[lab, j] += 1
1217                          resx[lab, j] = val
1218  
1219              for i in range(ncounts):
1220                  for j in range(K):
1221                      # TODO(cython3): the entire next block can be shared
1222                      #  across 3 places once conditional-nogil is available
1223                      if nobs[i, j] < min_count:
1224                          # if we are integer dtype, not is_datetimelike, and
1225                          #  not uses_mask, then getting here implies that
1226                          #  counts[i] < min_count, which means we will
1227                          #  be cast to float64 and masked at the end
1228                          #  of WrappedCythonOp._call_cython_op. So we can safely
1229                          #  set a placeholder value in out[i, j].
1230                          if uses_mask:
1231                              result_mask[i, j] = True
1232                          elif numeric_object_t is float32_t or numeric_object_t is float64_t:
1233                              out[i, j] = NAN
1234                          elif numeric_object_t is int64_t:
1235                              # Per above, this is a placeholder in
1236                              #  non-is_datetimelike cases.
1237                              out[i, j] = NPY_NAT
1238                          else:
1239                              # placeholder, see above
1240                              out[i, j] = 0
1241                      else:
1242                          out[i, j] = resx[i, j]
1243  
1244  
1245  # TODO(cython3): GH#31710 use memorviews once cython 0.30 is released so we can
1246  #  use `const numeric_object_t[:, :] values`
1247  @cython.wraparound(False)
1248  @cython.boundscheck(False)
1249  def group_nth(
1250      numeric_object_t[:, ::1] out,
1251      int64_t[::1] counts,
1252      ndarray[numeric_object_t, ndim=2] values,
1253      const intp_t[::1] labels,
1254      const uint8_t[:, :] mask,
1255      uint8_t[:, ::1] result_mask=None,
1256      int64_t min_count=-1,
1257      int64_t rank=1,
1258      bint is_datetimelike=False,
1259  ) -> None:
1260      """
1261      Only aggregates on axis=0
1262      """
1263      cdef:
1264          Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
1265          numeric_object_t val
1266          ndarray[numeric_object_t, ndim=2] resx
1267          ndarray[int64_t, ndim=2] nobs
1268          bint uses_mask = mask is not None
1269          bint isna_entry
1270  
1271      # TODO(cython3):
1272      # Instead of `labels.shape[0]` use `len(labels)`
1273      if not len(values) == labels.shape[0]:
1274          raise AssertionError("len(index) != len(labels)")
1275  
1276      min_count = max(min_count, 1)
1277      nobs = np.zeros((<object>out).shape, dtype=np.int64)
1278      if numeric_object_t is object:
1279          resx = np.empty((<object>out).shape, dtype=object)
1280      else:
1281          resx = np.empty_like(out)
1282  
1283      N, K = (<object>values).shape
1284  
1285      if numeric_object_t is object:
1286          # TODO(cython3): De-duplicate once conditional-nogil is available
1287          for i in range(N):
1288              lab = labels[i]
1289              if lab < 0:
1290                  continue
1291  
1292              counts[lab] += 1
1293              for j in range(K):
1294                  val = values[i, j]
1295  
1296                  if uses_mask:
1297                      isna_entry = mask[i, j]
1298                  else:
1299                      isna_entry = checknull(val)
1300  
1301                  if not isna_entry:
1302                      # NB: use _treat_as_na here once
1303                      #  conditional-nogil is available.
1304                      nobs[lab, j] += 1
1305                      if nobs[lab, j] == rank:
1306                          resx[lab, j] = val
1307  
1308          for i in range(ncounts):
1309              for j in range(K):
1310                  if nobs[i, j] < min_count:
1311                      out[i, j] = None
1312                  else:
1313                      out[i, j] = resx[i, j]
1314  
1315      else:
1316          with nogil:
1317              for i in range(N):
1318                  lab = labels[i]
1319                  if lab < 0:
1320                      continue
1321  
1322                  counts[lab] += 1
1323                  for j in range(K):
1324                      val = values[i, j]
1325  
1326                      if uses_mask:
1327                          isna_entry = mask[i, j]
1328                      else:
1329                          isna_entry = _treat_as_na(val, is_datetimelike)
1330  
1331                      if not isna_entry:
1332                          nobs[lab, j] += 1
1333                          if nobs[lab, j] == rank:
1334                              resx[lab, j] = val
1335  
1336              # TODO: de-dup this whole block with group_last?
1337              for i in range(ncounts):
1338                  for j in range(K):
1339                      if nobs[i, j] < min_count:
1340                          # if we are integer dtype, not is_datetimelike, and
1341                          #  not uses_mask, then getting here implies that
1342                          #  counts[i] < min_count, which means we will
1343                          #  be cast to float64 and masked at the end
1344                          #  of WrappedCythonOp._call_cython_op. So we can safely
1345                          #  set a placeholder value in out[i, j].
1346                          if uses_mask:
1347                              result_mask[i, j] = True
1348                              # set out[i, j] to 0 to be deterministic, as
1349                              #  it was initialized with np.empty. Also ensures
1350                              #  we can downcast out if appropriate.
1351                              out[i, j] = 0
1352                          elif numeric_object_t is float32_t or numeric_object_t is float64_t:
1353                              out[i, j] = NAN
1354                          elif numeric_object_t is int64_t:
1355                              # Per above, this is a placeholder in
1356                              #  non-is_datetimelike cases.
1357                              out[i, j] = NPY_NAT
1358                          else:
1359                              # placeholder, see above
1360                              out[i, j] = 0
1361  
1362                      else:
1363                          out[i, j] = resx[i, j]
1364  
1365  
1366  @cython.boundscheck(False)
1367  @cython.wraparound(False)
1368  def group_rank(
1369      float64_t[:, ::1] out,
1370      ndarray[numeric_object_t, ndim=2] values,
1371      const intp_t[::1] labels,
1372      int ngroups,
1373      bint is_datetimelike,
1374      str ties_method="average",
1375      bint ascending=True,
1376      bint pct=False,
1377      str na_option="keep",
1378      const uint8_t[:, :] mask=None,
1379  ) -> None:
1380      """
1381      Provides the rank of values within each group.
1382  
1383      Parameters
1384      ----------
1385      out : np.ndarray[np.float64, ndim=2]
1386          Values to which this method will write its results.
1387      values : np.ndarray of numeric_object_t values to be ranked
1388      labels : np.ndarray[np.intp]
1389          Array containing unique label for each group, with its ordering
1390          matching up to the corresponding record in `values`
1391      ngroups : int
1392          This parameter is not used, is needed to match signatures of other
1393          groupby functions.
1394      is_datetimelike : bool
1395          True if `values` contains datetime-like entries.
1396      ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
1397          * average: average rank of group
1398          * min: lowest rank in group
1399          * max: highest rank in group
1400          * first: ranks assigned in order they appear in the array
1401          * dense: like 'min', but rank always increases by 1 between groups
1402      ascending : bool, default True
1403          False for ranks by high (1) to low (N)
1404          na_option : {'keep', 'top', 'bottom'}, default 'keep'
1405      pct : bool, default False
1406          Compute percentage rank of data within each group
1407      na_option : {'keep', 'top', 'bottom'}, default 'keep'
1408          * keep: leave NA values where they are
1409          * top: smallest rank if ascending
1410          * bottom: smallest rank if descending
1411      mask : np.ndarray[bool] or None, default None
1412  
1413      Notes
1414      -----
1415      This method modifies the `out` parameter rather than returning an object
1416      """
1417      cdef:
1418          Py_ssize_t i, k, N
1419          ndarray[float64_t, ndim=1] result
1420          const uint8_t[:] sub_mask
1421  
1422      N = values.shape[1]
1423  
1424      for k in range(N):
1425          if mask is None:
1426              sub_mask = None
1427          else:
1428              sub_mask = mask[:, k]
1429  
1430          result = rank_1d(
1431              values=values[:, k],
1432              labels=labels,
1433              is_datetimelike=is_datetimelike,
1434              ties_method=ties_method,
1435              ascending=ascending,
1436              pct=pct,
1437              na_option=na_option,
1438              mask=sub_mask,
1439          )
1440          for i in range(len(result)):
1441              if labels[i] >= 0:
1442                  out[i, k] = result[i]
1443  
1444  
1445  # ----------------------------------------------------------------------
1446  # group_min, group_max
1447  # ----------------------------------------------------------------------
1448  
1449  
1450  @cython.wraparound(False)
1451  @cython.boundscheck(False)
1452  cdef group_min_max(
1453      numeric_t[:, ::1] out,
1454      int64_t[::1] counts,
1455      ndarray[numeric_t, ndim=2] values,
1456      const intp_t[::1] labels,
1457      Py_ssize_t min_count=-1,
1458      bint is_datetimelike=False,
1459      bint compute_max=True,
1460      const uint8_t[:, ::1] mask=None,
1461      uint8_t[:, ::1] result_mask=None,
1462  ):
1463      """
1464      Compute minimum/maximum  of columns of `values`, in row groups `labels`.
1465  
1466      Parameters
1467      ----------
1468      out : np.ndarray[numeric_t, ndim=2]
1469          Array to store result in.
1470      counts : np.ndarray[int64]
1471          Input as a zeroed array, populated by group sizes during algorithm
1472      values : array
1473          Values to find column-wise min/max of.
1474      labels : np.ndarray[np.intp]
1475          Labels to group by.
1476      min_count : Py_ssize_t, default -1
1477          The minimum number of non-NA group elements, NA result if threshold
1478          is not met
1479      is_datetimelike : bool
1480          True if `values` contains datetime-like entries.
1481      compute_max : bint, default True
1482          True to compute group-wise max, False to compute min
1483      mask : ndarray[bool, ndim=2], optional
1484          If not None, indices represent missing values,
1485          otherwise the mask will not be used
1486      result_mask : ndarray[bool, ndim=2], optional
1487          If not None, these specify locations in the output that are NA.
1488          Modified in-place.
1489  
1490      Notes
1491      -----
1492      This method modifies the `out` parameter, rather than returning an object.
1493      `counts` is modified to hold group sizes
1494      """
1495      cdef:
1496          Py_ssize_t i, j, N, K, lab, ngroups = len(counts)
1497          numeric_t val
1498          ndarray[numeric_t, ndim=2] group_min_or_max
1499          int64_t[:, ::1] nobs
1500          bint uses_mask = mask is not None
1501          bint isna_entry
1502  
1503      # TODO(cython3):
1504      # Instead of `labels.shape[0]` use `len(labels)`
1505      if not len(values) == labels.shape[0]:
1506          raise AssertionError("len(index) != len(labels)")
1507  
1508      min_count = max(min_count, 1)
1509      nobs = np.zeros((<object>out).shape, dtype=np.int64)
1510  
1511      group_min_or_max = np.empty_like(out)
1512      group_min_or_max[:] = _get_min_or_max(<numeric_t>0, compute_max, is_datetimelike)
1513  
1514      N, K = (<object>values).shape
1515  
1516      with nogil:
1517          for i in range(N):
1518              lab = labels[i]
1519              if lab < 0:
1520                  continue
1521  
1522              counts[lab] += 1
1523              for j in range(K):
1524                  val = values[i, j]
1525  
1526                  if uses_mask:
1527                      isna_entry = mask[i, j]
1528                  else:
1529                      isna_entry = _treat_as_na(val, is_datetimelike)
1530  
1531                  if not isna_entry:
1532                      nobs[lab, j] += 1
1533                      if compute_max:
1534                          if val > group_min_or_max[lab, j]:
1535                              group_min_or_max[lab, j] = val
1536                      else:
1537                          if val < group_min_or_max[lab, j]:
1538                              group_min_or_max[lab, j] = val
1539  
1540          for i in range(ngroups):
1541              for j in range(K):
1542                  if nobs[i, j] < min_count:
1543                      # if we are integer dtype, not is_datetimelike, and
1544                      #  not uses_mask, then getting here implies that
1545                      #  counts[i] < min_count, which means we will
1546                      #  be cast to float64 and masked at the end
1547                      #  of WrappedCythonOp._call_cython_op. So we can safely
1548                      #  set a placeholder value in out[i, j].
1549                      if uses_mask:
1550                          result_mask[i, j] = True
1551                          # set out[i, j] to 0 to be deterministic, as
1552                          #  it was initialized with np.empty. Also ensures
1553                          #  we can downcast out if appropriate.
1554                          out[i, j] = 0
1555                      elif numeric_t is float32_t or numeric_t is float64_t:
1556                          out[i, j] = NAN
1557                      elif numeric_t is int64_t:
1558                          # Per above, this is a placeholder in
1559                          #  non-is_datetimelike cases.
1560                          out[i, j] = NPY_NAT
1561                      else:
1562                          # placeholder, see above
1563                          out[i, j] = 0
1564                  else:
1565                      out[i, j] = group_min_or_max[i, j]
1566  
1567  
1568  @cython.wraparound(False)
1569  @cython.boundscheck(False)
1570  def group_max(
1571      numeric_t[:, ::1] out,
1572      int64_t[::1] counts,
1573      ndarray[numeric_t, ndim=2] values,
1574      const intp_t[::1] labels,
1575      Py_ssize_t min_count=-1,
1576      bint is_datetimelike=False,
1577      const uint8_t[:, ::1] mask=None,
1578      uint8_t[:, ::1] result_mask=None,
1579  ) -> None:
1580      """See group_min_max.__doc__"""
1581      group_min_max(
1582          out,
1583          counts,
1584          values,
1585          labels,
1586          min_count=min_count,
1587          is_datetimelike=is_datetimelike,
1588          compute_max=True,
1589          mask=mask,
1590          result_mask=result_mask,
1591      )
1592  
1593  
1594  @cython.wraparound(False)
1595  @cython.boundscheck(False)
1596  def group_min(
1597      numeric_t[:, ::1] out,
1598      int64_t[::1] counts,
1599      ndarray[numeric_t, ndim=2] values,
1600      const intp_t[::1] labels,
1601      Py_ssize_t min_count=-1,
1602      bint is_datetimelike=False,
1603      const uint8_t[:, ::1] mask=None,
1604      uint8_t[:, ::1] result_mask=None,
1605  ) -> None:
1606      """See group_min_max.__doc__"""
1607      group_min_max(
1608          out,
1609          counts,
1610          values,
1611          labels,
1612          min_count=min_count,
1613          is_datetimelike=is_datetimelike,
1614          compute_max=False,
1615          mask=mask,
1616          result_mask=result_mask,
1617      )
1618  
1619  
1620  @cython.boundscheck(False)
1621  @cython.wraparound(False)
1622  cdef group_cummin_max(
1623      numeric_t[:, ::1] out,
1624      ndarray[numeric_t, ndim=2] values,
1625      const uint8_t[:, ::1] mask,
1626      uint8_t[:, ::1] result_mask,
1627      const intp_t[::1] labels,
1628      int ngroups,
1629      bint is_datetimelike,
1630      bint skipna,
1631      bint compute_max,
1632  ):
1633      """
1634      Cumulative minimum/maximum of columns of `values`, in row groups `labels`.
1635  
1636      Parameters
1637      ----------
1638      out : np.ndarray[numeric_t, ndim=2]
1639          Array to store cummin/max in.
1640      values : np.ndarray[numeric_t, ndim=2]
1641          Values to take cummin/max of.
1642      mask : np.ndarray[bool] or None
1643          If not None, indices represent missing values,
1644          otherwise the mask will not be used
1645      result_mask : ndarray[bool, ndim=2], optional
1646          If not None, these specify locations in the output that are NA.
1647          Modified in-place.
1648      labels : np.ndarray[np.intp]
1649          Labels to group by.
1650      ngroups : int
1651          Number of groups, larger than all entries of `labels`.
1652      is_datetimelike : bool
1653          True if `values` contains datetime-like entries.
1654      skipna : bool
1655          If True, ignore nans in `values`.
1656      compute_max : bool
1657          True if cumulative maximum should be computed, False
1658          if cumulative minimum should be computed
1659  
1660      Notes
1661      -----
1662      This method modifies the `out` parameter, rather than returning an object.
1663      """
1664      cdef:
1665          numeric_t[:, ::1] accum
1666          Py_ssize_t i, j, N, K
1667          numeric_t val, mval, na_val
1668          uint8_t[:, ::1] seen_na
1669          intp_t lab
1670          bint na_possible
1671          bint uses_mask = mask is not None
1672          bint isna_entry
1673  
1674      accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
1675      accum[:] = _get_min_or_max(<numeric_t>0, compute_max, is_datetimelike)
1676  
1677      na_val = _get_na_val(<numeric_t>0, is_datetimelike)
1678  
1679      if uses_mask:
1680          na_possible = True
1681          # Will never be used, just to avoid uninitialized warning
1682          na_val = 0
1683      elif numeric_t is float64_t or numeric_t is float32_t:
1684          na_possible = True
1685      elif is_datetimelike:
1686          na_possible = True
1687      else:
1688          # Will never be used, just to avoid uninitialized warning
1689          na_possible = False
1690  
1691      if na_possible:
1692          seen_na = np.zeros((<object>accum).shape, dtype=np.uint8)
1693  
1694      N, K = (<object>values).shape
1695      with nogil:
1696          for i in range(N):
1697              lab = labels[i]
1698              if lab < 0:
1699                  continue
1700              for j in range(K):
1701  
1702                  if not skipna and na_possible and seen_na[lab, j]:
1703                      if uses_mask:
1704                          result_mask[i, j] = 1
1705                          # Set to 0 ensures that we are deterministic and can
1706                          #  downcast if appropriate
1707                          out[i, j] = 0
1708  
1709                      else:
1710                          out[i, j] = na_val
1711                  else:
1712                      val = values[i, j]
1713  
1714                      if uses_mask:
1715                          isna_entry = mask[i, j]
1716                      else:
1717                          isna_entry = _treat_as_na(val, is_datetimelike)
1718  
1719                      if not isna_entry:
1720                          mval = accum[lab, j]
1721                          if compute_max:
1722                              if val > mval:
1723                                  accum[lab, j] = mval = val
1724                          else:
1725                              if val < mval:
1726                                  accum[lab, j] = mval = val
1727                          out[i, j] = mval
1728                      else:
1729                          seen_na[lab, j] = 1
1730                          out[i, j] = val
1731  
1732  
1733  @cython.boundscheck(False)
1734  @cython.wraparound(False)
1735  def group_cummin(
1736      numeric_t[:, ::1] out,
1737      ndarray[numeric_t, ndim=2] values,
1738      const intp_t[::1] labels,
1739      int ngroups,
1740      bint is_datetimelike,
1741      const uint8_t[:, ::1] mask=None,
1742      uint8_t[:, ::1] result_mask=None,
1743      bint skipna=True,
1744  ) -> None:
1745      """See group_cummin_max.__doc__"""
1746      group_cummin_max(
1747          out=out,
1748          values=values,
1749          mask=mask,
1750          result_mask=result_mask,
1751          labels=labels,
1752          ngroups=ngroups,
1753          is_datetimelike=is_datetimelike,
1754          skipna=skipna,
1755          compute_max=False,
1756      )
1757  
1758  
1759  @cython.boundscheck(False)
1760  @cython.wraparound(False)
1761  def group_cummax(
1762      numeric_t[:, ::1] out,
1763      ndarray[numeric_t, ndim=2] values,
1764      const intp_t[::1] labels,
1765      int ngroups,
1766      bint is_datetimelike,
1767      const uint8_t[:, ::1] mask=None,
1768      uint8_t[:, ::1] result_mask=None,
1769      bint skipna=True,
1770  ) -> None:
1771      """See group_cummin_max.__doc__"""
1772      group_cummin_max(
1773          out=out,
1774          values=values,
1775          mask=mask,
1776          result_mask=result_mask,
1777          labels=labels,
1778          ngroups=ngroups,
1779          is_datetimelike=is_datetimelike,
1780          skipna=skipna,
1781          compute_max=True,
1782      )