/ lib / pandas / _libs / reshape.pyx
reshape.pyx
  1  cimport cython
  2  from cython cimport Py_ssize_t
  3  from numpy cimport (
  4      int64_t,
  5      ndarray,
  6      uint8_t,
  7  )
  8  
  9  import numpy as np
 10  
 11  cimport numpy as cnp
 12  
 13  cnp.import_array()
 14  
 15  from pandas._libs.dtypes cimport numeric_object_t
 16  from pandas._libs.lib cimport c_is_list_like
 17  
 18  
 19  @cython.wraparound(False)
 20  @cython.boundscheck(False)
 21  def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask,
 22              Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
 23              numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
 24      """
 25      Transform long values to wide new_values.
 26  
 27      Parameters
 28      ----------
 29      values : typed ndarray
 30      mask : np.ndarray[bool]
 31      stride : int
 32      length : int
 33      width : int
 34      new_values : np.ndarray[bool]
 35          result array
 36      new_mask : np.ndarray[bool]
 37          result mask
 38      """
 39      cdef:
 40          Py_ssize_t i, j, w, nulls, s, offset
 41  
 42      if numeric_object_t is not object:
 43          # evaluated at compile-time
 44          with nogil:
 45              for i in range(stride):
 46  
 47                  nulls = 0
 48                  for j in range(length):
 49  
 50                      for w in range(width):
 51  
 52                          offset = j * width + w
 53  
 54                          if mask[offset]:
 55                              s = i * width + w
 56                              new_values[j, s] = values[offset - nulls, i]
 57                              new_mask[j, s] = 1
 58                          else:
 59                              nulls += 1
 60  
 61      else:
 62          # object-dtype, identical to above but we cannot use nogil
 63          for i in range(stride):
 64  
 65              nulls = 0
 66              for j in range(length):
 67  
 68                  for w in range(width):
 69  
 70                      offset = j * width + w
 71  
 72                      if mask[offset]:
 73                          s = i * width + w
 74                          new_values[j, s] = values[offset - nulls, i]
 75                          new_mask[j, s] = 1
 76                      else:
 77                          nulls += 1
 78  
 79  
 80  @cython.wraparound(False)
 81  @cython.boundscheck(False)
 82  def explode(ndarray[object] values):
 83      """
 84      transform array list-likes to long form
 85      preserve non-list entries
 86  
 87      Parameters
 88      ----------
 89      values : ndarray[object]
 90  
 91      Returns
 92      -------
 93      ndarray[object]
 94          result
 95      ndarray[int64_t]
 96          counts
 97      """
 98      cdef:
 99          Py_ssize_t i, j, count, n
100          object v
101          ndarray[object] result
102          ndarray[int64_t] counts
103  
104      # find the resulting len
105      n = len(values)
106      counts = np.zeros(n, dtype='int64')
107      for i in range(n):
108          v = values[i]
109  
110          if c_is_list_like(v, True):
111              if len(v):
112                  counts[i] += len(v)
113              else:
114                  # empty list-like, use a nan marker
115                  counts[i] += 1
116          else:
117              counts[i] += 1
118  
119      result = np.empty(counts.sum(), dtype='object')
120      count = 0
121      for i in range(n):
122          v = values[i]
123  
124          if c_is_list_like(v, True):
125              if len(v):
126                  v = list(v)
127                  for j in range(len(v)):
128                      result[count] = v[j]
129                      count += 1
130              else:
131                  # empty list-like, use a nan marker
132                  result[count] = np.nan
133                  count += 1
134          else:
135              # replace with the existing scalar
136              result[count] = v
137              count += 1
138      return result, counts