reshape.pyx
1 cimport cython 2 from cython cimport Py_ssize_t 3 from numpy cimport ( 4 int64_t, 5 ndarray, 6 uint8_t, 7 ) 8 9 import numpy as np 10 11 cimport numpy as cnp 12 13 cnp.import_array() 14 15 from pandas._libs.dtypes cimport numeric_object_t 16 from pandas._libs.lib cimport c_is_list_like 17 18 19 @cython.wraparound(False) 20 @cython.boundscheck(False) 21 def unstack(numeric_object_t[:, :] values, const uint8_t[:] mask, 22 Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, 23 numeric_object_t[:, :] new_values, uint8_t[:, :] new_mask) -> None: 24 """ 25 Transform long values to wide new_values. 26 27 Parameters 28 ---------- 29 values : typed ndarray 30 mask : np.ndarray[bool] 31 stride : int 32 length : int 33 width : int 34 new_values : np.ndarray[bool] 35 result array 36 new_mask : np.ndarray[bool] 37 result mask 38 """ 39 cdef: 40 Py_ssize_t i, j, w, nulls, s, offset 41 42 if numeric_object_t is not object: 43 # evaluated at compile-time 44 with nogil: 45 for i in range(stride): 46 47 nulls = 0 48 for j in range(length): 49 50 for w in range(width): 51 52 offset = j * width + w 53 54 if mask[offset]: 55 s = i * width + w 56 new_values[j, s] = values[offset - nulls, i] 57 new_mask[j, s] = 1 58 else: 59 nulls += 1 60 61 else: 62 # object-dtype, identical to above but we cannot use nogil 63 for i in range(stride): 64 65 nulls = 0 66 for j in range(length): 67 68 for w in range(width): 69 70 offset = j * width + w 71 72 if mask[offset]: 73 s = i * width + w 74 new_values[j, s] = values[offset - nulls, i] 75 new_mask[j, s] = 1 76 else: 77 nulls += 1 78 79 80 @cython.wraparound(False) 81 @cython.boundscheck(False) 82 def explode(ndarray[object] values): 83 """ 84 transform array list-likes to long form 85 preserve non-list entries 86 87 Parameters 88 ---------- 89 values : ndarray[object] 90 91 Returns 92 ------- 93 ndarray[object] 94 result 95 ndarray[int64_t] 96 counts 97 """ 98 cdef: 99 Py_ssize_t i, j, count, n 100 object v 101 ndarray[object] result 102 ndarray[int64_t] counts 103 104 # find the resulting len 105 n = len(values) 106 counts = np.zeros(n, dtype='int64') 107 for i in range(n): 108 v = values[i] 109 110 if c_is_list_like(v, True): 111 if len(v): 112 counts[i] += len(v) 113 else: 114 # empty list-like, use a nan marker 115 counts[i] += 1 116 else: 117 counts[i] += 1 118 119 result = np.empty(counts.sum(), dtype='object') 120 count = 0 121 for i in range(n): 122 v = values[i] 123 124 if c_is_list_like(v, True): 125 if len(v): 126 v = list(v) 127 for j in range(len(v)): 128 result[count] = v[j] 129 count += 1 130 else: 131 # empty list-like, use a nan marker 132 result[count] = np.nan 133 count += 1 134 else: 135 # replace with the existing scalar 136 result[count] = v 137 count += 1 138 return result, counts