algorithms.pyc
1 o 2 c��cj� � @ s� d Z ddlmZ ddlZddlZddlmZ ddlmZm Z m 3 Z 4 mZmZm Z mZ ddlZddlZddlmZmZmZmZ ddlmZmZmZmZmZmZ ddlm Z dd l!m"Z" dd 5 l#m$Z$m%Z%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl;m<Z< dd l=m>Z>m?Z?m@Z@ ddlAmBZBmCZCmDZDmEZEmFZFmGZGmHZH ddlImJZJmKZK ddlLmMZM ddlNmOZPmQZQmRZR ddlSmTZT er�ddlmUZUmVZV ddlWmXZXmYZYmZZZm[Z[m\Z\ ddl]m^Z^m_Z_ d�dd�Z`d�d d!�Zad�d"d#�Zbejcejdejeejfejgejhejiejjejkejlejmejnejoejpd$�Zqd�d%d&�Zrd�d(d)�Zsd*d+� Ztd�d�d.d/�ZuetZvd�d2d3�Zw 4 d�d�d;d<�Zxe ed=�ed>�ed?�d@�dAejyejydfd�dHdI��Zzd�dJdK�Z{d�dMdN�Z| O A A Od�d�dTdU�Z} d�d�dWdX�Z~ Yd�d�d\d]�Z d�d�d^d_�Z� ` Z O Ad�d�dgdh�Z� d�d�dodp�Z�G dqdr� dr�Z�G dsdt� dte��Z�G dudv� dve��Z� A d�d�dzd{�Z� | d�d�d�d��Z�h d��Z�d�d�d�d��Z� 4 A Od�d�d�d��Z�d�d�d��Z�ed�d�d���Z�ed�d�d���Z�d�d�d��Z�d�d�d��Z�dS )�zl 6 Generic data algorithms. This module is experimental at the moment and not 7 intended for public consumption 8 � )�annotationsN)�dedent)� TYPE_CHECKING�Hashable�Literal�Sequence�cast�final�overload)�algos� hashtable�iNaT�lib)�AnyArrayLike� ArrayLike�DtypeObj� 9 IndexLabel�TakeIndexer�npt)�doc)�find_stack_level)�'construct_1d_object_array_from_listlike�infer_dtype_from_array�sanitize_to_nanoseconds)�ensure_float64� ensure_object�ensure_platform_int� is_array_like� is_bool_dtype�is_categorical_dtype�is_complex_dtype�is_datetime64_dtype�is_extension_array_dtype�is_float_dtype� 10 is_integer�is_integer_dtype�is_list_like�is_numeric_dtype�is_object_dtype� is_scalar�is_signed_integer_dtype�is_timedelta64_dtype�needs_i8_conversion)� concat_compat)�BaseMaskedDtype�ExtensionDtype�PandasDtype)�ABCDatetimeArray�ABCExtensionArray�ABCIndex� ABCMultiIndex� ABCRangeIndex� ABCSeries�ABCTimedeltaArray)�isna�na_value_for_dtype)�take_nd)�array�ensure_wrapped_if_datetimelike� extract_array)�validate_indices)�NumpySorter�NumpyValueArrayLike)�Categorical� DataFrame�Index� 11 MultiIndex�Series)�BaseMaskedArray�ExtensionArray�valuesr �return� 12 np.ndarrayc C sR t | t�st| dd�} t| j�rtt�| ��S t | jt�r/t d| �} | j 13 s*t| j�S t�| �S t | j�r<t d| �} | jS t| j�rYt | tj�rOt�| ��d�S t�| �jddd�S t| j�rct�| �S t| j�rw| jjdv rrt| �S t�| �S t| j�r�t tj| �S t| j�r�t | tj�r�t| �} | �d �}t tj|�}|S tj| td 14 �} t| �S )a� 15 routine to ensure that our data is of the correct 16 input dtype for lower-level routines 17 18 This will coerce: 19 - ints -> int64 20 - uint -> uint64 21 - bool -> uint8 22 - datetimelike -> i8 23 - datetime64tz -> i8 (in local tz) 24 - categorical -> codes 25 26 Parameters 27 ---------- 28 values : np.ndarray or ExtensionArray 29 30 Returns 31 ------- 32 np.ndarray 33 T�� extract_numpyrF rA �uint8F��copy)� � � �i8��dtype)� 34 isinstancer4 r= r( rU r �np�asarrayr. r �_hasna�_ensure_data�_datar �codesr �ndarray�view�astyper% r# �itemsizer r r, r �object)rH �npvalues� rc ��C:\Users\Jacks.GUTTSPC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pandas\core\algorithms.pyrZ u s@ 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 rZ rU r �originalr c C sv t | t�r| j|kr| S t |tj�s|�� }|j| |d�} | S t|�r)t�d�}n t|�r2t�d�}| j|dd�} | S )z� 50 reverse of _ensure_data 51 52 Parameters 53 ---------- 54 values : np.ndarray or ExtensionArray 55 dtype : np.dtype or ExtensionDtype 56 original : AnyArrayLike 57 58 Returns 59 ------- 60 ExtensionArray or np.ndarray 61 rT zdatetime64[ns]�timedelta64[ns]FrN ) rV r2 rU rW �construct_array_type�_from_sequencer! r+ r_ )rH rU re �clsrc rc rd �_reconstruct_data� s 62 � 63 rj c C sJ t | �s#tj| dd�}|dv rt| t�rt| �} t| �} | S t�| �} | S )z5 64 ensure that we are arraylike if not already 65 F��skipna)�mixed�string� mixed-integer) r r �infer_dtyperV �tuple�listr rW rX )rH �inferredrc rc rd �_ensure_arraylike� s 66 67 �rt )� 68 complex128� complex64�float64�float32�uint64�uint32�uint16rM �int64�int32�int16�int8rn ra c C s t | �} t| �}t| }|| fS )z� 69 Parameters 70 ---------- 71 values : np.ndarray 72 73 Returns 74 ------- 75 htable : HashTable subclass 76 values : ndarray 77 )rZ �_check_object_for_strings�_hashtables)rH �ndtype�htablerc rc rd �_get_hashtable_algo s r� �strc C s* | j j}|dkrtj| dd�dv rd}|S )z� 78 Check if we can use string hashtable instead of object hashtable. 79 80 Parameters 81 ---------- 82 values : ndarray 83 84 Returns 85 ------- 86 str 87 ra Frk )rn rn )rU �namer rp )rH r� rc rc rd r� s 88 r� c C s t | �S )a 89 90 Return unique values based on a hash table. 91 92 Uniques are returned in order of appearance. This does NOT sort. 93 94 Significantly faster than numpy.unique for long enough sequences. 95 Includes NA values. 96 97 Parameters 98 ---------- 99 values : 1d array-like 100 101 Returns 102 ------- 103 numpy.ndarray or ExtensionArray 104 105 The return can be: 106 107 * Index : when the input is an Index 108 * Categorical : when the input is a Categorical dtype 109 * ndarray : when the input is a Series/ndarray 110 111 Return numpy.ndarray or ExtensionArray. 112 113 See Also 114 -------- 115 Index.unique : Return unique values from an Index. 116 Series.unique : Return unique values of Series object. 117 118 Examples 119 -------- 120 >>> pd.unique(pd.Series([2, 1, 3, 3])) 121 array([2, 1, 3]) 122 123 >>> pd.unique(pd.Series([2] + [1] * 5)) 124 array([2, 1]) 125 126 >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) 127 array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') 128 129 >>> pd.unique( 130 ... pd.Series( 131 ... [ 132 ... pd.Timestamp("20160101", tz="US/Eastern"), 133 ... pd.Timestamp("20160101", tz="US/Eastern"), 134 ... ] 135 ... ) 136 ... ) 137 <DatetimeArray> 138 ['2016-01-01 00:00:00-05:00'] 139 Length: 1, dtype: datetime64[ns, US/Eastern] 140 141 >>> pd.unique( 142 ... pd.Index( 143 ... [ 144 ... pd.Timestamp("20160101", tz="US/Eastern"), 145 ... pd.Timestamp("20160101", tz="US/Eastern"), 146 ... ] 147 ... ) 148 ... ) 149 DatetimeIndex(['2016-01-01 00:00:00-05:00'], 150 dtype='datetime64[ns, US/Eastern]', 151 freq=None) 152 153 >>> pd.unique(list("baabc")) 154 array(['b', 'a', 'c'], dtype=object) 155 156 An unordered Categorical will return categories in the 157 order of appearance. 158 159 >>> pd.unique(pd.Series(pd.Categorical(list("baabc")))) 160 ['b', 'a', 'c'] 161 Categories (3, object): ['a', 'b', 'c'] 162 163 >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc")))) 164 ['b', 'a', 'c'] 165 Categories (3, object): ['a', 'b', 'c'] 166 167 An ordered Categorical preserves the category ordering. 168 169 >>> pd.unique( 170 ... pd.Series( 171 ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) 172 ... ) 173 ... ) 174 ['b', 'a', 'c'] 175 Categories (3, object): ['a' < 'b' < 'c'] 176 177 An array of tuples 178 179 >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]) 180 array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) 181 )�unique_with_mask)rH rc rc rd �unique; s ^r� �mask�npt.NDArray[np.bool_] | Nonec C s� t | �} t| j�r | �� S | }t| �\}} |t| ��}|du r-|�| �}t||j|�}|S |j| |d�\}}t||j|�}|dusCJ �||�d�fS )z?See algorithms.unique for docs. Takes a mask for masked arrays.N�r� �bool)rt r"