/ lib / pandas / core / algorithms.pyc
algorithms.pyc
  1  o

  2  c��cj��@s�dZddlmZddlZddlZddlmZddlmZm	Z	m
  3  Z
  4  mZmZm
Z
mZddlZddlZddlmZmZmZmZddlmZmZmZmZmZmZddlm Z dd	l!m"Z"dd
  5  l#m$Z$m%Z%m&Z&ddl'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:ddl;m<Z<dd
l=m>Z>m?Z?m@Z@ddlAmBZBmCZCmDZDmEZEmFZFmGZGmHZHddlImJZJmKZKddlLmMZMddlNmOZPmQZQmRZRddlSmTZTer�ddlmUZUmVZVddlWmXZXmYZYmZZZm[Z[m\Z\ddl]m^Z^m_Z_d�dd�Z`d�d d!�Zad�d"d#�Zbejcejdejeejfejgejhejiejjejkejlejmejnejoejpd$�Zqd�d%d&�Zrd�d(d)�Zsd*d+�Ztd�d�d.d/�ZuetZvd�d2d3�Zw	4			d�d�d;d<�Zxe ed=�ed>�ed?�d@�dAejyejydfd�dHdI��Zzd�dJdK�Z{d�dMdN�Z|	O	A	A		Od�d�dTdU�Z}	d�d�dWdX�Z~	Yd�d�d\d]�Z	d�d�d^d_�Z�		`	Z	O	Ad�d�dgdh�Z�		d�d�dodp�Z�Gdqdr�dr�Z�Gdsdt�dte��Z�Gdudv�dve��Z�		A	d�d�dzd{�Z�	|	d�d�d�d��Z�hd��Z�d�d�d�d��Z�		4	A	Od�d�d�d��Z�d�d�d��Z�ed�d�d���Z�ed�d�d���Z�d�d�d��Z�d�d�d��Z�dS)�zl
  6  Generic data algorithms. This module is experimental at the moment and not
  7  intended for public consumption
  8  �)�annotationsN)�dedent)�
TYPE_CHECKING�Hashable�Literal�Sequence�cast�final�overload)�algos�	hashtable�iNaT�lib)�AnyArrayLike�	ArrayLike�DtypeObj�
  9  IndexLabel�TakeIndexer�npt)�doc)�find_stack_level)�'construct_1d_object_array_from_listlike�infer_dtype_from_array�sanitize_to_nanoseconds)�ensure_float64�
ensure_object�ensure_platform_int�
is_array_like�
is_bool_dtype�is_categorical_dtype�is_complex_dtype�is_datetime64_dtype�is_extension_array_dtype�is_float_dtype�
 10  is_integer�is_integer_dtype�is_list_like�is_numeric_dtype�is_object_dtype�	is_scalar�is_signed_integer_dtype�is_timedelta64_dtype�needs_i8_conversion)�
concat_compat)�BaseMaskedDtype�ExtensionDtype�PandasDtype)�ABCDatetimeArray�ABCExtensionArray�ABCIndex�
ABCMultiIndex�
ABCRangeIndex�	ABCSeries�ABCTimedeltaArray)�isna�na_value_for_dtype)�take_nd)�array�ensure_wrapped_if_datetimelike�
extract_array)�validate_indices)�NumpySorter�NumpyValueArrayLike)�Categorical�	DataFrame�Index�
 11  MultiIndex�Series)�BaseMaskedArray�ExtensionArray�valuesr�return�
 12  np.ndarraycCsRt|t�st|dd�}t|j�rtt�|��St|jt�r/t	d|�}|j
 13  s*t|j�St�|�St
|j�r<t	d|�}|jSt|j�rYt|tj�rOt�|��d�St�|�jddd�St|j�rct�|�St|j�rw|jjdvrrt|�St�|�St|j�r�t	tj|�St|j�r�t|tj�r�t|�}|�d	�}t	tj|�}|Stj|td
 14  �}t|�S)a�
 15      routine to ensure that our data is of the correct
 16      input dtype for lower-level routines
 17  
 18      This will coerce:
 19      - ints -> int64
 20      - uint -> uint64
 21      - bool -> uint8
 22      - datetimelike -> i8
 23      - datetime64tz -> i8 (in local tz)
 24      - categorical -> codes
 25  
 26      Parameters
 27      ----------
 28      values : np.ndarray or ExtensionArray
 29  
 30      Returns
 31      -------
 32      np.ndarray
 33      T��
extract_numpyrFrA�uint8F��copy)����i8��dtype)�
 34  isinstancer4r=r(rUr�np�asarrayr.r�_hasna�_ensure_data�_datar�codesr�ndarray�view�astyper%r#�itemsizerr r,r�object)rH�npvalues�rc��C:\Users\Jacks.GUTTSPC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pandas\core\algorithms.pyrZus@
 35  
 36  
 37  
 38  
 39  
 40  
 41  
 42  
 43  
 44  
 45  
 46  
 47  
 48  
 49  rZrUr�originalrcCsvt|t�r|j|kr|St|tj�s|��}|j||d�}|St|�r)t�d�}n	t|�r2t�d�}|j|dd�}|S)z�
 50      reverse of _ensure_data
 51  
 52      Parameters
 53      ----------
 54      values : np.ndarray or ExtensionArray
 55      dtype : np.dtype or ExtensionDtype
 56      original : AnyArrayLike
 57  
 58      Returns
 59      -------
 60      ExtensionArray or np.ndarray
 61      rTzdatetime64[ns]�timedelta64[ns]FrN)	rVr2rUrW�construct_array_type�_from_sequencer!r+r_)rHrUre�clsrcrcrd�_reconstruct_data�s
 62  �
 63  rjcCsJt|�s#tj|dd�}|dvrt|t�rt|�}t|�}|St�|�}|S)z5
 64      ensure that we are arraylike if not already
 65      F��skipna)�mixed�string�
mixed-integer)	rr�infer_dtyperV�tuple�listrrWrX)rH�inferredrcrcrd�_ensure_arraylike�s
 66  
 67  �rt)�
 68  complex128�	complex64�float64�float32�uint64�uint32�uint16rM�int64�int32�int16�int8rnracCs t|�}t|�}t|}||fS)z�
 69      Parameters
 70      ----------
 71      values : np.ndarray
 72  
 73      Returns
 74      -------
 75      htable : HashTable subclass
 76      values : ndarray
 77      )rZ�_check_object_for_strings�_hashtables)rH�ndtype�htablercrcrd�_get_hashtable_algo
sr��strcCs*|jj}|dkrtj|dd�dvrd}|S)z�
 78      Check if we can use string hashtable instead of object hashtable.
 79  
 80      Parameters
 81      ----------
 82      values : ndarray
 83  
 84      Returns
 85      -------
 86      str
 87      raFrk)rnrn)rU�namerrp)rHr�rcrcrdr�s
 88  r�cCst|�S)a
 89  
 90      Return unique values based on a hash table.
 91  
 92      Uniques are returned in order of appearance. This does NOT sort.
 93  
 94      Significantly faster than numpy.unique for long enough sequences.
 95      Includes NA values.
 96  
 97      Parameters
 98      ----------
 99      values : 1d array-like
100  
101      Returns
102      -------
103      numpy.ndarray or ExtensionArray
104  
105          The return can be:
106  
107          * Index : when the input is an Index
108          * Categorical : when the input is a Categorical dtype
109          * ndarray : when the input is a Series/ndarray
110  
111          Return numpy.ndarray or ExtensionArray.
112  
113      See Also
114      --------
115      Index.unique : Return unique values from an Index.
116      Series.unique : Return unique values of Series object.
117  
118      Examples
119      --------
120      >>> pd.unique(pd.Series([2, 1, 3, 3]))
121      array([2, 1, 3])
122  
123      >>> pd.unique(pd.Series([2] + [1] * 5))
124      array([2, 1])
125  
126      >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
127      array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
128  
129      >>> pd.unique(
130      ...     pd.Series(
131      ...         [
132      ...             pd.Timestamp("20160101", tz="US/Eastern"),
133      ...             pd.Timestamp("20160101", tz="US/Eastern"),
134      ...         ]
135      ...     )
136      ... )
137      <DatetimeArray>
138      ['2016-01-01 00:00:00-05:00']
139      Length: 1, dtype: datetime64[ns, US/Eastern]
140  
141      >>> pd.unique(
142      ...     pd.Index(
143      ...         [
144      ...             pd.Timestamp("20160101", tz="US/Eastern"),
145      ...             pd.Timestamp("20160101", tz="US/Eastern"),
146      ...         ]
147      ...     )
148      ... )
149      DatetimeIndex(['2016-01-01 00:00:00-05:00'],
150              dtype='datetime64[ns, US/Eastern]',
151              freq=None)
152  
153      >>> pd.unique(list("baabc"))
154      array(['b', 'a', 'c'], dtype=object)
155  
156      An unordered Categorical will return categories in the
157      order of appearance.
158  
159      >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
160      ['b', 'a', 'c']
161      Categories (3, object): ['a', 'b', 'c']
162  
163      >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
164      ['b', 'a', 'c']
165      Categories (3, object): ['a', 'b', 'c']
166  
167      An ordered Categorical preserves the category ordering.
168  
169      >>> pd.unique(
170      ...     pd.Series(
171      ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
172      ...     )
173      ... )
174      ['b', 'a', 'c']
175      Categories (3, object): ['a' < 'b' < 'c']
176  
177      An array of tuples
178  
179      >>> pd.unique([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")])
180      array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
181      )�unique_with_mask)rHrcrcrd�unique;s^r��mask�npt.NDArray[np.bool_] | NonecCs�t|�}t|j�r
|��S|}t|�\}}|t|��}|dur-|�|�}t||j|�}|S|j||d�\}}t||j|�}|dusCJ�||�d�fS)z?See algorithms.unique for docs. Takes a mask for masked arrays.N�r��bool)rtr"rUr�r��lenrjr_)rHr�rer��table�uniquesrcrcrdr��s
182  
183  r��comps�npt.NDArray[np.bool_]cCs�t|�stdt|�j�d���t|�stdt|�j�d���t|ttttj	f�s?|}t
184  t|��}t|�r>t
|�s>tt|��}nt|t�rJt�|�}nt|ddd�}t
185  |�}t|dd�}t|tj	�sf|�|�St|j�rrt|��|�St|j�r�t|j�s�tj|jtd�St|j�r�t||�t��St|jt�r�tt�|�t�|��St|�dkr�t|�dkr�t|�s�t|�� �r�d	d
186  �}ntj!}nt�"|j|jgg�}|j|dd�}|j|dd�}t#j$}|||�S)
z�
187      Compute the isin boolean array.
188  
189      Parameters
190      ----------
191      comps : array-like
192      values : array-like
193  
194      Returns
195      -------
196      ndarray[bool]
197          Same length as `comps`.
198      zIonly list-like objects are allowed to be passed to isin(), you passed a [�]T)rL�
extract_rangerKrTi@B�cSst�t�||�t�|��S�N)rW�
199  logical_or�in1d�isnan)�c�vrcrcrd�f�szisin.<locals>.fFrN)%r&�	TypeError�type�__name__rVr3r6r2rWr]rtrrr'r*rr4r;r=�isinr,rU�pd_arrayr(�zeros�shaper�r_rar/rXr�r8�anyr��find_common_typer��ismember)r�rH�orig_valuesZcomps_arrayr��commonrcrcrdr��sZ�����
200  
201  
202  
203  �
204  
205  r�������na_sentinel�
206  int | None�	size_hint�na_valuera�'tuple[npt.NDArray[np.intp], np.ndarray]cCsv|du}|sd}|}|jjdvrt}t|�\}}||pt|��}|j|||||d�\}	}
207  t|	|j|�}	t|
208  �}
209  |
210  |	fS)a[
211      Factorize a numpy array to codes and uniques.
212  
213      This doesn't do any coercion of types or unboxing before factorization.
214  
215      Parameters
216      ----------
217      values : ndarray
218      na_sentinel : int, default -1
219      size_hint : int, optional
220          Passed through to the hashtable's 'get_labels' method
221      na_value : object, optional
222          A value in `values` to consider missing. Note: only use this
223          parameter when you know that you don't have any values pandas would
224          consider missing in the array (NaN for float data, iNaT for
225          datetimes, etc.).
226      mask : ndarray[bool], optional
227          If not None, the mask is used as indicator for missing values
228          (True = missing, False = valid) instead of `na_value` or
229          condition "val != val".
230  
231      Returns
232      -------
233      codes : ndarray[np.intp]
234      uniques : ndarray
235      Nr�)�m�M)r�r�r��	ignore_na)rU�kindr
r�r��	factorizerjr)rHr�r�r�r�r�re�
236  hash_klassr�r�r\rcrcrd�factorize_arrays$!
237  �	r�z�    values : sequence
238          A 1-D sequence. Sequences that aren't pandas objects are
239          coerced to ndarrays before factorization.
240      zt    sort : bool, default False
241          Sort `uniques` and shuffle `codes` to maintain the
242          relationship.
243      zG    size_hint : int, optional
244          Hint to the hashtable sizer.
245      )rH�sortr�Fr�r��int | None | lib.NoDefault�use_na_sentinel�bool | lib.NoDefault�%tuple[np.ndarray, np.ndarray | Index]c
CsFt||�}t|t�r|j|d�St|�}|}t|t�s!t|dd�}|du}t|ttf�r?|j	dur?|j|d�\}}t
246  |||�St|jtj�s�|dksN|durbdt
�|j�jvrb|j|dud�\}}nj|durhdn|}	t���t�dd	t�|j|	d
247  �\}}Wd�n1s�wYn>t�|�}|dur�|r�d}	n	|dur�d}	n|}	|s�|s�t|�r�t|�}
248  |
249  ��r�t|jdd�}t�|
250  ||�}t||	|d
�\}}|r�t|�dkr�|dur�d}t|||ddd�\}}|�s|�r|dur�d}	n|}	||	k}|���rt|jdd�}t�||g�}t�|t|�d|�}t||j|�}t
251  |||�S)a~
252      Encode the object as an enumerated type or categorical variable.
253  
254      This method is useful for obtaining a numeric representation of an
255      array when all that matters is identifying distinct values. `factorize`
256      is available as both a top-level function :func:`pandas.factorize`,
257      and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.
258  
259      Parameters
260      ----------
261      {values}{sort}
262      na_sentinel : int or None, default -1
263          Value to mark "not found". If None, will not drop the NaN
264          from the uniques of the values.
265  
266          .. deprecated:: 1.5.0
267              The na_sentinel argument is deprecated and
268              will be removed in a future version of pandas. Specify use_na_sentinel as
269              either True or False.
270  
271          .. versionchanged:: 1.1.2
272  
273      use_na_sentinel : bool, default True
274          If True, the sentinel -1 will be used for NaN values. If False,
275          NaN values will be encoded as non-negative integers and will not drop the
276          NaN from the uniques of the values.
277  
278          .. versionadded:: 1.5.0
279      {size_hint}
280      Returns
281      -------
282      codes : ndarray
283          An integer ndarray that's an indexer into `uniques`.
284          ``uniques.take(codes)`` will have the same values as `values`.
285      uniques : ndarray, Index, or Categorical
286          The unique valid values. When `values` is Categorical, `uniques`
287          is a Categorical. When `values` is some other pandas object, an
288          `Index` is returned. Otherwise, a 1-D ndarray is returned.
289  
290          .. note::
291  
292             Even if there's a missing value in `values`, `uniques` will
293             *not* contain an entry for it.
294  
295      See Also
296      --------
297      cut : Discretize continuous-valued array.
298      unique : Find the unique value in an array.
299  
300      Notes
301      -----
302      Reference :ref:`the user guide <reshaping.factorize>` for more examples.
303  
304      Examples
305      --------
306      These examples all show factorize as a top-level method like
307      ``pd.factorize(values)``. The results are identical for methods like
308      :meth:`Series.factorize`.
309  
310      >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'])
311      >>> codes
312      array([0, 0, 1, 2, 0]...)
313      >>> uniques
314      array(['b', 'a', 'c'], dtype=object)
315  
316      With ``sort=True``, the `uniques` will be sorted, and `codes` will be
317      shuffled so that the relationship is the maintained.
318  
319      >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
320      >>> codes
321      array([1, 1, 0, 2, 1]...)
322      >>> uniques
323      array(['a', 'b', 'c'], dtype=object)
324  
325      When ``use_na_sentinel=True`` (the default), missing values are indicated in
326      the `codes` with the sentinel value ``-1`` and missing values are not
327      included in `uniques`.
328  
329      >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
330      >>> codes
331      array([ 0, -1,  1,  2,  0]...)
332      >>> uniques
333      array(['b', 'a', 'c'], dtype=object)
334  
335      Thus far, we've only factorized lists (which are internally coerced to
336      NumPy arrays). When factorizing pandas objects, the type of `uniques`
337      will differ. For Categoricals, a `Categorical` is returned.
338  
339      >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
340      >>> codes, uniques = pd.factorize(cat)
341      >>> codes
342      array([0, 0, 1]...)
343      >>> uniques
344      ['a', 'c']
345      Categories (3, object): ['a', 'b', 'c']
346  
347      Notice that ``'b'`` is in ``uniques.categories``, despite not being
348      present in ``cat.values``.
349  
350      For all other pandas objects, an Index of the appropriate type is
351      returned.
352  
353      >>> cat = pd.Series(['a', 'a', 'c'])
354      >>> codes, uniques = pd.factorize(cat)
355      >>> codes
356      array([0, 0, 1]...)
357      >>> uniques
358      Index(['a', 'c'], dtype='object')
359  
360      If NaN is in the values, and we want to include NaN in the uniques of the
361      values, it can be achieved by setting ``use_na_sentinel=False``.
362  
363      >>> values = np.array([1, 2, 1, np.nan])
364      >>> codes, uniques = pd.factorize(values)  # default: use_na_sentinel=True
365      >>> codes
366      array([ 0,  1,  0, -1])
367      >>> uniques
368      array([1., 2.])
369  
370      >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
371      >>> codes
372      array([0, 1, 0, 2])
373      >>> uniques
374      array([ 1.,  2., nan])
375      )r�TrKNr�r�)r��ignorez.*use_na_sentinel.*)r�F)�compat)r�r�r)r��
assume_unique�verify�)�resolve_na_sentinelrVr5r�rtr4r=r1r7�freq�_re_wrap_factorizerUrW�inspect�	signature�
376  parameters�warnings�catch_warnings�filterwarnings�
FutureWarningrXr(r8r�r9�wherer�r��	safe_sort�appendrj)
rHr�r�r�r�re�dropnar\r��na_sentinel_arg�	null_maskr�Z
377  code_is_narcrcrdr�Msv
378  )
379  
380  �
381  �
382  ��
383  
384  �
385  
386  �
387  r�cCs�|tjur|tjurtd|�d|�d���|tjur)|tjus!|r%d}|Sd}|S|dur0d}n	|dkr7d}nd}tj|tt�d	�|}|S)
388  ap
389      Determine value of na_sentinel for factorize methods.
390  
391      See GH#46910 for details on the deprecation.
392  
393      Parameters
394      ----------
395      na_sentinel : int, None, or lib.no_default
396          Value passed to the method.
397      use_na_sentinel : bool or lib.no_default
398          Value passed to the method.
399  
400      Returns
401      -------
402      Resolved value of na_sentinel.
403      zICannot specify both `na_sentinel` and `use_na_sentile`; got `na_sentinel=z` and `use_na_sentinel=�`r�NzUSpecifying `na_sentinel=None` is deprecated, specify `use_na_sentinel=False` instead.zRSpecifying `na_sentinel=-1` is deprecated, specify `use_na_sentinel=True` instead.z�Specifying the specific value to use for `na_sentinel` is deprecated and will be removed in a future version of pandas. Specify `use_na_sentinel=True` to use the sentinel value -1, and `use_na_sentinel=False` to encode NaN values.��
404  stacklevel)r�
405  no_default�
406  ValueErrorr��warnr�r)r�r��result�msgrcrcrdr�Ss0���
407  �����r�r\cCsNt|t�rt|�}|j|dd�}||fSt|t�r#ddlm}||�}||fS)zO
408      Wrap factorize results in Series or Index depending on original type.
409      N)r�r)rC)rVr3r<�
_shallow_copyr6�pandasrC)rer�r\rCrcrcrdr��s
410  
411  �r�T�	ascending�	normalizer�rEc
412  Cshddlm}m}t|dd�}|durhddlm}	||�}z	|	||dd�}
413  Wnty6}ztd�|�d}~ww|
414  j|d	�}||j�	�}|j�
415  d
416  �|_|��}|r_|jdk�
�r_|jdd�}t�t|
417  �g�}
n:t|�r|||�jj|d	�}||_|j}
n&t|�}t||�\}}
|�|�}|jtkr�|jtkr�|�
418  t�}||
||d�}|r�|j|d�}|r�||
��}|S)
aK
419      Compute a histogram of the counts of non-null values.
420  
421      Parameters
422      ----------
423      values : ndarray (1-d)
424      sort : bool, default True
425          Sort by values
426      ascending : bool, default False
427          Sort in ascending order
428      normalize: bool, default False
429          If True then compute a relative histogram
430      bins : integer, optional
431          Rather than count values, group them into half-open bins,
432          convenience for pd.cut, only works with numeric data
433      dropna : bool, default True
434          Don't include counts of NaN
435  
436      Returns
437      -------
438      Series
439      r)rCrEr�N)�cutT)�include_lowestz+bins argument only works with numeric data.�r��interval)�indexr��r�)r�rCrE�getattr�pandas.core.reshape.tiler�r��value_countsr��notnar_�
440  sort_index�_values�all�ilocrWr;r�r"r�rt�value_counts_arraylike�_with_inferrUr�ra�sort_values�sum)rHr�r�r��binsr�rCrEr�r��ii�errr��counts�keys�idxrcrcrdr��sB
441  ��
442  
443  r��'tuple[ArrayLike, npt.NDArray[np.int64]]cCs^|}t|�}tj|||d�\}}t|j�r$|r$|tk}||||}}t||j|�}||fS)z�
444      Parameters
445      ----------
446      values : np.ndarray
447      dropna : bool
448      mask : np.ndarray[bool] or None, default None
449  
450      Returns
451      -------
452      uniques : np.ndarray
453      counts : np.ndarray[np.int64]
454      r�)rZr�Zvalue_countr,rUr
rj)rHr�r�rer�r��res_keysrcrcrdr��s
455  r��first�keep�Literal['first', 'last', False]cCst|�}tj||d�S)a
456      Return boolean ndarray denoting duplicate values.
457  
458      Parameters
459      ----------
460      values : nd.array, ExtensionArray or Series
461          Array over which to check for duplicate values.
462      keep : {'first', 'last', False}, default 'first'
463          - ``first`` : Mark duplicates as ``True`` except for the first
464            occurrence.
465          - ``last`` : Mark duplicates as ``True`` except for the last
466            occurrence.
467          - False : Mark all duplicates as ``True``.
468  
469      Returns
470      -------
471      duplicated : ndarray[bool]
472      �r�)rZr��
473  duplicated)rHr�rcrcrdr�
sr�c
474  Cs�t|�}|}t|j�rt|�}td|�}|j|d�St|�}tj|||d�}zt	�
475  |�}WntyJ}ztj
d|��t�d�WYd}~nd}~wwt||j|�}|S)a
476      Returns the mode(s) of an array.
477  
478      Parameters
479      ----------
480      values : array-like
481          Array over which to check for duplicate values.
482      dropna : bool, default True
483          Don't consider counts of NaN/NaT.
484  
485      Returns
486      -------
487      np.ndarray or ExtensionArray
488      rGr�)r�r�zUnable to sort modes: r�N)rtr,rUr<r�_moderZr��moderWr�r�r�r�rrj)rHr�r�reZnpresultr�r�rcrcrdr�&s&
489  
490  ���r��average�axis�int�method�	na_option�pct�npt.NDArray[np.float64]c	Csdt|j�}t|�}|jdkrtj||||||d�}|S|jdkr.tj|||||||d�}|Std��)a�
491      Rank the values along a given axis.
492  
493      Parameters
494      ----------
495      values : np.ndarray or ExtensionArray
496          Array whose values will be ranked. The number of dimensions in this
497          array must not exceed 2.
498      axis : int, default 0
499          Axis over which to perform rankings.
500      method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
501          The method by which tiebreaks are broken during the ranking.
502      na_option : {'keep', 'top'}, default 'keep'
503          The method by which NaNs are placed in the ranking.
504          - ``keep``: rank each NaN value with a NaN ranking
505          - ``top``: replace each NaN with either +/- inf so that they
506                     there are ranked at the top
507      ascending : bool, default True
508          Whether or not the elements should be ranked in ascending order.
509      pct : bool, default False
510          Whether or not to the display the returned rankings in integer form
511          (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
512      r�)�is_datetimelike�ties_methodr�rrrP)r�rrr�rrz&Array with ndim > 2 are not supported.)r,rUrZ�ndimrZrank_1dZrank_2dr�)rHr�r�rr�rr�ranksrcrcrd�rankOs0
513  
514  �
515  ���r�arr�npt.NDArray[np.int64]�b�int | npt.NDArray[np.int64]�arr_mask�b_maskc
CsZt�||j�}|durt�||j�}nd}|dur%|dur%t�||B�}n!|dur/t�|�}n|dur9t�|�}n
tj|jtd�}|�d�tj}t	}|dk}	|dk}
516  |	�
517  �sb|||k|@�
518  �}n/|
519520  �sq|||k|@�
521  �}n |||	||	k||	@�
522  �p�|||
523  ||
524  k||
525  @�
526  �}|r�td��||}|dus�|dur�t�||t	�|S)a�
527      Perform array addition that checks for underflow and overflow.
528  
529      Performs the addition of an int64 array and an int64 integer (or array)
530      but checks that they do not result in overflow first. For elements that
531      are indicated to be NaN, whether or not there is overflow for that element
532      is automatically ignored.
533  
534      Parameters
535      ----------
536      arr : np.ndarray[int64] addend.
537      b : array or scalar addend.
538      arr_mask : np.ndarray[bool] or None, default None
539          array indicating which elements to exclude from checking
540      b_mask : np.ndarray[bool] or None, default None
541          array or scalar indicating which element(s) to exclude from checking
542  
543      Returns
544      -------
545      sum : An array for elements x + b for each element x in arr if b is
546            a scalar or an array for elements x + y for each element pair
547            (x, y) in (arr, b).
548  
549      Raises
550      ------
551      OverflowError if any x + y exceeds the maximum or minimum int64 value.
552      NrTTrzOverflow in int64 addition)
rW�broadcast_tor��logical_not�emptyr��fillr�i8maxr
r��
OverflowError�putmask)
rr
553  rr
�b2Zb2_maskZnot_nanrZi8minZmask1Zmask2Zto_raiser�rcrcrd�checked_add_with_arr�s<#
554  	 �rc@sJeZdZddd�Zddd�Zed
d��Zedd��Zeeddd���Z	dS)�SelectN�nr�r�r�rI�NonecCs(||_||_||_|jdvrtd��dS)N)r��lastr�z,keep must be either "first", "last" or "all")�objrr�r�)�selfrrr�rcrcrd�__init__�s
555  �zSelectN.__init__r��DataFrame | SeriescCst�r�)�NotImplementedError)rr�rcrcrd�compute�szSelectN.computecC�
556  |�d�S)N�nlargest�r �rrcrcrdr"��
557  zSelectN.nlargestcCr!)N�	nsmallestr#r$rcrcrdr&�r%zSelectN.nsmallestrUrr�cCst|�ot|�pt|�S)zg
558          Helper function to determine if dtype is valid for
559          nsmallest/nlargest methods
560          )r'r r,rTrcrcrd�is_valid_dtype_n_method�s��zSelectN.is_valid_dtype_n_methodN)rr�r�r�rIr)r�r�rIr)rUrrIr�)
561  r��
562  __module__�__qualname__rr r	r"r&�staticmethodr'rcrcrcrdr�s
563  
564  
565  
566  rc@seZdZdZd	dd�ZdS)
567  �
SelectNSeriesz�
568      Implement n largest/smallest for Series
569  
570      Parameters
571      ----------
572      obj : Series
573      n : int
574      keep : {'first', 'last'}, default 'first'
575  
576      Returns
577      -------
578      nordered : Series
579      r�r�rIrEcCs�ddlm}|j}|jj}|�|�std|�d|����|dkr%|jgS|j��}|j�|j	�}|t
580  |j�krF|dk}|jj|d��|�S|j}t
|j�}	|dkrg|	}	t|�r^|	d8}	n	t|�rgd|	}	|jd	krs|	ddd
581  �}	|}
582  t
583  |	�}t||�}t�|	jdd�|d�}t�|	|k�\}
|
|	|
jd
d�}|jdkr�|d|�}|
584  }nt
585  |�|
586  kr�t
587  |�t
588  |�|
589  kr�t
590  |�t
591  |�}nt
592  |�}|jd	kr�|d|}||j||g�jd|�S)Nr)�concatzCannot use method 'z
' with dtype r&r�r"r�rr��C)�order�	mergesort)r�r�)�pandas.core.reshape.concatr,rrrUr'r�r��dropr�r�r��headrZrHr%rr��minrZkth_smallestrOrW�nonzero�argsortr�)rr�r,rrU�dropped�	nan_indexr��	new_dtyperZnbaseZnarrZkth_val�ns�inds�findexrcrcrdr sJ
593  
594  
595  
596  
597  
598  
599  
600  
601   
602  zSelectNSeries.computeN)r�r�rIrE)r�r(r)�__doc__r rcrcrcrdr+sr+cs,eZdZdZd�fdd
�Zddd�Z�ZS)�SelectNFramez�
603      Implement n largest/smallest for DataFrame
604  
605      Parameters
606      ----------
607      obj : DataFrame
608      n : int
609      keep : {'first', 'last'}, default 'first'
610      columns : list or str
611  
612      Returns
613      -------
614      nordered : DataFrame
615      rrBrr�r�r��columnsrrIrcsHt��|||�t|�rt|t�r|g}ttt|�}t|�}||_	dSr�)
616  �superrr&rVrqrrrrrr>)rrrr�r>��	__class__rcrdrhs
617  zSelectNFrame.__init__r�c	szddlm}|j}|j}|j}|D]}||j}|�|�s/tdt|��d|�dt���d���q�fdd�}|j	}	|j
618  d	d
619  �}
620  }|}|g�}t|�D]R\}
}|
621  |}t|�d|
k}t
|��||re|jndd
�}|sqt|�|kry|||j	�}n%|||j	dk}||}||}|||j	�}|
622  j|j	}
623  |t|�}qK|�|�}|	�|�|_	t|�dkr�|S�dk}|j||dd�S)Nr)�
624  Int64IndexzColumn z has dtype z, cannot use method z with this dtypecs�dkr	|�|�S|�|�S)z{
625              Helper function to concat `current_indexer` and `other_indexer`
626              depending on `method`
627              r&)r�)Zcurrent_indexerZ
other_indexer�r�rcrd�get_indexer�s
628  
629  z)SelectNFrame.compute.<locals>.get_indexerT)r1r�r�r�r�r&r/)r�r�)�pandas.core.apirBrrr>rUr'r��reprr��reset_index�	enumerater�r�r��loc�taker�)rr�rBr�framer>�columnrUrD�original_indexZ	cur_frameZcur_n�indexer�i�seriesZis_last_columnrHZborder_valueZ
unsafe_valuesZsafe_valuesr�rcrCrdr qsN
630  
631  ����
632  
633  zSelectNFrame.compute)
634  rrBrr�r�r�r>rrIr)r�r�rIrB)r�r(r)r<rr �
__classcell__rcrcr@rdr=Xs	r=�indicesr�
635  allow_fillcCs^t|�s	t�|�}tj|tjd�}|r&t||j|�t|||d|d�}|S|j||d�}|S)a�
636      Take elements from an array.
637  
638      Parameters
639      ----------
640      arr : array-like or scalar value
641          Non array-likes (sequences/scalars without a dtype) are coerced
642          to an ndarray.
643      indices : sequence of int or one-dimensional np.ndarray of int
644          Indices to be taken.
645      axis : int, default 0
646          The axis over which to select values.
647      allow_fill : bool, default False
648          How to handle negative values in `indices`.
649  
650          * False: negative values in `indices` indicate positional indices
651            from the right (the default). This is similar to :func:`numpy.take`.
652  
653          * True: negative values in `indices` indicate
654            missing values. These values are set to `fill_value`. Any other
655            negative values raise a ``ValueError``.
656  
657      fill_value : any, optional
658          Fill value to use for NA-indices when `allow_fill` is True.
659          This may be ``None``, in which case the default NA value for
660          the type (``self.dtype.na_value``) is used.
661  
662          For multi-dimensional `arr`, each *element* is filled with
663          `fill_value`.
664  
665      Returns
666      -------
667      ndarray or ExtensionArray
668          Same type as the input.
669  
670      Raises
671      ------
672      IndexError
673          When `indices` is out of bounds for the array.
674      ValueError
675          When the indexer contains negative values other than ``-1``
676          and `allow_fill` is True.
677  
678      Notes
679      -----
680      When `allow_fill` is False, `indices` may be whatever dimensionality
681      is accepted by NumPy for `arr`.
682  
683      When `allow_fill` is True, `indices` should be 1-D.
684  
685      See Also
686      --------
687      numpy.take : Take elements from an array along an axis.
688  
689      Examples
690      --------
691      >>> import pandas as pd
692  
693      With the default ``allow_fill=False``, negative numbers indicate
694      positional indices from the right.
695  
696      >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
697      array([10, 10, 30])
698  
699      Setting ``allow_fill=True`` will place `fill_value` in those positions.
700  
701      >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
702      array([10., 10., nan])
703  
704      >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
705      ...      fill_value=-10)
706      array([ 10,  10, -10])
707      rTT)r�rS�
708  fill_value)r�)rrWrX�intpr>r�r:rJ)rrRr�rSrTr�rcrcrdrJ�sP
709  
710  ��rJ�left�value�$NumpyValueArrayLike | ExtensionArray�side�Literal['left', 'right']�sorterr?�npt.NDArray[np.intp] | np.intpcCs�|durt|�}t|tj�r]t|j�r]t|�st|�r]t�|jj�}t	|�r,t�
711  |g�nt�
712  |�}||jk��rC||j
k��rC|j}n|j}t	|�rStt|�|��}nttt|�|d�}nt|�}|j|||d�S)a
713      Find indices where elements should be inserted to maintain order.
714  
715      .. versionadded:: 0.25.0
716  
717      Find the indices into a sorted array `arr` (a) such that, if the
718      corresponding elements in `value` were inserted before the indices,
719      the order of `arr` would be preserved.
720  
721      Assuming that `arr` is sorted:
722  
723      ======  ================================
724      `side`  returned index `i` satisfies
725      ======  ================================
726      left    ``arr[i-1] < value <= self[i]``
727      right   ``arr[i-1] <= value < self[i]``
728      ======  ================================
729  
730      Parameters
731      ----------
732      arr: np.ndarray, ExtensionArray, Series
733          Input array. If `sorter` is None, then it must be sorted in
734          ascending order, otherwise `sorter` must be an array of indices
735          that sort it.
736      value : array-like or scalar
737          Values to insert into `arr`.
738      side : {'left', 'right'}, optional
739          If 'left', the index of the first suitable location found is given.
740          If 'right', return the last such index.  If there is no suitable
741          index, return either 0 or N (where N is the length of `self`).
742      sorter : 1-D array-like, optional
743          Optional array of integer indices that sort array a into ascending
744          order. They are typically the result of argsort.
745  
746      Returns
747      -------
748      array of ints or int
749          If value is array-like, array of insertion points.
750          If value is scalar, a single integer.
751  
752      See Also
753      --------
754      numpy.searchsorted : Similar method from NumPy.
755      NrT)rYr[)rrVrWr]r%rUr$�iinfor�r)r;r3r��maxrr�r�rr<�searchsorted)rrWrYr[r]�	value_arrrUrcrcrdr_.s(2
756  ����	r_>r|r}rr~rxrwrcCsTt|�}tj}|j}t|�}|rtj}ntj}t|t	�r#|�
757  �}|j}t|tj�s]t|d|j�d��rL|dkrDt
dt|�j�d|����|||�|��Stjdtt�d�t�|�}|j}d}t|j�rqtj}|�d�}t}d	}n|rwtj}nt|�r�|jjd
758  vr�tj}ntj}|j}|dkr�|�dd�}t�|�}tj |j!|d
�}	t"d�gd}
759  |dkr�t"d|�nt"|d�|
760  |<||	t#|
761  �<|jjt$vr�t%j&||	|||d�nCt"d�gd}|dkr�t"|d�nt"d|�||<t#|�}t"d�gd}
|dkr�t"d|�nt"|d�|
|<t#|
�}|||||�|	|<|�r|	�d�}	|dk�r(|	dd�df}	|	S)aQ
762      difference of n between self,
763      analogous to s-s.shift(n)
764  
765      Parameters
766      ----------
767      arr : ndarray or ExtensionArray
768      n : int
769          number of periods
770      axis : {0, 1}
771          axis to shift on
772      stacklevel : int, default 3
773          The stacklevel for the lost dtype warning.
774  
775      Returns
776      -------
777      shifted
778      �__rzcannot diff z	 on axis=zwdtype lost in 'diff()'. In the future this will raise a TypeError. Convert to a suitable dtype prior to calling 'diff'.r�FrST)rr~r�r�rTNrP)�datetimelikerf)'r�rW�nanrUr�operator�xor�subrVr0�to_numpy�hasattrr�r�r��shiftr�r�r�rrXr,r|r^r
�object_r%r�rxrwr�reshaperr��slicerq�
_diff_specialrZdiff_2d)rrr��narU�is_bool�op�is_timedeltaZ	orig_ndim�out_arrZ
779  na_indexerZ_res_indexer�res_indexerZ_lag_indexerZlag_indexerrcrcrd�diff�sp
780  �
781  
782  
783  
784    $
785  
786  rtr�r��Dnp.ndarray | MultiIndex | tuple[np.ndarray | MultiIndex, np.ndarray]cCst|�std��|}t|t�}t|tjtf�s$t|�\}}tj||d�}d}	t	|�s8t
787  j|dd�dkr8t|�}
788  n1z|�
�}	|rE|�|	�}
789  n|�|	�}
790  Wntyh|jrbt|dt�rbt||�}
791  nt|�}
792  Ynw|duro|
793  St|�swtd��tt�|��}|s�tt|��t|�ks�td	��|	dur�t|�\}}|t|��}|�|�t|�|
794  ��}	|d
795  kr�|	�
�}
t|
|d
796  d�}|r�|t|�k|t|�kB}n4d}n1tjt|	�tjd�}|�|	t�t|	���|j|dd
�}||k}|r�||t|�kB|t|�kB}|du�r	t�|||�|
797  t|�fS)a�
798      Sort ``values`` and reorder corresponding ``codes``.
799  
800      ``values`` should be unique if ``codes`` is not None.
801      Safe for use with mixed types (int, str), orders ints before strs.
802  
803      Parameters
804      ----------
805      values : list-like
806          Sequence; must be unique if ``codes`` is not None.
807      codes : list_like, optional
808          Indices to ``values``. All out of bound indices are treated as
809          "not found" and will be masked with ``na_sentinel``.
810      na_sentinel : int, default -1
811          Value in ``codes`` to mark "not found".
812          Ignored when ``codes`` is None.
813      assume_unique : bool, default False
814          When True, ``values`` are assumed to be unique, which can speed up
815          the calculation. Ignored when ``codes`` is None.
816      verify : bool, default True
817          Check if codes are out of bound for the values and put out of bound
818          codes equal to na_sentinel. If ``verify=False``, it is assumed there
819          are no out of bound codes. Ignored when ``codes`` is None.
820  
821          .. versionadded:: 0.25.0
822  
823      Returns
824      -------
825      ordered : ndarray or MultiIndex
826          Sorted ``values``
827      new_codes : ndarray
828          Reordered ``codes``; returned when ``codes`` is not None.
829  
830      Raises
831      ------
832      TypeError
833          * If ``values`` is not list-like or if ``codes`` is neither None
834          nor list-like
835          * If ``values`` cannot be sorted
836      ValueError
837          * If ``codes`` is not None and ``values`` contain duplicates.
838      zFOnly list-like objects are allowed to be passed to safe_sort as valuesrTNFrkrorzMOnly list-like objects or None are allowed to be passed to safe_sort as codesz,values should be unique if codes is not Noner��rT�wrap)r�)r&r�rVr4rWr]r2rrXr"rrp�_sort_mixedr5rJ�sizerq�_sort_tuplesrr�r�r�r�Z
map_locations�lookupr:r�int_�put�aranger)rHr\r�r�r��original_values�is_mirU�_r[�orderedr��t�order2�	new_codesr��reverse_indexerrcrcrdr��sn1�
839  �
840  
841  ����
842  
843  r�cCsxtjdd�|D�td�}tjdd�|D�td�}t�|||@�}t�||�}t�|tj|td�t�||�g�S)z3order ints before strings in 1d arrays, safe in py3cSsg|]}t|t��qSrc)rVr���.0�xrcrcrd�
844  <listcomp>�sz_sort_mixed.<locals>.<listcomp>rTcSsg|]}|du�qSr�rcr�rcrcrdr��s)rWr;r�r��concatenaterXra)rHZstr_posZnone_pos�nums�strsrcrcrdrx�s�rxrcC�dSr�rc�rHrrcrcrdrz��rzrDcCr�r�rcr�rcrcrdrz�r��np.ndarray | MultiIndexcCs:ddlm}ddlm}||d�\}}||dd�}||S)aj
845      Convert array of tuples (1d) to array or array (2d).
846      We need to keep the columns separately as they contain different types and
847      nans (can't use `np.sort` as it may fail when str and nan are mixed in a
848      column as types cannot be compared).
849      We have to apply the indexer to the original values to keep the dtypes in
850      case of MultiIndexes
851      r)�	to_arrays)�lexsort_indexerNT)�orders)�"pandas.core.internals.constructionr��pandas.core.sortingr�)rHrr�r��arraysr�rNrcrcrdrz�s
852  �lvals�rvalscCs�g}t|dd�}t|dd�}|j|dd�\}}tt||g��}t|�}t|�D]\}}||gtt|j||j|��7}q'|�	|�S)a�
853      Extracts the union from lvals and rvals with respect to duplicates and nans in
854      both arrays.
855  
856      Parameters
857      ----------
858      lvals: np.ndarray or ExtensionArray
859          left values which is ordered in front.
860      rvals: np.ndarray or ExtensionArray
861          right values ordered after lvals.
862  
863      Returns
864      -------
865      np.ndarray or ExtensionArray
866          Containing the unsorted union of both arrays.
867  
868      Notes
869      -----
870      Caller is responsible for ensuring lvals.dtype == rvals.dtype.
871      Fr�rrv)
872  r��alignr�r-r<rHr�r^�atrJ)r�r�rNZl_countZr_countZunique_arrayrOrWrcrcrd�union_with_duplicates�s&
873  r�)rHrrIrJ)rHrrUrrerrIr)rIr)rHrJ)rHrJrIr�r�)r�r�)r�rrHrrIr�)r�NNN)rHrJr�r�r�r�r�rar�r�rIr�)
874  r�r�r�r�r�r�r�r�rIr�)r�r�r�r�rIr�)r\rJ)TFFNT)
875  r�r�r�r�r�r�r�r�rIrE)rHrJr�r�r�r�rIr�)r�)rHrr�r�rIr�)TN)rHrr�r�r�r�rIr)rr�r�TF)rHrr�r�r�r�rr�r�r�rr�rIr)NN)
876  rr	r
877  rrr�r
r�rIr	)rFN)rRrr�r�rSr�)rVN)
878  rrrWrXrYrZr[r?rIr\)r)rr�r�r�)Nr�FT)r�r�r�r�r�r�rIru)rIrJ)rHrJrrJrIrJ)rHrJrrDrIrD)rHrJrr�rIr�)r�rr�rrIr)�r<�
879  __future__rr�rd�textwrapr�typingrrrrrr	r
880  r��numpyrW�pandas._libsrrr�r
r�pandas._typingrrrrrr�pandas.util._decoratorsr�pandas.util._exceptionsr�pandas.core.dtypes.castrrr�pandas.core.dtypes.commonrrrrrrr r!r"r#r$r%r&r'r(r)r*r+r,�pandas.core.dtypes.concatr-�pandas.core.dtypes.dtypesr.r/r0�pandas.core.dtypes.genericr1r2r3r4r5r6r7�pandas.core.dtypes.missingr8r9�pandas.core.array_algos.taker:�pandas.core.constructionr;r�r<r=�pandas.core.indexersr>r?r@r�rArBrCrDrE�pandas.core.arraysrFrGrZrjrtZComplex128HashTableZComplex64HashTableZFloat64HashTableZFloat32HashTableZUInt64HashTableZUInt32HashTableZUInt16HashTableZUInt8HashTable�Int64HashTableZInt32HashTableZInt16HashTableZ
Int8HashTableZStringHashTableZPyObjectHashTabler�r�r�r�r��unique1dr�r�r�r�r�r�r�r�r�r�rrrr+r=rJr_rmrtr�rxrzr�rcrcrcrd�<module>s�$	 T$	
881  	
882  P
883  &�
884  
885  a
886  Y�?�����
887  q
888  2�[� ��+�>�a Ms�i�Zv�
889  
890  
891