/ lib / pandas / core / describe.pyc
describe.pyc
  1  o

  2  c��c�3�@s`dZddlmZddlmZmZddlmZmZm	Z	m
  3  Z
  4  mZmZddl
Z
ddlZddlmZddlmZmZmZddlmZdd	lmZdd
  5  lmZmZmZmZmZm Z ddl!Z"ddl#m$Z$ddl%m&Z&erndd
l!m'Z'm(Z(d8dd�Z)Gdd�de�Z*Gdd�de*�Z+Gdd�de*�Z,d9d#d$�Z-d:d(d)�Z.d;d,d-�Z/d;d.d/�Z0d<d0d1�Z1d=d3d4�Z2d>d6d7�Z3dS)?z�
  6  Module responsible for execution of NDFrame.describe() method.
  7  
  8  Method NDFrame.describe() delegates actual execution to function describe_ndframe().
  9  �)�annotations)�ABC�abstractmethod)�
TYPE_CHECKING�Any�Callable�Hashable�Sequence�castN)�	Timestamp)�DtypeObj�NDFrameT�npt)�find_stack_level)�validate_percentile)�
is_bool_dtype�is_complex_dtype�is_datetime64_any_dtype�is_extension_array_dtype�is_numeric_dtype�is_timedelta64_dtype)�concat)�format_percentiles)�	DataFrame�Series�objr
�include�str | Sequence[str] | None�exclude�datetime_is_numeric�bool�percentiles�#Sequence[float] | np.ndarray | None�returncCsRt|�}|jdkrttd|�|d�}nttd|�|||d�}|j|d�}tt|�S)a�Describe series or dataframe.
 10  
 11      Called from pandas.core.generic.NDFrame.describe()
 12  
 13      Parameters
 14      ----------
 15      obj: DataFrame or Series
 16          Either dataframe or series to be described.
 17      include : 'all', list-like of dtypes or None (default), optional
 18          A white list of data types to include in the result. Ignored for ``Series``.
 19      exclude : list-like of dtypes or None (default), optional,
 20          A black list of data types to omit from the result. Ignored for ``Series``.
 21      datetime_is_numeric : bool, default False
 22          Whether to treat datetime dtypes as numeric.
 23      percentiles : list-like of numbers, optional
 24          The percentiles to include in the output. All should fall between 0 and 1.
 25          The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
 26          75th percentiles.
 27  
 28      Returns
 29      -------
 30      Dataframe or series description.
 31      �r�rrr)rrrr)r!)�refine_percentiles�ndim�SeriesDescriberr
 32  �DataFrameDescriber�describer
)rrrrr!Z	describer�result�r,��C:\Users\Jacks.GUTTSPC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\pandas\core\describe.py�describe_ndframe6s
 33  ��
 34  r.c@s(eZdZdZddd	�Zeddd
��ZdS)�NDFrameDescriberAbstractz�Abstract class for describing dataframe or series.
 35  
 36      Parameters
 37      ----------
 38      obj : Series or DataFrame
 39          Object to be described.
 40      datetime_is_numeric : bool
 41          Whether to treat datetime dtypes as numeric.
 42      r�DataFrame | Seriesrr r#�NonecCs||_||_dS�Nr%)�selfrrr,r,r-�__init__ts
 43  z!NDFrameDescriberAbstract.__init__r!�Sequence[float] | np.ndarraycCsdS)z�Do describe either series or dataframe.
 44  
 45          Parameters
 46          ----------
 47          percentiles : list-like of numbers
 48              The percentiles to include in the output.
 49          Nr,)r3r!r,r,r-r*xsz!NDFrameDescriberAbstract.describeN)rr0rr r#r1)r!r5r#r0)�__name__�
 50  __module__�__qualname__�__doc__r4rr*r,r,r,r-r/is
 51  
 52  
 53  r/c@s$eZdZUdZded<d
 54  dd�Zd	S)r(z2Class responsible for creating series description.rrr!r5r#cCst|j|j�}||j|�Sr2)�select_describe_funcrr)r3r!�
describe_funcr,r,r-r*�s
 55  �zSeriesDescriber.describeN)r!r5r#r)r6r7r8r9�__annotations__r*r,r,r,r-r(�s
 56  r(cs4eZdZdZd�fdd�Zddd�Zdd�Z�ZS)r)a�Class responsible for creating dataobj description.
 57  
 58      Parameters
 59      ----------
 60      obj : DataFrame
 61          DataFrame to be described.
 62      include : 'all', list-like of dtypes or None
 63          A white list of data types to include in the result.
 64      exclude : list-like of dtypes or None
 65          A black list of data types to omit from the result.
 66      datetime_is_numeric : bool
 67          Whether to treat datetime dtypes as numeric.
 68      rrrrrrr r#r1cs>||_||_|jdkr|jjdkrtd��t�j||d�dS)N�rz+Cannot describe a DataFrame without columns)r)rrr'�columns�size�
 69  ValueError�superr4)r3rrrr��	__class__r,r-r4�s
 70  zDataFrameDescriber.__init__r!r5csn|��}g}|��D]\}}t||j�}|�|||��q
 71  t|��t�fdd�|D�ddd�}|j��|_|S)Ncsg|]	}|j�dd��qS)F)�copy)�reindex��.0�x��	col_namesr,r-�
 72  <listcomp>�sz/DataFrameDescriber.describe.<locals>.<listcomp>r$F)�axis�sort)	�_select_data�itemsr:r�append�reorder_columnsrr>rD)r3r!�data�ldesc�_�seriesr;�dr,rIr-r*�s�zDataFrameDescriber.describecCs�|jdur)|jdur)tjg}|jr|�d�|jj|d�}t|j	�dkr'|j}|S|jdkr>|jdur9d}t
 73  |��|j}|S|jj|j|jd�}|S)zSelect columns to be described.N�datetime)rr�allz*exclude must be None when include is 'all')rr)rr�np�numberrrPr�
select_dtypes�lenr>r@)r3Zdefault_includerR�msgr,r,r-rN�s&
 74  
 75 76  ��zDataFrameDescriber._select_data)
 77  rrrrrrrr r#r1)r!r5r#r)r6r7r8r9r4r*rN�
__classcell__r,r,rBr-r)�s
 78  
 79  r)rS�Sequence[Series]�list[Hashable]cCsDg}tdd�|D�td�}|D]}|D]}||vr|�|�qq|S)z,Set a convenient order for rows for display.css�|]}|jVqdSr2)�indexrFr,r,r-�	<genexpr>�s�z"reorder_columns.<locals>.<genexpr>)�key)�sortedr\rP)rS�namesZ
ldesc_indexesZidxnames�namer,r,r-rQ�s
 80  ��rQrUr�Sequence[float]cCs�ddlm}t|�}gd�|dg}|��|��|��|��g|�|���|�	�g}t
 81  |�r6t��}nt
|�rDt|�sDt�d�}nd}||||j|d�S)z�Describe series containing numerical data.
 82  
 83      Parameters
 84      ----------
 85      series : Series
 86          Series to be described.
 87      percentiles : list-like of numbers
 88          The percentiles to include in the output.
 89      r�r)�count�mean�std�min�max�floatN�rarf�dtype)�pandasrrrirjrkrl�quantile�tolistrmr�pd�Float64DtyperrrYrprf)rUr!r�formatted_percentiles�
 90  stat_indexrVrpr,r,r-�describe_numeric_1d�s
 91  ���
 92  rxrR�percentiles_ignoredc
 93  Cs�gd�}|��}t||dk�}|dkr"|jd|jd}}d}n	tjtj}}d}|��|||g}ddlm}	|	|||j	|d�S)z�Describe series containing categorical data.
 94  
 95      Parameters
 96      ----------
 97      data : Series
 98          Series to be described.
 99      percentiles_ignored : list-like of numbers
100          Ignored, but in place to unify interface.
101      )ri�unique�top�freqrN�objectrhro)
102  �value_countsr\ra�ilocrY�nanrirqrrf)
103  rRryre�	objcounts�count_uniquer{r|rpr+rr,r,r-�describe_categorical_1ds
r�cCs
104  ddg}|��}t||dk�}|��|g}d}|dkrf|jd|jd}}|jj}	|��j�	d�}
105  t
106  |�}|jdurF|	durF|�|	�}n|�
|	�}|gd�7}|||t
107  |
108  ��|	d�t
109  |
110  ��|	d�g7}n|dd	g7}|tjtjg7}d
111  }ddlm}||||j|d�S)
z�Describe series containing timestamp data treated as categorical.
112  
113      Parameters
114      ----------
115      data : Series
116          Series to be described.
117      percentiles_ignored : list-like of numbers
118          Ignored, but in place to unify interface.
119      rirzrN�i8)r{r|�first�last)�tzr{r|r}rhro)r~r\rirar�dtr��dropna�values�viewr�tzinfo�
120  tz_convert�tz_localizerlrmrYr�rqrrf)rRryrer�r�r+rpr{r|r��asintrr,r,r-�$describe_timestamp_as_categorical_1d!s2

121  �
122  r�cCsdddlm}t|�}gd�|dg}|��|��|��g|�|���|��g}||||j	d�S)z�Describe series containing datetime64 dtype.
123  
124      Parameters
125      ----------
126      data : Series
127          Series to be described.
128      percentiles : list-like of numbers
129          The percentiles to include in the output.
130      rrh)rirjrlrm)rarf)
131  rqrrrirjrlrrrsrmrf)rRr!rrvrwrVr,r,r-�describe_timestamp_1dQs���r�rcCsTt|j�rtSt|�r
tSt|j�r!|rtStjdt	t
132  �d�tSt|j�r(tStS)z�Select proper function for describing series based on data type.
133  
134      Parameters
135      ----------
136      data : Series
137          Series to be described.
138      datetime_is_numeric : bool
139          Whether to treat datetime dtypes as numeric.
140      z�Treating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.)�
141  stacklevel)
rrpr�rrxrr��warnings�warn�
FutureWarningrr�r)rRrr,r,r-r:is 
142  

143  �
144  r:�%np.ndarray[Any, np.dtype[np.float64]]cCst|durt�gd��St|�}t|�d|vr|�d�t�|�}t�|�}|dus,J�t|�t|�kr8td��|S)z�
145      Ensure that percentiles are unique and sorted.
146  
147      Parameters
148      ----------
149      percentiles : list-like of numbers, optional
150          The percentiles to include in the output.
151      N)g�?��?g�?r�z%percentiles cannot contain duplicates)	rY�array�listrrP�asarrayrzr\r@)r!�unique_pctsr,r,r-r&�s
152  
153  
154  r&)rr
rrrrrr r!r"r#r
)rSr_r#r`)rUrr!rgr#r)rRrryrgr#r)rRrr!rgr#r)rRrrr r#r)r!r"r#r�)4r9�
155  __future__r�abcrr�typingrrrrr	r
156  r��numpyrY�pandas._libs.tslibsr�pandas._typingrr
r�pandas.util._exceptionsr�pandas.util._validatorsr�pandas.core.dtypes.commonrrrrrrrqrt�pandas.core.reshape.concatr�pandas.io.formats.formatrrrr.r/r(r)rQrxr�r�r�r:r&r,r,r,r-�<module>s6  	
157  3
158  G
159  
160  
161   
162  0
163  $