
    MhJ                    f   S SK Jr  S SKJr  S SKJrJr  S SKrS SKJ	r	J
r
  S SKrS SKJr  S SKJrJrJrJr  S SKJrJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKJ r J!r!  S SK"J#r#  \	(       a  S SK$J%r%         S           SS jjr&     S           SS jjr'  S       SS jjr(g)    )annotations)defaultdict)HashableIterableN)TYPE_CHECKINGcast)IntIndex)is_integer_dtypeis_list_likeis_object_dtypepandas_dtype)
ArrowDtypeCategoricalDtype)SparseArray)factorize_from_iterable)StringDtype)	DataFrame)Indexdefault_index)Series)NpDtypec                  ^ SSK Jn  / SQn	[        U [        5      (       Ga  Uc  U R	                  U	S9mO[        U5      (       d  [        S5      eX   mSU4S jjn
U
" US5        U
" US5        [        U[        5      (       a  [        R                  " U/5      n[        U[        5      (       a  TR                   Vs/ s H  oU   PM	     nnUc  TR                  n[        U[        5      (       a  [        R                  " U/5      nO3[        U[        5      (       a  TR                   Vs/ s H  oU   PM	     nnTR                  U R                  :X  a  / nO%Ub  U R                  US	S
9/nOU R	                  U	S9/n[        TR                  5       X5       H)  u  pn[!        US	   UUUUUUS9nUR#                  U5        M+     U" US	S
9nU$ [!        U UUUUUUS9nU$ s  snf s  snf )a?  
Convert categorical variable into dummy/indicator variables.

Each variable is converted in as many 0/1 variables as there are different
values. Columns in the output are each named after a value; if the input is
a DataFrame, the name of the original variable is prepended to the value.

Parameters
----------
data : array-like, Series, or DataFrame
    Data of which to get dummy indicators.
prefix : str, list of str, or dict of str, default None
    String to append DataFrame column names.
    Pass a list with length equal to the number of columns
    when calling get_dummies on a DataFrame. Alternatively, `prefix`
    can be a dictionary mapping column names to prefixes.
prefix_sep : str, default '_'
    If appending prefix, separator/delimiter to use. Or pass a
    list or dictionary as with `prefix`.
dummy_na : bool, default False
    Add a column to indicate NaNs, if False NaNs are ignored.
columns : list-like, default None
    Column names in the DataFrame to be encoded.
    If `columns` is None then all the columns with
    `object`, `string`, or `category` dtype will be converted.
sparse : bool, default False
    Whether the dummy-encoded columns should be backed by
    a :class:`SparseArray` (True) or a regular NumPy array (False).
drop_first : bool, default False
    Whether to get k-1 dummies out of k categorical levels by removing the
    first level.
dtype : dtype, default bool
    Data type for new columns. Only a single dtype is allowed.

Returns
-------
DataFrame
    Dummy-coded data. If `data` contains other columns than the
    dummy-coded one(s), these will be prepended, unaltered, to the result.

See Also
--------
Series.str.get_dummies : Convert Series of strings to dummy codes.
:func:`~pandas.from_dummies` : Convert dummy codes to categorical ``DataFrame``.

Notes
-----
Reference :ref:`the user guide <reshaping.dummies>` for more examples.

Examples
--------
>>> s = pd.Series(list('abca'))

>>> pd.get_dummies(s)
       a      b      c
0   True  False  False
1  False   True  False
2  False  False   True
3   True  False  False

>>> s1 = ['a', 'b', np.nan]

>>> pd.get_dummies(s1)
       a      b
0   True  False
1  False   True
2  False  False

>>> pd.get_dummies(s1, dummy_na=True)
       a      b    NaN
0   True  False  False
1  False   True  False
2  False  False   True

>>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
...                    'C': [1, 2, 3]})

>>> pd.get_dummies(df, prefix=['col1', 'col2'])
   C  col1_a  col1_b  col2_a  col2_b  col2_c
0  1    True   False   False    True   False
1  2   False    True    True   False   False
2  3    True   False   False   False    True

>>> pd.get_dummies(pd.Series(list('abcaa')))
       a      b      c
0   True  False  False
1  False   True  False
2  False  False   True
3   True  False  False
4   True  False  False

>>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
       b      c
0  False  False
1   True  False
2  False   True
3  False  False
4  False  False

>>> pd.get_dummies(pd.Series(list('abc')), dtype=float)
     a    b    c
0  1.0  0.0  0.0
1  0.0  1.0  0.0
2  0.0  0.0  1.0
r   concat)objectstringcategory)includez1Input must be a list-like for parameter `columns`c                   > [        U 5      (       aJ  [        U 5      TR                  S   :X  d-  SU S[        U 5       STR                  S    S3n[        U5      eg g )N   zLength of 'z' (9) did not match the length of the columns being encoded (z).)r   lenshape
ValueError)itemnamelen_msgdata_to_encodes      N/var/www/html/env/lib/python3.13/site-packages/pandas/core/reshape/encoding.py	check_lenget_dummies.<locals>.check_len   so    D!!4yN$8$8$;;%dV3s4yk :*0034B8 
 %W-- < "    prefix
prefix_sepr    axis)exclude)r-   r.   dummy_nasparse
drop_firstdtype)r3   r4   r5   )r&   str)pandas.core.reshape.concatr   
isinstancer   select_dtypesr   	TypeErrorr6   	itertoolscycledictcolumnsr#   dropzipitems_get_dummies_1dappend)datar-   r.   r2   r>   r3   r4   r5   r   dtypes_to_encoder*   colwith_dummiespresepdummyresultr(   s                    @r)   get_dummiesrL   +   s   f 27$	""?!//8H/INg&&OPP!]N	. 	&(#*l+fc""__fX.Ffd##-;-C-CD-CcSk-CFD>#++F j#&&"*6J
D))5C5K5KL5KcS/5KJL 4::-L  !IIgAI67L !..7G.HIL !5!5!7LMCc#A!%E & M 1- M !!
 M_ E Ms   =G,G$c           
        SSK Jn  [        [        U SS95      u  pUc  [	        U S5      (       a  U R
                  n
[        U
[        5      (       a  U
R                  R
                  n
[        U
[        5      (       a  SS K
n[        UR                  5       5      nOi[        U
[        5      (       a  U
R                  S:w  a  [        S5      nO8[        R
                  " [         5      nOUc  [        R
                  " [         5      n[        U5      n[#        U5      (       a  [%        S5      eSS	 jnU(       d  ['        U	5      S:X  a  U" U 5      $ UR)                  5       nU(       a9  ['        U	5      XS
:H  '   U	R+                  ['        U	5      [        R,                  5      n	U(       a  ['        U	5      S:X  a  U" U 5      $ ['        U	5      nUc  U	nO"[/        U	 Vs/ s H  nU U U 3PM     sn5      n[        U [        5      (       a  U R0                  nOS nU(       Ga0  [3        U5      (       a  SnO#U[        R
                  " [         5      :X  a  SnOSn/ n['        U 5      n[5        ['        U5      5       Vs/ s H  n/ PM     nnUS
:g  nUU   n[        R6                  " U5      U   n[9        UU5       H  u  nnUU   R;                  U5        M     U(       a
  USS  nUSS  n[9        UU5       HS  u  nn[=        [        R>                  " ['        U5      US9[A        UU5      UUS9nUR;                  [        UUUSS95        MU     U" USSS9$ ['        U5      U4n[        U[        R
                  5      (       a  UnO[        R                  n[        RB                  " UUSS9n SU [        R6                  " ['        U5      5      U4'   U(       d  SU US
:H  '   U(       a  U S S 2SS 24   n USS  n[E        U UXS9$ s  snf s  snf )Nr   r   Fcopyr5   pyarrow_numpybooleanz1dtype=object is not a valid dtype for get_dummiesc                    [        U [        5      (       a  U R                  nO[        [	        U 5      5      n[        US9$ )N)index)r8   r   rS   r   r"   r   )rD   rS   s     r)   get_empty_frame(_get_dummies_1d.<locals>.get_empty_frame  s1    dF##JJE!#d),Eu%%r,   r    g        r5   )sparse_index
fill_valuer5   )rD   rS   r&   rO   )r0   rO   F)r#   r5   order)rS   r>   r5   )returnr   )#r7   r   r   r   hasattrr5   r8   r   
categoriesr   pyarrowbool_r   storager   npboolr   r$   r"   rO   insertnanr   rS   r
   rangearanger@   rC   r   onesr	   zerosr   )!rD   r-   r.   r2   r3   r4   r5   r   codeslevelsinput_dtypepa_dtyperT   number_of_cols
dummy_colslevelrS   rY   sparse_seriesN_
sp_indicesmaskn_idxndxcoderF   ixssarrr#   dummy_dtype	dummy_mats!                                    r)   rB   rB      sy    2 ,F4e,DEME}w//jjk#344%0066Kk:.. rxxz*E{K00##6 +EHHTNE	% FvLMM& Fq(t$$JJLE [rks6{BFF3 c&kQ&t$$[N~
Ouvhzl5':OP
 $

E""Jbhhtn$JJI.3C
O.D!E.D".D
!E{d		!T"UE*ICt##C( +  $ABJ#ABJJ
3HCC.%a-%	D   TSu!UV 4 m!%88 E
N*fbhh'' K((KHH53G	23	"))CJ'./%&Ierk"!!QR%(I#ABJ%RR} P& "Fs   O32O8c           	     @   SSK Jn  [        U [        5      (       d!  [	        S[        U 5      R                   35      e[        [        U R                  5       R                  5       5      nUR                  5       (       a  [        SUR                  5        S35      e U R                  SSS9n[        [        5      nUc  [        U R                   5      US'   O[        U["        5      (       aa  UR                    HP  nUR%                  U5      S   n['        U5      ['        U5      :X  a  [        SU 35      eXh   R)                  U5        MR     O![	        S[        U5      R                   35      eUb  [        U[*        5      (       a?  ['        U5      ['        U5      :X  d&  S['        U5       S['        U5       S3n	[        U	5      eOX[        U[,        5      (       a"  [+        [/        Xb/['        U5      -  5      5      nO![	        S[        U5      R                   35      e0 n
UR1                  5        GH  u  pUc  UR3                  5       nOU Vs/ s H  ow['        X-   5      S
 PM     nnUR4                  S
S
2U4   R7                  SS9n[        US:  5      (       a  [        SUR                  5        35      e[        US:H  5      (       ac  [        U[*        5      (       a  UR)                  X(   5        O[        SUR9                  5        35      eU" UR4                  S
S
2U4   US:H  4SS9nOUR4                  S
S
2U4   nU R;                  XR                   R<                  S9nUR                  SS9nUR                   R?                  U5      nURA                  U5      RC                  U RD                  5      X'   GM     [        U
5      nUb4  UR                   R                  U R                   R<                  5      Ul        U$ ! [         a    [	        S	5      ef = fs  snf )a  
Create a categorical ``DataFrame`` from a ``DataFrame`` of dummy variables.

Inverts the operation performed by :func:`~pandas.get_dummies`.

.. versionadded:: 1.5.0

Parameters
----------
data : DataFrame
    Data which contains dummy-coded variables in form of integer columns of
    1's and 0's.
sep : str, default None
    Separator used in the column names of the dummy categories they are
    character indicating the separation of the categorical names from the prefixes.
    For example, if your column names are 'prefix_A' and 'prefix_B',
    you can strip the underscore by specifying sep='_'.
default_category : None, Hashable or dict of Hashables, default None
    The default category is the implied category when a value has none of the
    listed categories specified with a one, i.e. if all dummies in a row are
    zero. Can be a single value for all variables or a dict directly mapping
    the default categories to a prefix of a variable.

Returns
-------
DataFrame
    Categorical data decoded from the dummy input-data.

Raises
------
ValueError
    * When the input ``DataFrame`` ``data`` contains NA values.
    * When the input ``DataFrame`` ``data`` contains column names with separators
      that do not match the separator specified with ``sep``.
    * When a ``dict`` passed to ``default_category`` does not include an implied
      category for each prefix.
    * When a value in ``data`` has more than one category assigned to it.
    * When ``default_category=None`` and a value in ``data`` has no category
      assigned to it.
TypeError
    * When the input ``data`` is not of type ``DataFrame``.
    * When the input ``DataFrame`` ``data`` contains non-dummy data.
    * When the passed ``sep`` is of a wrong data type.
    * When the passed ``default_category`` is of a wrong data type.

See Also
--------
:func:`~pandas.get_dummies` : Convert ``Series`` or ``DataFrame`` to dummy codes.
:class:`~pandas.Categorical` : Represent a categorical variable in classic.

Notes
-----
The columns of the passed dummy data should only include 1's and 0's,
or boolean values.

Examples
--------
>>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0],
...                    "c": [0, 0, 1, 0]})

>>> df
   a  b  c
0  1  0  0
1  0  1  0
2  0  0  1
3  1  0  0

>>> pd.from_dummies(df)
0     a
1     b
2     c
3     a

>>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0],
...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
...                    "col2_c": [0, 0, 1]})

>>> df
      col1_a  col1_b  col2_a  col2_b  col2_c
0       1       0       0       1       0
1       0       1       1       0       0
2       1       0       0       0       1

>>> pd.from_dummies(df, sep="_")
    col1    col2
0    a       b
1    b       a
2    a       c

>>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0],
...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
...                    "col2_c": [0, 0, 0]})

>>> df
      col1_a  col1_b  col2_a  col2_b  col2_c
0       1       0       0       1       0
1       0       1       1       0       0
2       0       0       0       0       0

>>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"})
    col1    col2
0    a       b
1    b       a
2    d       e
r   r   z>Expected 'data' to be a 'DataFrame'; Received 'data' of type: z.Dummy DataFrame contains NA value in column: ''rQ   FrN   z(Passed DataFrame contains non-dummy dataN z$Separator not specified for column: zFExpected 'sep' to be of type 'str' or 'None'; Received 'sep' of type: zLength of 'default_category' (r!   )znExpected 'default_category' to be of type 'None', 'Hashable', or 'dict'; Received 'default_category' of type: r    r/   zEDummy DataFrame contains multi-assignment(s); First instance in row: zEDummy DataFrame contains unassigned value(s); First instance in row: rW   )#r7   r   r8   r   r:   type__name__r   r   isnaanyr$   idxmaxastyper   listr>   r6   splitr"   rC   r=   r   r@   rA   rO   locsumidxmin_constructor_slicedr5   get_indexer_fortakeset_axisrS   )rD   rI   default_categoryr   col_isna_maskdata_to_decodevariables_slicerF   r-   r'   cat_dataprefix_slicecatsassigned
data_slice
cats_arraytrue_valuesindexerrK   s                      r)   from_dummiesr   o  s   \ 2dI&&((,T
(;(;'<>
 	

 !23M$$&'q*
 	
DYU;
 "$'O
{"4<<0	C		!))CYYs^A&F6{c#h& #Gu!MNN#**3/	 * ''+Cy'9'9&:<
 	

 #&--'(C,@@4S9I5J4K LO,-Q0 
 !)) A ((33#O%7#o:N%NO  8 ()2235  H / 5 5 7;$$&D8DEFL)+,DE!%%ao6:::Bx!|**2//*;)<>  x1}*D11,45 ..6oo.?-@B   ##A|O4h!mD1J (++A|O<J--d,,:L:L-M
 ''Q'/$$44[A%??73<<TZZH9 !8< x F
..t||/A/ABMU  DBCCDZ Fs   P PP)Nrt   FNFFN)r.   z$str | Iterable[str] | dict[str, str]r2   rc   r3   rc   r4   rc   r5   zNpDtype | Noner\   r   )rt   FFFN)NN)rD   r   rI   z
None | strr   z%None | Hashable | dict[str, Hashable]r\   r   ))
__future__r   collectionsr   collections.abcr   r   r;   typingr   r   numpyrb   pandas._libs.sparser	   pandas.core.dtypes.commonr
   r   r   r   pandas.core.dtypes.dtypesr   r   pandas.core.arraysr   pandas.core.arrays.categoricalr   pandas.core.arrays.string_r   pandas.core.framer   pandas.core.indexes.apir   r   pandas.core.seriesr   pandas._typingr   rL   rB   r    r,   r)   <module>r      sF   " # 
  ( 
 + B 2 ' &&
 7: @ 5@ 	@ @ @ @ @L 8; ~S 5~S 	~S
 ~S ~S ~S ~SF >BK
K	K <K 	Kr,   