
    Q?h9!                     B    S SK JrJr  SSKJr  SSKJr   " S S\5      rg)    )ListUnion   )CharSetProber)ProbingStatec                   >  ^  \ rS rSrSrSrSrSU 4S jjrSU 4S jjr\	S\
4S	 j5       r\	S\
4S
 j5       rS\4S jrS\4S jrS\4S jrS\4S jrS\4S jrS\4S jrS\\   SS4S jrS\\   SS4S jrS\\\4   S\4S jr\	S\4S j5       rS\4S jrSr U =r!$ )UTF1632Prober   aL  
This class simply looks for occurrences of zero bytes, and infers
whether the file is UTF16 or UTF32 (low-endian or big-endian)
For instance, files looking like (       [nonzero] )+
have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
may be guessed to be UTF16BE, and inversely for little-endian varieties.
   gGz?returnNc                   > [         TU ]  5         SU l        S/S-  U l        S/S-  U l        [
        R                  U l        / SQU l        SU l	        SU l
        SU l        SU l        SU l        SU l        U R                  5         g )Nr      r   r   r   r   F)super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__s    G/var/www/html/env/lib/python3.13/site-packages/chardet/utf1632prober.pyr   UTF1632Prober.__init__)   s~    C!G !sQw",, 	$$$$7<47<4

    c                    > [         TU ]  5         SU l        S/S-  U l        S/S-  U l        [
        R                  U l        SU l        SU l	        SU l
        SU l        SU l        SU l        / SQU l        g )Nr   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s    r"   r   UTF1632Prober.reset8   ss    C!G !sQw",,$$$$7<47<4 	r$   c                     U R                  5       (       a  gU R                  5       (       a  gU R                  5       (       a  gU R                  5       (       a  gg)Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler    s    r"   charset_nameUTF1632Prober.charset_nameF   sM    !!##!!##!!##!!##r$   c                     g)N  r,   s    r"   languageUTF1632Prober.languageS   s    r$   c                 4    [        SU R                  S-  5      $ )N      ?g      @maxr   r,   s    r"   approx_32bit_chars UTF1632Prober.approx_32bit_charsW       3+,,r$   c                 4    [        SU R                  S-  5      $ )Nr5   g       @r6   r,   s    r"   approx_16bit_chars UTF1632Prober.approx_16bit_charsZ   r:   r$   c                    U R                  5       nXR                  :  =(       a    U R                  S   U-  U R                  :  =(       a    U R                  S   U-  U R                  :  =(       a]    U R                  S   U-  U R                  :  =(       a7    U R                  S   U-  U R                  :  =(       a    U R
                  (       + $ Nr   r         )r8   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r   r    approx_charss     r"   r(   UTF1632Prober.is_likely_utf32be]   s    ..0;;; 
a </$2E2EE )!!!$|3d6I6II)!!!$|3d6I6II) $$Q',69L9LL) (((	
r$   c                    U R                  5       nXR                  :  =(       a    U R                  S   U-  U R                  :  =(       a    U R                  S   U-  U R                  :  =(       a]    U R                  S   U-  U R                  :  =(       a7    U R                  S   U-  U R                  :  =(       a    U R
                  (       + $ r?   )r8   rB   r   rC   r   r   rD   s     r"   r)   UTF1632Prober.is_likely_utf32leg   s    ..0;;; 
  #l2T5H5HH )!!!$|3d6I6II)!!!$|3d6I6II) !!!$|3d6I6II) (((	
r$   c                 F   U R                  5       nXR                  :  =(       a}    U R                  S   U R                  S   -   U-  U R                  :  =(       aG    U R                  S   U R                  S   -   U-  U R                  :  =(       a    U R
                  (       + $ )Nr   rA   r   r@   )r<   rB   r   rC   r   r   rD   s     r"   r*   UTF1632Prober.is_likely_utf16beq       ..0;;; 
!!!$t';';A'>>,N!!" )""1%(9(9!(<<L!!") (((	
r$   c                 F   U R                  5       nXR                  :  =(       a}    U R                  S   U R                  S   -   U-  U R                  :  =(       aG    U R                  S   U R                  S   -   U-  U R                  :  =(       a    U R
                  (       + $ )Nr   r@   r   rA   )r<   rB   r   rC   r   r   rD   s     r"   r+   UTF1632Prober.is_likely_utf16le{   rK   r$   r   c                    US   S:w  d.  US   S:  d%  US   S:X  a#  US   S:X  a  SUS   s=::  a  S::  a
  O  OSU l         US   S:w  d.  US   S:  d%  US   S:X  a&  US   S:X  a  SUS   s=::  a  S::  a  O  g	SU l        g	g	g	g	)
z
Validate if the quad of bytes is valid UTF-32.

UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
excluding 0x0000D800 - 0x0000DFFF

https://en.wikipedia.org/wiki/UTF-32
r   r         r@      TrA   N)r   r   )r    r   s     r"   validate_utf32_characters'UTF1632Prober.validate_utf32_characters   s     GqLAw~Q1aA$$q'2IT2I#'D GqLAw~Q1aA$$q'2IT2I#'D  3Jr$   pairc                    U R                   (       d6  SUS   s=::  a  S::  a  O  OSU l         O=SUS   s=::  a  S::  a
  O  O*SU l        O"SUS   s=::  a  S::  a  O  OSU l         OSU l        U R                  (       d7  SUS   s=::  a  S::  a  O  OSU l        g	SUS   s=::  a  S::  a  O  g	SU l        g	g	SUS   s=::  a  S::  a  O  OSU l        g	SU l        g	)
a	  
Validate if the pair of bytes is  valid UTF-16.

UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
with an exception for surrogate pairs, which must be in the range
0xD800-0xDBFF followed by 0xDC00-0xDFFF

https://en.wikipedia.org/wiki/UTF-16
rP   r      T   rQ   Fr   N)r   r   r   r   )r    rT   s     r"   validate_utf16_characters'UTF1632Prober.validate_utf16_characters   s     ;;tAw&$&?C<a(D('+$tAw&$&?D<'+$;;tAw&$&?C<a(D('+$ ) tAw&$&?D<'+$r$   byte_strc                    U H  nU R                   S-  nX R                  U'   US:X  aW  U R                  U R                  5        U R                  U R                  SS 5        U R                  U R                  SS 5        US:X  a  U R                  U==   S-  ss'   OU R
                  U==   S-  ss'   U =R                   S-  sl         M     U R                  $ )Nr   rA   r   r@   r   )r   r   rR   rX   r   r   state)r    rZ   cmod4s       r"   feedUTF1632Prober.feed   s    A==1$DIIdOqy..tyy9..tyy1~>..tyy1~>Av!!$'1,'$$T*a/*MMQM  zzr$   c                 B   U R                   [        R                  [        R                  1;   a  U R                   $ U R	                  5       S:  a!  [        R                  U l         U R                   $ U R
                  S:  a  [        R                  U l         U R                   $ )Ng?i   )r   r   NOT_MEFOUND_ITget_confidencer   r,   s    r"   r\   UTF1632Prober.state   sz    ;;<..0E0EFF;; 4'&//DK
 {{	 ]]X% '--DK{{r$   c                     U R                  5       (       d?  U R                  5       (       d*  U R                  5       (       d  U R                  5       (       a  S$ S$ )Ng333333?g        )r+   r*   r)   r(   r,   s    r"   rd   UTF1632Prober.get_confidence   sT     &&(())++))++))++ 		
 		
r$   )r   r   r   r   r   r   r   r   r   r   r   )r   N)"__name__
__module____qualname____firstlineno____doc__rB   rC   r   r   propertystrr-   r2   floatr8   r<   boolr(   r)   r*   r+   r   intrR   rX   r   bytes	bytearrayr   r_   r\   rd   __static_attributes____classcell__)r!   s   @r"   r	   r	      s    !N! 
c 
 
 #  -E --E -
4 

4 

4 

4 
(d3i (D (,,d3i ,D ,@U5)#34   
| 
 


 

 

r$   r	   N)typingr   r   charsetproberr   enumsr   r	   r1   r$   r"   <module>ry      s   *  ( F
M F
r$   