
    Q?h :                         S r SSKrSSKrSSKrSSKJrJrJr  SSKJ	r	  SSK
Jr  SSKJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJr  SSKJr   " S S5      rg)a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N)ListOptionalUnion   )CharSetGroupProber)CharSetProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MacRomanProber)MBCSGroupProber)
ResultDict)SBCSGroupProber)UTF1632Proberc            	       f   \ rS rSrSrSr\R                  " S5      r\R                  " S5      r	\R                  " S5      r
SSS	S
SSSSS.rSSSSSSSS.r\R                  S4S\S\SS4S jjr\S\4S j5       r\S\4S j5       r\S\\   4S j5       rS#S jrS\\\4   SS4S  jrS\4S! jrS"rg)$UniversalDetector8   aI  
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
and coordinates all of the different charset probers.

To get a ``dict`` containing an encoding and its confidence, you can simply
run:

.. code::

        u = UniversalDetector()
        u.feed(some_bytes)
        u.close()
        detected = u.result

g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8
iso-8859-9ziso-8859-13zISO-8859-11GB18030CP949UTF-16)asciir   ztis-620r   gb2312zeuc-krzutf-16leFlang_filtershould_rename_legacyreturnNc                    S U l         S U l        / U l        S SS S.U l        SU l        SU l        [        R                  U l        SU l	        Xl
        [        R                  " [        5      U l        SU l        X l        U R#                  5         g )N        encoding
confidencelanguageF    )_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_datar	   
PURE_ASCII_input_state
_last_charr   logging	getLogger__name__logger_has_win_bytesr   reset)selfr   r   s      K/var/www/html/env/lib/python3.13/site-packages/chardet/universaldetector.py__init__UniversalDetector.__init__d   s    
 @D 8<57#

 	&11&''1#$8!

r&   c                     U R                   $ N)r.   r6   s    r7   input_stateUniversalDetector.input_state{   s       r&   c                     U R                   $ r;   )r4   r<   s    r7   has_win_bytesUniversalDetector.has_win_bytes   s    """r&   c                     U R                   $ r;   )r)   r<   s    r7   charset_probers!UniversalDetector.charset_probers   s    $$$r&   c                 n   SSSS.U l         SU l        SU l        SU l        [        R
                  U l        SU l        U R                  (       a  U R                  R                  5         U R                  (       a  U R                  R                  5         U R                   H  nUR                  5         M     g)z
Reset the UniversalDetector and all of its probers back to their
initial states.  This is called by ``__init__``, so you only need to
call this directly in between analyses of different documents.
Nr!   r"   Fr&   )r*   r+   r,   r4   r	   r-   r.   r/   r'   r5   r(   r)   )r6   probers     r7   r5   UniversalDetector.reset   s     $(sM	#&11##$$**,  &&(++FLLN ,r&   byte_strc                 
   U R                   (       a  gU(       d  g[        U[        5      (       d  [        U5      nU R                  (       Gd  UR	                  [
        R                  5      (       a  SSSS.U l        OUR	                  [
        R                  [
        R                  45      (       a  SSSS.U l        OUR	                  S5      (       a  SSSS.U l        OaUR	                  S	5      (       a  S
SSS.U l        O?UR	                  [
        R                  [
        R                  45      (       a  SSSS.U l        SU l        U R                  S   b  SU l         gU R                  [        R                  :X  a  U R                  R!                  U5      (       a  [        R"                  U l        O`U R                  [        R                  :X  aB  U R$                  R!                  U R&                  U-   5      (       a  [        R(                  U l        USS U l        U R*                  (       d  [-        5       U l        U R*                  R.                  [0        R2                  :X  al  U R*                  R5                  U5      [0        R6                  :X  a?  U R*                  R8                  U R*                  R;                  5       SS.U l        SU l         gU R                  [        R(                  :X  a  U R<                  (       d  [?        U R@                  5      U l        U R<                  R5                  U5      [0        R6                  :X  aS  U R<                  R8                  U R<                  R;                  5       U R<                  RB                  S.U l        SU l         ggU R                  [        R"                  :X  GaL  U RD                  (       d  [G        U R@                  5      /U l"        U R@                  [H        RJ                  -  (       a#  U RD                  RM                  [O        5       5        U RD                  RM                  [Q        5       5        U RD                  RM                  [S        5       5        U RD                   H\  nUR5                  U5      [0        R6                  :X  d  M(  UR8                  UR;                  5       URB                  S.U l        SU l           O   U RT                  R!                  U5      (       a  SU l+        ggg)a  
Takes a chunk of a document and feeds it through all of the relevant
charset probers.

After calling ``feed``, you can check the value of the ``done``
attribute to see if you need to continue feeding the
``UniversalDetector`` more data, or if it has made a prediction
(in the ``result`` attribute).

.. note::
   You should always call ``close`` when you're done feeding in your
   document if ``done`` is not already ``True``.
Nz	UTF-8-SIG      ? r"   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143r   Tr#   ),r+   
isinstance	bytearrayr,   
startswithcodecsBOM_UTF8r*   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr.   r	   r-   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr/   	ESC_ASCIIr(   r   stater   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer'   r   r   r%   r)   r   r
   NON_CJKappendr   r   r   WIN_BYTE_DETECTORr4   )r6   rH   rF   s      r7   r]   UniversalDetector.feed   s    99(I.. *H ~~~""6??33 !,"% "
 $$f&9&96;N;N%OPP ,43TVW$$%899 !9"% "	 $$%899 !9"% "	 $$fmmV]]%CDD ,43TVW!DN{{:&2 	 
 5 55&&--h77$.$8$8!!!Z%:%::%%,,T__x-GHH$.$8$8!"23- ###0?D %%)?)??##((2l6K6KK $ 4 4 A A"&"6"6"E"E"G "
 !	 
 4 44+++;D<L<L+M('',,X6,:O:OO $ 8 8 E E"&":":"I"I"K $ 8 8 A A
 !	 P *"6"66(()89I9I)J(K%##n&<&<<))001BC%%,,\^<%%,,^-=>//;;x(L,A,AA$*$7$7&,&;&;&=$*OO#DK
 !%DI 0 %%,,X66&*# 7# 7r&   c           	         U R                   (       a  U R                  $ SU l         U R                  (       d  U R                  R	                  S5        GOiU R
                  [        R                  :X  a  SSSS.U l        GO>U R
                  [        R                  :X  Ga  SnSnSnU R                   H'  nU(       d  M  UR                  5       nX:  d  M#  UnUnM)     U(       a  X R                  :  a  UR                  nUc   eUR                  5       nUR                  5       nUR                  S	5      (       a,  U R                  (       a  U R                   R#                  Xe5      nU R$                  (       a3  U R&                  R#                  U=(       d    SR                  5       U5      nUUUR(                  S.U l        U R                  R+                  5       [,        R.                  ::  a  U R                  S
   c  U R                  R	                  S5        U R                   H  nU(       d  M  [1        U[2        5      (       aU  UR4                   HC  nU R                  R	                  SUR                  UR(                  UR                  5       5        ME     Mv  U R                  R	                  SUR                  UR(                  UR                  5       5        M     U R                  $ )z
Stop analyzing the current document and come up with a final
prediction.

:returns:  The ``result`` attribute, a ``dict`` with the keys
           `encoding`, `confidence`, and `language`.
Tzno data received!r   rJ   rK   r"   Nr!   ziso-8859r#   z no probers hit minimum thresholdz%s %s confidence = %s)r+   r*   r,   r3   debugr.   r	   r-   rX   r)   r`   MINIMUM_THRESHOLDr_   lowerrO   r4   ISO_WIN_MAPgetr   
LEGACY_MAPr%   getEffectiveLevelr0   DEBUGrM   r   probers)	r6   prober_confidencemax_prober_confidence
max_proberrF   r_   lower_charset_namer$   group_probers	            r7   closeUniversalDetector.close  s`    99;;	~~KK12 *"7"77'.crRDK *"6"66 $$'!J//$*$9$9$;!$<,=)!'J 0 47M7MM)66#///%1%7%7%9"'668
 &00<<**'+'7'7';';.( ,,#'??#6#6%+224l$L !-", * 3 3 ;;((*gmm;{{:&.!!"DE$($9$9L' !,0BCC&2&:&:F KK-- 7 & 3 3 & & 5 5 7	 '; ))3(55(11(779	 %:$ {{r&   )r)   r'   r,   r4   r.   r/   r(   r+   r   r3   r*   r   )r   N) r2   
__module____qualname____firstlineno____doc__rg   recompilerV   rY   rc   ri   rk   r
   ALLboolr8   propertyintr=   r@   r   r   rC   r5   r   bytesrN   r]   r   rt   __static_attributes__ r&   r7   r   r   8   s1     N3::l+L

>2$$$$$$$%	K  $ $J '5&8&8%*# # 
	. !S ! ! #t # # %m!4 % %&A+U5)#34 A+ A+FMz Mr&   r   )ry   rP   r0   rz   typingr   r   r   charsetgroupproberr   charsetproberr   enumsr	   r
   r   	escproberr   latin1proberr   macromanproberr   mbcsgroupproberr   
resultdictr   sbcsgroupproberr   utf1632proberr   r   r   r&   r7   <module>r      sH   8   	 ( ( 2 ( ; ; ' & * , " , (r rr&   