
    Q?h,                     l    S SK r S SKrS SKJrJr  SSKJrJr  \R                  " S5      r	 " S S5      r
g)    N)OptionalUnion   )LanguageFilterProbingStates%   [a-zA-Z]*[-]+[a-zA-Z]*[^a-zA-Z-]?c                   >   \ rS rSrSr\R                  4S\SS4S jjrSS jr\	S\
\   4S j5       r\	S\
\   4S	 j5       rS
\\\4   S\4S jr\	S\4S j5       rS\4S jr\S\\\4   S\4S j5       r\S\\\4   S\4S j5       r\S\\\4   S\4S j5       rSrg)CharSetProber(   gffffff?lang_filterreturnNc                     [         R                  U l        SU l        Xl        [
        R                  " [        5      U l        g )NT)	r   	DETECTING_stateactiver   logging	getLogger__name__logger)selfr   s     G/var/www/html/env/lib/python3.13/site-packages/chardet/charsetprober.py__init__CharSetProber.__init__,   s.    ",,&''1    c                 .    [         R                  U l        g N)r   r   r   r   s    r   resetCharSetProber.reset2   s    ",,r   c                     g r    r   s    r   charset_nameCharSetProber.charset_name5   s    r   c                     [         er   NotImplementedErrorr   s    r   languageCharSetProber.language9   s    !!r   byte_strc                     [         er   r$   )r   r(   s     r   feedCharSetProber.feed=   s    !!r   c                     U R                   $ r   )r   r   s    r   stateCharSetProber.state@   s    {{r   c                     g)Ng        r    r   s    r   get_confidenceCharSetProber.get_confidenceD   s    r   bufc                 6    [         R                  " SSU 5      n U $ )Ns   ([ -])+    )resub)r2   s    r   filter_high_byte_only#CharSetProber.filter_high_byte_onlyG   s    ff&c2
r   c                     [        5       n[        R                  U 5      nU HJ  nUR                  USS 5        USS nUR	                  5       (       d  US:  a  SnUR                  U5        ML     U$ )u  
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
international: international characters [-ÿ]
marker: everything else [^a-zA-Z-ÿ]
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
This filter applies to all scripts which do not use English characters.
N   r4   )	bytearrayINTERNATIONAL_WORDS_PATTERNfindallextendisalpha)r2   filteredwordsword	last_chars        r   filter_international_words(CharSetProber.filter_international_wordsL   st     ;
 ,33C8DOOD"I& RS	I$$&&9w+> 	OOI&  r   c                 D   [        5       nSnSn[        U 5      R                  S5      n [        U 5       HN  u  pEUS:X  a	  US-   nSnM  US:X  d  M  XC:  a+  U(       d$  UR	                  XU 5        UR	                  S5        SnMP     U(       d  UR	                  XS	 5        U$ )
a+  
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
Fr   c   >r      <r4   TN)r<   
memoryviewcast	enumerater?   )r2   rA   in_tagprevcurrbuf_chars         r   remove_xml_tagsCharSetProber.remove_xml_tagsn   s     ;o""3''nND 4axT!;v OOCTN3OOD) -$  OOCJ'r   )r   r   r   r   )r   N)r   
__module____qualname____firstlineno__SHORTCUT_THRESHOLDr   NONEr   r   propertyr   strr!   r&   r   bytesr<   r   r*   r-   floatr0   staticmethodr7   rE   rR   __static_attributes__r    r   r   r	   r	   (   s0   5C5H5H 2N 2T 2- hsm   "(3- " ""U5)#34 " " |    5	)9#: u   eY.>(? I  B $U5)#34 $ $ $r   r	   )r   r5   typingr   r   enumsr   r   compiler=   r	   r    r   r   <module>rb      s1   :  	 " / jj8 
k kr   