
    BhLb                        S SK r S SKrS SKJrJr  S SKJr  S SKJrJ	r	  S SK
r
SSKJrJrJr  \" S \ 5       5      r\" S \ 5       5      r\" S	 \	 5       5      r\\" S
S/5      -  r\R(                  " S5      r1 Skr\R(                  " S5      r0 rS r " S S5      r " S S\5      r " S S\5      r " S S5      r " S S5      rS r g)    N)BytesIOStringIO)Path)ascii_lettersascii_uppercase   )EOFReparseErrorspace_charactersc              #   @   #    U  H  oR                  5       v   M     g 7fNencode.0items     G/var/www/html/env/lib/python3.13/site-packages/tinyhtml5/inputstream.py	<genexpr>r      s     "N=MT;;===M   c              #   @   #    U  H  oR                  5       v   M     g 7fr   r   r   s     r   r   r      s     H-$-r   c              #   @   #    U  H  oR                  5       v   M     g 7fr   r   r   s     r   r   r      s     !LOD++--Or      >   <u   [---﷐-﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿-]>            	 
               	 
       z[	- -/:-@\[-`{-~]c                    [        U [        5      (       aP  [        U 5      S:  aA  [        U 5      R	                  5       (       a#  [        [        U 5      R                  5       40 UD6$ [        U [        5      (       a  [        U R                  5       40 UD6$ [        [        U S5      (       a  U R                  S5      OU [        5      (       a  [        U 40 UD6$ [        U 40 UD6$ )N   readr   )

isinstancestrlenr   is_fileHTMLUnicodeInputStream	read_texthasattrr<   HTMLBinaryInputStream)sourcekwargss     r   HTMLInputStreamrG   &   s    &#3v;#4f9M9M9O9O%d6l&<&<&>I&II	FD	!	!%f&6&6&8CFCC	gff&=&=FKKN63	O	O%f777$V6v66    c                   R    \ rS rSrSrS rS rS rS rS r	S r
S	 rSS
 jrS rSrg)rA   1   zProvides a Unicode stream of characters to the HTMLTokenizer.

This class takes care of character encoding and removing or replacing
incorrect byte-sequences and also provides column and line tracking.

c                     S/U l         [        S5      S4U l        U R                  U5      U l        U R                  5         g)ab  Initialise the HTMLInputStream.

Create a normalized stream from source for use by tinyhtml5.

source can be either a file-object, local filename or a string.

The optional encoding parameter must be a string that indicates
the encoding.  If specified, that encoding will be used,
regardless of any BOM or later declaration (such as in a meta
element).

r   utf-8certainN)	new_lineslookup_encodingencodingopen_streamstreamresetselfrE   s     r   __init__HTMLUnicodeInputStream.__init__9   s9     (19=&&v.

rH   c                 f    SU l         SU l        SU l        / U l        SU l        SU l        S U l        g )N r   )chunk
chunk_sizechunk_offseterrorsprevious_number_linesprevious_number_columns_buffered_characterrU   s    r   rS   HTMLUnicodeInputStream.resetN   s;    
 &'"'($ $( rH   c                 >    [        US5      (       a  U$ [        U5      $ )zeProduce a file object from source.

source can be either a file object, local filename or a string.

r<   )rC   r   rT   s     r   rQ   "HTMLUnicodeInputStream.open_stream\   s      !00vFhv6FFrH   c                     U R                   nUR                  SSU5      nU R                  U-   nUR                  SSU5      nUS:X  a  U R                  U-   nXF4$ XS-   -
  nXF4$ )N
r   r   )rZ   countr^   rfindr_   )rU   offsetrZ   number_linesposition_linelast_line_positionposition_columns          r   	_position HTMLUnicodeInputStream._positiond   s{    

{{4F322\A"[[q&9#"::VCO // %Q(>?O//rH   c                 J    U R                  U R                  5      u  pUS-   U4$ )z9Return (line, col) of the current position in the stream.r   )ro   r\   )rU   linecolumns      r   positionHTMLUnicodeInputStream.positiono   s'    ~~d&7&78q&!!rH   c                     U R                   U R                  :  a  U R                  5       (       d  [        $ U R                   nU R                  U   nUS-   U l         U$ )z\Read one character from the stream or queue if available.

Return EOF when EOF is reached.

r   )r\   r[   
read_chunkr	   rZ   )rU   r\   	characters      r   rx    HTMLUnicodeInputStream.charactert   sT     /??$$
((JJ|,	(1,rH   c                    U R                  U R                  5      u  U l        U l        SU l        SU l        SU l        U R                  R                  S5      nU R                  (       a  U R                  U-   nS U l        OU(       d  g[        U5      S:  a3  [        US   5      nUS:X  d  SUs=::  a  S	::  a  O  OUS   U l        US S n[        [        [        R                  U5      5      5       H  nU R                  R                  S
5        M      UR!                  SS5      nUR!                  SS5      nXl        [        U5      U l        g)NrY   r   i (  Fr   rg      i   i  zinvalid-codepointz
rf   T)ro   r[   r^   r_   rZ   r\   rR   r<   r`   r?   ordrangeinvalid_unicode_refindallr]   appendreplace)rU   datalast_s       r   rw   !HTMLUnicodeInputStream.read_chunk   s$   CG>>OOD@"D$@ 
{{& ##++d2D'+D$t9q=tBx=Dt|v77+/8(CRy s-55d;<=AKK23 > ||FD)||D$'
d)rH   c           
          [         X4   n/ n UR                  U R                  U R                  5      nUc  U R                  U R                  :w  a  OONUR                  5       nXpR                  :w  a/  UR                  U R                  U R                  U 5        Xpl        O@UR                  U R                  U R                  S 5        U R                  5       (       d  OM  SR                  U5      $ ! [         ak    SR                  U Vs/ s H  nS[        U5      S 3PM     Os  snf sn5      nU(       d  SU 3n[        R
                  " SU S35      nU=n[         X4'    GN[f = f)zReturn a string of characters from the stream.

String goes up to but does not include any character in 'characters' or
EOF. 'characters' must be a container that supports the 'in' method and
iteration over its characters.

rY   z\x02x^[z]+N)characters_until_regexKeyErrorjoinr}   recompilematchrZ   r\   r[   endr   rw   )rU   
charactersoppositerx   regexresultr   r   s           r   chars_until"HTMLUnicodeInputStream.chars_until   sV   	P/0FGJ $$TZZ1B1BCE} $$7 8 iik //)MM$**T->->s"CD(+% MM$**T%6%6%789??$$+ . wwvA  	PGGTIs3y>#"67TUEE7JJ5'}-EJOOJ/0FG	Ps   
C3 3E(D&
%?E('E(c                     U[         Lan  U R                  S:X  a)  XR                  -   U l        U =R                  S-  sl        g U =R                  S-  sl        U R                  U R                     U:X  d   eg g )Nr   r   )r	   r\   rZ   r[   )rU   chars     r   ungetHTMLUnicodeInputStream.unget   sj     s?  A%
 "JJ.
1$!!Q&!zz$"3"34<<< rH   )
r`   rZ   r\   r[   rP   r]   rN   r_   r^   rR   N)F)__name__
__module____qualname____firstlineno____doc__rV   rS   rQ   ro   rt   rx   rw   r   r   __static_attributes__ rH   r   rA   rA   1   s7    *(G	0"
"#J,\=rH   rA   c                   Z   ^  \ rS rSrSr   SS jrU 4S jrS rS rS r	S r
S	 rS
rU =r$ )rD      zProvide a binary stream of characters to the HTMLTokenizer.

This class takes care of character encoding and removing or replacing
incorrect byte-sequences and also provides column and line tracking.

c                     U R                  U5      U l        SU l        X l        X0l        X@l        XPl        X`l        U R                  5       U l	        U R                  S   c   eU R                  5         g )Ni   r   )rQ   
raw_streamnumber_bytes_metaoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermine_encodingrP   rS   )rU   rE   r   r   r   r   r   s          r   rV   HTMLBinaryInputStream.__init__   sp    
 **62
 "&!2"4+F(. 0 //1}}Q+++ 	

rH   c                    > U R                   S   R                  R                  nU" U R                  S5      U l        [
        TU ]  5         g )Nr   r   )rP   
codec_infostreamreaderr   rR   superrS   )rU   r   	__class__s     r   rS   HTMLBinaryInputStream.reset	  s8    }}Q'22??"4??I>rH   c                     [        US5      (       a8  [        US5      (       a  UR                  5       (       a  U$ UR                  5       n[        U5      $ )Nr<   seekable)rC   r   r<   r   rT   s     r   rQ   !HTMLBinaryInputStream.open_stream  sB    66""vz**v/@/@[[]FvrH   c                     U R                  5       S4nUS   b  U$ [        U R                  5      S4nUS   b  U$ [        U R                  5      S4nUS   b  U$ U R	                  5       S4nUS   b  U$ [        U R
                  5      S4nUS   b%  US   R                  R                  S5      (       d  U$ [        U R                  5      S4nUS   b  U$ [        U R                  5      S4nUS   b  U$ [        S5      S4$ )NrM   r   	tentativezutf-16windows-1252)

detect_bomrO   r   r   detect_encoding_metar   name
startswithr   r   )rU   rP   s     r   r   (HTMLBinaryInputStream.determine_encoding  s2    ??$i/A;"O #4#9#9:IEA;"O #4#:#:;YFA;"O ,,.;A;"O #4#C#CDkQA;"8A;+;+;+F+Fx+P+PO #4#7#78+EA;"O #4#8#89;FA;"O ~.;;rH   c                    U R                   S   S:w  d   e[        U5      =nc  g UR                  S;   a  [        S5      nUc   eg XR                   S   :X  a  U R                   S   S4U l         g U R                  R	                  S5        US4U l         U R                  5         [        SU R                   S    SU 35      e)Nr   rM   utf-16beutf-16lerL   r   zEncoding changed from z to )rP   rO   r   r   seekrS   r
   )rU   new_encodings     r   change_encoding%HTMLBinaryInputStream.change_encoding=  s    }}Q9,,,+L99LB 88*73L+++]]1--!]]1-y9DMOO  #)95DMJJL(q)9(:$|nMO OrH   c           
         [         R                  S[         R                  S[         R                  S[         R                  S[         R
                  S0nU R                  R                  S5      n[        U[        5      (       d   eS HF  nUR                  USU 5      =n(       d  M   U R                  R                  U5        [        U5      s  $    U R                  R                  S	5        g)
zAttempt to detect at BOM at the start of the stream.

If an encoding can be determined from the BOM return the name of the
encoding otherwise return None.

rL   r   r   zutf-32lezutf-32be   )   r      Nr   )codecsBOM_UTF8BOM_UTF16_LEBOM_UTF16_BEBOM_UTF32_LEBOM_UTF32_BEr   r<   r=   bytesgetr   rO   )rU   bomsstringr   rP   s        r   r    HTMLBinaryInputStream.detect_bomM  s     OOW
 %%a(&%(((( D88F5DM22x2$$T*&x00	  	QrH   c                 &   U R                   R                  U R                  5      n[        U[        5      (       d   e[        U5      nU R                   R                  S5        UR                  5       nUb  UR                  S;   a  [        S5      nU$ )z1Report the encoding declared by the meta element.r   r   rL   )
r   r<   r   r=   r   EncodingParserr   get_encodingr   rO   )rU   bufferparserrP   s       r   r   *HTMLBinaryInputStream.detect_encoding_metaj  sz    %%d&<&<=&%(((('Q&&(HMM5M$M&w/HrH   )	r   rP   r   r   r   r   r   rR   r   )NNNNr   )r   r   r   r   r   rV   rS   rQ   r   r   r   r   r   __classcell__)r   s   @r   rD   rD      s?     KOCG"02
&<PO  : rH   rD   c                   |    \ rS rSrSrS rS rS rS rS r	S r
\" \
\	5      r\S	 5       r\4S
 jrS rS rS rSrg)EncodingBytesix  zBytes-like object with an associated position and various extra methods.

If the position is ever greater than the string length then an exception is
raised.

c                 v    [        U[        5      (       d   e[        R                  XR                  5       5      $ r   )r=   r   __new__lower)clsvalues     r   r   EncodingBytes.__new__  s*    %''''}}S++-00rH   c                     SU l         g )Nrg   ro   )rU   r   s     r   rV   EncodingBytes.__init__  s	    rH   c                 d    U R                   S-   =ol         U[        U 5      :  a  [        eXUS-    $ Nr   ro   r?   StopIterationrU   rt   s     r   __next__EncodingBytes.__next__  s6    $(NNQ$66>s4y X\**rH   c                 <    U R                   S-
  =U l         nXUS-    $ r   r   r   s     r   previousEncodingBytes.previous  s&    $(NNQ$66X\**rH   c                 d    U R                   [        U 5      :  a  [        e[        SU5      U l         g Nr   )ro   r?   r   maxr   s     r   set_positionEncodingBytes.set_position  s&    >>SY&Q)rH   c                 z    U R                   [        U 5      :  a  [        eU R                   S:  a  U R                   $ g r   r   ra   s    r   get_positionEncodingBytes.get_position  s3    >>SY&>>Q>>! rH   c                 8    X R                   U R                   S-    $ r   )rt   ra   s    r   current_byteEncodingBytes.current_byte  s    MM$--!"344rH   c                     U R                   nU[        U 5      :  a*  XUS-    nX1;  a  X l        U$ US-  nU[        U 5      :  a  M*  X l        g)zSkip past a list of characters.r   Nrt   r?   ro   rU   r   rt   rx   s       r   skipEncodingBytes.skip  sW    ==T"hl3I*!)  MH T" "rH   c                     U R                   nU[        U 5      :  a*  XUS-    nX1;   a  X l        U$ US-  nU[        U 5      :  a  M*  X l        g r   r   r   s       r   
skip_untilEncodingBytes.skip_until  sW    ==T"hl3I&!)  MH T" "rH   c                     U R                  XR                  5      =n(       a  U =R                  [        U5      -  sl        U$ )zLook for a sequence of bytes at the start of a string.

If the bytes are found return True and advance the position to the byte
after the match. Otherwise return False and leave the position alone.

)r   rt   r?   )rU   r   r   s      r   match_bytesEncodingBytes.match_bytes  s3     __UMM::6:MMSZ'MrH   c                      U R                  XR                  5      [        U5      -   S-
  U l        g! [         a    [
        ef = f)zLook for the next sequence of bytes matching a given sequence.

If a match is found advance the position to the last byte of the match.

r   T)indexrt   r?   ro   
ValueErrorr   )rU   r   s     r   jump_toEncodingBytes.jump_to  sE    	 !ZZ}}=E
JQNDN   	 	 s	   /2 Ar   N)r   r   r   r   r   r   rV   r   r   r   r   propertyrt   r   space_characters_bytesr  r  r  r  r   r   rH   r   r   r   x  s^    1++*
" l3H5 5 5 
		
rH   r   c                   N    \ rS rSrSrS rS rS rS rS r	S r
S	 rS
 rS rSrg)r   i  z@Mini parser for detecting character encoding from meta elements.c                 2    [        U5      U l        S U l        g r   )r   r   rP   rU   r   s     r   rV   EncodingParser.__init__  s    !$'	rH   c                 6   SU R                   ;  a  g U R                  U R                  U R                  U R                  U R                  U R
                  S.nU R                    Hx  nSn U R                   R                  S5        UR                  5        H/  u  pEU R                   R                  U5      (       d  M'   U" 5       n  O   U(       a  Mm    U R                  $    U R                  $ ! [         a       U R                  $ f = f! [         a    Sn   MP  f = f)N   <meta)s   <!--r  s   </s   <!s   <?r   Tr   F)r   handle_commenthandle_metahandle_possible_end_taghandle_otherhandle_possible_start_tagr  r   itemsr  rP   )rU   method_dispatchr   keep_parsingkeymethods         r   r   EncodingParser.get_encoding  s   499$ ((&&//$$$$00
 AL		!!$'  /44699((--'-x	  7  <}}# " }} !  }} ) ',s$   )C+<D+
DDDDc                 8    U R                   R                  S5      $ )zSkip over comments.s   -->r   r  ra   s    r   r  EncodingParser.handle_comment  s    yy  ((rH   c                    U R                   R                  [        ;  a  gSnS n U R                  5       =nc  gUS   S:X  a  US   S:H  nU(       a
  Ub  X l        gOuUS   S:X  a  US   n[        U5      nUb  XPl        gOQUS   S:X  aH  [        [        US   5      5      nUR                  5       =nb  [        U5      nUb  U(       a  XPl        gUnM  )	NTFr   s
   http-equivr   s   content-type   charsets   content)	r   r   r  get_attributerP   rO   ContentAttributeParserr   parse)rU   
has_pragmapending_encoding	attributetentative_encodingcodeccontent_parsers          r   r  EncodingParser.handle_meta  s    99!!)??
!//11	:|},&q\_<
"2">$4M 1+%.q\"'(:;$$)M  % 1+!7iPQl8S!T*8*>*>*@@&M+,>?E(%,1M#(+0(1 rH   c                      U R                  SS9$ )NFend_tag)handle_possible_tagra   s    r   r  (EncodingParser.handle_possible_start_tag  s    '''66rH   c                 J    [        U R                  5        U R                  SS9$ )NTr1  )nextr   r3  ra   s    r   r  &EncodingParser.handle_possible_end_tag  s!    TYY'''55rH   c                    U R                   nUR                  [        ;  a(  U(       a   UR                  5         U R	                  5         gUR                  [        5      nUS:X  a  UR                  5         g U R                  5       c   gM  )NTr   )r   r   ascii_letters_bytesr   r  r  spaces_angle_bracketsr&  )rU   r2  r   rx   s       r   r3  "EncodingParser.handle_possible_tag#  s}    yy$77 !!#OO$9:	 MMO  %%'/ rH   c                 8    U R                   R                  S5      $ )Nr   r"  ra   s    r   r  EncodingParser.handle_other:  s    yy  &&rH   c                    U R                   nUR                  [        [        S/5      -  5      nUb  [	        U5      S:X  d   eUS;   a  g/ n/ n US:X  a  U(       a  OU[        ;   a  UR                  5       nOeUS;   a  SR                  U5      S4$ U[        ;   a   UR                  UR                  5       5        OUc  gUR                  U5        [        U5      nM  US:w  a#  UR                  5         SR                  U5      S4$ [        U5        UR                  5       nU=nS;   a{   [        U5      nX%:X  a-  [        U5        SR                  U5      SR                  U5      4$ U[        ;   a   UR                  UR                  5       5        OUR                  U5        Mz  US	:X  a  SR                  U5      S4$ U[        ;   a   UR                  UR                  5       5        OUc  gUR                  U5         [        U5      nU[        ;   a"  SR                  U5      SR                  U5      4$ U[        ;   a   UR                  UR                  5       5        OUc  gUR                  U5        Mx  )
zkReturn a (name, value) pair for the next attribute in the stream.

If no attribute is found, return None.

   /Nr   )r   N   =)r?  r   rH   )   '   "r   )r   r  r  	frozensetr?   r   ascii_uppercase_bytesr   r   r6  r   r:  )rU   r   rx   attribute_nameattribute_valuequotes         r   r&  EncodingParser.get_attribute=  sH    yyII4y$7HHI	 C	Na$777$D ^44 IIK	l*xx/4433%%ioo&78"%%i0T
I! $ MMO88N+S00T
IIK	E</ J	%J88N3SXXo5NNN"77#**9??+<= $**95  $88N+S00//""9??#45""9-T
I11xx//1JJJ33&&y'89"&&y1 rH   )r   rP   N)r   r   r   r   r   rV   r   r  r  r  r  r3  r  r&  r   r   rH   r   r   r     s4    J>)1B76.'L2rH   r   c                        \ rS rSrS rS rSrg)r'  i  c                 >    [        U[        5      (       d   eXl        g r   )r=   r   r   r  s     r   rV   ContentAttributeParser.__init__  s    $&&&&	rH   c                     U R                   R                  S5        U R                   =R                  S-  sl        U R                   R                  5         U R                   R                  S:X  d  g U R                   =R                  S-  sl        U R                   R                  5         U R                   R                  S;   a  U R                   R                  nU R                   =R                  S-  sl        U R                   R                  nU R                   R                  U5      (       a"  U R                   X R                   R                   $ g U R                   R                  n U R                   R                  [        5        U R                   X R                   R                   $ ! [         a    U R                   US  s $ f = f! [         a     g f = f)Nr%  r   r@  )rB  rA  )r   r  rt   r  r   r  r  r   )rU   rG  old_positions      r   r(  ContentAttributeParser.parse  s_   	IIj)II!#IINN99))T1II!#IINNyy%%5		..		""a'"#yy1199$$U++99\))2D2DEE  $yy114II(()?@99\))2D2DEE$ 499\]334  		s>   A.G	 1CG	 G	 )A F* *GG	 GG	 	
GG)r   N)r   r   r   r   rV   r(  r   r   rH   r   r'  r'    s    rH   r'  c                     [        U [        5      (       a   U R                  S5      n U b   [        R
                  " U 5      $ g! [         a     gf = f! [         a     gf = f)zReturn the Python codec name corresponding to an encoding.

Return None if the string doesn't correspond to a valid encoding.

asciiN)r=   r   decodeUnicodeDecodeErrorwebencodingslookupAttributeError)rP   s    r   rO   rO     sn     (E""	w/H 	&&x00  " 		  		s"   A A 
AA
A A )!r   r   ior   r   pathlibr   r   r   r   rS  	constantsr	   r
   r   rC  r  r9  rD  r:  r   r   non_bmp_invalid_codepointsascii_punctuation_rer   rG   rA   rD   r   r   r   r'  rO   r   rH   r   <module>r[     s     	    1  : : #"N=M"NN H-HH !!LO!LL .D$<1HH ZZ=> N  zzOQ   7t= t=nM2 M`VE Vrx2 x2v# #LrH   