
    Mh*                     2   S r SSKJrJr  SSKrSSKrSSKrSSKrSSK	r	SSK
JrJr  SSKJr  \	R                   R#                  S5      r\	R                   R'                  S5      rS r\S 5       r\\	R                   R/                  S	S
S/5      \	R                   R/                  S/ SQ5      S 5       5       5       rS rS r\	R                   R/                  SS0 \" SS/05      4SSS0\" SS/05      4SSS/0\" SSS/05      4SS/SS.\" SS/05      4SS/SS.\" S\R6                  S/05      4/5      S  5       rS! r\	R                   R/                  S"/ S#Q5      S$ 5       r\	R                   R/                  S%SS/5      S& 5       rS' r \	R                   R/                  S/ S(Q5      S) 5       r!\	R                   R/                  S/ S*Q5      S+ 5       r"S, r#S- r$\	R                   R'                  S.5      \	R                   R/                  S/S0S1/5      S2 5       5       r%g)3zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesIOTextIOWrapperN)	DataFrameread_csvz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningpyarrow_skipc                     SnU n[        SR                  U5      5      nUR                  USUS9n[        SS//SS/S	9n[        R
                  " XE5        g )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpecteds         V/var/www/html/env/lib/python3.13/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_inputr      s[    HF+228<=D__TsX_>F3*
F/CDH&+    c                     U n[        SR                  5       5      nUR                  USSS S9n[        SS//5      n[        R
                  " X45        g )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   s        r   test_read_csv_unicoder"   (   sR    F&--/0D__TsWT_JF,a012H&+r   r   ,	r   )utf-16zutf-16lezutf-16bec                 V   U nSR                  SU5      nS[        R                  " 5        S3nUSS.nSn[        R                  " U5       nUR                  U5      n[        US5       n	U	R                  U5        S S S 5        [        [        UR                  U5      5      US	9 n
UR                  " U4S
U0UD6nUR                  " U
4S
U0UD6nS S S 5        [        R                  " WW5        S S S 5        g ! , (       d  f       N= f! , (       d  f       N?= f! , (       d  f       g = f)Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r#   __z__.csv   )r   skiprowsr   wbr   r   )replaceuuiduuid4r   ensure_cleanr   openwriter   r   r   r   )r   r   r   r   r   pathkwargsutf8
bytes_datafbytes_bufferr   r   s                r   test_utf16_bom_skiprowsr8   2   s
   
 F	 
S
	 	 

~V$Da(FD		$[[*
$GGJ  74;;t#45E__TGHGGF|MdMfMH F 	fh/ 
	  FE 
	s<   D)C8;*D%+D	D8
D	D	
D	D
D(c                     [         R                  R                  US5      nU nUR                  USSS9n[	        U5      S:X  d   eg )Nzutf16_ex.txtr%   r$   )r   r   2   )osr2   joinr   len)r   csv_dir_pathr2   r   r   s        r   test_utf16_exampler?   O   sA    77<<n5DF__TH$_?Fv;"r   c                     [         R                  R                  US5      nU nUR                  US SS9nUR	                  S5      nUS   S   nSnXV:X  d   eg )Nunicode_series.csvlatin-1)r    r   r   r!   i`  u$   Á köldum klaka (Cold Fever) (1994))r;   r2   r<   r   	set_index)r   r>   r2   r   r   gotr   s          r   test_unicode_encodingrE   V   s\    77<<&:;DF__T$_CFa F
)D/C9H??r   zdata,kwargs,expectedza
1ar!   z"a"
1	quotechar"zb
1namesb1
1T)rI   skip_blank_linesFc                   ^^	 U nSmSm	UU	4S jnUR                   S:X  a1  US:X  a+  UR                  SS5      (       a  [        R                  " SS	9  UR                  " U" U5      4S
T	0UD6n[
        R                  " Xs5        g )Nu   ﻿r   c                 B   > TU -   R                  T5      n[        U5      $ )N)r   r   )_databom_databomr4   s     r   _encode_data_with_bom,test_utf8_bom.<locals>._encode_data_with_bom{   s"    %K''-x  r   pyarrowrL   rM   Tz,https://github.com/apache/arrow/issues/38676)reasonr   )enginegetpytestskipr   r   r   )
r   r   r3   r   requestr   rS   r   rR   r4   s
           @@r   test_utf8_bomr\   b   s{    * F
CD!
 	"EMJJ)400 	IJ__248R4R6RF&+r   c                     [        S/S/S.5      nU nUR                  U5      nSR                  U5      nUR                  [	        U5      US9n[
        R                  " Xs5        g )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr+   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   s           r   test_read_csv_utf_aliasesrd      s^    SEABHF""9-H'..x8D__WT]X_>F&+r   zfile_path,encoding)))ior   csvz	test1.csvr   ))re   r   r   rA   rB   ))re   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 d   U nU" U6 nUR                  XRS9n[        XRS9 nUR                  U5      nUR                  (       a   e S S S 5        [        R                  " UW5        [        USS9 n	UR                  XS9nU	R                  (       a   e S S S 5        [        R                  " Xh5        [        USSS9 n	UR                  XS9nU	R                  (       a   e S S S 5        [        R                  " Xh5        g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       NG= f)Nr+   rb)moder   )rj   	buffering)r   r0   closedr   r   )
r   	file_pathr   datapathr   fpathr   far   fbs
             r   test_binary_mode_file_buffersrr      s     Fi Eu8H	e	'2$99}9 
( (F+	e$	2799}9 
  (+	e$!	,799}9 
- (+ 
(	'
 
 	
 
-	,s#   %C?/#D<#D!?
D
D!
D/pass_encodingc                    U nUR                  U5      nUR                  S:X  a!  USL a  US;   a  [        R                  " S5        [	        SS/05      n[
        R                  " SUSS9 nUR                  S	5        UR                  S
5        UR                  Xs(       a  UOS S9n[
        R                  " X5        S S S 5        g ! , (       d  f       g = f)NrU   T)       zThese cases freezefoobarzw+)rj   r   return_filelikezfoo
barr   r+   )ra   rW   rY   rZ   r   r   r/   r1   seekr   r   )	r   rb   rc   rs   r   r   r   r6   r   s	            r   test_encoding_temp_filer{      s     F""9-H}}	!mt&;	X@U()%%)*H	dXt	LPQ	
	q	XDQ
f/ 
M	L	Ls   )AC
Cc                 j   U nSnSnSn[        X4/05      n[        R                  " 5        nUR                  U SU 3R	                  U5      5        UR                  S5        UR                  XbS9n[        R                  " Xu5        UR                  (       a   e S S S 5        g ! , (       d  f       g = f)Nz	shift-jisu	   てすとu   こむ
r   r+   )
r   tempfileNamedTemporaryFiler1   r   rz   r   r   r   rl   )r   r   r   titler   r   r6   r   s           r   test_encoding_named_temp_filer      s    FHED%)H		$	$	&!	5'D6"))(34	q	6
f/88|8 
'	&	&s   A/B$$
B2)r   r%   z	utf-16-bez	utf-16-lezutf-32c                     Sn[        UR                  U 5      5      n[        USU S9n[        SS/SS/SS	//S
S/S9n[        R
                  " X45        g )Nu   a	b
：foo	0
bar	1
baz	2r$   )	delimiterr   u   ：foor   rx   r!   bazr(   rF   rJ   )r   r   r   )r   r   encoded_datar   r   s        r   %test_parse_encoded_special_charactersr      sa     -D4;;x01LldXFFmeQZ%4c
H &+r   )r   Nr%   r	   rB   c                    U n[        / SQ/ SQ/ SQS.5      n[        R                  " 5        nUR                  USUS9  UR                  S:X  a>  Sn[
        R                  " [        US	9   UR                  XAS
S9  S S S 5         S S S 5        g UR                  XAS
S9nS S S 5        [        R                  " WU5        g ! , (       d  f       NH= f! , (       d  f       N7= f)N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   rU   BThe 'memory_map' option is not supported with the 'pyarrow' enginematchT)r   
memory_map)
r   r   r/   to_csvrW   rY   raises
ValueErrorr   r   )r   r   r   r   filemsgdfs          r   test_encoding_memory_mapr      s     FH7?	
H 
	dEH===I%VCz5DI 6 
	 __T_F 
 "h' 65 
	s)   =C*B=;	CC=
C	C
Cc                    U n[        S/S-  S9nSUR                  S'   [        R                  " S5       nUR	                  USSSS	9  UR
                  S
:X  a?  Sn[        R                  " [        US9   UR                  USSS9  SSS5         SSS5        gUR                  USSS9nSSS5        [        R                  " WU5        g! , (       d  f       NI= f! , (       d  f       N7= f)zC
Chunk splits a multibyte character with memory_map=True

GH 43540
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   r   r    r   rU   r   r   NT)r    r   )r   ilocr   r/   r   rW   rY   r   r   r   r   )r   r   r   fnamer   dfrs         r    test_chunk_splits_multibyte_charr     s     F	d*	+B %BGGDM	*	+u
		%uUW	E==I%VCz5dtD 6 
,	+ ooeDToB 
, #r" 65 
,	+s)   >C3C	CC
C	C
C'c           	         / nSnSnSn[        [        U5      [        U5      U5       H`  nSR                  [        XUS-   5       Vs/ s H  n[        U5      PM     sn5      S-   n UR	                  S5        UR                  U5        Mb     U n[        U5      n	[        R                  " S5       n
U	R                  U
SSSS	9  UR                  S
:X  a@  Sn[        R                  " [        US9   UR                  U
SSSS9  SSS5         SSS5        gUR                  U
SSSS9nSSS5        [        R                   " U	W5        gs  snf ! [
         a     GM/  f = f! , (       d  f       Na= f! , (       d  f       NN= f)z[
GH 43787

Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
    u   𐂀 r}   r   zutf8test.csvFr   rU   r   r   NT)r    r   r   )rangeordr<   chrr   UnicodeEncodeErrorappendr   r   r/   r   rW   rY   r   r   r   r   )r   linesline_length
start_charend_charlnumcliner   r   r   r   r   s                r   test_readcsv_memmap_utf8r   "  sW    EKJH c*os8}kBwwd4K(@A(@1A(@ABTI	KK  	T C F	5	B		(E
		%uUW	E==I%VCz5dtgV 6 
)	( ooeDTGoT 
) "c"% B " 		 65 
)	(sB   E
&E	/>E,-E 	E,E,	
EE
E)	%E,,
E:pyarrow_xfailrj   zw+bzw+tc                 $   U nSnSU;   a  Sn[         R                  " USS9 nUR                  U5        UR                  S5        UR	                  U5      nS S S 5        [        / S/S9n[        R                  " WU5        g ! , (       d  f       N1= f)Ns   abcdtabcdr   )rj   r   r   r   )r~   SpooledTemporaryFiler1   rz   r   r   r   r   )r   rj   r   contenthandler   r   s          r   test_not_readabler   D  s     FG
d{		&	&D7	CvWA__V$ 
D fX.H"h' 
D	Cs   4B
B)&__doc__re   r   r   r;   r~   r-   numpynprY   pandasr   r   pandas._testing_testingr   markfilterwarnings
pytestmarkusefixturesskip_pyarrowr   r"   parametrizer8   r?   rE   nanr\   rd   rr   r{   r   r   r   r   r   r    r   r   <module>r      s   
     [[''C
 {{&&~6, , , d,%GH0 I - 04	  
YaSz*+	K%y#s'<=	'C5!9cC:->#?@	3%T:IsQCj<QR e7sRVVQK()	
&,'&,,	, ,,0 4-80 90&( G,, %ST( U(.#4#D )%0( 1 *(r   