
    g                     D   d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Zd dl
mZ d dlmZ d dlmZmZmZ 	 d dlmZ d dlmZmZ 	 d dlZd dlmZ d d	lmZ d d
lm Z  	 d dl!Z"e	jF                  j(                  Z$d Z%d Z&e	jF                  j2                  d        Z'e	jF                  j2                  d        Z(e	jF                  j2                  d        Z)e	jF                  j2                  d        Z*e	jF                  j2                  d        Z+d Z,d Z-e	jF                  j\                  d        Z/e	jF                  j2                  d        Z0e	jF                  j2                  d        Z1d Z2e	jF                  j2                  d        Z3d Z4e	jF                  jk                  dd ejl                         g      e	jF                  jk                  dd      d               Z7d Z8d Z9d  Z:d! Z;d" Z<d# Z=d$ Z>d% Z?e	jF                  j2                  d&        Z@e	jF                  j2                  d'        ZAe	jF                  j2                  d(        ZBd) ZCd* ZDe	jF                  j2                  d+        ZEd, ZFe	jF                  j2                  e	jF                  j                  e	jF                  j                  d-      e	jF                  j                  d.      d/                             ZIe	jF                  jk                  d0d1 d2 d3 d4 g      e	jF                  jk                  d5d6d7g      d8               ZJd9 ZKd: ZLd; ZMd< ZNd= ZOd> ZPe	jF                  j                  d?        ZRe	jF                  j                  d@        ZSy# e$ r dZY 4w xY w# e$ r dxZZY -w xY w# e$ r dZ"Y 6w xY w)A    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_dataframe)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 :   t        j                  dg di      }t        j                  t        d      5  t        || dz  d       d d d        t        j                  t        d      5  t        || dz  d	       d d d        y # 1 sw Y   ?xY w# 1 sw Y   y xY w)
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr   s     U/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_versionr!   ;   s    HHc9%&E	z)M	NUG&<<eL 
O	z *! 
"UG&<<',	.
" 
" 
O	N
" 
"s   B*BBBc                      t        j                  g ddz        } t         j                  j                  | gdg      }ddg}|D ]  }t	        ||        y )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizes       r    test_set_data_page_sizer.   E   sT    
((9v%
&C
cU4&1A 7#J&+;< '    c                  z    t        d      } t        j                  j                  | d      }t	        |ddd       y )Nd   Fpreserve_index
   r   2.4)r&   write_batch_sizer   )r
   r   r(   from_pandasr   dfr   s     r    test_set_write_batch_sizer:   O   s6    		BHH  E :Eb1er/   c                      t        d      } t        j                  j                  | d      }t	        |ddd       t        j                  t              5  t	        |ddd       d d d        y # 1 sw Y   y xY w)	Nr1   Fr2   r   r4   r5   )dictionary_pagesize_limitr&   r   r   )r
   r   r(   r7   r   r   r   	TypeErrorr8   s     r    "test_set_dictionary_pagesize_limitr>   Y   sa    		BHH  E :EUa$&7 
y	!#(*E	; 
"	!	!s   A..A7c            	         g } t         j                  j                  t        d            }| j	                  t         j
                  j                  |gdz               t               \  }}t         j                  j                  |      }| j	                  t         j
                  j                  |gdz               dD ]  }dD ]  }| D ]  }t        |d||         ! y )Nr4   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchr7   r   appendr(   from_batchesr   r   )tablesbatchr9   _r   rC   r   s          r    test_chunked_table_writerJ   f   s     FNN&&B'?@E
MM"((''!45 "EBNN&&r*E
MM"((''!45++N 5&7#13   , ,r/   c                 H   t        d      }t        j                  j                  |      }t	        |ddid       t        | dz        }t        |d      5 }t        ||d	       d d d        t        j                  |d
      }|j                  |      sJ y # 1 sw Y   4xY w)Nr4   r@   
memory_mapTrB   read_table_kwargsr   tmp_filewbr   )rL   r   r   r(   r7   r   stropenr   pqread_pandasequalsr   r9   r   filenamef
table_reads         r    test_memory_mapr[   y   s    	b	!BHH  $EU|T.B"$ 7Z'(H	h	UAu- 
T:JU### 
	   BB!c                 H   t        d      }t        j                  j                  |      }t	        |ddid       t        | dz        }t        |d      5 }t        ||d	       d d d        t        j                  |d
      }|j                  |      sJ y # 1 sw Y   4xY w)Nr4   r@   buffer_sizei  rB   rM   rO   rP   r   i   )r^   rQ   rW   s         r    test_enable_buffered_streamr_      s    	b	!BHH  $EU}d.C"$ 7Z'(H	h	UAu- 
d;JU### 
	r\   c                 >   t         j                  j                  t        j                  dg      gdg      }d}| |z  }|j	                         rJ t        |t        |             |j	                         sJ t        t        |            }|j                  |      sJ y )N*   intsz	foo # bar)	r   r(   r)   r'   existsr   rR   r   rV   )r   r   rX   pathrZ   s        r    test_special_chars_filenamere      s    HH  "((B4.!1F8<EHXD{{}D	";;==SY'JU###r/   c                     t        j                  t        d      5  t        j                  d        d d d        t        j                  t        d      5  t        j
                  d        d d d        y # 1 sw Y   CxY w# 1 sw Y   y xY w)NNoner   )r   r   r=   rT   
read_tableParquetFile r/   r    test_invalid_sourcerk      sY     
y	/
d 
0 
y	/
t 
0	/ 
0	/ 
0	/s   A4B 4A= B	c                  t    t        j                  t        t        d            gdg      } t	        | d       y )Ni@  r#   r$   r   )row_group_size)r   r   listranger   )r+   s    r    (test_file_with_over_int16_max_row_groupsrp      s,     	$uU|$%dV4AQq)r/   c                  Z   t        d      } t        j                  j                  |       }t        j                  j	                  |j                         D cg c]  }|j                  d      d d  c}|j                  j                        }|j                  j                  d      j                  t        j                         k(  sJ |j                  j                  d      j                  t        j                  t        j                               k(  sJ t        |d       y c c}w )	Nr4   r@   r   r$   null	null_listrB   r   )r   r   r(   r7   r)   itercolumnschunkschemar%   fieldtyperr   list_r   )r9   r   cols      r    test_empty_table_roundtripr{      s    	b	!B HH  $EHH  %*%6%6%89%8c1bq	%89ll   ! "E <<f%**bggi777<<k*//288BGGI3FFFFu 	:s   D(c                      t        j                         } t        j                  j	                  | d      }t        |       y )NFr2   )pd	DataFramer   r(   r7   r   )r9   emptys     r    test_empty_table_no_columnsr      s.    	BHH  E :EUr/   c            	      H   t        t        j                         t        j                  t        j                                     } g t        dd      gg}|D cg c]:  }t        j
                  |t        j                  |             j                         < }}|D cg c]6  }t        j                  j                  |t        j                  |             8 }}t        j                  j                  |t        j                  |             }t        |       y c c}w c c}w )N)int32list_stringr   )Grx   )rv   )r   r   r   ry   stringr'   structflattenrD   r)   rv   r(   rF   r   )colsdatarH   	my_arrays
my_batchestbls         r    1test_write_nested_zero_length_array_chunk_failurer      s    hhjHHRYY[)D 1&9<=D #$"U %biio6>>@"  $  )*(e ..,,U299T?,K(  *
((


BIIdO
<CS$*s   ?D;Dc                    | dz  }t        j                  dt        j                  dt        j                        i      }t        ||       t        |      }|j                         }t        j                  ||       t        |       dz   }t        j                  dt        j                  dt        j                        i      }t        ||       t        |      }|j                         }t        j                  ||       y )Nzzzz.parquetxr4   dtype)r}   r~   nparangeint64r   r   	to_pandastmassert_frame_equalrR   )r   rd   r9   rZ   df_reads        r    test_multiple_path_typesr      s     ]"D	sBIIb9:	;BTT"J""$G"g& w<-'D	sBIIb9:	;BTT"J""$G"g&r/   c                 X   | dz  }t        j                  dg di      }t        ||       t        j                  |      }t        |      }|j                  |      sJ t        j                  t              5  t        |t        j                                d d d        y # 1 sw Y   y xY w)Ntest.parquetr   r   
filesystem)r   r   r   r   FSProtocolClassr   rV   r   r   r=   r   
FileSystem)r   rd   r   fs_protocol_objresults        r    test_fspathr      s    ^#DHHc9%&E**40O)F== 
y	!O@ 
"	!	!s   7 B  B)r   name)data.parquetu   例.parquetc                 >   t        j                  dg di      }| |z  }t        j                  |t	        |             t        j                  |       5  t        j                  ||      }d d d        j                  |      sJ |j                          |j                         rJ t        j                  |       5  t        j                  |||       d d d        t        j                  |      }|j                  |      sJ y # 1 sw Y   xY w# 1 sw Y   >xY w)Nr   r   r   )r   r   rT   write_tablerR   r   
change_cwdrh   rV   unlinkrc   )r   r   r   r   rd   r   s         r    test_relative_pathsr     s     HHc9%&ET>D NN5#d)$		!t
; 
"==KKM{{} 
	!
udz: 
"]]4 F== 
"	! 
"	!s   D=DDDc                      t        j                  t              5  t        j                  d       d d d        y # 1 sw Y   y xY w)Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrT   rh   rj   r/   r    test_read_non_existing_filer   &  s(    	(	)
12 
*	)	)s	   9Ac                       G d dt         j                        } t        j                  t        d      5  t        j                   | d             d d d        y # 1 sw Y   y xY w)Nc                       e Zd Zd Zd Zy)3test_file_error_python_exception.<locals>.BogusFilec                     t        d      NzorglubZeroDivisionErrorselfargss     r    readz8test_file_error_python_exception.<locals>.BogusFile.read.      #I..r/   c                     t        d      r   r   r   s     r    seekz8test_file_error_python_exception.<locals>.BogusFile.seek1  r   r/   N)__name__
__module____qualname__r   r   rj   r/   r    	BogusFiler   -  s    	/	/r/   r   r   r   r/   )ioBytesIOr   r   r   rT   rh   )r   s    r     test_file_error_python_exceptionr   ,  s?    /BJJ / 
(		:
in% 
;	:	:s   AA#c                    t        j                  dg di      }t        j                  |t	        | dz               t        t	        | dz        d      5 }t        j                  |      }d d d        j                  |      sJ t        t	        | dz        d      5 }t        j                  t        j                  |            }d d d        |j                  |      sJ y # 1 sw Y   yxY w# 1 sw Y   )xY w)Nr   r   r   rb)	r   r   rT   r   rR   rS   rh   rV   
PythonFile)r   r   rY   r   s       r    test_parquet_read_from_bufferr   9  s    HHc9%&ENN5#g678	c'N*+T	2aq! 
3==	c'N*+T	2ar}}Q/0 
3== 
3	2 
3	2s   C")C."C+.C7c            
         t        j                  t        t        t        t        d                        } t        j                  t        t        t        t        d                        }t        j                  ddgdz        }| | g}t         j                  j                  |ddg      }t        ||ddd	       t        ||ddgdg	       t        ||dddgddg	       t         j                  j                  | | ||gg d
      }t        ||ddgddg       t         j                  j                  |gdg      }t        j                  t        d      5  t        ||dd       d d d        y # 1 sw Y   y xY w)Nr1   TF2   r   br$   gzip)expectedcompressionrC   use_byte_stream_splitr   r   cdr   r   )r   rC   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   rC   )r   r'   rn   mapfloatro   intr(   r)   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tables         r    test_byte_stream_splitr   G  sZ   c%s456IhhtCU3Z012Gxxu*+HY'JHH  C: >E UU$)G
 UU%(E,/52
 UU%(#J,/:7
 ((&&	9gw'O-A ' CK[;%(#J,/:7 HH  (E7 ;E	w&G	Hd(-	/ 
I	H	Hs   E33E<c           
      L   t        j                  t        t        t        t        d                  t        j                  dd            }t        j                  t        t        t        t        d                  t        j                  dd            }t        j                  t        t        t        t        d                  t        j                  dd            }t        j                  dd	gd
z        }|||g}t         j                  j                  |g d      }t        ||dd	d       t        j                  j                  | d      }t        j                  ||dd	d       t        j                  |      }|j                   j#                  d      }	|j                   j#                  d      }
|	j$                  dk(  sJ |
j$                  dk(  sJ t        ||dd	dddd       t         j                  j                  ||||gg d      }t        ||d	d       y )Nr1      r   r      	      TFr   r   r   r   r$   r   )r   r   rC   store_decimal_as_integerr   )r   rC   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   rC   r   column_encodingr   )r   rC   r   )r   r'   rn   r   r   ro   
decimal128r(   r)   r   osrd   joinrT   r   ri   rv   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   s               r    test_store_decimal_as_integerr   m  s   hhtCs$<=$&MM!Q$79Oc'5:&>!?&(mmB&:<xxS%*%= >%']]2q%9;xxu*+H#%68HILHH  _ EE U#!'$).2	4 ggll7N;ONN5/%"',02
 0J"))003$++2215**g555,,777 U#!'$).233& ((&&	+-=xH" ' $K [)$).24r/   c                  &   t        j                  t        t        t        t        d                        } t        j                  t        t        t        t        d                        }t        j                  t        d      D cg c]  }t        |       c}t        j                               }t        j                  t        d      D cg c]  }t        |      j                  d       c}t        j                  d            }t        j                  g ddz        }t         j                  j                  | ||||gg d      }t        ||dd	d	d
d	d       t        ||dd
       t        ||dd
dd
d       t        ||dd
ddd       t        ||dd
dddd       t        ||dddi       t        j                  t        d      5  t        ||dd
d
d	d       d d d        t        j                  t         d      5  t        ||ddd
d
d       d d d        t        j                  t"        d      5  t        ||dd       d d d        t        j                  t"        d      5  t        ||dddi       d d d        t        j                  t"              5  t        ||dgdd
i       d d d        t        j                  t"              5  t        ||dd
i       d d d        t        j                  t"              5  t        ||ddgdd	d
d       d d d        t        j                  t"              5  t        ||dddd	d
d       d d d        t        j                  t$              5  t        ||dd       d d d        y c c}w c c}w # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   hxY w# 1 sw Y   @xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w) Nr1   r   r4   )FTFF   )r   r   r   r   er$   FBYTE_STREAM_SPLITPLAINr   )r   rC   r   r   r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr   RLEr   r   )r   r   r   z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r   )r   rC   r   r   T)r   r'   rn   r   r   ro   r   rR   binaryzfillr(   r)   r   r   r   r   OSErrorr   r=   )r   r   r   arr_binarr_flbar   r   s          r    test_column_encodingr    s   c%s456IhhtCU3Z012Ghhc
3
1A
3"))+FGxx#(:.:aQb	:.RYYr]DHxx3b89H((&&	GWh9' ' )K [;u+>+>+2+>&@A [;$)%,.
 [;$)+2+@+2&45 [;$)+2+@+D&FG [;$)+2+@+=+=&?@ [;$)&)5\3 
w>
@{(-/6/6/B*D	E
@ 
wH
J{(-/D/6/6*8	9
J 
zJ
L{(-)9	;
L 
zN
P{(-*-/A)B	D
P 
z	"{),*-w	9 
# 
z	"{*-w	9 
# 
z	"{(-03u/4/B/6*8	9 
# 
z	"{(-/3/4/B/6*8	9 
# 
y	!{(-)-	/ 
"	!o 4.`
@ 
@
J 
J
L 
L
P 
P 
#	" 
#	" 
#	" 
#	" 
"	!sx   N!NN"N/N<9O	,O O#O/	O;?P"N,/N9<O	OO #O,/O8;PPc            
      D   t        j                  t        t        t        t        d                        } | | g}t         j                  j                  |ddg      }t        ||dd       t        ||dd       t        ||dd	d
       t        ||dddd
       t        ||dd       t        ||dd       g d}t        j                         }|D ]<  \  }}t        j                  t        t        f      5  t        ||||       d d d        > y # 1 sw Y   IxY w)N  r   r   r$   r   r   )r   r   compression_levelr   snappyr   )r   r   r   r   lz4r   ))r     )r   i)rg   i  )lzo   )r   r
  )r   r'   rn   r   r   ro   r(   r)   r   r   r   r   r   r   r  r   )r*   r   r   invalid_combinationsbufcodeclevels          r    test_compression_levelr  #  s   
((4Ct-.
/C:DHH  c3Z 8E UU'(*
 UU'(* UU'-H!=? UU-.Q'79
 UU'(* UU'(*8
**,C.]]J01+02 21 /11s   ;DD	c                      t        j                  g d      } d}t         j                  j                  | g|g      }t	        |ddi      }d}|j
                  d   j                  |k(  sJ y )N)r   r   r   r   r  zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r'   r(   r)   r	   rv   r   )a0r   r   r   expected_names        r     test_sanitized_spark_field_namesr  P  sb    	/	"BDHH  "v.Ee78KLF"M==  M111r/   c                  >   t        d      } t        j                  j                  |       }t	        j
                         }t        ||dd       |j                  d       t        |d      }|j                  d       t        |d	      }|j                  |      sJ y )
Ni'  r@   SNAPPYrB   )r   r   r   T)use_threadsF)
r   r   r(   r7   r   r   r   r   r   rV   )r9   r   r  table1table2s        r    test_multithreaded_readr"  [  sy    	e	$BHH  $E
**,C5AHHQK$/FHHQK%0F==   r/   c                     t        j                  t        j                  d      gg d      } t        j
                  j                  | j                               }t        j                         }t        ||d       |j                  d       t        |      }|j                  |      sJ t        j                  t               5  t        ||d       d d d        y # 1 sw Y   y xY w)Nr  )ABCD)columns)
chunk_sizer   )r}   r~   r   r   r   r(   r7   reset_indexr   r   r   r   r   rV   r   r   r   )r   r   r  r   s       r    test_min_chunksizer,  m  s    <<10DEDHH  !1!1!34E
**,C+HHQKF==	z	"UCA. 
#	"	"s   CC&c                 f   t        j                  t        d      t        t        dd            t	        j
                  dd      j                  d      t	        j
                  ddd	
      g dt        j                  t        d            t        j                  dd      t        j                  ddd      t        j                  ddd      d	      }t        j                  j                  |      }| dz  }	 t        ||d       |j                         rJ y # t        j                  $ r Y (w xY w)Nabcr   r  r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)r7  tzns)r7  freq)	r   r   r   r   r   rY   ghirO   r5   r   )r}   r~   rn   ro   r   r   astypeCategorical
date_ranger   r(   r7   r   ArrowExceptionrc   )r   r9   pdfrX   s       r    (test_write_error_deletes_incomplete_filerC  ~  s     
DK q!-IIaO2248IIc3i@/NN4;7MM*a@MM*a-9;MM*adK	M 	
NB ((

r
"C#H 	S(E2       s   9D D0/D0c                     d}	 t        j                  |       y # t        $ r}||j                  d   v sJ Y d }~y d }~ww xY w)Nznonexistent-file.parquetr   )rT   rh   	Exceptionr   )r   rd   r   s      r    test_read_non_existent_filerF    s>    %D!
d !qvvay   !s    	A ;A c                     t        j                         5  t        j                  d       t        j                  | dz         d d d        y # 1 sw Y   y xY w)Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrT   rh   )datadirs    r    test_read_table_doesnt_warnrN    s:    		 	 	"W-
g 001 
#	"	"s   /AAc                  z   t         j                  j                  t        j                  ddg      gdg      } t	        j
                         }t        j                  | |d       |j                  d       t        j                  |      }t        j                  |j                         | j                                y )Nr.  defsome_colr   r   r   )r   r(   r)   r'   r   r   rT   r   r   rh   r   r   r   )r   rY   	roundtrips      r    test_zlib_compression_bugrT    s     HH  "((E5>":!;j\JE


ANN5!0FF1Ia I)--/1BCr/   c                    t        | dz        }t        j                  t        j                  t
        fd      5  t        |d      5 }	 d d d        t        j                  |       d d d        t        j                  t        j                  t
        fd      5  t        |d      5 }|j                  d       d d d        t        j                  |       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ?xY w# 1 sw Y   y xY w)Nr   zsize is 0 bytesr   rP   zsize is 4 bytess   ffff)
rR   r   r   r   ArrowInvalidr  rS   rT   rh   write)r   rd   rY   s      r    test_parquet_file_too_smallrX    s    w'(D	1.
0$ 
d	
0 
1.
0$GGG 
d	
0 
0	 
0 
0 
0 
0sG   C*C	C*D&C68DC'	#C**C36C?	;DDzignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc                    t        j                  d      }t        j                  t	        d      t	        t        dd            t        j                  ddd      g d	t        j                  d
d      t        j                  g d      d      }t        j                  |      }t        | dz        }t        j                  ||d        |j                  |      }|j!                         }t#        j$                  ||       t        | dz        }|j'                  ||       t        j(                  |      }|d   j+                  t,              |d<   t#        j$                  |j!                         |       y )Nfastparquetr.  r   r  r1  r2  r3  r   r4  r5  r   r6  )r   r   r   )r   r   r   r   r   rY   zcross_compat_arrow.parquetrR  z cross_compat_fastparquet.parquetrY   )r   importorskipr}   r~   rn   ro   r   r   r@  r?  r   r   rR   rT   r   ri   r   r   r   rW  rU   r>  object)	r   fpr9   r   
file_arrowfp_filedf_fpfile_fastparquettable_fps	            r    $test_fastparquet_cross_compatibilityrc    s(   
 
		]	+B	eeAqk"395$z150		

B HHRLE W;;<JNN5*$7nnZ(GE"e$ 7%GGHHHr"~~./H gnnV$BsG(,,.3r/   array_factoryc                  6    t        j                  dd gdz        S Nr   r4   r   r'   rj   r/   r    <lambda>rh    s    BHHaY^$r/   c                  R    t        j                  dd gdz        j                         S rf  r   r'   dictionary_encoderj   r/   r    rh  rh    s    BHHaY^$668r/   c                  6    t        j                  dd gdz        S N r4   rg  rj   r/   r    rh  rh    s    BHHb$Z"_%r/   c                  R    t        j                  dd gdz        j                         S rm  rj  rj   r/   r    rh  rh    s    BHHb$Z"_%779r/   read_dictionaryFTc                    t         j                  j                  d |        i      }t        j                         }t        j                  ||d       |j                  d       |rdgnd }t        j                  |d|      }|j                  D ]E  }|j                  \  }|j                         d   }|j                         |j                  dz  k(  rEJ  y )	Nrz   T)rC   r   F)r  rp  r       )r   r(   from_pydictr   r   rT   r   r   rh   r(  chunksbuffers
to_pybytesrA   )rd  rp  
orig_tablebior   rz   ru   r  s           r    test_buffer_contentsry    s     %%umo&>?J
**,CNN:s48HHQK!0ugdOMM#5*9;E }}**mmoa ~~388e#3333 r/   c                     t        j                  t        j                  t        d            gdg      }| dz  }t	        j
                  ||d       t	        j                  |      }|j                  |      sJ y )Nr  rb   r$   zarrow-10480.pyarrow.gzGZIPrR  )r   r   r'   ro   rT   r   rh   rV   )r   r   rd   r   s       r    "test_parquet_compression_roundtripr|    sa    
 HHbhhuQx()&:E--DNN5$F3]]4 F==r/   c                    t         j                  j                  t        j                  g d      gdg      }| dz  }d}t	        j
                  ||j                        5 }t        |      D ]  }|j                  |        	 d d d        t	        j                  |      }|j                  j                  |k(  sJ t        |      D ]$  }|j                  |      j                  |      r$J  y # 1 sw Y   lxY w)Nr   r   r#   zempty_row_groups.parquetr   )r   r(   r)   r'   rT   ParquetWriterrv   ro   r   ri   metadatanum_row_groupsread_row_grouprV   )r   r   rd   
num_groupswriterr=  readers          r    test_empty_row_groupsr    s    HH  "((2G"<!=vFE//DJ			$	-z"Au% # 
. ^^D!F??))Z777:$$Q'..u555  
.	-s   "C--C6c                     d gdz  }|j                  dg       t        j                  j                  |gdg      }| dz  }t	        j
                  ||       t	        j                  |      }||k(  sJ y )Ni   r   r   zarrow-11607.parquet)rE   r   r(   r)   rT   r   rh   )r   r   r   rd   r!  s        r    test_reads_over_batchr  $  sk    6WDKK HH  $(4E**DNN5$]]4 FF??r/   c                    | dz  }|j                  d       t        j                  g dg dgddg      }t        j                  ||d	z         t        j                  g d
g dgddg      }t        j                  ||dz         t        j
                  t        |            }t        j                  g dg dgddg      }||k(  sJ y )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r$   zdata1.parquet)皙?      ?333333?)r  r   r/  zdata2.parquet)r   r   r   r  r   r/  )r  r  r  r  r  r  )mkdirr   r   rT   r   rh   rR   )r   casedata1data2r   r!  s         r     test_permutation_of_column_orderr  2  s    77DJJJHHi.sCjAENN5$01HHlI.sCjAENN5$01MM#d)$EXX)57 #J(F F??r/   c                    | dz  }t        j                  t        t        d                  }d}t        j                  |g|z  t        |      D cg c]  }d| 	 c}      }t        j                  ||       t        j                  t        d      5  t        j                  |d|z  	       d d d        t        j                  t        d      5  t        j                  ||
       d d d        t        j                  |d|z  	      }||k(  sJ t        j                  |d|z  
      }||k(  sJ t        j                  |      }||k(  sJ y c c}w # 1 sw Y   xY w# 1 sw Y   yxY w)Nzlargethrift.parquetr4   r	  some_long_column_name_r$   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr1   r   )r   r'   rn   ro   r   rT   r   r   r   r  rh   )r   rd   r'   num_colsr=  r   gots          r    test_thrift_size_limitsr  E  sC   **DHHT%)_%EHHH	(5:8_E_'s+_EGE NN5$	E
G 	dR(]C
G 
E
G 	dA
G
 --sX~
FC%<<
--!h,
GC%<<
--
C%<<# F
G 
G
G 
Gs   E
EE#E #E,c                    | dz  }t        j                  dg di      }t        j                  ||d       t        j                  |d      }||k(  sJ t        |j                               }|d   |d   k7  sJ |d   |d   c|d<   |d<   | d	z  }|j                  |       t        j                  |d
      }||k7  sJ |t        j                  dg di      k(  sJ t        j                  t        d      5  t        j                  |d      }ddd       t        j                  |d
      }|j                         }	|	|k7  sJ |	t        j                  dg di      k(  sJ t        j                  |d      }t        j                  t        d      5  |j                         }ddd       y# 1 sw Y   xY w# 1 sw Y   yxY w)zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r  Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r  CRC checksum verificationr   N)r   r   rT   r   rh   	bytearray
read_byteswrite_bytesr   r   r  ri   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrI   corrupted_pq_filetable_corrupt2s
             r    +test_page_checksum_verification_write_tabler  `  s   
 //M3-.JNN:}$G --$OK$$$
 1134H B<8B<'''!)"x|HRL(2, 22Nx( MM.=BDM J&&&BHHc<%89999 
w&A	BMM.TJ 
C
 ~BGI&++-NZ'''RXXsL&9:::: ~BFH 
w&A	B""$ 
C	B! 
C	B  
C	Bs   9F1F=1F:=Gc                    t        j                  dg di      }| dz  }t        j                  ||d       t	        |j                               }t        |      dk(  sJ |d   }t        j                  |d      }||k(  sJ t        |j                               }|d	   |d
   k7  sJ |d
   |d	   c|d	<   |d
<   | dz  }t        ||       ||j                  z  }|j                  |       t        j                  |d      }	|	|k7  sJ |	t        j                  dg di      k(  sJ t        j                  t        d      5  t        j                  |d      }
ddd       y# 1 sw Y   yxY w)zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   r  correct_dirTr  r   r   r  r  r  corrupted_dirFr  r  r   N)r   r   rT   write_to_datasetrn   iterdirlenrh   r  r  r   r   r  r   r   r  )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rI   s              r    test_checksum_write_to_datasetr    s   
 3-.J  -/
),02
 ##4#<#<#>?&'1,,,+A.M--$OK$$$
 1134H B<8B<'''!)"x|HRL(2, !?2 23,}/A/AA##H- MM"5=BDM J&&&BHHc<%89999 
w&A	BMM-$O 
C	B	Bs   E##E,c                    t        j                  dg di      }| dz  }d}t        j                  t        |      5  t        j                  ||d       d d d        t        j                  ||       t        j                  t        |      5  t        j                  |d       d d d        t        j                  t        |      5  t        j                  |d       d d d        y # 1 sw Y   xY w# 1 sw Y   QxY w# 1 sw Y   y xY w)Nr   r   deprecate_legacyzPassing 'use_legacy_dataset'r   F)use_legacy_dataset)	r   r   r   warnsFutureWarningrT   r  rh   ParquetDataset)r   r   rd   msgs       r    "test_deprecated_use_legacy_datasetr    s     HHc9%&E''D
(C	m3	/
E4EB 
0 t$	m3	/
du5 
0 
m3	/
$59 
0	/ 
0	/
 
0	/ 
0	/s#   C*C6	D*C36C?D)Tr   collectionsr   r   rJ  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r	   r
   pyarrow.parquetparquetrT   r   r   ImportErrorpandasr}   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr!   r.   r:   r>   rJ   r[   r_   re   rk   slowrp   r{   r   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r   r  r  r  r"  r,  rC  rF  rN  rT  rX  rZ  filterwarningsrc  ry  r|  r  r  r  r  r  datasetr  r  rj   r/   r    <module>r     s  $ 
 # 	       ; ; F
B< [[  
.=   	; 	; 3 3$ $ $ $ $$ * *    8 ' '$A  
"


(  !@A  B *3
& #/L34l}/@*2Z2 ! !" / /  ! !4!2 D D  =>LM!4 N ?  !4H $8%9	+  *UDM:4 ;4(	 6$&67%t 1P 1Ph : :Y  	B  NB
  	Bs5   M6 N $N 6N N	NNNN