
    gyK                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ e j                  j                  Z
	 d dlmZ d dlZ e j                   d      d        Zd Zdd	Zdd
Ze j                  j                  e j                  j+                  dg d      d               Zd Zd Zd Zd Zd Ze j                  j8                  d        Zd Zd Zd Z d Z!y# e$ r Y w xY w)    N)fs)util)assert_frame_equalmodule)scopec                     | dz  S )Norc )base_datadirs    K/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/tests/test_orc.pydatadirr   '   s    %    c           	         |D ]  }||   }| |   }|dk(  rs|D cg c]  }|D ]  }|j                         ddhk(     c}}rG||   j                         }t        |      D ]   \  }}|D cg c]  }|d   |d   f c}||<   " |||<   |d   j                  }	t	        |	t
        j
                        rt        j                  |      }nt	        |	t
        j                        r|j                  j                  }n|	t        j                  u rdgt        |      z  }
t        t        ||            D ]q  \  }\  }}t        j                  |      r|j                         j                   }d| z  }t        j                  t#        ||z              j%                  |      |
|<   s t        j&                  |
      }|||<    yc c}}w c c}w )z_
    Fix type of expected values (as read from JSON) according to
    actual ORC datatype.
    mapkeyvaluer   N
   )keyscopy	enumerate	__class__
issubclassdatetimepdto_datetimedatedtdecimalDecimallenzipisnullas_tupleexponentroundscalebSeries)actual_colsexpected_colsnameexpectedactualmdcolitypconverted_decimalsvexpfactors                 r   fix_example_valuesr6   ,   s   
  &T"EM7?Kx!AeW---xK%**,C!(+1:;<!Q1U8QwZ0!<A ,"%M$Qi!!c8,,-~~h/HX]]+{{''HGOO#"&#h-!7&s68'<=	6Aqyy|**,//CC4ZFa&j(9:AA#F 'q)	 > yy!34H&d=  L =s   #G*Gc                 R    ||||| j                  d      }t        | |d       y )NT)dropF)check_dtype)reset_indexr   )orc_dfexpected_dfstartstops       r   check_example_valuesr?   R   s3    D,!%-99t9Dv{>r   c           
         ddl m} |j                  |       }|j                         }t	        |t
        j                        sJ |j                          t        j                  |j                               }t        |j                        t        |j                        k(  sJ |j                  j                  |j                        s|j                  |j                        }|rt        ||       t!        ||       d}t#        |j$                        D ]]  }|j'                  |      }	t!        t        j                  |	j                               |||t)        |	      z          |t)        |	      z  }_ ||j*                  k(  sJ y)zC
    Check a ORC file against the expected columns dictionary.
    r   r	   columns)r=   r>   N)pyarrowr	   ORCFileread
isinstancepaTablevalidater   	DataFrame	to_pydictsetrC   equalsreindexr6   r?   rangenstripesread_striper    nrows)
orc_pathr<   need_fixr	   orc_filetabler;   json_posr0   batchs
             r   check_example_filerZ   X   s9    {{8$HMMOEeRXX&&&	NN \\%//+,F{""#s6>>':::: >>  !4!45!))&..)A6;/-H8$$%$$Q'R\\%//*;<(#+"*SZ"7	9 	CJ & x~~%%%r   filename)zTestOrcFile.test1.orczTestOrcFile.testDate1900.orczdecimal.orcc                     || z  }t        j                  t        |j                  d            d      }t	        ||d       y)z
    Check a ORC file example against the equivalent JSON file, as given
    in the Apache ORC repository (the JSON file has one JSON object per
    line, corresponding to one row in the ORC file).
    z.jsn.gzT)lines)rU   N)r   	read_jsonstrwith_suffixrZ   )r[   r   pathrW   s       r   test_example_using_jsonrb   }   s:     XDLLT--i89FEtUT2r   c                    ddl m} |j                  | dz        j                         }|j                  dk(  sJ t        j                  dt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fd	t        j                         fd
t        j                         fdt        j                         fdt        j                         fdt        j                   dt        j"                  t        j                   dt        j                         fdt        j                         fg            fg      fdt        j"                  t        j                   dt        j                         fdt        j                         fg            fdt        j$                  t        j                         t        j                   dt        j                         fdt        j                         fg            fg      }|j                  |k(  sJ y )Nr   rA   zTestOrcFile.emptyFile.orcboolean1byte1short1int1long1float1double1bytes1string1middlelistr   )rD   r	   rE   rF   num_rowsrH   schemabool_int8int16int32int64float32float64binarystringstructlist_map_)r   r	   rW   expected_schemas       r   test_orcfile_emptyr~      s   KK"==>CCEE>>Qii	RXXZ 	"'')	288:		"((*	2::< 	BJJL!	299;	BIIK 	299bhh		FBHHJ/%ryy{35 67 8  	
 
II
+!299;/1 2
 	 
				FBHHJ#7$-ryy{#;#= > 	'! O0 <<?***r   c                    ddl m} t        j                  dg di      }| dz  }|j	                          |dz  }|j                  |t        |             |j                  |t        j                               }|j                  |      sJ |j                  dt        j                  |             }|j                  |      sJ |j                  t        j                  |            }|j                  |      sJ y )	Nr   rA   a         data_dirzdata.orc)
filesystemzdata_dir/data.orc)rD   r	   rH   rW   mkdirwrite_tabler_   
read_tabler   LocalFileSystemrN   r   _filesystem_uri)tmpdirr	   rW   	directoryra   results         r   test_filesystem_urir      s    HHc9%&E#IOOz!DOOE3t9% ^^DR-?-?-A^BF== ^^(<(<V(D  FF== ^^T"$F==r   c                    ddl m} t        j                  g d      }t        j                  g d      }t        j                  ||d      }| j                  d      }|j                  ||       |j                  |      }|j                  |      sJ |j                  |g       }d|j                  k(  sJ d|j                  k(  sJ |j                  |dg	      }d|j                  k(  sJ d
|j                  k(  sJ y )Nr   rA   r   Nr   NNArrowNORCru   utf8test.orc   ru   rB   r   )rD   r	   rH   arrayrW   joinr   r   rN   ro   num_columns)r   r	   r   brW   fileoutput_tables          r   test_orcfile_readwriter      s    
#$A
-.AHHq!,-E;;z"DOOE4 >>$'L<<%%%>>$+L%%%%%(((((>>$	>:L%%%%%(((((r   c                  d   ddl m}  ddlm}  |       }t	        j
                  g d      }t	        j
                  g d      }t	        j                  ||d      }| j                  ||       |j                  d       | j                  |      }|j                         }|j                  |      sJ y )Nr   rA   )BytesIOr   r   r   )rD   r	   ior   rH   r   rW   r   seekrE   rF   rN   )r	   r   bufr   r   rW   rV   r   s           r   test_bytesio_readwriter      s    
)C
#$A
-.AHHq!,-EOOE3HHQK{{3H==?L<<%%%r   c                     ddl m}  t        j                         }t        j                  g d      }t        j                  g d      }t        j
                  ||d      }| j                  ||       t        j                  |j                               }| j                  |      }|j                         }|j                  |      sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  d	k(  sJ t        j                         }t!        j"                  t$              5  | j                  ||       d d d        t        j                  |j                               }| j                  |      }|j                         }|j                  |      sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  d	k(  sJ y # 1 sw Y   xY w)
Nr   rA   r   r   r   UNCOMPRESSEDz0.12i'  i   rD   r	   rH   BufferOutputStreamr   rW   r   BufferReadergetvaluerE   rF   rN   compressionfile_versionrow_index_stridecompression_sizepytestwarnsFutureWarningr	   buffer_output_streamr   r   rW   buffer_readerrV   r   s           r   test_buffer_readwriter      s   002
#$A
-.AHHq!,-EOOE/0OO$8$A$A$CDM{{=)H==?L<<%%%>111  F***$$---$$--- 002	m	$,e4 
%OO$8$A$A$CDM{{=)H==?L<<%%%>111  F***$$---$$--- 
%	$s   ;G22G;c            	         ddl m}  t        j                         }t        j                  g d      }t        j                  g d      }t        j
                  ||d      }| j                  ||dddd	
       t        j                  |j                               }| j                  |      }|j                         }|j                  |      sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  d	k(  sJ t        j                         }t!        j"                  t$              5  | j                  ||dddd
       d d d        t        j                  |j                               }| j                  |      }|j                         }|j                  |      sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ |j                  dk(  sJ y # 1 sw Y   xY w)Nr   rA   r   r   r   snappyz0.11i  i   )r   r   r   compression_block_sizeSNAPPYuncompressedi N  i @  r   r   r   s           r   'test_buffer_readwrite_with_writeoptionsr     s   002
#$A
-.AHHq!,-EOO$   OO$8$A$A$CDM{{=)H==?L<<%%%8+++  F***$$,,,$$--- 002	m	$ &"#( 	 	
 
% OO$8$A$A$CDM{{=)H==?L<<%%%>111  F***$$---$$---# 
%	$s    G<<Hc                  N   ddl m}  t        j                         }t        j                  g d      }t        j
                  d|i      }t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d	       d d d        t        j                  t              5  | j                  ||d
	       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d       d d d        t        j                  t              5  | j                  ||d !       d d d        t        j                  t              5  | j                  ||dd"g!       d d d        t        j                  t              5  | j                  ||h d#!       d d d        t        j                  t              5  | j                  ||d$       d d d        t        j                  t              5  | j                  ||d%$       d d d        t        j                  t              5  | j                  ||d&$       d d d        y # 1 sw Y   -xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   `xY w# 1 sw Y   7xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   jxY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   txY w# 1 sw Y   KxY w# 1 sw Y   "xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   UxY w# 1 sw Y   *xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)'Nr   rA   r   ru   )
batch_sizeigR @gp=
ף?)r   z1.1)stripe_sizeipgG @)r   nonezlid)r   i8gR"@)compression_strategynolarge)r   igGz0@cat)padding_tolerancearrow)dictionary_key_size_thresholdg333333?g	ry   )bloom_filter_columnsgffffff?>   r   r   )bloom_filter_fppg?g)rD   r	   rH   r   r   rW   r   raises
ValueErrorr   	TypeError)r	   r   r   rW   s       r   +test_buffer_readwrite_with_bad_writeoptionsr   ;  s   002
#$AHHgq\"E 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	"  	 	
 
# 
y	!  	 	
 
" 
z	"  	 	
 
# 
z	"  	 	
 
# 
z	" #$ 	 	
 
# 
z	" #' 	 	
 
# 
z	" #* 	 	
 
# 
y	! !" 	 	
 
" 
z	" !% 	 	
 
# 
z	" !( 	 	
 
# 
z	"  	 	
 
# 
z	" ! 	 	
 
# 
z	" $ 	 	
 
# 
z	" # 	 	
 
# 
z	" *1 	 	
 
# 
z	" *- 	 	
 
# 
z	" *. 	 	
 
# 
z	" !) 	 	
 
# 
z	" "#S 	 	
 
# 
z	" !+ 	 	
 
# 
z	" $ 	 	
 
# 
z	"   	 	
 
# 
z	" ! 	 	
 
#	"g 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
"	! 
#	" 
#	" 
#	" 
#	" 
#	" 
"	! 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	" 
#	"sh  "Z"Z/Z<[	:[0[#&[0[=\
\>\$4\1*\> ]]]%]28]?.^$^^&^3_ <_2_(_' _4```"Z,/Z9<[	[[ #[-0[:=\
\\!$\.1\;>]]]"%]/2]<?^	^^#&^03^= _
__$'_14_>```$c                 	   ddl m} t        j                  dt        j                               }t        j                  dt        j
                  |g            }t        j                  dt        j                               t        j                  dt        j                  t        j                  dt        j                                           t        j                  dt        j
                  |t        j                  d	t        j                               g            t        j                  d
t        j                  t        j                  dt        j
                  t        j                  dt        j                               t        j                  d	t        j                               g                        t        j                  dt        j                               g}dgddggddiddgddddddggdgg}t        j                  |t        j                  |            }t        | dz        }|j                  ||       |j                  |      }|j                         }	|	j                  |      sJ |j                  ddg      }
|
j                  |j                  ddg            sJ |j                  g d      }|j                  |j                  g d            sJ |j                  dg      }t        j                  ddddiigi      }|j                  |      sJ |j                  dg      }t        j                  dd	digi      }|j                  |      sJ |j                  g d      }|j                  |j                  ddg            sJ |j                  dg      }t        j                  d
ddiddiggi      }|j                  |      sJ |j                  ddg      }
|
j                  |j                  ddg            sJ |j                  g d       }|j                  |j                  g d!            sJ t!        j"                  t$              5  |j                  d"g       d d d        t!        j"                  t&              5  |j                  dg       d d d        y # 1 sw Y   ?xY w# 1 sw Y   y xY w)#Nr   rA   innerrm   basicrn   itemrz   inner2list-structinner1basic2r   r   r   r   )rm   r         )r   r         	   )rp   r   rB   )rn   rz   r   struct.middle.innerstruct.inner2)rn   r   r   zlist-struct.inner1r   )rn   rz   r   wrong)rD   r	   rH   fieldru   rz   rt   r{   rW   rp   r_   r   rE   rF   rN   selectr   r   IOErrorr   )tempdirr	   r   rm   fieldsarrsrW   ra   rV   result1result2result3result4	expected4result5	expected5result6result7	expected7s                      r   test_column_selectionr     s    HHWbhhj)EXXh		5' 23F
"((*%
BHHRXXfbhhj9:	
 	bii(BHHJ)G HI	
 	288BHH		HHXrxxz2HHXrxxz2# % 	
 	288:&!F& 
q!fXGQ<1=>!
$a&@	ABQCID HHT"))F"34Ew#$DOOE4 {{4 H mmoG>>%    mmWh$7m8G>>%,,':;<<<mm$@mAG>>%,,'CDEEE mm%:$;m<G(h!%=$>?@I>>)$$$mm_$5m6G(h]O45I>>)$$$mm@  G >>%,,'9:;;;mm%9$:m;G-Ha=8Q-*H)IJKI>>)$$$ mmQFm+G>>%,,':;<<<mmIm.G>>%,,'HIJJJ 
w	 	wi( 
 
 
z	"qc" 
#	" 
 	
 
#	"s   4S)SSSc                     ddl m} t        | dz        }|j                  |      5 }t	        j
                  t              5  |j                          d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   rA   r   )rD   r	   r_   	ORCWriterr   r   AttributeErrortest)r   r	   ra   writers       r   test_wrong_usage_orc_writerr   l  sT    w#$D	t	]]>*KKM + 
	** 
	s#   A. A"A."A+	'A..A7c                 L   ddl m} t        | dz        }t        j                  g d      }t        j                  g d      }t        j
                  ||d      }t        j                  t        j                        5  |j                  ||       d d d        y # 1 sw Y   y xY w)Nr   rA   r   r   )NNNNr   )
rD   r	   r_   rH   r   rW   r   r   ArrowNotImplementedErrorr   )r   r	   ra   r   r   rW   s         r    test_orc_writer_with_null_arraysr   u  sq    w#$D
#$A
)*AHHq!,-E	r22	3t$ 
4	3	3s   >BB#)NN)F)"r   r   r   rD   rH   r   pyarrow.testsr   markr	   
pytestmarkpandas.testingr   pandasr   ImportErrorfixturer   r6   r?   rZ   parametrizerb   r~   r   r   r   r   r   r   r   r   r   r   r
   r   r   <module>r     s  $      
 [[__
	1
 h    #'L?"&J  & 
	3 	3+B 0)&&.B ,. ,.^_
DL#^%e  		s   
C CC