
    g                        d dl Z d dlZd dlZd dlZd dlZ	 d dlZd dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ 	 d dlmZ d dlmZm Z m!Z! 	 d dl"Z#d dl$m%Z& ejN                  j8                  ejN                  jP                  gZ)d Z*ejN                  jD                  d        Z+ejN                  jD                  d        Z,ejN                  jD                  d	        Z-ejN                  jD                  d
        Z.ejN                  j_                  e0e1fd      ejN                  jD                  d               Z2ejN                  jD                  d        Z3ejN                  jD                  d        Z4ejN                  jD                  d        Z5ejN                  jD                  d        Z6ejN                  jD                  d        Z7ejN                  jD                  ejN                  jq                  ddgdgg ejr                  d      dk   ejr                  dd      dk   ejr                  dd      ju                   ejv                               dk  f      ejN                  jq                  dd      d                      Z<ejN                  jD                  d        Z=ejN                  j|                  d        Z?ejN                  j|                  d        Z@ejN                  jD                  d         ZAejN                  jD                  ejN                  j|                  d!               ZBd" ZCd# ZDd$ ZEejN                  jD                  d%        ZFejN                  jD                  d&        ZGejN                  jD                  d'        ZHejN                  jD                  d(        ZIejN                  jD                  d)        ZJejN                  jD                  d*        ZKdRd+ZLd, ZMejN                  jD                  ejN                  jq                  d-d.d/g      d0               ZNejN                  jD                  d1        ZOejN                  jD                  d2        ZPejN                  jD                  ejN                  jq                  d-d.d/g      d3               ZQd4 ZRd5 ZS	 	 	 dSd6ZT	 dTd7ZUejN                  jD                  d8        ZVejN                  jD                  d9        ZWejN                  jD                  d:        ZXejN                  jD                  d;        ZYejN                  jD                  d<        ZZejN                  jD                  ejN                  j|                  d=               Z[ejN                  jD                  ejN                  j|                  ejN                  j                  ej                  d>k(  d?@      dA                      Z^ejN                  jD                  ejN                  j|                  dB               Z_ejN                  jD                  dC        Z`dUdDZaejN                  jD                  dE        ZbejN                  jD                  dF        ZcdG ZddH ZedI ZfdJ ZgdK ZhdL ZidM ZjejN                  jq                  dNdO      dP        ZkejN                  jD                  dQ        Zly# e$ r dZY w xY w# e$ r dZY lw xY w# e$ r dxZ#Z&Y qw xY w)V    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_write_tablec                    t        j                  dg di      }| dz  }|j                          |dz  }t        j                  |t        |             t        j                  |t                     }|j                  |      sJ t        j                  dt        j                  |             }|j                  |      sJ y )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr	   _filesystem_uri)tempdirr   	directorypathresults        W/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_urir&   ;   s    HHc9%&E*$IOO~%DNN5#d)$ ]]*,F== ]]D,@,@,IKF==    c                 0    t               }t        ||        y N)r   _partition_test_for_filesystem)r!   locals     r%   test_read_partitioned_directoryr,   N   s    E"5'2r'   c                     t               }| }t        ||       t        j                  |      }|j	                  dg      }|j
                  dgk(  sJ y )Nvaluescolumns)r   r*   r   ParquetDatasetreadcolumn_names)r!   r+   	base_pathdatasetr$   s        r%   'test_read_partitioned_columns_selectionr6   T   sS     EI"5)4	*G\\8*\-F8*,,,r'   c                 h   t               }| }ddg}g d}ddg}d|gd|gd|gg}t        j                  t        j                  |d	
      j                  d      t        j                  t        j                  t        j                  |t        
      d      d      t        j                  t        j                  t        j                  |d
      d      d      t        j                  d      d      }t        ||||       t        j                  ||g d      }|j                         }	|	j                         j                  d      }
d|
d   j                  vsJ d|
d   j                  vsJ d|
d   j                  vsJ g dddgg}t        j                  |||      }|j                         }	|	j                         j                  d      }
|
d   dk(  |
d   dk7  z  |
d   dk(  z  }t        j                  |
d         dk(  |
d   dk(  z  }|j!                         dkD  sJ |j!                         dkD  sJ |
j"                  d   |j!                         |j!                         z   k(  sJ dggdggfD ]9  }t        j                  |||      }|j                         j$                  dk(  r9J  y )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r;   r<   r=   r.   ))r;   =r   )r<   !=r9   )r=   ==Truer   filtersdropr9   )r;   rF   r   )r=   rH   FalserI   rN   )r<   rH   s   1 a)r<   rH   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r1   r2   	to_pandasreset_indexr.   sumshapenum_rows)r!   r+   r4   integer_keysstring_keysboolean_keyspartition_specdfr5   r   	result_dfrK   df_filter_1df_filter_2s                 r%   test_filters_equivalencyre   a   s   EIq6L!K%=L	L!	;	L!N 
88L5<<R@''"''"((;f"EqI1M77277288L#GKQO))B-	 
B $E9nbI e,G
 LLNE"..D.9IIi(/////i)00000	),33333	

 
89G eW6GLLNE!--4-8I Y'1,X#%'Y6)+K 88Ii01Q6Y7*,K??q   ??q   ??1+//"3koo6G"GHHH/01./02##%:||~&&!+++	2r'   c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  ||d	d
g      }|j                         }|j                         j                  d      j                  d      }	t        t        |	d   j                        D 
cg c]  }
|
 }}
|ddgk(  sJ y c c}
w )Nr   r   r   r      integersrB   r>   r?   indexri   rk   r/   )ri   <rh   )ri   >r   rJ   byTrL   r   r   r   rO   rP   rQ   rV   rR   rW   r   r1   r2   rX   sort_valuesrY   mapintr.   r!   r+   r4   r]   r`   Nra   r5   r   rb   xresult_lists               r%   %test_filters_cutoff_exclusive_integerrx      s   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbIe  
G LLNE""{g{."{{-  "#y'<'C'CDED1DKE1a&    Fs   #	C8z5Loss of type information in creation of categoricals.)raisesreasonc           	      8   t               }| }t        j                  ddd      t        j                  ddd      t        j                  ddd      t        j                  ddd      t        j                  ddd      g}d|gg}d	}t        j                  t        j                  |      t        j                  |d
      dddg      }t        ||||       t        j                  ||ddg      }|j                         }|j                         j                  d      j                  d      }	t        j                  t        j                  t        j                  ddd      gd
      t        j                  |d
            }
|	d   j                   |
k(  sJ y )Ni  rh   	   
            datesrB   
datetime64r?   )rk   r   rk   r/   )r   rl   z
2018-04-12)r   rm   z
2018-04-10rJ   rn   TrL   
categories)r   datetimedaterO   rP   rQ   rV   rR   rW   r   r1   r2   rX   rq   rY   Categoricalr.   )r!   r+   r4   	date_keysr`   ru   ra   r5   r   rb   expecteds              r%   &test_filters_cutoff_exclusive_datetimer      ss    EI 	dAq!dAr"dAr"dAr"dAr"I 
)N 	
A	1)<8 !
#B
 $E9nbIe((
G LLNE""{g{."{{-  ~~
(--a,-\B88I\:<H W$$000r'   c           
      J   | dz  }t        j                  t        j                  ddd      t        d      d      j	                  |d       t        j                  |d	d
t        j                  ddd      fg      }|j                  d      j                         g dk(  sJ y )Nztimestamps.parquetz
2020-01-01r}   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   rB   rK   r   rg   )
rO   rP   
date_rangerange
to_parquetr   r   r   column	to_pylist)r!   r#   r   s      r%   test_filters_inclusive_datetimer      s     ))DLL|RcBBi  z$z=MM$	$))$156) E <<'')_<<<r'   c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  ||d	d
g      }|j                         }|j                         j                  d      j                  d      }	t        t        |	d   j                        D 
cg c]  }
t        |
       }}
|ddgk(  sJ y c c}
w )Nrg   ri   rB   r>   r?   rj   rk   r/   )ri   r   r   )ri   z>=r   rJ   rn   TrL   r   r   rp   rt   s               r%   test_filters_inclusive_integerr     s
   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbIe!!
G LLNE"++)+4+(  $'sIj,A,H,H#IJ#Ia3q6#IKJ1a&    Ks   #Dc                     t               }| }ddg}g d}ddg}d|gd|gd|gg}t        j                  t        j                  |d	
      j                  d      t        j                  t        j                  t        j                  |t        
      d      d      t        j                  t        j                  t        j                  |d
      d      d      t        j                  d      d      }t        ||||       t        j                  ||dg      }|j                         }	|	j                         j                  d      }
d|
d   j                  v sJ d|
d   j                  v sJ d|
d   j                  vsJ t        j                  ||dddgfddddhfg      }|j                         }	|	j                         j                  d      }
d|
d   j                  vsJ d|
d   j                  vsJ d|
d   j                  vsJ y )Nr   r   r8   TFr;   r<   r=   r>   r?   rA   rB   r   rC   r   rD   rE   )r<   inabrJ   rL   r   r9   r:   r   )r<   r   r   r9   znot inrN   )r   rO   rP   rQ   rR   rS   rT   rU   rV   rW   r   r1   r2   rX   rY   r.   )r!   r+   r4   r]   r^   r_   r`   ra   r5   r   rb   s              r%   test_filters_inclusive_setr   .  s
   EIq6L!K%=L	L!	;	L!N 
88L5<<R@''"''"((;f"EqI1M77277288L#GKQO))B-	 
B $E9nbIe'(G LLNE"..D.9I)H%,,,,,)H%,,,,,i)00000eTA3')EXy13G
 LLNE"..D.9IIi(/////i)00000	),33333r'   c                 N   t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       t        j                  t              5  t        j                  ||d	g
       d d d        t        j                  t              5  t        j                  ||dg
       d d d        t        j                  ||ddt               fg
      }|j                         j                  dk(  sJ t        j                  ||dddhfg
      }t        j                  t               5  |j                         j                  dk(  sJ 	 d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nrg   ri   rB   r>   r?   rj   rk   r/   )ri   r   r   rJ   )ri   z=<r   r   r   rG   r   )r   rO   rP   rQ   rV   rR   rW   pytestry   	TypeErrorr   r1   
ValueErrorsetr2   r\   NotImplementedError)r!   r+   r4   r]   r`   ru   ra   r5   s           r%   test_filters_invalid_pred_opr   ]  sz   EI"L	\"N 	
A	1HH\6 $
&B
 $E9nbI	y	!
)%*#8";	= 
"
 
z	"
)%*#8";	= 
# 	+0*4dCE)B(EGG <<>""a'''	+0*4dQC)@(ACG 
*	+||~&&!+++ 
,	+' 
"	!
 
#	" 
,	+s$   F>F FFFF$c                    t               }| }g d}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg      }t        ||||       d	}t        j                  t        |
      5  t        j                  ||dg      j                          d d d        y # 1 sw Y   y xY w)Nrg   ri   rB   r>   r?   rj   rk   r/   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrl   r   rJ   )r   rO   rP   rQ   rV   rR   rW   r   ry   r   r   r1   r2   )r!   r+   r4   r]   r`   ru   ra   msgs           r%   test_filters_invalid_columnr     s     EI"L!<01N	A	1HH\6 $
&B
 $E9nbI
>C	z	-
)#B"E	GGKtv 
.	-	-s   (B88CrK   )ri   rl   r   ri   r   nestedr   r9   read_method)r   read_pandasc                    t        t        |      }t               }| }g d}d|gg}t        |      }t	        j
                  t        j                  |      t        j                  |d      t        j                  t        |      D 	cg c]  }	|	t        |	      d c}	      d      }
t        ||||
       t        ||      } ||fi |}|j                  dk(  sJ y c c}	w )	Nrg   ri   r>   r?   r   )rk   ri   r   rJ   r   )getattrr   r   lenrO   rP   rQ   rV   rR   r   r   rW   dictr\   )r!   rK   r   r2   r+   r4   r]   r`   ru   ira   kwargsr   s                r%   test_filters_read_tabler     s     2{#DEI"L	\"N 	LA	1HH\6((58D8a!#a&18DE 
B $E9nbIUG4F%f%E>>Q Es   Cc                 h   t               }| }ddg}d|gg}d}t        j                  t        j                  |      t        j
                  |d      dddg	      }t        ||||       t        j                  |      }|j                         }|j                  d      j                         |k(  sJ y )
N2019_22019_3	year_weekr   rU   r?   )rk   r   rk   r/   )r   rO   rP   rQ   rV   rR   rW   r   r1   r2   r   r   )	r!   r+   r4   r^   r`   ru   ra   r5   r$   s	            r%   $test_partition_keys_with_underscoresr     s     EIX&K	k"N 	
A	1XXk: %
'B
 $E9nbI	*G\\^F==%//1[@@@r'   c                     | \  }}|dz   }t        j                  dg di      }t        |||       t        ||      }|j	                  |      sJ y Nz/test.parquetr   r   r   r   r   r   r   r   )s3_example_s3fsfsr#   r   r$   s        r%   test_read_s3fsr     sR    HB/!DHHc9%&E,"-F==r'   c                     | \  }}|dz   }t        j                  dg di      }t        |||       t        ||      }|j	                  |      sJ y r   r   )r   r   r"   r#   r   r$   s         r%   test_read_directory_s3fsr     sR    #MB	&DHHc9%&E,r2F==r'   c                     t        | dz        }t        j                  dg di      }t        ||       t	        j
                  |g      j                         }|j                  |      sJ y )Nr   r   r   )r   r   r   r   r   r1   r2   r   )r!   	data_pathr   r$   s       r%   test_read_single_file_listr     s[    Gn,-IHHc9%&E	"	{+002F==r'   c                 &    | \  }}t        ||       y r)   )r*   r   r   r#   s      r%   $test_read_partitioned_directory_s3fsr     s     HB"2t,r'   c                    ddg}g d}d|gd|gg}d}t        j                  t        j                  |      t        j                  |d      j                  d	      t        j                  t        j                  t        j                  |t              d
      d      t        j                  j                  |      dg d      }t        | |||       t        j                  ||       }|j                         }|j                         j                  d      j!                  d      }	|j                  d      j!                  d      j#                  |	j$                        }
|
d   j'                  d      |
d<   |
d   j'                  d      |
d<   |	j$                  g dk(  j)                         sJ t+        j,                  |	|
       y )Nr   r   r8   foobarrD   r>   r?   rA   rB   r   )rk   r   r   r.   r/   r   rk   rn   TrL   category)rk   r.   r   r   )rO   rP   rQ   rV   rR   rS   rT   rU   randomrandnrW   r   r1   r2   rX   rq   rY   reindexr0   astypealltmassert_frame_equal)r   r4   foo_keysbar_keysr`   ru   ra   r5   r   rb   expected_dfs              r%   r*   r*      s   1vHH		N 	A	1xx-44R8wwrwwrxx?CQG))//!$	
 1
2B $B	>2F	b9GLLNE"++)+4+(  >>W>-KTK*GI$5$5G6  %U+22:>K$U+22:>K!BBGGIII)[1r'   c           	           t         t              st        t                      t	              t         dt         dd             fd |dg        y )Npathsepsep/c                 p   |   \  }}|D ]  }|||fgz   }j                  t        |       dj                  ||      g      }j                  |       |dz
  k(  rddlm} j                  |t               g      }	t        |      }
t        j                  j                  |
      }j                  |	      5 }t        ||       d d d        j                  |	      j                  |j                  k7  sJ j                  |	      j                  |j                   k(  sJ j                  |dg      }j                  |      5 }	 d d d        G ||dz   |       j                  |dg      }j                  |      5 }	 d d d         y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nz{}={}r   r   )FileType_SUCCESS)joinr   format
create_dir
pyarrow.fsr   r
   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer.   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelra   r   r`   r   s                 r%   r   z5_generate_partition_directories.<locals>._visit_level2  s   %e,fE&4-8NHtU+& I MM)$	!/#LL)TV)<=	/NCXX11+>
**95 Q/ 6''	2778;L;LLLL''	2778==HHH&||Y
,CD**<8A 98 Y	>B&||Y
,CD**<8A 987   65 98
 98s$   :FFF+F	F(	+F5	r   )
isinstancer   r   r   r   r   )r   r   r`   ra   r   r   r   s   ` ``@@@r%   rW   rW   '  sX     b*%-+,Eb)WR%<=G @ 1b!r'   c                 D   t        j                  t        |       t              }g }|D ]`  \  }}|j	                  |       t        |t        j                  t        j                  f      rt        j                  |      }|| |   |k(  z  }b | |   j                  |d      S )Nr?   r   )axis)rQ   onesr   rC   appendr   r   r   rO   	TimestamprM   )ra   r   	predicateto_dropr   r   s         r%   r   r   U  s    Bt,IG et ehmmX->->?@LL'ERX&&	 ! i=gA..r'   c                 6   | dz  }|j                          t        j                  j                  t	        j
                  dg di            }t        j                  ||dz         | dz  }|j                          t        j                  j                  t	        j
                  dg di            }t        j                  ||dz         t        j                  | dgg      }|j                  d      j                  t        j                  g dg            sJ y )	NzA=0Br   r   zA=1r8   )ArH   r   r   )r   r   r   r   rO   rP   r   r   r   r   r   chunked_array)r!   dir1table1dir2table2r   s         r%   "test_filter_before_validate_schemar	  e  s     U?DJJLXX!!",,Y/?"@AFNN64.01U?DJJLXX!!",,_/E"FGFNN64.01 MM'^,<+=>E<<##B$4$4i[$ABBBr'   c                    d}d}| t               z  }|j                          g }g }t        |      D ]  }t        ||      }|d   j	                  t
        j                        |d<   |dj                  |      z  }t        j                  j                  |      }	t        |	|       |j                  |	       |j                  |        |dz  j                          dd}
 |
|      }t        j                  |      }|j                  |      sJ d	d
d|j                   dz
  g}|D cg c]  }|j#                  |      j$                   }}t'        j(                  ||      }t        j                  j+                  |D cg c]  }|j-                  |       c}||j.                  j0                        }|j                  |      sJ t'        j(                  |d       t        |      j2                  d d d df   }| dj                  t                     z  }t        j                  j                  |      }t        ||       y c c}w c c}w )Nr}   rB   seeduint32
{}.parquetz_SUCCESS.crcTc                 T    t        j                  | fi |}|j                  ||      S )N)r0   use_threads)r   r1   r2   )pathsr0   r  r   r5   s        r%   read_multiple_filesz5test_read_multiple_files.<locals>.read_multiple_files  s*    ##E4V4||G|EEr'   r   r      r   r/   )namesmetadata)r  rh   )NT)r
   r   r   r   r   rQ   int64r   r   r   r   r   r   touchconcat_tablesr   num_columnsfieldr   r   r   from_arraysr   schemar  iloc)r!   nfilessizedirpath	test_datar  r   ra   r#   r   r  r$   r   to_read	col_namesout	bad_applebad_apple_pathts                      r%   test_read_multiple_filesr(  {  s
   FDGMMOIE6]T* (|**28848,,Q//$$R(UD!T  ~$$&F !'F	*H==""" !Q**Q./G/67w!a%%wI7
--
3Cxx##w$Gw!V]]1%5w$G*3-3]]-C-C $ EH ::h MM't,  1-221bqb59I|2246::N
Y'AN# 8$Gs   '"I?Ic                    d}d}| t               z  }|j                          g }g }g }t        |      D ]  }t        ||      }t	        j
                  ||z  |dz   |z        |_        d|j                  _        |dj                  |      z  }	t        j                  j                  |      }
t        |
|	       |j                  |
       |j                  |       |j                  |	        t        j                  |      }ddg}|j!                  |      j#                         }t%        j&                  |D cg c]  }||   	 c}      }t)        j*                  ||       |j!                  t-        |            j#                         }|j.                  |j.                  k(  sJ t)        j*                  |j1                  |j2                        |       y c c}w )	NrB   r  r   rk   r  uint8stringsr/   )r
   r   r   r   rQ   rV   rk   r   r   r   r   r   r   r   r   r1   r   rX   rO   concatr   r   r   r[   r   r0   )r!   r  r  r   r!  framesr  r   ra   r#   r   r5   r0   r$   rv   r   s                   r%   test_dataset_read_pandasr.    s   FDGMMOIFE6]T*99QXA~6,,Q//$$R(UD!bT  (G	"G   1;;=Fyyf5f!G*f56H&(+   W 6@@BF<<8>>)))&..1A1A.BHM 6s   8Gc                 J   | t               z  }|j                          t        dd      }|dj                  d      z  }t        j
                  j                  |      }t        ||d       t        j                  |d      }|j                         j                  |      sJ y )	Nr}   r   r  r  2.6versionT)
memory_map)r
   r   r   r   r   r   r   r   r   r1   r2   r   )r!   r   ra   r#   r   r5   s         r%   test_dataset_memory_mapr4    s     GMMO	!	$B\((++DHH  $Ee,D"G<<>  '''r'   c                    | t               z  }|j                          t        dd      }|dj                  d      z  }t        j
                  j                  |      }t        ||d       t        j                  t              5  t        j                  |d       d d d        d	D ]:  }t        j                  ||      }|j                         j                  |      r:J  y # 1 sw Y   IxY w)
Nr}   r   r  r  r0  r1  i)buffer_size)   i   )r
   r   r   r   r   r   r   r   r   ry   r   r   r1   r2   r   )r!   r   ra   r#   r   r6  r5   s          r%   #test_dataset_enable_buffered_streamr8    s    GMMO	!	$B\((++DHH  $Ee,	z	"
	& 
# ###.||~$$U+++ #	 
#	"s   C%%C.c                    | t               z  }|j                          t        dd      }|dj                  d      z  }t        j
                  j                  |      }t        ||d       dD ]d  }t        j                  ||      }|j                         j                  |      sJ t        j                  ||      }|j                  |      rdJ  y )	Nr}   r   r  r  r0  r1  )TF)
pre_buffer)r
   r   r   r   r   r   r   r   r   r1   r2   r   r   )r!   r   ra   r#   r   r:  r5   actuals           r%   test_dataset_enable_pre_bufferr<    s    GMMO	!	$B\((++DHH  $Ee,#
##
,||~$$U+++w:>}}U### $r'   c                     g }g }t        |      D ]O  }t        ||      }| dj                  |      z  }|j                  t	        ||             |j                  |       Q |S )Nr  r  )r   r   r   r   r   )r4   r  
file_nrowsr!  r  r   ra   r#   s           r%   _make_example_multifile_datasetr?    sc    IE6]Za0<..q11b$/0T  Lr'   c                     |D cg c]  }t        |j                                }}t        |      t        | j                        k(  sJ y c c}w r)   )r   as_posixr   files)r5   r  r#   s      r%   _assert_dataset_pathsrC  +  sA    .34edS!eE4u:W]]++++ 5s    A
dir_prefix_.c                     | t               z  }|j                          t        |dd      }|dj                  |      z  j                          t	        j
                  |      }t        ||       y )Nr}   rB   r  r>  z	{}staging)r
   r   r?  r   r   r1   rC  r!   rD  r   r  r5   s        r%   test_ignore_private_directoriesrJ  0  sb     GMMO+GB78:E {!!*--446(G'5)r'   c                    | t               z  }|j                          t        |dd      }|dz  j                  d      5 }|j	                  d       d d d        |dz  j                  d      5 }|j	                  d       d d d        t        j                  |      }t        ||       y # 1 sw Y   YxY w# 1 sw Y   7xY w)Nr}   rB   rH  z	.DS_Storewbs	   gibberishz.privater
   r   r?  openwriter   r1   rC  r!   r   r  r   r5   s        r%   test_ignore_hidden_files_dotrQ  A  s    GMMO+GB78:E K
	%	%d	+q	 
, J
	$	$T	*a	 
+ (G'5) 
,	+ 
+	*    B*.B6*B36B?c                    | t               z  }|j                          t        |dd      }|dz  j                  d      5 }|j	                  d       d d d        |dz  j                  d      5 }|j	                  d       d d d        t        j                  |      }t        ||       y # 1 sw Y   YxY w# 1 sw Y   7xY w)Nr}   rB   rH  _committed_123rL  s   abcd_started_321rM  rP  s        r%   #test_ignore_hidden_files_underscorerV  T  s    GMMO+GB78:E $
$	*	*4	0A	 
1 N
"	(	(	.!	 
/ (G'5) 
1	0 
/	.rR  c                    | dj                  |      z  t               z  }|j                  d       t        |dd      }t	        j
                  |      }t        ||       t	        j
                  |      }t        ||       y )Nz{0}dataTparentsr}   rB   rH  )r   r
   r   r?  r   r1   rC  rI  s        r%   /test_ignore_no_private_directories_in_base_pathrZ  g  sw    
 	((44tv=GMM$M+GB78:E &G'5) (G'5)r'   c           	         dgdz  dgdz  z   }t        j                  t        j                  t        t	        |                  t        j                  |      j                         gddg      }t        j                  |t        |       dg       | dz  }|j                          t        j                  |t        |      dg       t        j                  | d	g
      }|j                  |      sJ y )Nxxxr   yyyrk   _partr  partition_cols_private_duplicate_private)ignore_prefixes)r   r   rR   r   r   dictionary_encoder   write_to_datasetr   r   r   r   )r!   partr   private_duplicater2   s        r%   test_ignore_custom_prefixesri  z  s    7Q;%1$DHH
s4y!"
((* w!E
 s7|WIF"66s#45(/y2 ==*/D ;;ur'   c                     | dz  }|j                          t        j                  |      }|j                         }|j                  dk(  sJ |j
                  dk(  sJ y )Nr5   r   )r   r   r1   r2   r\   r  )r!   	empty_dirr5   r$   s       r%   test_empty_directoryrl    sW    )#IOO	*G\\^F??a"""r'   c                    dd l }dd lm} dd lm}  |j
                  t        d      t        d      t        t        d            t        j                  gdz  t        j                  ddd      j                  d	      d
      }|j                  j                         }ddg}	t        j                  j!                  ||dd      }
 |j"                  |
| |	|       t$        j&                  j)                  t+        |       d      }|9|j-                  |d      5 } |j.                  |
j0                  |       d d d        n |j.                  |
j0                  |        |j2                  | |      }t5        |j0                  j6                        }|t5        |
j0                  j6                        k(  sJ |j9                         }|j;                         }|j                  j                         }|	|dt=        |	      z  d  k(  sJ ||   }|	D ]  }||   j                  d      ||<    |r@|j?                  d      j@                  jC                         }|d   j                  |      |d<    |jD                  ||       y # 1 sw Y   *xY w)Nr   
aaabbbbccc
eefeffgeeer}   
2017-01-01
2017-01-11datetime64[D]r?   datetime64[ns])group1group2numnanr   rt  ru  F)r  safepreserve_indexr   _common_metadatarL  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrP   listr   rQ   rw  rV   r   r0   tolistr   r   r   rf  osr#   r   r   rN  write_metadatar  r1   r   r  r2   rX   r   r  r   to_pandas_dtyper   )r4   r   r  
index_namerO   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r5   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_types                       r%   &_test_write_to_dataset_with_partitionsr    sO      |$|$E"Ix"}		,OLSS I ##%Dh'L88''	&u7< ( >LBi#-/ GGLLY1CDM__]D1QBl1115 21 	,--}=b	+57G w~~++,L3|22889999,,.K$$&H $$++-M=c,.?)?)@AAAA~H"3..z:	#  #\\&166FFH%f-445GH	&B)X.= 21s   I<<Jc           
         dd l }dd lm}  |j                  t	        d      t	        d      t	        t        d            t        j                  ddd      j                  d	      d
      }|j                  j                         }t        j                  j                  |      }|t               }n$t        |t               st#        t%        |            }d}t        |      D ]  } |j&                  || |        t)        t+        |       dd      }	|j-                  |	      }
|
D cg c]   }|j.                  j1                  d      s|" }}t3        |      |k(  sJ  |j4                  | |      j7                         }|j9                         }|j;                         }||   }t=        j>                  ||       y c c}w )Nr   rn  ro  r}   rp  rq  rr  r?   rs  )rt  ru  rv  r   rB   r   FT)allow_not_found	recursivez.parquet) r|  r  r  rP   r  r   rQ   rV   r   r0   r  r   r   r   r   r   r   r   r   rf  r   r   r   r#   endswithr   r1   r2   rX   drop_duplicatesr   r   )r4   r   rO   r   r  r  r  nr   selectorinfosinfooutput_filesr  r  s                  r%   $_test_write_to_dataset_no_partitionsr    s     |$|$E"I		,OLSS	 I ##%D88''	2L$&

J/!-
";<
 	
A1XL)'1	3  C	NE&*,H $$X.E%*MUTdii.@.@.LDULM|!!! $"##j
df  $$&H'')H~H)X. Ns   ( F=	F=c                 ,    t        t        |              y r)   r  r   r!   s    r%   %test_write_to_dataset_with_partitionsr    s    *3w<8r'   c                    t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j
                               t        j                  dt        j                  d            g      }t        t        |       |	       y )
Nrt  )r   ru  rv  rw  r   us)unitr  )	r   r  r  r<   r  int32	timestampr  r   )r!   r  s     r%   0test_write_to_dataset_with_partitions_and_schemar  
  s    YY		<		<RXXZ8RXXZ8bll.EF	H IF
 +GV%r'   c                 0    t        t        |       d       y )Nr  )r  r  r  s    r%   4test_write_to_dataset_with_partitions_and_index_namer    s    *G/r'   c                 ,    t        t        |              y r)   )r  r   r  s    r%   #test_write_to_dataset_no_partitionsr    s    (W6r'   c                 <    t        | dz         t        | dz         y )Ntest1test2)r  r  r  s    r%   test_write_to_dataset_pathlibr     s    *7W+<=(7):;r'   c                    |\  }}t        j                  t        d      5  t        | dz  |       d d d        t        j                  t        d      5  t	        | dz  |       d d d        y # 1 sw Y   >xY w# 1 sw Y   y xY w)Nz"path-like objects are only allowedr   r  r   r  )r   ry   r   r  r  )r!   r   r   rE  s       r%   &test_write_to_dataset_pathlib_nonlocalr  &  st     EB	y(L	M.g"	. 
N 
y(L	M,g"	. 
N	M	 
N	M 
N	Ms   A/A;/A8;Bwin32z,test fails because of unsupported characters)rz   c                 (    | \  }}t        ||       y Nr   )r  r   s      r%   *test_write_to_dataset_with_partitions_s3fsr  5  s     HB*r'   c                 (    | \  }}t        ||       y r  )r  r   s      r%   (test_write_to_dataset_no_partitions_s3fsr  A  s     HB(r'   c                    t        j                  dg di      }t        j                  j	                  |      }t        |       }t        j                  ||t                      t        j                  |      }|j                  |      sJ y )Nr  r   r   )rO   rP   r   r   r   r   r   rf  r   r   r   )r!   ra   r   r#   r$   s        r%    test_write_to_dataset_filesystemr  J  sg    	sI&	'BHH  $Ew<Dt0AB]]4 F==r'   c                    | dz  }t               }t        j                  t        j                  |      t        j
                  j                  |      dddg      }t        j                  j                  |      }d}t        j                  ||j                        5 }t        |      D ]  }|j                  |        	 d d d        t        j                  |      }	|	j                   j"                  |k(  sJ | dz  }
|j%                  t'        |
            5 }t        j(                  |j                  |       d d d        t        j*                  | |      }|S # 1 sw Y   xY w# 1 sw Y   .xY w)	Nr   )rk   r.   rk   r.   r/   r   	_metadatar   )r   rO   rP   rQ   rV   r   r   r   r   r   r   ParquetWriterr  r   r   ParquetFiler  num_row_groupsr   r   r  r1   )r!   ru   r#   r+   ra   r   
num_groupswriterr   readerr  r   r5   s                r%   _make_dataset_for_picklingr  U  s/   ^#DE	1))//!$ "
$B HH  $EJ			$	-z"Au% # 
. ^^D!F??))Z777k)M		!	!#m"4	5
%,,* 
6 E#G N 
.	- 
6	5s   "E!E%E"%E.c                 :    fd}t        |       } ||      sJ y )Nc                 J    | j                  j                  |             k(  S r)   )loadsdumps)objpickle_modules    r%   is_pickleablez*test_pickle_dataset.<locals>.is_pickleables  s$    m))-*=*=c*BCCCr'   )r  )r!   r  r  r5   s    `  r%   test_pickle_datasetr  q  s#    D )1G!!!r'   c                 H   | dz  }t        j                  g dg dg dd      }t        j                  j	                  |      }t        j                  |t        |      ddg       t        j                  |      j                         }t        j                  ||d	z         y )
Nz
ARROW-3208)r{  r}   g      @d     r   g333333=@)r{  r}   r   r  r  r   r~   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathra  zoutput.parquet)rO   rP   r   r   r   r   rf  r   r1   r2   r   )r!   r#   ra   r   s       r%   test_partitioned_datasetr  z  s     \!D	0,& 
B
 HH  $ET(-u~7d#((*ENN5$!112r'   c                    | dz  }t        j                  t        d      D cg c]  }t        j                  d       c}dz  gdg      }t        j                  t        d      D cg c]  }t        j                  d       c}dz  gdg      }t        j                  |t        |             t        j                  |t        |             t        j                  |dg      j                         }|d   j                  d      j                         |d   j                  d      j                         g}|d   j                  d	k(  sJ |d   j                  d      |d   j                  d
      }}|j                  |d         r|j                  |d
         sJ y |j                  |d
         sJ |j                  |d         sJ y c c}w c c}w )NzARROW-3325-datasetrB   r}   f0r_  )r  )read_dictionaryr   r   r   )r   r   r   r	   randsr   rf  r   r1   r2   chunkre  
num_chunksr   )	r!   r#   r   t1t2r$   	ex_chunksc0c1s	            r%   test_dataset_read_dictionaryr    s   ))D	E!H5HqDJJrNH5:;D6	JB	E!H5HqDJJrNH5:;D6	JBc$i0c$i0tf&&*df  AQ113AQ1135I !91$$$AY__Q!3B	yy1yy1&&&yy1&&&yy1&&&% 65s   G(Gc                    t        j                  dt        j                  g dt        j                               i      }t	        j
                  || dz         t	        j
                  || dz         t        j                  dg      }t	        j                  | dz  |      }t        j                  dg di|      }|j                  |      sJ t	        j                  | |      }t        j                  dg di|      }|j                  |      sJ t	        j                  | |      }t        j                  dg di|      }|j                         j                  |      sJ y )Nr   r   zdata1.parquetzdata2.parquet)r   r  r  )r   r   r   r   r   r   )r   r   rR   r  r   r   r  r   r   r1   r2   )r!   r   r  r$   r   s        r%   test_read_table_schemar    s   HHc288Irxxz:;<ENN5'O34NN5'O34YY'(F ]]7_4VDFxxi(8H==""" ]]762Fxx01&AH=="""wv6Fxx01&AH;;=)))r'   c                    t        j                  t        j                  g dt        j                               t        j                  g dt        j                               d      }t        j                  || dz         t        j                  | dz  ddg      }t        j                  ddg      }|j                  ddgk(  sJ |j                  |k(  sJ y )Nr   r   r   r   r/   )r   r  )
r   r   rR   r  r*  r   r   r   r  r3   )r!   r   r$   expected_schemas       r%   *test_read_table_duplicate_column_selectionr    s    HH288Irxxz:88Irxxz:< =ENN5'N23]]7^3c3ZHFii @AO3*,,,==O+++r'   c                    dd l m} | dz  }|dz  dz  dz  j                  d       t        j                  dg d	i      }t        j                  |t        |dz  dz  dz  d
z               |j                  g d      }t        j                  t        |      |      }|j                  g dk(  sJ t        j                  t        |      |      j                         }|j                  g dk(  sJ y )Nr   test_partitioning20121001TrX  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr5   r   r   r   r   r   r   r  r   r3   r1   r2   )r!   dsr  r   rg  r$   s         r%   test_dataset_partitioningr    s      --I$%,,T,:HHc9%&ENNs9v%,t3nDEG ??'??@D]]IT+F"????IT++/46 "????r'   c                    t        j                  dg di      }t        j                  || dz         t	        t        |       t                     }t        j                  d|      }|j                         }|j                  |      sJ y )Nr   r   r   rF  r   )
r   r   r   r   r   r   r   r1   r2   r   )r!   r   r   r5   r$   s        r%   #test_parquet_dataset_new_filesystemr    sj    HHc9%&ENN5'N23"3w<1BCJ
;G\\^F==r'   c                 ^   t        j                  d      }|j                  d      }t        j                  dg di      }t        j                  || dz         t        |       j                  dd      }t        j                  ||      }|d	z   }|j                  d
   j                  |k(  sJ y )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer1   	fragmentsr#   )r!   r  r   r   r#   r5   r   s          r%   6test_parquet_dataset_partitions_piece_path_with_fsspecr    s       *F""6*JHHc9%&ENN5'N23 w<c*D%G o%HQ$$000r'   c                    t        j                  dg di      }| dz  }g fd}d}t        j                  ||dg||       |dz  dz  |d	z  dz  |d
z  dz  h}t	        t        t        j                              }||k(  sJ y )Nr   r   r  c                 <    j                  | j                         y r)   )r   r#   )written_filepaths_writtens    r%   file_visitorzDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitor  s    \../r'   zpart-{i}.parquet)r  r   basename_template1zpart-0.parquet23)r   r   r   rf  r   rr   pathlibPath)r!   r   r#   r   r  expected_pathspaths_written_setr  s          @r%   .test_parquet_write_to_dataset_exposed_keywordsr	    s    HHc9%&E^#DM0 +t3%%1*;=
 	s
%%s
%%s
%%N
 Cm<=...r'   write_dataset_kwarg))r   T)r   Fc                    ddl m} t        j                  dg di      }| dz  }t	        j
                  |j                        }|\  }}|t	        j
                  t        j                        j                  vsJ ||j                  v sJ t        j                  j                  |dd      5 }t        j                  ||fi ||i |j                  d   \  }	}
}||   |k(  sJ 	 ddd       y# 1 sw Y   yxY w)	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r5   r   r   inspect	signaturer  r   rf  
parametersmockpatchrU   
mock_calls)r!   r
  r  r   r#   r  keyargmock_write_dataset_name_argsr   s               r%   #test_write_to_dataset_kwargs_passedr    s     !HHc9%&E]"D!!""2"23I"HC g''(;(;<GGGGG)&&&&&			2		>!
E46C:61<<Q?ufc{c!!!	 
?	>	>s   ,7C--C6c                 |   t        j                  t        j                  g dg d      g dd      }t        j                  |      }| dz  }t        j                  || dz  dg       |j                         D cg c]  }|j                         s|j                  ! }}t        |      d	k(  sJ d
|vsJ y c c}w )N)r   r9   r   r8   r   r   )catr  r5   r  r`  r   zcat=c)rO   rP   r   r   r   r   rf  iterdiris_dirr   r   )r!   ra   r   r#   r   subdirss         r%   'test_write_to_dataset_category_observedr  *  s    
 
~~o/J 
B HHRLEYDw"E7  $||~<~!qvv~G<w<1'!!! =s   <B9B9)r}   rB   )NNNr)   )r  )mr   r  r  r  sysnumpyrQ   ImportErrorr   unittest.mockr  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr	   pyarrow.utilr
   r  r  r   pyarrow.tests.parquet.commonr   r   r   r|  rO   r}  r~  r   markr5   
pytestmarkr&   r,   r6   re   rx   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr  r   r   s3r   r   r   r   r*   rW   r   r	  r(  r.  r4  r8  r<  r?  rC  rJ  rQ  rV  rZ  ri  rl  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r	  r  r   r'   r%   <module>r5     sJ  $   	  
    H H   4 4 kk!!6;;#6#67
 & 3 3
 	- 	- C, C,L ! !B  ~&B	   '1 '1T = =  ! !B +4 +4\ %, %,P N N. /0012""((:.2""((8S1A5""((8S166xrxxzBQF	HI (EF GI 4 A A.             -  -
$2N+"\/  C C* 5$ 5$N "N "NJ ( ( , ,& $ $"	,
 Sz2* 3 * * *$ * *$ Sz2* 3 *"*# 7;266::/| 59+/\ 9 9 % % / /
 7 7 < <
 
.  
. CLLG+I  KK  
       8 " " 3 3 '.*.
,@0 1"/0 . 1 "	"* " "c(  	B   	B  NBs5   Z/ Z= 
[ /Z:9Z:=[[	[[