
    g5                    )   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ 	 d dlZd dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ 	 d dl%Z&	 d dl'm(Z) 	 d dl*m+Z, ejZ                  jP                  Z.d Z/d Z0d Z1ejd                  d	        Z3ejd                  d
        Z4 ejd                  d      d        Z5ejd                  d        Z( ejd                  ddgddg      d        Z6ejZ                  jV                  d        Z7d Z8ejZ                  jV                  d        Z9ejZ                  jV                  d        Z:ejZ                  jV                  d        Z;ejZ                  jV                  d        Z<ejZ                  jV                  d        Z=ejZ                  jV                  d        Z>ejZ                  jV                  d        Z?d Z@d ZAd  ZBejZ                  j                  d!g d"      d#eDd$eEd%eEfd&       ZFd' ZGd( ZHejZ                  jV                  d)        ZIejZ                  jV                  d*        ZJejZ                  jV                  d+        ZKd, ZLd- ZMejZ                  j                  d. ej                  d/d0      d1d2gg      ejZ                  j                  d3ddg      ejZ                  jV                  d4                      ZOejZ                  jV                  d5        ZPejZ                  jV                  ejZ                  j                  d6               ZRd7 ZSd8 ZTejZ                  jV                  d9        ZUejZ                  jV                  dd:       ZVejZ                  jV                  d;        ZWejZ                  jJ                  ejZ                  jV                  d<               ZXejZ                  jV                  d=        ZYejZ                  jV                  d>        ZZejZ                  jV                  d?        Z[ejZ                  jJ                  ejZ                  jV                  d@               Z\ejZ                  jV                  dA        Z]ejZ                  jV                  dB        Z^ddCZ_ejZ                  jJ                  ejZ                  jV                  dD               Z`ejZ                  jV                  dE        ZaejZ                  jJ                  ejZ                  jV                  dF               ZbejZ                  jV                  dG        ZcejZ                  jV                  dH        ZdejZ                  jV                  dI        ZeejZ                  jV                  dJ        ZfejZ                  jV                  dK        ZgejZ                  jV                  dL        ZhejZ                  jJ                  ejZ                  jV                  dM               ZiejZ                  jV                  ejZ                  j                  dNdO dP g      dQ               ZjejZ                  jV                  ejZ                  j                  dRddg      ejZ                  j                  dNdS dT g      dU                      ZkejZ                  j                  dNdV dW g      dX        ZlejZ                  j                  dNdY dZ g      d[        Zmd\ Znd] ZoejZ                  jV                  ejZ                  jJ                  d^               Zpd_ Zqd` Zrda Zsdb Ztdc Zudd Zvde ZwejZ                  jV                  df        Zxdg ZyddhZzdi Z{dj Z|dk Z}ejZ                  jV                  dl        Z~ejZ                  jV                  dm        ZejZ                  jV                  dn        ZejZ                  jV                  do        ZejZ                  jV                  dp        ZejZ                  jV                  dq        ZejZ                  jV                  dr        ZejZ                  jV                  ds        ZejZ                  jV                  dt        ZejZ                  jV                  du        Zdv Zdw Zdx Zdy ZejZ                  j                  dzddg      d{        Zd| ZejZ                  jV                  d}        ZejZ                  jV                  d~        ZejZ                  jV                  d        ZejZ                  jV                  d        Zd Zd ZejZ                  jV                  ejZ                  j                  dddg      ejZ                  j                  dddg      ejZ                  j                  dRddg      ejZ                  j                  dg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg      d                                    ZejZ                  jJ                  d        Zejd                  d        ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jJ                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  d        Zd Zd Zd Zd ZejZ                  jV                  d        ZejZ                  jV                  d        Zd ZejZ                  jP                  d        ZejZ                  jP                  d        Zd ZejZ                  jP                  d        ZejZ                  jJ                  d        ZejZ                  jJ                  ejZ                  j                  dg d      d               Zd Zd Zd ZejZ                  jJ                  d        ZejZ                  jJ                  d        ZejZ                  jJ                  d        Zd Zd Zd ZejZ                  jJ                  ejZ                  j                  dg d      d               Zd ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  ejZ                  jJ                  ejZ                  jw                  ejx                  dk(  d      d                      ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  jJ                  d               Zd Zd ZejZ                  jV                  ejZ                  jJ                  d               ZejZ                  jV                  ejZ                  jJ                  dÄ               ZejZ                  jV                  ejZ                  jJ                  dĄ               ZejZ                  jV                  ejZ                  jJ                  dń               ZejZ                  jV                  dƄ        ZejZ                  jV                  ejZ                  jJ                  dǄ               ZejZ                  jV                  ejZ                  jJ                  dȄ               ZdɄ ZejZ                  jJ                  ejZ                  jV                  dʄ               ZejZ                  jV                  ejZ                  jJ                  d˄               Zd̄ Z	 dd̈́ZejZ                  jV                  d΄        ZejZ                  jV                  ejZ                  jJ                  dτ               ZdЄ Zdф Zd҄ ZejZ                  jV                  ejZ                  j                  dӄ               ZdԄ ZejZ                  jJ                  dՄ        Z֐d	dքZdׄ Zd؄ ZejZ                  jV                  dل        ZejZ                  jV                  dڄ        ZejZ                  jV                  dۄ        ZejZ                  jV                  d܄        ZejZ                  jV                  ejZ                  jJ                  d݄               ZejZ                  jV                  ejZ                  jJ                  dބ               Zd߄ Zd Zd Zd Zd ZejZ                  j                  ejZ                  jV                  d               Zd ZejZ                  jV                  d        ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  jJ                  d               Zd ZejZ                  jV                  d        ZejZ                  jV                  ejZ                  j                  d               ZdZejZ                  jV                  ejZ                  j                  d               ZejZ                  jV                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  jP                  d        ZejZ                  j                  dddg      d        ZejZ                  j                  dddg      d        Zd Zd ZejZ                  j                  dd      d        Zd  ZejZ                  jV                  d        Zd Zd ZejZ                  j                  dddg      d        Z d Zd Zy# e$ r dZY w xY w# e$ r dZ&Y Yw xY w# e$ r dZ)Y `w xY w# e$ r dZ,Y gw xY w(
      N)copytree)quote)is_threading_enabled)FSProtocolClassProxyHandler_configure_s3_limited_user_filesystem_uri
change_cwdc           	      4   dd l }dd l} |j                   ddd      } |j                  d      }|j                  g d      }g }t	        |       D ].  }|j                  ||t        |      t        |      f       ||z  }0 t        j                  |g d      S )	Nr   i        )days)greenblueyellowredorange)dateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr   r   dayintervalcolorsdatais           O/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/tests/test_dataset.py_generate_datar+   E   s    
(

D!Q
'C!x!!q)H__IJFD1XS!U1XtF|45x  <<&IJJ    c           
         t        j                  t        j                  dt        j                               t        j                  dt        j                               t        j                  dt        j
                               t        j                  dt        j                               g      }t         j                  j                  | |d      }|j                         S )Nr   r   r   r   F)schemapreserve_index)
par.   fielddate32int64float64stringTablefrom_pandasreplace_schema_metadata)dfr.   tables      r*   _table_from_pandasr;   U   s    YY
%
"((*%
"**,'
"))+&	 F HH  F5 IE((**r,   c                 :   | j                         D ]|  }|j                         5 }t        |t        j                        sJ |j
                  rJ |j                         sJ |j                         sJ |j                         rJ 	 d d d        ~ y # 1 sw Y   xY wN)	get_fragmentsopen
isinstancer0   
NativeFileclosedseekablereadablewritable)datasetfragmentnfs      r*   +assert_dataset_fragment_convenience_methodsrI   `   sz    ))+]]_b"--000yy =;;= =;;= ={{}$$} _ ,_s   A!BB	c                     t        j                         } ddg}t        |      D ]  \  }}dj                  ||      }| j	                  |       | j                  |      5 }t        t        d            t        t        t        t        d                  t        t        t        t        d                  |gdz  t        d      D cg c]  }|dz  t        |dz        d c}g}t        j                  dt        j                         fdt        j                         fd	t        j                         fd
t        j                         fdt        j                   t        j                         t        j                         d      fg      }t        j"                  ||      }	t        j$                  j'                  |	g      }
t)        j*                  |
|       d d d         | S c c}w # 1 sw Y   xY w)Nzsubdir/1/xxxzsubdir/2/yyyz{}/file{}.parquetr      abi64f64strconststructr.   )fs_MockFileSystem	enumerateformat
create_diropen_output_streamlistr   mapr    rQ   r0   r.   r3   r4   r5   rS   record_batchr6   from_batchespqwrite_table)mockfsdirectoriesr)   	directorypathoutjr(   r.   batchr:   s              r*   ra   ra   k   s   !F 	K
 "+.9")))Q7)$&&t,U1XSa)*SeAh'(a8=aA1q1u3q1u:.AD YY
#

%		$"((*%299288:BIIK%HIJ  F OOD8EHH))5'2ENN5#&% -, /. M B -,s    A(G+G&"C7G+&G++G5	c                    	 ddl m}m} ddlm}  |       fdt               		fd}| j                  |d|        | |            }t        j                  	fd       }||fS )	Nr   )LocalFileSystemPyFileSystemr   )r   c                 ^    | D ch c]  }j                  t        |             c}S c c}w r=   )normalize_pathrQ   )pathsplocalfss     r*   
normalizedz#open_logging_fs.<locals>.normalized   s*    8=>1&&s1v.>>>s   !*c                     j                  t        |            }j                  |       | j                  j	                  |      S r=   )rl   rQ   add_fsopen_input_file)selfrd   ro   openeds     r*   rt   z(open_logging_fs.<locals>.open_input_file   s8    %%c$i0

4xx''--r,   rt   c              3      K   j                          	 d          |       k(  sJ y #         |       k(  sJ w xY wwr=   )clear)expected_openedrp   rv   s    r*   assert_opensz%open_logging_fs.<locals>.assert_opens   sI     	Ef%O)DDDD:f%O)DDDDs   A- AAA)	
pyarrow.fsri   rj   test_fsr   setsetattr
contextlibcontextmanager)
monkeypatchri   rj   r   rt   rU   rz   ro   rp   rv   s
          @@@r*   open_logging_fsr      so    8%G? UF. &7I	l7+	,BE E |r,   module)scopec           
      p   | j                   j                  j                  d       | j                   j                  j                  d       t        d      }t	        j
                         }t        |      }t        d||dz        D cg c]  }|j                  |||dz  z     c}\  }}}}|j                  d       t        |      }t        t        d||dz        D cg c]  }|j                  |||dz  z     c}      D ]O  \  }}	dj                  |      }
|j                  |
      5 }t        j                  t        |	      |       d d d        Q |j                  d	       |j!                  |j"                  j$                  j&                  |j(                  g      D ]o  \  }}	 d
j                  | }dj                  |      }
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        q |j                  d       |j!                  |j"                  j$                  j*                  |j"                  j$                  j,                  g      D ]o  \  }}	 dj                  | }dj                  |      }
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        q |j                  d       |j!                  d      D ]q  \  }}	dj                  |      }dj                  |      }
|j                  |       |j                  |
      5 }t        j                  t        |	      |       d d d        s |S c c}w c c}w # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   8xY w# 1 sw Y   xY w)Npandasparquet  r      plain
   zplain/chunk-{}.parquetr.   zschema/{}/{}z{}/chunk.parquethivezhive/year={}/month={}
hive_colorr   zhive_color/color={})configpyarrowrequiresr+   rU   rV   lenr   ilocrY   rW   rX   rZ   r_   r`   r;   groupbyr   dt	dayofweekr   yearmonth)requestr9   ra   r$   r)   df_adf_bdf_cdf_dchunkrd   re   partfolders                 r*   multisourcefsr      s&   NN##H-NN##I.		B!F 	BA9>q!QT9JK9JAbgga!Q$/9JKD$d gD	AU1aB=OP=Otyy1QU73=OPQ5'..q1&&t,NN-e4c: -, R h||TYY\\%;%;TZZ$HIe&&&-!((0&!&&t,NN-e4c: -,	 J f||TYY\\%6%6		8J8J$KLe/(//6!((0&!&&t,NN-e4c: -,	 M l#||G,e&--d3!((0&!&&t,NN-e4c: -,	 - MM L Q,, -, -, -,s<   M;N ) N' N9 N N,N	N	N)	,N5	c           
         t        j                         }t        j                  dd      }t        j                  d      }t        j
                  t        j                  t        j                  dt        j                               t        j                  dt        j                               g            |_        t        j                  | |||      }|j                         S )NsubdirT	recursivegroupkey)dsParquetFileFormatrU   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr0   r.   r1   int32r5   partitioningFileSystemDatasetFactoryfinish)ra   rX   selectoroptionsfactorys        r*   rF   rF      s    !!#Fx48H))(3G33
		HHWbhhj)HHUBIIK(
 	G ))&(FGLG>>r,   TFthreadedserial)paramsidsc                 B    | j                    G fdd      } |       S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                   F    e Zd Z fdZ fdZd Zd Zd Zd Zd Z	d Z
y	)
dataset_reader.<locals>.readerc                     | _         y r=   use_threads)ru   r   s    r*   __init__z'dataset_reader.<locals>.reader.__init__   s    *Dr,   c                 .    d|v rt        d      |d<   y )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)ru   kwargsr   s     r*   _patch_kwargsz,dataset_reader.<locals>.reader._patch_kwargs  s)    &$& & %0F=!r,   c                 H    | j                  |        |j                  di |S N )r   to_tableru   rF   r   s      r*   r   z'dataset_reader.<locals>.reader.to_table	  s&    v&#7##-f--r,   c                 H    | j                  |        |j                  di |S r   )r   
to_batchesr   s      r*   r   z)dataset_reader.<locals>.reader.to_batches  &    v&%7%%///r,   c                 H    | j                  |        |j                  di |S r   )r   scannerr   s      r*   r   z&dataset_reader.<locals>.reader.scanner  s$    v&"7??,V,,r,   c                 J    | j                  |        |j                  |fi |S r=   )r   head)ru   rF   num_rowsr   s       r*   r   z#dataset_reader.<locals>.reader.head  s&    v&7<<3F33r,   c                 J    | j                  |        |j                  |fi |S r=   )r   take)ru   rF   indicesr   s       r*   r   z#dataset_reader.<locals>.reader.take  s&    v&7<<2622r,   c                 H    | j                  |        |j                  di |S r   )r   
count_rowsr   s      r*   r   z)dataset_reader.<locals>.reader.count_rows  r   r,   N)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   s   r*   readerr      s+    	+	0	.	0	-	4	3	0r,   r   )param)r   r   r   s     @r*   dataset_readerr      s"     --K"0 "0H 8Or,   c           	      *
   t        j                  t        j                  dt        j                               g      }t	        j
                         }ddg}t        dd      D cg c]  }t	        j                  d      |k(   }}t        ||      D cg c]  \  }}|j                  || |       }}}t	        j                  d      t	        j                  d      k(  }	t	        j                  |||| |		      }
t        j                  j                  |||| ||	
      }|
|fD ]"  }t        |t        j                        sJ t        |j                  t        j
                        sJ |j                  j                  |	      sJ t!        |j"                        t!        |      k(  sJ t%        |j'                               }t        |||      D ]2  \  }}}|j                  j                  |      sJ |j(                  |k(  sJ t        |j                  t        j
                        sJ t        |t        j*                        sJ |j,                  dgk(  sJ |j.                  dk(  sJ t%        |j1                               }|j.                  t3        |      cxk(  rdk(  sJ  J t        |d   t        j*                        sJ |d   j(                  |k(  sJ |d   j,                  dgk(  sJ |d   j.                  dk(  r3J  t%        |j'                  t	        j                  d      dk(              }t3        |      dk(  r#J  t	        j                  ||||       }|j                  j                  t	        j                  d            sJ t        j                  j                  ||||       }|j                  j                  t	        j                  d            sJ |j'                         D ]2  }|j                  j                  t	        j                  d            r2J  t5        j6                  t8        d      5  t	        j                  |||       d d d        t5        j6                  t8        d      5  t	        j                  |||d       d d d        t5        j6                  t8        d      5  t        j                  j                  ||       d d d        y c c}w c c}}w # 1 sw Y   xY w# 1 sw Y   fxY w# 1 sw Y   y xY w)NrR   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetr   rK   r   leveli9  )r.   rX   
filesystemroot_partition)r.   rX   r   
partitionsr   r   filter   r.   rX   r   Tzincorrect typematch)r.   rX   r   rX   )r0   r.   r1   r3   r   r   r   zipmake_fragmentscalarFileSystemDataset
from_pathsr@   rX   partition_expressionequalsr}   filesr[   r>   rd   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_groupr   pytestraises	TypeError)ra   r.   file_formatrm   xr   rd   r   	fragmentsr   dataset_from_fragmentsdataset_from_pathsrF   rG   	partitionrow_group_fragmentss                   r*   test_filesystem_datasetr   $  s   YY
"((*% F &&(K)+GHE16q!=A"((6"a'J=#&uj#9;#9ZT4 **4>#9  ;XXg&"))D/9N11&. --88f[Vn 9 
 +,>?'2#7#7888'.."*>*>???++22>BBB7==!SZ///..01	),Y
E)J%Hi0077	BBB==D(((hoor/C/CDDDh(>(>???&&1#---**a///"&x'B'B'D"E**c2E.FK!KKKKK1!4b6L6LMMM&q)..$666&q)44;;;&q)88A=== *K ..bhhw6G16L.MN	9~"""/ @4 ""&G ''..ryy??? ""--f[V . G ''..ryy???))+,,33BIIdODDD , 
y(8	9
YV< 
: 
y(8	9
Yv$/	C 
: 
y(8	9
''	+'F 
:	9{ >;j 
:	9 
:	9 
:	9s0   #S&S+S1>S=;"T	1S:=T	Tc                    t        j                  t        j                  dt        j                               g      }t	        j
                         }dg}t        j                  j                  |||t        j                               }|j                          t        j                  t              5  | j                  |       d d d        y # 1 sw Y   y xY w)Nf1znonexistingfile.arrowr   )r0   r.   r1   r3   r   IpcFileFormatr   r   rU   ri   r>   r   r   FileNotFoundErrorr   )r   r.   r   rm   rF   s        r*   1test_filesystem_dataset_no_filesystem_interactionr  l  s    YY
rxxz" F ""$K$%E ""--f[%%' . G  
(	)( 
*	)	)s   2CCc                    t        | t        j                        sJ t        | j                  t        j
                        sJ t	        j                  g dt	        j                               }t	        j                  g dt	        j                               }|j                  |       D ]b  }t        |t        j                        sJ |j                  d      j                  |      sJ |j                  d      j                  |      rbJ  |j                  |       j                         D ]D  }t        |t        j                        sJ t        |j                   t        j"                        rDJ  |j%                  |       }t        |t        j&                        sJ t)        |      dk(  sJ t        j*                  d      dk(  }| j%                  d|      }|j-                  d	      j/                         }|d   ddgk(  sJ |d
   ddgk(  sJ t1        |d	         ddgk(  sJ t1        |d         ddgk(  sJ t        j*                  d      dk(  }| j%                  d|      }|j-                  d	      j/                         }|d   g dk(  sJ |d
   g dk(  sJ |d	   g dk(  sJ |d   g dk(  sJ t        j*                  d      t        j*                  d
      t        j*                  d      dk(  d}| j%                  d|      }|j-                  d      j/                         }t3        |      g dk(  sJ |d   g dk(  sJ |d
   g dk(  sJ |d   g dk(  sJ t5        |        y )Nr   r   r   rK   r   typer   r   r   rO   T)r   r   r   rP         ?r   r   xxxyyy)rS   rN   1)r   r   r   r   )r        @r  r  )r   r   r   r   )r	  r	  r
  r
  )rO   rP   new)r   r   )
r   r   r   r   r   r   rK   rK   r   r   )
        r  r  r         @r        @r  r  r  r  )
FFTTFFFFTT)r@   r   Datasetr.   r0   Schemaarrayr3   r4   r   RecordBatchcolumnr   r   scan_batchesTaggedRecordBatchrG   Fragmentr   r6   r   r1   sort_by	to_pydictsortedr[   rI   )	rF   r   expected_i64expected_f64rg   r:   	conditionresult
projections	            r*   test_datasetr!    s1   grzz***gnnbii000 88O"((*=L88O"**,?L**73%000||A%%l333||A%%l333 4
  ''0==?%!5!5666%.."++666 @ ##G,EeRXX&&&u:1$I$yAF^^G$..0F%=QF"""%=RH$$$&/"q!f,,,&- UEN222 )S0I$yAF^^G$..0F%=L(((%=0000'?l***%=8888 xxxxxx(C/J
 $
CF^^E",,.F<0000%=::::%= ; ; ; ;%= 7 7 7 7/8r,   c                 b    | j                  dd      }t        |      }|j                  dk(  sJ y )N      )fragment_readaheadbatch_readahead   )r   r!   num_columns)rF   r   rg   s      r*   test_scanner_optionsr)    s4      B JGME!!!r,   c                    |j                  | t        j                               }t        |t        j
                        sJ t        j                  t        j                        5  |j                  | dg       d d d        |j                  | dgt        j                               }|j                  | j                  k(  sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |t        j
                        sJ |j                         }|j                         D ].  }|j                  |j                  k(  sJ |j                  dk(  r.J  ||j!                         j#                         k(  sJ |j                  |j                  k(  sJ t%        |j&                        D ]=  }t        j(                  |g      }|j+                  |      |j+                  |      k(  r=J  t        j                  t        j,                        5  |j+                  t        j(                  |j&                  g             d d d        |j&                  |j/                         k(  sJ |j                  | g dt        j                               }|j                         }g d}|j0                  |k(  sJ |j3                  d      }|d	   j5                         d
gdz  dgdz  z   k(  sJ |d   j5                         dgdz  dgdz  z   k(  sJ |d   j5                         dgdz  k(  sJ |d   j5                         dgdz  k(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w)N)memory_poolunknownr   rO   )r   r+  r   )
__filename__fragment_index__batch_index__last_in_fragmentr.  r-  r   r   r   r   r/  r   r0  T)r   r0   default_memory_poolr@   r   Scannerr   r   ArrowInvaliddataset_schemar.   projected_schemar3   r   r   r(  	to_readerread_allr   r   r  r   ArrowIndexErrorr   column_namesr  	to_pylist)	rF   r   r   r:   rg   r)   r   expected_namessorted_tables	            r*   test_scannerr=    s(   $$R335 % 7Ggrzz***	r	'w< 
( $$Wug131G1G1I % KG!!W^^333##ryy5"((*2E1F'GGGGgrzz***E##%||w77777  A%%% & G%%'002222<<7333335>>"((A3-zz'"gll7&;;;; # 
r))	*RXXu~~./0 
+ >>W//1111$$W 7M 241G1G1I	 % KG
 E=N///==!34L%//1	%&*	%&*	+, , , *+557QC!GqcAg<MNNN(224b@@@,-779dVb[HHHQ 
(	'& 
+	*s   %M 0M!M!M+c                    t        j                         }t        j                         }t        j                  |       	 |j	                         }t
        j                  j                  |       }|j                         }|j	                         |kD  sJ 	 t        j                  |       y # t        j                  |       w xY wr=   )	r0   r1  system_memory_poolset_memory_poolbytes_allocatedr   r2  from_datasetr   )rF   old_poolpoolallocated_beforer   _s         r*   test_scanner_memory_poolrG    s     %%'H   "Dt%//1**))'2##%(8888
8$8$s   AB* *Cc                    |j                  | d      }|t        j                  j                  g | j                        k(  sJ |j                  | ddg      j                         }|ddgik(  sJ |j                  | ddgt        j                  d      dkD        j                         }|dddgik(  sJ |j                  | d	dg      j                         }|dt        t        d
            dz  ik(  sJ t        | j                               }|j                  ddg      j                         }|ddgik(  sJ |j                  d	dg      j                         }|dt        t        d
            ik(  sJ y )Nr   rT   r   rO   r   r   r   r   rK      r   )r   r0   r6   r^   r.   r  r   r1   r[   r   r!   r>   )rF   r   r  rG   s       r*   	test_headrK  	  s{     !,FRXX**2gnn*EEEE  !eW =GGIFeaS\!!!  !eW(*!(; ! ==FY[ eaV_$$$  $ @JJLFeT%(^a/0000G))+,H]]1ug].88:FeaS\!!!]]4%]1;;=FeT%(^,,,,r,   c                    t        | j                               }ddgt        j                  ddg      fD ]9  }|j	                  |      j                  |      }|j                  ||      |k(  r9J  t        j                  t              5  |j                  |t        j                  dg             d d d        ddgt        j                  ddg      fD ]7  }|j                  | |      |j	                  |       j                  |      k(  r7J  t        j                  t              5  |j                  | t        j                  dg             d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   rK   r   r'  r   )	r!   r>   r0   r  r   r   r   r   
IndexError)rF   r   rG   r   expecteds        r*   	test_takerO     s:   G))+,HFBHHaV,-!**8499'B""8W5AAA . 
z	"Hbhhsm4 
# FBHHaV,-""W!/!8!8!A!F!Fw!OP 	P P . 
z	"GRXXrd^4 
#	" 
#	" 
#	"s   
'E''E#E #E,c                    t        | j                               }|j                  |      dk(  sJ |j                  |t        j                  d      dk(        dk(  sJ |j                  |       dk(  sJ |j                  | t        j                  d      dk(        dk(  sJ |j                  | t        j                  d      dk\        dk(  sJ |j                  | t        j                  d      d	k        d	k(  sJ y )
Nr   rO   r   r   r   r   r   rK   r   )r!   r>   r   r   r1   )rF   r   rG   s      r*   test_count_rowsrQ  0  s   G))+,H$$X.!333$$%A- % /234 4 4 $$W-333$$)Q. % 0345 5 5 $$WRXXe_5I$JaOOO$$WRXXe_q5H$IQNNNr,   c                      t         j                  t         j                  t         j                  g} | D ]+  }t	        j
                  t              5   |        d d d        - y # 1 sw Y   8xY wr=   )r   
FileFormatr2  Partitioningr   r   r   )classesklasss     r*   test_abstract_classesrW  @  sJ    




G
 ]]9%G &% %%s   A!!A*	c                  N   t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t
        j                  t
        j                  t
        j                  fD ]:  } ||       }t        |t
        j                        sJ | ||       k(  sJ |dk7  r:J  t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  |       }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        |t
        j                        sJ t        j                  d      d	k(  t        j                  d      d
k(  z  }|j!                  |      sJ t#        j$                  t         j&                        5  |j                  d       d d d        |j                  d      }t        j                  d      d	k(  }|j!                  |      sJ |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  | d      }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        j                  d      t        j(                  d      k(  t        j                  d      t        j(                  d	      k(  z  }|j!                  |      sJ |j                  d      }t        j                  d      j+                         t        j                  d      t        j(                  d	      k(  z  }|j!                  |      sJ dD ]?  }t#        j$                  t         j&                        5  |j                  |       d d d        A |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j                               g      } t        j                  |       }t        |j                        dk(  sJ t        d |j                  D              sJ |j                  d      }t        |t
        j                        sJ t        j                  d      d	k(  t        j                  d      d
k(  z  }|j!                  |      sJ t#        j$                  t         j&                        5  |j                  d       d d d        |t        j                  | d      k7  sJ t        j                  t        j                  dt        j                               t        j                  dt        j,                  t        j.                         t        j0                                     g      } t        j                  | dt        j2                  g d      i      }|j                  d   J |j                  d   j5                         g dk(  sJ |t        j                  | d       k7  sJ t        j                  t        j                  t        j                  dt        j                               t        j                  dt        j,                  t        j.                         t        j0                                     g      dt        j2                  g d      i      }|j                  d   J |j                  d   j5                         g dk(  sJ t        j6                  t        j2                  t9        d            t        j2                  d  t9        d      D              t        j2                  d!gd"z  d#gd"z  z         gg d$%      }t        j                  d&t        j0                         fg      }t
        j                  t
        j                  t
        j                  fD ]|  }t;        j<                         5 } ||      }t        j>                  ||d'|(       t        j@                  |d'|(      }	|	jC                         }
|
j!                  |      sJ 	 d d d        ~ t;        j<                         5 }t        j                  |      }t        j>                  ||d'|(       d }	t#        j$                  tD        d)*      5  t        j@                  |d'tG        d      (      }	d d d        |	J 	 d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   HxY w# 1 sw Y   KxY w# 1 sw Y   y xY w)+NrO   rP   zother objectr   r   r   c              3   $   K   | ]  }|d u  
 y wr=   r   .0r   s     r*   	<genexpr>z$test_partitioning.<locals>.<genexpr>]       <";QqDy";   z/3/3.14/rK   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc              3   $   K   | ]  }|d u  
 y wr=   r   rZ  s     r*   r\  z$test_partitioning.<locals>.<genexpr>s  r]  r^  z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc              3   $   K   | ]  }|d u  
 y wr=   r   rZ  s     r*   r\  z$test_partitioning.<locals>.<genexpr>  r]  r^  z3_3.14_prefix_3_aaa_)firstsecondthirddictionariesr      c              3   D   K   | ]  }t        j                            y wr=   randomr[  rF  s     r*   r\  z$test_partitioning.<locals>.<genexpr>       %Iy!fmmoy    rM   r   rN   r   f2r   namesr   ipcrX   r   z,Expected Partitioning or PartitioningFactoryr   )$r0   r.   r1   r3   r4   r   r   HivePartitioningFilenamePartitioningr@   rT  r   rn  allparse
Expressionr   r   r   r3  r   is_null
dictionaryint8r5   r  r:  r:   r   tempfileTemporaryDirectorywrite_datasetrF   r   
ValueErrorint)r.   rV  r   exprrN  
shouldfailr:   partitioning_schematempdir	load_backload_back_tables              r*   test_partitioningr  K  s   YY

#


% F **B,?,?))+V},888uV},,,~---+ YY
"((*%


% F ++F3L|(()Q...<,";";<<<<j)DdBMM***!Q&288E?d+BCH;;x   	r	'?+ 
( e$Dxx A%H;;x   233FVTTTTYY
"((*%
$ F &&vUCL|(()Q...<,";";<<<<01D	'	biil	*	&	RYYq\	)	+  ;;x   23D	'		"	"	$(8BIIaL(H	I  ;;x   I
]]2??+z* ,+ J 2..vWMMMMYY
"((*%


% F **62L|(()Q...<,";";<<<<i(DdBMM***!Q&288E?d+BCH;;x   	r	'?+ 
( 2226FSSSSYY
"((*%
bggi=> F ++eRXX.J%KLL $$Q'///$$Q'113 8$ $ $ $233FNNNN**
		HHWbhhj)HHUBMM"'')RYY[AB
 	
 28889
L $$Q'///$$Q'113 8$ $ $ $ HH
rRXX%IuRy%II
#sebj()+ #E
 ))fbiik%:$;<**B,?,?))+((*g !45LUG$)F

750<>I'002O"))%000 +*+ 
	$	$	&'//0CD
 %L	B	]]:!OQ

75s1vNIQ     
'	&A 
(	'< ,+$ 
(	'L +*Q Q 
'	&sV   gg(
g5AhAh#"hhg%(g2	5g?h	h	hh$c           
      $   t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }t        j                  |      t        j                  |      t        j                  |      t        j                  |d      t        j                  |d      t        j                  |dd      g}|D ]'  }| j                  | j                  |            |k(  r'J  y )NrO   rP   r_  r`  rd  )ra  re  )r0   r.   r1   r3   r4   r   r   r|  r}  loadsdumps)pickle_moduler.   partsr   s       r*   test_partitioning_picklingr    s    YY

#


% F
 	  (
F#
'
  &A
@
FV5QE ""=#6#6t#<=EEE r,   z@flavor, expected_defined_partition, expected_undefined_partition))r|  )zfoo=A/bar=ant%20bee r  r  )r   )z	A/ant beer  r  )r}  )r  z
A_ant bee_)r  rF  flavorexpected_defined_partitionexpected_undefined_partitionc                    t        j                  dt        j                         fdt        j                         fg      } t        t        |       |      }|j                  t        j                  d      dk(  t        j                  d      dk(  z        |k(  sJ |j                  dj                  |            j                  t        j                  d      dk(  t        j                  d      dk(  z        sJ |j                  t        j                  d      dk(  t        j                  d      dk(  z  t        j                  d      dk(  t        j                  d      dk(  z  z        |k(  sJ |j                  t        j                  d      dk(  t        j                  d      dk(  z  t        j                  d      dk(  t        j                  d      dk(  z  z        |k(  sJ | dk7  rVt        j                  t         j                  d	      5  |j                  t        j                  d      dk(         d d d        y |j                  t        j                  d      dk(        d
k(  sJ y # 1 sw Y   y xY w)NfoobarrT   zant beeA/r|  zDNo partition key for foo but a key was provided subsequently for barr   )zbar=ant%20beer  )r0   r.   r5   getattrr   rX   pcr1   r  joinr   r   r   r3  )r  r  r  r  r   s        r*    test_dataset_partitioning_formatr    s:    ))eRYY[%9E299;;O$PQ&72v&.ABL 	RXXe_	9bhhuoQT>TUV%	&	&
 chh'ABCJJ	%C	BHHUOy$@A   	hhuo*rxx#/EF9,%C1GHJ	
 &		&	& 	hhuo*rxx#/EF9,%C1GHJ	
 (		(	( ## ]]OO(
 "((5/Y">A
 
 ""RXXe_	%AD I
 
 	
 

 
s   (I22I;c                     t        j                  t        j                  g dg dd            } t        j                  d      }t        j                  d      }| j                  |dz   ||z
  |dz  |j                  d      |z  d	
      }t        j                  g dg dg dg dd	      }|j                  |      sJ y )Nr   r   rK   )r   r   r   rL   rM   rN   r   r   r4   )za+1zb-aza*2za/br   r   rK   r   )r   r   )r   r      )      ?r  g      ?)r   rF   r0   r:   r1   r   castr   )rF   rM   rN   r  rN  s        r*   $test_expression_arithmetic_operatorsr  $  s    jj		"BCDG
A
A1u1u1uvvi 1$	' F xx H =="""r,   c                  |   dD  cg c]  } t        j                  |       | k(   c} \  }}}t        j                  |      ddik(  sJ t        j                  |      t        j                  |      k(  sJ t        j                  ||z  |z        dD  ci c]  } | |  c} k(  sJ t        j                  d      dk\  }t        j                  |      i k(  sJ t        j                  ||z        ddik(  sJ t        j                  d      j	                         }t        j                  |      dd ik(  sJ y c c} w c c} w )NabcrM   drK   )r   r1   get_partition_keys_get_partition_keysr  )frM   rN   cnopenulls         r*   test_partition_keysr  5  s(   )./Arxx{a/GAq!  #Sz111  #r'='=a'@@@@  Q+e/De1e/DDDD88C=AD  &",,,  T*sCj88888C=  "D  &3+555 0 0Es   D4
D9c                  N   t        j                         } t        j                  ddg      }t        j                  d      }| j                  t               k(  sJ |j                  ddhk(  sJ | j                  dk(  sJ |j                  dk(  sJ | | k(  sJ | |k7  sJ | |k7  sJ y )NrM   rN   dictionary_columnsmscoerce_int96_timestamp_unitns)r   ParquetReadOptionsr  r}   r  )opts1opts2opts3s      r*   test_parquet_read_optionsr  C  s    !!#E!!c3Z@E!!dCE##su,,,##Sz111,,444,,444E>>E>>E>>r,   c                  b   t        j                         } t        j                  dh      }t        j                  d      }| j                  t        j                         k(  sJ |j                  t        j                  dg      k(  sJ |j                  t        j                  d      k(  sJ y )NrM   r  sr  )r   r   read_optionsr  )pff1pff2pff3s      r*   %test_parquet_file_format_read_optionsr  U  s    !DC59DC@D 5 5 7777 5 5# OOOO 5 5$'!) ) ) )r,   c                     t        j                         } t        j                  d      }t        j                  dd      }t        j                  dd      }t        j                  dd	
      }t        j                  d      }t        j                  ddd      }t        j                  d|      }| j                  du sJ | j
                  dk(  sJ t               r| j                  du sJ | j                  dk(  sJ | j                  dk(  sJ | j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  du sJ |j
                  dk(  sJ t               r|j                  du sJ |j                  dk(  sJ |j                  d	k(  sJ |j                  du sJ t               r|j                  du sJ |j                  |k(  sJ |j                  | j                  k7  sJ | | k(  sJ | |k7  sJ ||k7  sJ ||k7  sJ || k7  sJ || k7  sJ || k7  sJ y )N   buffer_sizei    T)r  use_buffered_streamF)r  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrJ  )hole_size_limitrange_size_limitlazy)r  cache_optionsi @B )r   ParquetFragmentScanOptionsr0   CacheOptionsr  r  r   r  r  r  r  r  )r  r  r  opts4opts5opts6
cache_optsopts7s           r*   test_parquet_scan_optionsr  a  s   ))+E))d;E))t5E))eNE))!'$*-E ))#')EdDJ))TTE$$---%%%4'''))[888,,	999++u444$$---%%%4'''$$,,,%%%4'''$$---%%%5((())V333,,666++t3334'''*,,,%"5"5555E>>E>>E>>E>>E>>E>>E>>r,   c                    t        j                         t        j                         t        j                  t        j                  j                  dd            t        j                  t        j                  j                  ddg            t        j                  t        j                  j                  dd	            t        j                         t        j                  t        j                  j                  dd
            t        j                  t        j                  j                  dd            g}	 |j                  t        j                                t        g|j                  t        j                         t        j                  dh      t        j                  d      t        j                  dddd      g       |D ]'  }| j                  | j!                  |            |k(  r'J  y # t        $ r Y w xY w)N	T)	delimiterignore_empty_linesrK   r  )	skip_rowsr9  r  i   )r  
block_sizeignorenewlines_in_valuesunexpected_field_behavior)parse_optionsF   r   r  rM   r  )r  r  {   i  )r  r  r  r  )r   r  CsvFileFormatr0   csvParseOptionsReadOptionsJsonFileFormatjsonr   OrcFileFormatImportErrorr_   extendr   r  r  )r  formatsr   s      r*   test_file_format_picklingr    s   


,,t@D - F 	G
bff&8&8ug '9 '/ 	0
bff&8&8E '9 '+ 	,

''..$IQ / S	T 	rww':':" (; (. 	/G r'')* 
~  "  SE:  T:  $( ),,/		

 
	 ""=#6#6{#CDSSS !  s   #G> >	H
	H
c                    t        j                         t        j                  t        j                  j	                  d            t        j                  t        j                  j                  d            t        j                         t        j                  t        j                  j                  dd	            t        j                  t        j                  j                  dd
            g}t        ;|j                  t        j                  d      t        j                  d      g       |D ]'  }| j                  | j                  |            |k(  r'J  y )NT)strings_can_be_nullconvert_options   r  r  Ferrorr  i   r  r  r  )r  )r   CsvFragmentScanOptionsr0   r  ConvertOptionsr  JsonFragmentScanOptionsr  r  r_   r  r  r  r  )r  r   options      r*   #test_fragment_scan_options_picklingr    s    
!!#
!!FF11d1K	M
!!++u+=	?
""$
""GG  E;B ! D	E 	"",,#,N	PG 
~))d;))T:
 	
 ""=#6#6v#>?6III r,   paths_or_selectorr   r   r   r   r  c                 x   t        j                  t        j                  dh      |      }t        j                  d      }t        j                  t        j                  t        j                  dt        j                               t        j                  dt        j                               g            |_
        |j                  dk(  sJ |j                  ddgk(  sJ |j                  d	u sJ t        j                  | |||      }|j                         }|j                         j!                  t        j                  t        j                  d
t        j"                               t        j                  dt        j$                               t        j                  dt        j&                  t        j                         t        j                                     t        j                  dt        j"                               t        j                  dt        j(                  t        j"                         t        j                         d            t        j                  dt        j                               t        j                  dt        j                               g      d	      sJ t+        |j-                         t.              sJ t+        |j1                  |      t         j2                        sJ |j4                  j!                  t        j6                  d            sJ |j1                         }t+        |t         j2                        sJ |j9                         }t        j:                  g dt        j"                               }	t        j:                  g dt        j$                               }
t
        j<                  j?                  t        j:                  g dt        j                               t        j:                  djA                         t        j                                     }t        j:                  tC        d      D cg c]  }|dz  tE        |dz        d c}      }|jG                         }tI        |ddgddg      D ]M  \  \  }}}}t        j:                  |gdz  t        j                               }t        j:                  |gdz  t        j                               }t        j:                  |dz
  gdz  t        j"                               }|jJ                  J |jL                  dk(  sJ |d   j!                  |	      sJ |d   j!                  |
      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      sJ |d   j!                  |      rNJ  |jO                         }t+        |t
        jP                        sJ tS        |      dk(  sJ |jL                  dk(  sJ y c c}w )NrQ   r  )r  r  r   r   r   .rF  FrO   rP   rR   rS   rL   check_metadataTr  r  z	0 1 2 3 4r   rK   r   r   r	  r
  r'  r   r   r  r   )*r   r   r  r   r   r0   r.   r1   r   r5   r   partition_base_dirselector_ignore_prefixesexclude_invalid_filesr   inspectr   r3   r4   r  rS   r@   inspect_schemasr[   r   r   r   r   r   r  DictionaryArrayfrom_arrayssplitr   rQ   r  r   r   r(  r   r6   r   )ra   r  r  rX   r   r   inspected_schemarF   r   r  r  expected_strr)   expected_structiteratorrg   rG   r   r   expected_groupexpected_keyexpected_constr:   s                          r*   test_filesystem_factoryr    s    !!**ugFF
 ))(3G33
		HHWbhhj)HHUBIIK(
 	G %%111++Sz999((E111))!67G (??##BII

#


%
bhhj"))+>?
"((*%
299288:+-99;&8 9 	:
"((*%
		$	/ 	%  $ 	 	 	 g--/666gnn%56**, , ,!!((4999nnGgr33444ooG88O"((*=L88O"**,?L%%11
rxxz2
""$299;7L hh).q 3)1A '(!e#a!e* =)1 3 4O##%H),X1vu~)N%5#5'A+BHHJ?xx			<519+/
C,,888  A%%%Qx|,,,Qx|,,,Qx|,,,Qx~...Qx///Qx~...Qx|,,, *O EeRXX&&&u:!!!+ 3s   X7c                    t        j                         }t        j                  d| |      }|j                  D ]  }|j	                  ||       }|j
                  dgk(  sJ |j	                  || dg      }||fD ]P  }t        |t         j                        sJ |j                  |k(  sJ t        |j                  t        |             rPJ  |j
                  dgk(  rJ  y )N/plainr   rX   r   r   )r   r   rF   r   r   r   r@   r   rd   r   r  )r   parquet_formatrF   rd   rG   row_group_fragmentr  s          r*   test_make_fragmentr!  ,  s    ))+Njjm .0G !//mD""qc)))+99$FGS : J./Aa!7!788866T>!>allD,?@@@ 0 ",,333 r,   c           	      r   | \  }}}}}}}}t        j                         }	|g}
|
D cg c]  }|	j                  ||       }}t        j                  ||	|j                  |      }|j                         }|j                  |      sJ |j                  D cg c]'  }|j                  j                  |      j                  ) }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }|j                         }|j                  |      sJ |
D cg c]  }d }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }t        j                  t        j                  j                   d      5  |j                         }ddd       |
D cg c]  }d }}t        |
|      D cg c]  \  }}|	j                  |||       }}}t        j                  ||	|j                  |      }t        j                  t"        d      5  |j                         }ddd       yc c}w c c}w c c}}w c c}w c c}}w # 1 sw Y   xY wc c}w c c}}w # 1 sw Y   yxY w)	z
    Test passing file_size to make_fragment. Not all FS implementations make use
    of the file size (by implementing an OpenInputFile that takes a FileInfo), but
    s3 does, which is why it's used here.
    )rX   r.   r   )	file_sizer   zParquet file size is 1 bytesr   Nr  zHTTP status 416)r   r   r   r   r.   r   r   r   r   get_file_infosizer   r   r   r   libr3  OSError)s3_example_simpler:   rd   rU   urihostport
access_key
secret_keyr   rm   r   rF   tblr   
sizes_truer%  fragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarges                        r*   test_make_fragment_with_sizer4  ?  s    @Q<E4S$j*&&(KFE #$"T **44"  $""+ellrG 


C::e ELMMRMq'$$2215::MJR-0
-CE-CztT '44T24N-C  E,,KQS 


C::e %**EDaEN*-0-GI-GztT '44T24N-G  I ,,KQS 
w{{//7U	V!**, 
W +00%$g%N0-0-GI-GztT '44T24N-G  I ,,KQS
 
w&7	8!**, 
9	8S$ SE +I 
W	V 1I 
9	8sA   I;,J J>	JJJ(	J"J'!J-J-J6c                 <   t        j                  d      }t        j                  |j	                  d            }t        j                         }|j                  |      }t        |j                         t        j                        sJ t        j                  g dg dg dgg d      }| j                  |      j                  |      sJ |j                  |j                  |            }| j                  |      j                  |j                               sJ y )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rM   rN   r        r   dogcatrabbitrb  numanimalrx  )textwrapdedentr0   	py_bufferencoder   r  r   r@   r?   BufferReaderr:   r   r   r  r  )r   r  contentbuffer
csv_formatrG   rN  pickleds           r*   "test_make_csv_fragment_from_bufferrK  y  s    oo  	G \\'..12F!!#J''/H hmmor777xx%13 9:H ""8,33H===!!-"5"5h"?@G""7+2283D3D3FGGGr,   c                    d}t        j                  |j                  d            }t        j                         }|j                  |      }t        |j                         t         j                        sJ t        j                  g dg dg dgg d      }| j                  |      j                  |      sJ |j                  |j                  |            }| j                  |      j                  |j                               sJ y )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
r6  r7  r8  r;  r?  rx  )r0   rD  rE  r   r  r   r@   r?   rF  r:   r   r   r  r  )r   r  rG  rH  json_formatrG   rN  rJ  s           r*   #test_make_json_fragment_from_bufferrN    s    <G \\'..12F##%K((0H hmmor777xx%13 9:H ""8,33H===!!-"5"5h"?@G""7+2283D3D3FGGGr,   c                    t        j                  g d      t        j                  g d      t        j                  g d      g}|d   j                         |d   |d   j                         g}t        j                  t        j
                  ddg	      d
d      }|t        j                         f||fg}|D ]  \  }}t        j                  |g d      }t        j                         }t        j                  ||       |j                         }	|j                  |	      }
| j                  |
      j                  |      sJ |j                  |j                  |
            }| j                  |      j                  |      rJ  y )Nr7  r8  r;  r   r   r   rb  rA  r  Tr  )r  r  r  r?  rx  )r0   r  dictionary_encoder   r   r  r:   BufferOutputStreamr_   r`   getvaluer   r   r   r  r  )r   r  arraysdictionary_arraysdictionary_formatcasesformat_r:   re   rH  rG   rJ  s               r*   &test_make_parquet_fragment_from_bufferrX    sf    	!

)*F 	q	##%q	q	##%
 ,,** '2
 ! 
%%'(	-.E !'AB##%
uc"((0&&x077>>>%%m&9&9(&CD&&w/66u=== !r,   c                     t        j                  t        d      dgdz  dgdz  dgdz  z   gg d      }t        | dz        }t	        j
                  ||d	g|
       t        j                  |dd|      }||fS )Nr$  r   rM   r   rN   rv  rx  test_parquet_datasetr   )partition_cols
chunk_sizer   r   )rX   r   r   )r0   r:   r   rQ   r_   write_to_datasetr   rF   )r  r\  r   r:   rd   rF   s         r*   _create_dataset_for_fragmentsr^    s    HH	qA37SEAI	12"E
 w//0Dt(.xJHjjYV
G '>r,   c                    t        |       \  }}t        |j                               }t        |      dk(  sJ |d   }ddg}|j                  j
                  |k(  sJ |j                  j                  |j                  |j                        |j                  k(  sJ |j                  j                  t        j                  d      dk(        sJ |j                  |      }|j                  |k(  sJ |j                  |j!                  d      j#                  dd            sJ |j                  ||j$                        }|j                  g d	k(  sJ |j                  |j#                  dd            sJ |j                  |j$                  j'                  d      k(  sJ |j                  ||j$                  t        j                  d      dk  
      }|j                  g d	k(  sJ y )Nr   r   r   rw  r   rM   r   rT   rv  )r.   r   )r^  r[   r>   r   physical_schemary  rX   r  rd   r   r   r   r   r1   r   r9  remove_columnslicer.   remove)r  r   r:   rF   r   r  physical_namesr  s           r*   test_fragmentsre    s   27;NE7 W**,-Iy>Q!AD\N""n44488AFFALL1Q5F5FFFF!!((&)9S)@AAA $$Q'F.000==,,Q/55a;<<< $$Qw~~$>F"6666==Q*+++ 4 4Q 7777 $$	'..$!); % =F"6666r,   c                    t        j                  t        d      dgdz  dgdz  z   gddg      }t        | dz        }t	        j
                  ||dg	       t        j                  t        j                  d
g      d      }t        j                  |d|      }|j                  t        j                  d      dk\        }t        t        |            dk(  sJ y )Nr$  r   r   r   colr   rx  rZ  r[  )r   r  r   r  r   r{  r   )r0   r:   r   rQ   r_   r]  r   r   r.   rF   r>   r1   r   r[   )r  r:   rd   r   rF   r   s         r*   test_fragments_implicit_castrj    s     HHeAha1#' 125&/JEw//0DtVH=??299&6%78HDjjidCG%%RXXf-=-B%CItI1$$$r,   c                 .  
 t        |       \  
}	 d
fd	}t        |j                               d   }|j                  }|j	                  |j                  |            }|j                  |      |j                  |      k(  sJ |j                  |j                  |j                  |j                        }|j                  |      j                  |j                  |            sJ  ||d       |j                  |j                  |j                  |j                        } ||dt        j                  d      dk         |j                  |j                  |j                  |j                        } ||ddgt        j                  d      d	k  
       |j                  |j                  |j                  |j                        } ||dt        j                  d      dk(         d|j                  j                  ddd      z   }	t!        j"                  t$        |	      5  |j                  |j                  |j                  |j                        }|j                  |t        j                  d      dk(         d d d        y # 1 sw Y   y xY w)Nc                     | j                  j                  ||      }|r|nj                  }|j                  |k(  sJ  j                  | j	                  |      }|j                  |      sJ y )Nr.   r   r   )r   r.   r9  rb  selectr   )rG   	row_slicer   r   actualr9  rN  r:   s          r*   assert_yields_projectedz;test_fragments_reconstruct.<locals>.assert_yields_projected  st    ""<< # A")wu/A/A""l2225;;	*11,?}}X&&&r,   r   )r   )r   r   )r   r   r   r   r   r  rI  r   rM   z&No match for FieldRef.Name\(part\) in Fr   NN)r^  r[   r>   rX   r  r  r   r   rd   r   r   r   r   r1   r`  	to_stringr   r   r  )r  r   r  rF   rq  rG   r  pickled_fragmentnew_fragmentpatternr:   s             @r*   test_fragments_reconstructrw    sp   27;NE7 6:' G))+,Q/H__N %**=+>+>x+HI""+44X>? ? ? "//x**%:: 0 <L ""<077)+ + +L&1 "//x**%:: 0 <L L&$!9KL "//x**%:: 0 <L L&%)F288D>C3GI "//x**%:: 0 <L L&#%88F#3s#:<
 9''11%FGG	z	1%33MM8..!)!>!> 4 @ 	RXXf5E5LM	 
2	1	1s   &AJJc                    t        | d      \  }}t        |j                               d   }t        |j                               }t	        |      |j
                  cxk(  rdk(  sJ  J |j                  |d   |j                        }|j                  g dk(  sJ t	        |      dk(  sJ |j                  |j                  dd            sJ |d   j                  J |d   j
                  dk(  sJ |d   j                  d   j                  dddddddk(  sJ t        |j                  t        j                  d	      dk  
            d   }t        |j                  t        j                  d	      dk              }t	        |      dk(  sJ |j                  |d   t        j                  d	      dk  
      }t	        |      dk(  sJ y )Nr   r\  r   rT   rv  r   minmaxr   rw  r   r   )r^  r[   r>   r   r   r   r   r.   r9  r   rb  r   
statisticsr   r1   )r  r   r:   rF   rG   r   r  s          r*   !test_fragments_parquet_row_groupsr  L  s   27qINE7G))+,Q/H x::<="#x'>'>C!CCCCC$$Aw~~ % 7F"6666v;!==Q*+++q!,,888q!00A555q!,,Q/::""?   
 G))$!1C)DEaHHx::288D>A;MNO"#q((($$Arxx~'9 % ;Fv;!r,   c                    t        j                  dt        d      i      }t        j                  || dz  d       t        j                  | dz  d      }t        |j                               d   }|j                  j                  |j                  |j                  d	d
g      }|j                  dk(  sJ |j                          |j                  dk(  sJ t        |j                         dk(  sJ y )NrM   r$  test.parquetr   row_group_sizer   r   r   r   rK   r  )r0   r:   r   r_   r`   r   rF   r[   r>   rX   r   rd   r   r   ensure_complete_metadatar   r   )r  r:   rF   original_fragmentrG   s        r*   %test_fragments_parquet_num_row_groupsr  j  s    HHc58_%ENN5'N21Ejj>1)DGW2245a8 !''55 1 < <q6 6 H ""a'''%%'""a'''x""#q(((r,   c                    t        j                  t        ddgddg            }|d   j                  d      |d<   t	        j
                  t        j                  |      | dz         d	d lm	}  |j                  | dz        }|j                  | |j                  d      dk(  
      }|j                  d	   |j                         k(  j                         j                         sJ y )NrM   rN   r   r   )col1col2r  categoryztest_filter_dictionary.parquetr   r   )r"   r#   dictastyper_   r`   r0   r:   pyarrow.datasetrF   r   r1   r   	to_pandasr~  )r  r   r9   r   rF   r  s         r*   ,test_fragments_parquet_row_groups_dictionaryr  |  s     
dc
!Q8	9BF"":.BvJNN288B<+K!KL bjj#CCDG$$WXRXXf5E5L$MFGGAJ&**,,11377999r,   c                    |\  }}t        | d|      \  }}t        |j                               d   } ||j                  g      5  |j	                          d d d        |j
                  ddgk(  sJ  |g       5  |j	                          d d d        t        |j                  t        j                        sJ |j                  j                  |j                  |j                  ddg      }|j
                  |j
                  k(  sJ |j	                          |j
                  d   }	|	j                  dk(  sJ |	j                  dk(  sJ |	j                  J |j!                  |j#                  |            }
 ||j                  g      5  |
j
                  ddgk(  sJ |
j
                  d   }	|	j                  dk(  sJ |	j                  J 	 d d d        y # 1 sw Y   xY w# 1 sw Y   ZxY w# 1 sw Y   y xY w)Nr   r\  r   r   r   r  )r^  r[   r>   rd   r  r   r@   metadatar_   FileMetaDatarX   r   r   idr   r~  r  r  )r  r   r  rU   rz   rF  rF   rG   ru  	row_grouprt  s              r*   &test_fragments_parquet_ensure_metadatar    s   &B.A"JAw G))+,Q/H 
x}}o	&))+ 
'1a&((( 
b	))+ 
 h''999 ??00x**1v 1 L ""h&9&9999 ))+''*I<<1"""+++ %**=+>+>|+LM	x}}o	&**q!f444$//2	||q   ##///	 
'	&3 
'	&
 
	( 
'	&s%   G:G#
AG0G #G-0G9c                 X   |\  }}t        | |      \  }}t        |j                               d   } |g       5  |j                  |j	                  |            }d d d         |j
                  g      5  |j                  }	d d d        	dgk(  sJ y # 1 sw Y   :xY w# 1 sw Y   xY w)Nr   r   r   )r^  r[   r>   r  r  rd   r   )
r  r   r  rU   rz   rF  rF   rG   rt  r   s
             r*   )test_fragments_parquet_pickle_no_metadatar    s     'B.w2FJAwG))+,Q/H 
b	(..}/B/B8/LM 
 
',,-	.%00
 
/! 
	 
/	.s   !B6B B B)c                 &   t        j                  t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j
                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                               t        j                  g dt        j                   d            t        j                  g dt        j                   d            t        j                  g dt        j                   d            t        j                  g dt        j"                               t        j                  g dt        j$                               t        j                  g dt        j&                  d            t        j                  g dt        j(                  d            gg d	
      }t+        | dz        }t-        j.                  |||       |t1        j2                  |dd      fS )N)TNF)r   r   *   )r  g      $@      E@)rM   Nzr  r  us)r   r   l    jt )booleanr  uint8int16uint16r   uint32r3   uint64r    doubleutf8binaryts[s]ts[ms]ts[us]r2   date64time32time64rx  test_parquet_dataset_all_typesry  r   r   r{  )r0   r:   r  bool_r  r  r  r  r   r  r3   r  float32r4   r  r  	timestampr2   r  r  r  rQ   r_   r]  r   rF   )r  r\  r:   rd   s       r*   _create_dataset_all_typesr    s   HHHH("((*5HH["''),HH["((*-HH["((*-HH["))+.HH["((*-HH["))+.HH["((*-HH["))+.HH&

5HH&

5HH%rwwy1HH%ryy{3HH[",,s"34HH[",,t"45HH[",,t"45HH["))+.HH("))+6HH["))C.1HH["))D/2)	
,
/-E^ w99:D t
;"**T)&IIIr,   c                 D   t        |       \  }}t        |j                               d   }dd lfd}fd}fd}j                  }j
                  }t        |j                               }	|	d   j                  J |	d   j                  d   }
|
j                  dk(  sJ |
j                  dkD  sJ |
j                  i ddd	d
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
dddd
d |d       |d      d
d  |d       |d      d
d! |d       |d      d
d" |d#dd$       |d#d$d%      d
 |d#dd       |d#d$d&      d
 |ddd       |ddd      d
 |dddd       |dddd      d
d'k(  sJ y )(Nr   c                 2     j                   ddddd|       S N  r   r   r   r   r   s    r*   dt_sz.test_parquet_fragment_statistics.<locals>.dt_s  s     )))$1aA>>r,   c           
      :     j                   dddddd| dz        S )Nr  r   r   r   r  r  s    r*   dt_msz/test_parquet_fragment_statistics.<locals>.dt_ms  s&    ***4Aq!Q$GGr,   c           	      4     j                   dddddd|       S r  r  r  s    r*   dt_usz/test_parquet_fragment_statistics.<locals>.dt_us  s"    ***4Aq!QBBr,   rK   r   r  FTrz  r  r   r  r  r  r  r   r  r3   r  r    r  r  r  r  rM   r  r     a   zr  r  r  r2   r  r   r9     )r  r  r  )r  r[   r>   r   r   timer   r   r   total_byte_sizer~  )r  r:   rF   rG   r  r  r  r   r  r   r  r   s              @r*    test_parquet_fragment_statisticsr    sn    /w7NE7G))+,Q/H>GB==D==D x::<=q!,,888#A&11!4I"""$$t+++ $5.$"%$ 	2&$ 	2&	$
 	!B'$ 	2&$ 	!B'$ 	2&$ 	!B'$ 	T*$ 	#d+$ 	C($ 	$t,$ 	ab2$ 	%(595$  	%(595!$" 	$tQ*4a3DE#$$ tQ*4a3DEq!Q-Q2?q!Q*41a3DE)$   r,   c                 <   t        j                  g dg dd      }t        j                  || dz  d       t	        j
                  | dz  d      }t        |j                               d	   j                         }|d
   j                  d	   j                  i k(  sJ y )N)r   r   NN)rM   rN   NNrL   r  r   r  r   r   r   r   )r0   r:   r_   r`   r   rF   r[   r>   r   r   r~  )r  r:   rF   r   s       r*   &test_parquet_fragment_statistics_nullsr  *  s    HH-4JKLENN5'N21Ejj>1)DGW**,-a0CCEIQ<""1%00B666r,   c                 8   t        j                  g dg dd      d d }|j                  | dz  d       t        j                  | dz  d	      }t        |j                               d   j                         }|d   j                  d   j                  i k(  sJ y )
N)rM   rN   rN   r   r   r  rL   r   r  r   enginer   r   )
r"   r#   
to_parquetr   rF   r[   r>   r   r   r~  )r  r9   rF   r   s       r*   'test_parquet_empty_row_group_statisticsr  5  s     
O)<	=bq	ABMM'N*9M=jj>1)DGW**,-a0CCEIQ<""1%00B666r,   c                    t        | d      \  }}t        |j                               d   }|j                  j	                  t        j                  d      dk(        sJ t        |j                  t        j                  d      dk(  |j                              }t        |      dk(  sJ t        |j                  t        j                  d      dk(  |j                              }t        |      dk(  sJ y )Nr   ry  r   r   rM   r   r.   rN   )
r^  r[   r>   r   r   r   r1   r   r.   r   )r  r:   rF   rG   r   s        r*   +test_fragments_parquet_row_groups_predicater  A  s    27qINE7G))+,Q/H((//0@C0GHHH ##288F+;s+B+2>> 	$ 	;< "#q((( ##288F+;s+B+2>> 	$ 	;< "#q(((r,   c                 
   t        | d      \  }}t        |j                               d   }|j                  }t        |j	                               }|j                  |j                  |            }|j                  |      |j                  |      k(  sJ |j                  |j                  |j                  |j                  dg      }	|j                  |	      }
|
j                  |j                  |d               sJ |j                  |j                  |j                  |j                  dh      }	|j                  |	|j                  ddgt        j                  d      dk  	      }
|
j                   ddgk(  sJ t#        |
      dk(  sJ |j                  |j                  |j                  |j                  dh      }	t%        j&                  t(        d
      5  |j                  |	       d d d        y # 1 sw Y   y xY w)Nr   ry  r   )r   r   r   r   r   rK   rm  zreferences row group 2r   )r^  r[   r>   rX   r   r  r  r   r   rd   r   r   r   r.   r   r1   r9  r   r   r   rM  )r  r   r  r:   rF   rG   r  r   rt  ru  r  s              r*   -test_fragments_parquet_row_groups_reconstructr  X  s    37qINE7G))+,Q/H__Nx::<= %**=+>+>x+HI""+44X>? ? ? "//x**%::3 0 L $$\2F==001DQ1GHIII "//x**%::3 0 L $$U\\D&>xx~! % %F 4.000v;! "//x**%::3 0 L 
z)A	B- 
C	B	Bs   G99Hc                    |\  }}t        | d|      \  }}t        |j                               d   }|j                  ddg      } |g       5  |j                  dk(  sJ |j
                  ddgk(  sJ |j
                  d   j                  J 	 d d d        |j                  |      }	|	j                         ddgddgdk(  sJ |j                  g       }|j                  dk(  sJ |j
                  g k(  sJ |j                  ||j                        }	|	j                  dk(  sJ |	j                  |d d       sJ y # 1 sw Y   xY w)	Nr   r  r   rK   row_group_idsr   r}  rT   )r^  r[   r>   subsetr   r   r~  r   r  r.   r   r   
r  r   r   rU   rz   r:   rF   rG   subfragr  s
             r*   !test_fragments_parquet_subset_idsr    sd    'B27q>@BNE7G))+,Q/H ooQFo3G	b	%%***!!aV+++!!!$//;;; 
 $$W-FAq!f!==== ooBo/G!!Q&&&###$$WW^^$DF??a==r### 
	s   A D>>Ec                 |   |\  }}t        | d|      \  }}t        |j                               d   }|j                  t	        j
                  d      dk\        } |g       5  |j                  dk(  sJ t        |j                        dk(  sJ |j                  d   j                  J 	 d d d        |j                  |      }	|	j                         g dg ddk(  sJ |j                  t	        j
                  d      d	kD        }|j                  dk(  sJ |j                  g k(  sJ |j                  ||j                  
      }	|	j                  dk(  sJ |	j                  |d d       sJ |j                  t	        j
                  d      dk(  |j                  
      }|j                  dk(  sJ y # 1 sw Y   	xY w)Nr   r  r   r   rK   r  )r   r   r   r}  r   rT   r   rM   r   )r^  r[   r>   r  r   r1   r   r   r   r~  r   r  r.   r   r   r  s
             r*   $test_fragments_parquet_subset_filterr    s    'B27q>@BNE7G))+,Q/H oobhhtn12G	b	%%***7%%&!+++!!!$//;;; 
 $$W-F	!CCCC oobhhtnq01G!!Q&&&###$$WW^^$DF??a==r### oobhhv.#5gnnoMG!!Q&&&' 
	s   "AF11F;c                    t        | d      \  }}t        |j                               d   }t        j                  t
              5  |j                  t        j                  d      dk\  ddg       d d d        t        j                  t
              5  |j                          d d d        y # 1 sw Y   <xY w# 1 sw Y   y xY w)Nr   ry  r   r   r   r  )	r^  r[   r>   r   r   r  r  r   r1   )r  rF  rF   rG   s       r*   %test_fragments_parquet_subset_invalidr    s    .w1EJAwG))+,Q/H 
z	"!+Aq6B 
# 
z	" 
#	" 
#	" 
#	"s   ,B-B9-B69Cc                 T   t        j                  g d      }t        j                  g d      }t        j                  g d      }t         j                  j                  ||gddg      }t         j                  j                  ||gddg      }t        j                  d	|i      }t        j                  || d
z  d       t        j                  | d
z  d      }t        |j                               d   }|j                  dk(  sJ |j                  t        j                  d	d      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	d      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	dd      dkD        }	|	j                  dk(  sJ |j                  t        j                  d	dd      dk        }	|	j                  dk(  sJ t        j                  t         j                   d      5  |j                  t        j                  d	d      dkD         d d d        t        j                  t"        d      5  |j                  t        j                  d	d      dkD         d d d        y # 1 sw Y   VxY w# 1 sw Y   y xY w)N)r   r   r   rK   )皙?皙?333333?皙?r   r   rK   r   f21f22rx  r   rw  rg  zdata_struct.parquetr   r  r   r   r   r   r   zNo match for FieldRef.Nestedr   f3z)Function 'greater' has no kernel matching)r0   r  StructArrayr  r:   r_   r`   r   rF   r[   r>   r   r  r1   r   r   r3  NotImplementedError)
r  r   r  r  rw  
struct_colr:   rF   rG   r  s
             r*   0test_fragments_parquet_subset_with_nested_fieldsr    s0    
,	B
(('
(C
((<
 C		#	#S#Juen	#	EB++RHT4L+IJHHeZ()ENN5'$99!Ljj#88KGG))+,Q/H""a'''oobhhud3a78G!!Q&&&oobhhud3a78G!!Q&&&oobhhudE:Q>?G!!Q&&&oobhhudE:a?@G!!Q&&& 
r.L	M-12 
N 
#N
 	-12
 
	 
N	M
 
s   )J )JJJ'c                    t        |j                               d   }t        |      dk(  st        |      dk(  sJ t        |       \  }}t	        j
                  |d      }t        |j                               d   }t        |      dj                  |j                  j                  t        |                  k(  sJ | dz  }t        j                  j                  ||       t	        j
                  |d      }t        |j                               d   }t        |      d	j                  |j                  j                  t        |                  k(  sJ y )
Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[group=1, key=xxx]>r   r   z-<pyarrow.dataset.ParquetFileFragment path={}>data.featherfeatherz/<pyarrow.dataset.FileFragment type=ipc path={}>)r[   r>   repr_create_single_filer   rF   rX   r   rl   rQ   r0   r  write_feather)r  rF   rG   r:   rd   s        r*   test_fragments_reprr    sD    G))+,Q/H 	X	(	( 	X	(	(	( &g.KE4jji0GG))+,Q/HX7>>--c$i8	:	:	: ^#DJJUD)jji0GG))+,Q/HX9@@--c$i8	:	:	:r,   rJ  c                     | S r=   r   r   ms     r*   <lambda>r        Qr,   c                 B    |j                  |j                  |             S r=   r  r  r  s     r*   r  r        QWWQWWQZ-@r,   c                    t        j                  dd      }t        j                         }t        j                  d      }t        j
                  j                  ddg      } |||      }t        |t        j                        sJ ||_	        t        j                  | |||      }|j                         }t        j                  dt        j                         fdt        j                         fdt        j                          fd	t        j                         fd
t        j"                  t        j                         t        j                          d      fdt        j$                         fdt        j                          fg      }	|j'                  |	      sJ t        j(                  j                         }
t        |
t        j                        sJ y )Nr   Tr   r   r   rO   rP   rQ   rR   rS   rL   )rU   r   r   r   r   r   discoverr@   PartitioningFactorypartitioning_factoryr   r  r0   r.   r3   r4   r5   rS   r   r   r|  )ra   rJ  r  r  rX   r   r  r   r  expected_schemahive_partitioning_factorys              r*   test_partitioning_factoryr    sp    DA!!#F))(3G33<<gu=MN"#7G*B,B,BCCC#7G ))!67G (ii	
	

				"((*	299288:BIIK@AB	"((*			! O ""?333 " 3 3 < < >/1G1GHHHr,   infer_dictionaryc                     | S r=   r   r  s     r*   r  r  8  r  r,   c                 B    |j                  |j                  |             S r=   r  r  s     r*   r  r  8  r  r,   c                 f   t        j                  dd      }t        j                         }t        j                  d      }t        j
                  j                  ddg|      } |||      |_        t        j                  | |||      }|j                         }	|rct        j                  t        j                         t        j                               }
|	j                  d      j                  |
k(  sJ |j!                         j#                         j%                         }|j'                  d      j)                  d      }t        j*                  dgd	z  d
gd	z  z         j-                         }|j/                  |      sJ |j!                         j#                  t        j                  d      dk(        }|j'                  d      j)                  d      }|j1                  dd	      }|j/                  |      sJ y |	j                  d      j                  t        j                         k(  sJ y )Nr   Tr   r   r   r  r   r	  r   r
  r   )rU   r   r   r   r   r   r  r  r   r  r0   r  r   r5   r1   r  r   r   combine_chunksr  r   r  rP  r   rb  )ra   r  rJ  r  r  rX   r   r  r   inferred_schemaexpected_typer:   rp  rN  s                 r*   $test_partitioning_factory_dictionaryr  5  s    DA!!#F))(3G33<<	%+; = =#*+?#OG ))!674G oo'Obhhj"))+>$$U+00MAAA ))+::<e$**1-88UGaK5'A+56HHJ}}X&&&  ))%E1I)Je$**1->>!Q'}}X&&&$$U+00BIIK???r,   c                     | S r=   r   r  s     r*   r  r  Z  r  r,   c                 B    |j                  |j                  |             S r=   r  r  s     r*   r  r  Z  r  r,   c                 r   t        j                         }t        j                         }t	        j
                  dt	        j                         fg      }t	        j                  t	        j                  t        d            g|      }t	        j
                  dt	        j                  d      fdt	        j                         fg      }t	        j
                  dt	        j                         fdt	        j                         fg      }t	        j
                  t        |      t        |      z         }dD ]z  }	|j                  |	       |j                  |	dz         5 }
t        j                  j!                  |
|      5 }|j#                  |       |j%                          d d d        d d d        | t        j&                  d	d
      }t        j(                  d	      }t        j*                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        j*                  j-                  ddgd      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  |d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        t        j&                  dd
      }t        j(                  d      }t        jL                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        jL                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        jL                  |d      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        jL                  j-                  |d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        y # 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   
xY w# 1 sw Y   y xY w)NrO   r   rT   r   r  r5   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherrc   Tr   date_intr   r   逎`r_  r`  2021-05-04 00%3A00%3A00%24r.   ra  +Could not cast segments for partition fieldr   r   )'rU   rV   r   r  r0   r.   r3   r:   r  r   r  r5   r[   rY   rZ   rz  new_filer`   closer   r   r   r  r  r   r  r   r   r1   r  as_pyr>   r   r   r   r   r   r3  r|  )rJ  r  ra   rX   r.   r:   partition_schemastring_partition_schemafull_schemarc   sinkwriterr   r   r  r   r  rp  r   r   s                       r*   *test_partitioning_factory_segment_encodingr  Y  s    !FFYY
+,-FHHbhhuRy)*6:Eyy
",,s#	$x&=>@ ii
"))+	299; 78:))DL40@+AABK	 	)$&&y<'?@Dv.&""5) / A@ {d;H))+6G33<< = !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHHV$))"((*50& F !9Q<:---33<<	V = 5#*+?#OG ))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ++&:L"<?G))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' 33<<& = :#*+?#OG ))&(FGLG	rJ
L!//+
L
 v6H))&1G..77 8 !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHHV$))"((*50& F !9Q<:---..77 8 !#*+?#OG ))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ..&:G))&(FGLGW^^%3356IQ<,,33	&	6	6	(	u	$	&' ' ' ..77& 8 :#*+?#OG ))&(FGLG	rJ
L!//+
L 
L_ /. A@R
L 
LN
L 
Ls<   !^;"^^^ ,^-^^^	 ^*-^6c                     | S r=   r   r  s     r*   r  r    r  r,   c                 B    |j                  |j                  |             S r=   r  r  s     r*   r  r    r  r,   c                 Z   t        j                         }t        j                         }t	        j
                  dt	        j                         fg      }t	        j                  t	        j                  t        d            g|      }t	        j
                  dt	        j                  d      fdt	        j                         fg      }t	        j
                  dt	        j                         fdt	        j                         fg      }t	        j
                  t        |      t        |      z         }t	        j
                  dt	        j                  d      fdt	        j                         fg      }	t	        j
                  dt	        j                         fdt	        j                         fg      }
d	}|j                  |       |j                  |d
z         5 }t        j                  j!                  ||      5 }|j#                  |       |j%                          d d d        d d d        t        j&                  dd      }t        j(                  d      }t        j*                  j-                  |      } | ||      |_        t        j0                  ||||      }|j3                         }||k(  sJ |j5                         j7                  dt        j8                  d      j;                  t	        j                               i      }|d   d   j=                         dk(  sJ t        j*                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  d      } | ||      |_        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  |
d      } | ||      |_"        t        j0                  ||||      }t        |j5                         j?                               }|d   j@                  jC                  t        j8                  d      dk(  t        j8                  d      dk(  z        sJ t        j*                  j-                  |	d      } | ||      |_        t        j0                  ||||      }tG        jH                  t        jJ                  d      5  |j3                         }d d d        y # 1 sw Y   pxY w# 1 sw Y   uxY w# 1 sw Y   y xY w)NrO   r   rT   ztest'; dater  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r  r   Tr   r  r   r   r  r)  r`  z2021-05-04 00:00:00$r_  r  r  r  r  r   )&rU   rV   r   r  r0   r.   r3   r:   r  r   r  r5   r[   rY   rZ   rz  r  r`   r  r   r   r|  r  r  r   r  r   r   r1   r  r  r>   r   r   r   r   r   r3  )rJ  r  ra   rX   r.   r:   r  r  r  partition_schema_enstring_partition_schema_enrc   r  r  r   r   r  r   r  rp  r   r   s                         r*   ;test_partitioning_factory_hive_segment_encoding_key_encodedr!    s    !FFYY
+,-FHHbhhuRy)*6:Eyy
c*	+.?-MNP ii
	%(9299;'GHJ))DL40@+AABK))
r||C0	1
%ryy{	3	56 "$
ryy{	+
%ryy{	3	5"61I
i 		"	"9|#;	<VV__T6*fu%LLN + 
= v6H))&1G..77 8 !#*+?#OG ))&(FGLGoo'Ok)))^^&&BHH]+00<0& F !9Q<:---..77 8  #*+?#OG ))&(FGLGW^^%3356IQ<,,33	-	 $9	9	#	$	+	-. . . &&%9L"<?G))&(FGLGW^^%3356IQ<,,33	-	 $9	9	#	$	+	-. . . ..77 8 !#*+?#OG ))&(FGLGW^^%3356IQ<,,33	%	&*C	C	-	.%	7	9: : : &&"V=L"<?G))&(FGLGW^^%3356IQ<,,33	%	&*C	C	-	.%	7	9: : : ..77"V 8 =#*+?#OG ))&(FGLG	rJ
L!//+
L 
Lu +* 
=	<v
L 
Ls0   !Z7"ZZ-Z!Z	ZZ!Z*c           
         t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g            }t        j                  t         j                        5  t        j                  || d|       d d d        y # 1 sw Y   y xY w)Nr   yNr   r$  r  rL   rM   rN   rz  r{  )r0   r:   r   r   r.   r1   r5   r   r   r3  r  r  r:   r   s      r*   /test_dictionary_partitioning_outer_nulls_raisesr'    s    HH+/BCE??
		288C-rxxRYY[/IJKMD	r	'
DI 
(	'	's   6CC"c                     t        j                  g dg dd      }t        j                  t              5  t        j                  || d       d d d        y # 1 sw Y   y xY w)Nr#  r%  rL   zbasename-{i}.arrow)r0   r:   r   r   r   r   r  )r  r:   s     r*   test_positional_keywords_raisesr)    sA    HH+/BCE	y	!
)=> 
"	!	!s   AA c                 0   d}t        j                  t        j                  d|dz         t        j                  |dz         d      }t        j                  |d | | dz  dg       t        j                  |d |dz    | dz  dg       t        j                  | dz  dg	      }|d   j                  dk(  sJ t        j                  | dz  dd
g	      }|d   j                  dk(  sJ t        j                  | dz  dg	      }|d   j                  dk(  sJ y )Ni   r   r   )r   r   oner   rh  twor   r   r   )	r0   r:   repeatnparanger_   r]  
read_table
num_chunks)r  
BATCH_SIZEr:   s      r*   test_read_partition_keys_onlyr3  $  s    J HHyyJN+:>*, -E kz%1 ozA~%1 MM'E/E7;E<""a'''MM'E/E73CDE<""a'''MM'E/E7;E<""a'''r,   c                     t        j                  |       }t        |D cg c]?  }t         j                  j	                  t         j                  j                  | |            A c}      S c c}w r=   )oslistdiranyrd   isdirr  )basedirelementsels      r*   _has_subdirsr<  ?  sI    zz'"H8L8Rbggll7B788LMMLs   AA*c                 8   t        j                  |       D ]  }t         j                  j                  | |      }t         j                  j	                  |      sCt        j                  ||      }t        |      rt        |||       r|j                  |        y r=   )	r5  r6  rd   r  r8  	posixpathr<  _do_list_all_dirsr   )r9  path_so_farr  r  true_nestednorm_nesteds         r*   r?  r?  D  si    ZZ ggll7A.77==%#..a8KK(!+{FCk* !r,   c                 $    g }t        | d|       |S )Nr  )r?  )r9  r  s     r*   _list_all_dirsrD  O  s    Fgr6*Mr,   c                 L    t        t        |             }|t        |      k(  sJ y r=   )r}   rD  )r  expected_directoriesactual_directoriess      r*   _check_dataset_directoriesrH  U  s&    ^G45%9!::::r,   c           
      v   t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g            }t        j                  || d|       t        | g d       y )	Nr%  r#  rL   rM   rN   rz  r{  )zx/xzy/yr  )	r0   r:   r   r   r.   r1   r5   r  rH  r&  s      r*   (test_dictionary_partitioning_inner_nullsrJ  Z  sw    HH?1ABCE??
		288C-rxxRYY[/IJKMDUGEEw(;<r,   c           
      z   t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               t        j
                  dt        j                               g      d d      }t        j                  || d|       t        | g d	       y )
N)r   Nr  r#  rL   rM   rN   rd  rz  r{  )za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r0   r:   r   r|  r.   r1   r5   r  rH  r&  s      r*   test_hive_partitioning_nullsrL  b  s    HH+2BCDEryy	#ryy{	#RXXc299;%?@ BCGPDUGEEw(MNr,   c                  .   t        j                  dt        j                         fdt        j                         fg      } ddg}t	        j
                  |       }t        |t        j                        sJ t	        j
                  | d      }t        |t        j                        sJ t	        j
                  |      }t        |t        j                        sJ t        j                  t              5  t	        j
                          d d d        t        j                  t        d      5  t	        j
                  |        d d d        t        j                  t        d      5  t	        j
                  | |        d d d        t	        j
                  | d	
      }t        |t        j                        sJ t	        j
                  | dd	      }t        |t        j                        sJ t	        j
                  d	
      }t        |t        j                        sJ t        j                  t              5  t	        j
                  |d	
       d d d        t        j                  t        d      5  t	        j
                  |d	       d d d        t        j                  t              5  t	        j
                  | d
       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   kxY w# 1 sw Y   xY w# 1 sw Y   vxY w# 1 sw Y   y xY w)Nr   r   inferrm  )field_nameszExpected listr   zCannot specify bothr   ri  )rn  r  zCannot specify 'field_names')rO  r  unsupported)r0   r.   r  r  r   r   r@   r   r  r   r   r  r|  )r.   ry  r   s      r*   test_partitioning_functionrQ  j  s   YY,w	.BCDFWE ??6"DdB44555??68DdB22333??u-DdB22333	z	"
 
#	z	9
F+ 
:	z)>	?
F3 
@ ??6&1DdB//000??6GDdB22333??&)DdB22333	z	"
f- 
#	z)G	H
E&9 
I 
z	"
}5 
#	"+ 
#	"	9	9	?	? 
#	"	H	H 
#	"sH   3K+K%K&7K32K?+LKK#&K03K<?LLc                    t        j                  t        j                  dt        j                  t        j                         t        j
                                     t        j                  dt        j                  t        j                         t        j                                     g      }t        j                  j                  |      }t        j                  dd| |      }|j                  j                  |k(  sJ |j                         }|j                  d      j                  j                  |j                   d         sJ |j                  d      j#                         dgd	z  d
gd	z  z   k(  sJ |j                  d      j                  j                  |j                   d         sJ |j                  d      j#                         dgd	z  dgd	z  z   k(  sJ y )Nr   r   rT   r   r   rX   r   r   r   r   r   r   r	  r
  )r0   r.   r1   r  r  r   r5   r   r   r  rF   r   r   r  r  r   typesr:  )ra   r.   r   rF   r:   s        r*   *test_directory_partitioning_dictionary_keyrU    su    YY
"--	288:>?
bggi=> F ##,,F,;DjjvDG &&&000E<< %%,,V\\!_===<< **,a1#'0AAAA<<##**6<<?;;;<<((*ugkUGaK.GGGGr,   c                    t        j                  t        j                  dt        j                  t        j                         t        j
                                     t        j                  dt        j                  t        j                         t        j
                                     g      }t        j                  j                  |      }t        j                  dd| |      }|j                  j                  |k(  sJ |j                         }t        t        dd            }t        t        d	d
            }|j                  d      j                  j!                  |j"                  d         sJ |j                  d      j$                  D ]3  }|j                  j'                         }|j)                          ||k(  r3J  |j                  d      j                  j!                  |j"                  d	         sJ |j                  d      j$                  D ]3  }|j                  j'                         }|j)                          ||k(  r3J  y )Nr   r   rT   r   r   rS  i  i  r      r   )r0   r.   r1   r  r  r  r   r|  r  rF   r   r   r[   r   r  r  r   rT  chunksr:  sort)	r   r.   r   rF   r:   year_dictionarymonth_dictionaryr   rp  s	            r*   %test_hive_partitioning_dictionary_keyr\    s   YY
rwwy"((*=>
"--	288:>? F ''v'6Djjy]G &&&000E5t,-OE!RL)<<$$++FLLO<<<f%,,!!++-((( - << %%,,V\\!_===g&--!!++-)))) .r,   c                     |,t        j                  t        d      dgdz  dgdz  z   d      }| dz  }t        j                  |||       ||fS )	N	   r  r   r  r   rL   r  r  r0   r:   r   r_   r`   )base_dirr:   r  rd   s       r*   r  r    sS    }uQxrdQh".ABCn$DNN5$~>$;r,   c                 0   t        j                  t        d      dgdz  dgdz  z   d      }| dz  }t        j                  ||       t        j                  t        dd      dgdz  dgdz  z   d      }| d	z  }t        j                  ||       ||f||ffS )
Nr^  r  r   r  r   rL   ztest1.parquetr  ztest2.parquetr_  )r`  table1path1table2path2s        r*   _create_directory_of_filesrf    s    XXE!HB4!8rdQh+>?@F&ENN65!XXE!RLtax2$(/BCDF&ENN65!FeU^++r,   c                     | |j                  |j                  |             fD ]K  }| j                  j                  |j                        sJ |j	                  |       j                  |      rKJ  y r=   )r  r  r.   r   r   )rF   r:   r   picklerr  s        r*   _check_datasetri    s^    w}}W]]7%;<=~~$$U\\222&&w/66u=== >r,   c                    t        | t        j                        sJ | t        |       | gt        |       gfD ]B  }t	        j
                  | fi |}t        |t        j                        sJ t        ||||       D t        | j                        5  t	        j
                  | j                  fi |}t        |t        j                        sJ t        ||||       d d d        y # 1 sw Y   y xY wr=   )r@   pathlibPathrQ   r   rF   r   ri  r
   parentname)rd   r:   r   rh  r   rn   rF   s          r*   _check_dataset_from_pathro    s    dGLL))) CIvD	{3**T,V,'2#7#7888w~w? 4 
DKK	 **TYY1&1'2#7#7888w~w? 
!	 	 s   AC%%C.c                 <    t        |       \  }}t        ||||       y r=   r  ro  r  r   r  r:   rd   s        r*   test_open_dataset_single_filers    s    %g.KE4T5.-Hr,   c                 @    t        | d      \  }}t        ||||       y )Nr   r  rq  rr  s        r*   test_deterministic_row_orderru    s"    
 &ga@KE4T5.-Hr,   c                 f    t        |       \  }}t        j                  |      }t        | |||       y r=   )rf  r0   concat_tablesro  )r  r   r  tablesrF  r:   s         r*   test_open_dataset_directoryry    s.    *73IFAV$EWe^]Kr,   c           	         t        |       \  }\  }}t        j                  |      }t        j                  ||g      t        j                  t        |      t        |      g      g}||D cg c]"  }|j                  |j                  |            $ c}z  }|D ]M  }	|	j                  j                  |j                        sJ |j                  |	      }
|
j                  |      rMJ  y c c}w r=   )rf  r0   rw  r   rF   rQ   r  r  r.   r   r   )r  r   r  rx  rc  re  r:   datasetsr  rF   r  s              r*   test_open_dataset_list_of_filesr|   	  s    7@FNUEV$E 	

E5>"


CJE
+,H =E=EM//23X H ~~$$U\\222((1}}U### 	s   ,'C+c                    t        |       \  }}t        |      }t        j                  |      }|j                  j                  |j                        sJ t        j                  |t        j                               }|j                  j                  |j                        sJ t        j                  t              5  t        j                  |t        j                                d d d        y # 1 sw Y   y xY w)Nr  )r  r   r   rF   r.   r   rU   ri   r   r   r   rV   )r  r:   rd   fspathdataset1dataset2s         r*   #test_open_dataset_filesystem_fspathr  	  s     &g.KE4T"F zz&!H??!!%,,/// zz&R-?-?-ABH??!!%,,/// 
y	!


6b&8&8&:; 
"	!	!s   ?*C22C;c                    | dz  }|j                          t        |      \  }}|j                  |      }t        j                  |      }t        j                  |t        j                               }t        j                  t        |      t        |            }	|j                  |j                  |            }
|j                  |      |j                  |      cxk(  r*|j                  |	      cxk(  r|j                  |
      k(  sJ  J y )Nsingle-filer  )mkdirr  relative_tor   rF   rU   ri   rQ   r	   r  r  r   )r  r   r  rc   r:   rd   relative_pathd1d2d3d4s              r*   test_construct_from_single_filer  '	  s    -'IOO%i0KE4$$Y/M 
D	B	DR%7%7%9	:B	C&?93M	NB			]004	5B""2&.*A*A
+ J&&r*J.<.E.Eb.IJ J J J Jr,   c                     | dz  }|j                          t        |      \  }}t        j                  |      }t        j                  |t	        j
                               }t        j                  |j                  t        |             }|j                  |      }	|j                  |      }
|j                  |      }|	|
cxk(  r|k(  sJ  J |||fD ]8  }|j                  |j                  |            }|j                  |      |	k(  r8J  y )Nsingle-directoryr  )r  rf  r   rF   rU   ri   rn  r	   r   r  r  )r  r   r  rc   rx  rm   r  r  r  t1t2t3r  restoreds                 r*   $test_construct_from_single_directoryr  ;	  s    ,,IOO.y9MFE	I	B	I"*<*<*>	?B	INNw/G	HB		 	 	$B		 	 	$B		 	 	$B>r>>> "b\ &&}':':1'=>&&x0B666 r,   c                    | dz  }|j                          t        |      \  }}|D cg c]  }|j                  |        }}t        |       5  t	        j
                  |      }|j                  |      }t        |      t        t        t        |            k(  sJ 	 d d d        t	        j
                  |t        |             }	|j                  |	      }
t	        j
                  |      }|j                  |      }t	        j
                  |t        j                               }|j                  |      }|
cxk(  r|cxk(  r|k(  sJ  J y c c}w # 1 sw Y   xY w)Nzlist-of-filesr  )r  rf  r  r
   r   rF   r   r   sumr\   r	   rU   ri   )r  r   rc   rx  rm   rn   relative_pathsr  r  r  r  r  r  r  t4s                  r*   !test_construct_from_list_of_filesr  O	  s$    /)IOO.y9MFE6;<eammG,eN<	G	ZZ'$$R(2w#c#v.//// 

 
Nw/G	HB		 	 	$B	E	B		 	 	$B	Eb&8&8&:	;B		 	 	$BrR =		s   EAEEc                     ddg}t        j                  t        d      5  t        j                  ||        d d d        y # 1 sw Y   y xY w)Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r   r   r  r   rF   )ra   r   s     r*   -test_construct_from_list_of_mixed_paths_failsr  f	  s=     	%+E 
(	?


5V, 
@	?	?s   AA
c                    t        j                  ddg|       }t        j                  d|       }t        j                  ||g      }t        |t         j                        sJ t	        t        |j                                     dk(  sJ |j                         }t	        |      dk(  sJ |j                  dk(  sJ t	        |j                        dk(  sJ |j                  D ]  }|j                  ddgk(  rJ  y )	Nr   r   r  r   r   ro  r   r   )r   rF   r@   UnionDatasetr   r[   r>   r   r(  childrenr   )ra   rM   rN   rF   r:   childs         r*   (test_construct_from_mixed_child_datasetsr  q	  s     	

002>D	FA


8/Ajj!Q Ggr///tG))+,-222Eu:!!!w A%%%!!{{;;= = 	= = "r,   c                      t        j                  g d      } | j                         }|j                  dk(  sJ |j                  dk(  sJ y )Nrz  r   r   )r   rF   r   r   r(  )emptyr:   s     r*   test_construct_empty_datasetr  	  sD    JJr%(ENNE>>Q!!!r,   c            
      .   t        j                  g dt        j                  dt        j                         fdt        j
                         fg            } t        j                  t        d      5  | j                          d d d        y # 1 sw Y   y xY w)Nrz  rM   rX   r.   zMultiple matches for .*a.* in r   )
r   rF   r0   r.   r3   r5   r   r   r  r   )r  s    r*   *test_construct_dataset_with_invalid_schemar  	  sh    JJr%			bhhj	biik; 1 E 
z)I	J 
K	J	Js   1BBc                    t        j                  | t        j                  d      t        j                               }t        j                  | t        j                  d      t        j                               }t
        j                  j                  t        j                  t        d            gdg      t
        j                  j                  t        j                  t        d            gdg      }t        j                  t        d	      5  t        j                  ||g       d d d        d
}t        j                  t        |	      5  t        j                  g d       d d d        d}t        j                  t        |	      5  t        j                  d        d d d        d}t        j                  t        |	      5  t        j                  fdt        d      D               d d d        d}t        j                  t        |	      5  t        j                  g        d d d        d}t        j                  t        |	      5  t        j                  |g       d d d        d}t        j                  t        |	      5  t        j                  dg       d d d        d}t        j                  t        |	      5  t        j                  dg       d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   dxY w# 1 sw Y   $xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nr  r   /schemar   rM   rx  rN   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr  zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc              3   "   K   | ]  }  y wr=   r   )r[  rF  batch1s     r*   r\  z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>	  s     -HqFH   rK   zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rU   r   r   r0   r  r  r  r   r   r   r   rF   r  InMemoryDataset)r   child1child2batch2rN  r  s        @r*   )test_construct_from_invalid_sources_raiser  	  sq   ((
!##%F
 ((
	"##%F
 ^^''%))<(=cU'KF^^''%))<(=cU'KF	y(L	M


FF#$ 
N	 
 
y	1


9 
2	.  
y	1


4 
2	/  
y	1


-E!H-. 
2 	P  
z	2
2 
3	  
y	1


FF#$ 
2	J  
y	1


FA; 
2 	N  
y	1
FA;' 
2	1a 
N	M 
2	1 
2	1 
2	1 
3	2 
2	1 
2	1 
2	1s`   K-K:L
(LL!L-L9M-K7:LLL!L*-L69MMc                    t         j                  j                  t        j                  t	        d            gdg      }t         j
                  j                  |g      }t        j                  g dt        j                  g             j                         }|t        j                  g       k(  sJ |||g|gfD ]  }t        j                  |      }| j                  |      |k(  sJ t        t        |j                                     dk(  sJ t        |j                               j                         |k(  sJ t         j
                  j                  t        |j!                                     |k(  rJ  y )Nr   rM   rx  rz  r  r   )r0   r  r  r  r   r6   r^   r   rF   r.   r   r:   r   r[   r>   r!   r   )r   rg   r:   dataset_tablesourcerF   s         r*   test_construct_in_memoryr  	  s.   NN&&r(;'<SE&JEHH!!5'*EJJr%		" !!) BHHRL(((%%5'2**V$&&w/58884--/01Q666G))+,5575@@@xx$$T'*<*<*>%?@EIII 3r,   r   c                    t         j                  j                  t        j                  t	        d            gdg      t         j
                  j                  g      }d}fdd ffdj                  ffD ]  \  }}t        j                  j                   |       ||       }|j                         |k(  sJ t        j                  t         j                  |      5  |j                          d d d         y # 1 sw Y   xY w)	Nr   rM   rx  z#OneShotFragment was already scannedc                  Z    t         j                  j                   j                   g      S r=   )r0   RecordBatchReaderr^   r.   rg   s   r*   r  z$test_scan_iterator.<locals>.<lambda>	  s!    R))66ug'r,   c                  ,     fdt        d      D        S )Nc              3   "   K   | ]  }  y wr=   r   )r[  rF  rg   s     r*   r\  z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>	  s     .XeXr  r   )r   r  s   r*   r  z$test_scan_iterator.<locals>.<lambda>	  s    .U1X.r,   r.   r   r   )r0   r  r  r  r   r6   r^   r.   r   r2  r   r   r   r3  )r   r:   r   r   r.   r   rg   s         @r*   test_scan_iteratorr  	  s    NN&&r(;'<SE&JEHH!!5'*E1E'(,..= **))If+ * ?!U***]]2??%8 98 98s   "C>>D	c                    t        j                  t        d      dgdz  dgdz  z   d      }| dz  }|j                          t        d      D ]R  }|d	j	                  |      z  }|j                          t        j                  |j                  d|z  d      |d
z         T |j                  dt        j                  dgdz  dgdz  z   dgdz  z   t        j                                     }||fS )Nr^  r  r   r  r   rL   zdataset-partitionedrK   zpart={}r  r   r   r   r   r  )r0   r:   r   r  rX   r_   r`   rb  append_columnr  r   )r9  r:   rd   r)   r   
full_tables         r*   _create_partitioned_datasetr  	  s    HH582$(bTAX*=>?E**DJJL1Xi&&q))


u{{1Q3*D>,AB 
 $$!qA37*aS1W4288:FHJ tr,   c           
         t        |       \  }}|j                  ddg      }t        ||||       t        j                  t        |      t        j                  d            }|j                  j                  |j                        sJ t        |       5  t        j                  dt        j                  d            }|j                  j                  |j                        sJ 	 d d d        t        j                  t        |      d      }|j                  j                  |j                        sJ t        j                  t        |      t        j                  t        j                  dt        j                         fg      d            }|j                  j                  t        j                  dt        j                                     }|j                  j                  |      sJ |j                         }|j                  dt        j                   dgd	z  d
gd	z  z   dgd	z  z   t        j                                     }	|j                  |	      sJ y # 1 sw Y   {xY w)NrM   rN   r   ri  r   zdataset-partitioned/r   r   rK   r   r   r  )r  rn  ro  r   rF   rQ   r   r.   r   r
   r0   r  r   r1   r   r  r  )
r  r   r  r  rd   r:   rF   r  r  rN  s
             r*   'test_open_dataset_partitioned_directoryr  
  s   27;J sCj)ET5.-H jjD	v >@G>>  !2!2333 
G	**3*,//*HJ~~$$Z%6%6777 
 jjT8G>>  !2!2333 jjD	__II	*+,V=>G ll))"((62779*EFO>>  111F""!qA37*aS1W42779EGH==""") 
	s   AI##I-c                    t        |       \  }}t        j                  t        |            }|j                  j                  |j                        sJ t        j                  t        |      t        j                               }|j                  j                  |j                        sJ t        |       5  t        j                  dt        j                               }d d d        j                  j                  |j                        sJ t        j                  t              5  t        j                  t        |      t        j                                d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr  r  )r  r   rF   rQ   r.   r   rU   ri   r
   r   r   r  rV   )r  r:   rd   r  r  dataset3s         r*   test_open_dataset_filesystemr  2
  s    &g.KE4 zz#d)$H??!!%,,/// zz#d)0B0B0DEH??!!%,,/// 
G	::n9K9K9MN 
??!!%,,/// 
(	)


3t9););)=> 
*	) 
	
 
*	)s   8*E&*3E2&E/2E;c                     t        |       \  }}t        j                  t        d      5  t	        j
                  |gd       d d d        y # 1 sw Y   y xY w)Nz format 'blabla' is not supportedr   blablar   )r  r   r   r  r   rF   )r  rF  rd   s      r*   $test_open_dataset_unsupported_formatr  I
  s<    !'*GAt	z)K	L


D6(+ 
M	L	Ls   AAc                 :   t        |       \  }}t        j                  |      }t        j                  ||g      }t        |t        j                        sJ |j                  |j                  |            }|j                  |      |j                  |      k(  sJ y r=   )r  r   rF   r@   r  r  r  r   )r  r   r  rF  rd   rF   unionrJ  s           r*   test_open_union_datasetr  P
  s    !'*GAtjjGJJ)*EeR__---!!-"5"5e"<=G""7+~/F/Fu/MMMMr,   c                     t        j                  d| d      }t        j                  t        d      5  t        j                  |gd       d d d        y # 1 sw Y   y xY w)Nr  r   r  zcannot pass any additionalr   r   )r   rF   r   r   r  )r   r  s     r*   .test_open_union_dataset_with_additional_kwargsr  \
  s@    JJxM)LE	z)E	F


E79- 
G	F	Fs   AAc                  0   t        j                  t              5  t        j                  dd       d d d        t        j                  t
        j                  d      5  t        j                  dd       d d d        y # 1 sw Y   OxY w# 1 sw Y   y xY w)Nzi-am-not-existing.arrowrz  r   zcannot be relativer   zfile:i-am-not-existing.arrow)r   r   r  r   rF   r0   r3  r   r,   r*   #test_open_dataset_non_existing_filer  b
  se     
(	)


,U; 
* 
r.B	C


1%@ 
D	C 
*	) 
D	Cs   B B B	Br   rc   r   re  rd  partition_keysr  BCr  )DEFr  )r   NrK   )r  Nr  )Nr   rK   c           	         t        j                  t        d      dgdz  dgdz  z   d      }d |d   v xs d |d   v }|d	k(  r|ry |d	k(  r(t        j                  j                  d
dg      }d}d }nM|r"t        j                  j                  |      }n t        j                  j                        }d}|r|}nd}| dz  }	|	j                          |\  }
}|
D ]Q  }|D ]J  }|	|j                  |xs ||xs |      z  }|j                  d       t        j                  ||dz         L S t        j                  t        |	      |      }fd}|j                  j                  t        j                  d
 ||
d                     j                  t        j                  d ||d                     }|j                  j!                  |      sJ y )Nr^  r  r   r  r   rL   r   r   rc   part1part2r  z{0}/{1})r  re  zpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rF   T)parentsr  r  c                 8   r`t        | t              rt        j                         nt        j                         }t        j
                  t        j                         |      S t        | t              rt        j                         S t        j                         S r=   )r@   rQ   r0   r5   r   r  )r   
value_typer  s     r*   r  z/test_partition_discovery.<locals>.expected_type
  sY    (23(<"((*J==Z88",S#"6299;FBHHJFr,   )r0   r:   r   r   r   r  r|  r  rX   r_   r`   rF   rQ   r.   r   r1   r   )r  r   re  r  r  r:   has_nullfmt
null_valuebasepath
part_keys1
part_keys2r  r  rd   rF   r  r  s      `              r*   test_partition_discoveryr  l
  s   " HH583%!)seai*?@AE~a((EDN14E,EH{"x{"//88g1A 9 C
..77!1 8 L ..77!1 8 3L#&J5J"HNN+J
E

5.J0CDEDJJtJ$NN5$"78	    jjX\BGG ll))
-
167f
-
167 
 >>  111r,   c                    t        j                  t        j                  ddgd      t	        d      d      }t        j                  |j                  dg      j                  d      }t        j                  || |d	
       t        j                  | d	t
        j                  j                  d            }t        j                  |d   |d   j                         d      }|j                         j                  |      sJ t!        |j#                               d   }|j                  |j                        j                  |d d       sJ |j$                  }|j'                  |j)                  |            }|j                         j                  |      sJ |j'                  |j)                  |            }|j                  |j                        j                  |d d       sJ |j                  |j                        j+                         j                  |d d j+                               sJ |j$                  j                  |      sJ y )Nr  r  r   r   r   rg  r   r   ri  r  r   rX   Tr  r{  rg  )rg  r   r   rT   )r0   r:   r.  r-  r   r   r   rn  r.   r  rF   r|  r  rP  r   r   r[   r>   r   r  r  r  )	r  r  r:   r   rF   rN  rG   	part_exprr  s	            r*   4test_dataset_partitioned_dictionary_type_reconstructr  
  s     HHbiic
A6uRyIJE??5<<188HDUG$yIjj	((1141HG xxeeFm&E&E&GHH $$X...G))+,Q/HGNN3::8BQ<HHH--I""=#6#6w#?@H%%h///""=#6#6x#@AHGNN3::8BQ<HHHGNN3==?FF!    ((//	:::r,   c                 V   ddl m} | d   \  }}}}dj                  ||||      }|j                  |      \  }} |j                  d       t        j                  dg di      }	 |j                  d      5 }
t        j                  |	|
       d d d        |	|||||||fS # 1 sw Y   xY w)	Nr   
FileSystem
connectionz_s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}&allow_bucket_creation=TruemybucketrM   r  zmybucket/data.parquet)
r{   r  rX   from_urirY   r0   r:   rZ   r_   r`   )	s3_serverr  r*  r+  r,  r-  r)  rU   rd   r:   re   s              r*   r(  r(  
  s    %)2<)@&D$
J	&	
Jd	3  ""3'HBBMM*HHc9%&E			6	73
uc" 
8 $CtZCC 
8	7s   6BB(c                     | \  }}}}}}}}t        j                  |d      }|j                  |      j                  |      sJ t        j                  |d|      }|j                  |      j                  |      sJ y )Nr   r   rX   r   )r   rF   r   r   )r(  r   r:   rd   rU   r)  rF  rF   s           r*   test_open_dataset_from_uri_s3r  
  s~     (9$E4S!Q1 jjY/G""7+225999 jjiB?G""7+225999r,   c                     | \  }}}}}}}}t        j                  d      }|j                  |      }t        j                  |d|      }	|j                  |	      j                  |      sJ y )Nr  r   r  )rU   r   r$  r   rF   r   r   )
r(  r   r:   rd   r   r)  rF  r   finfosrF   s
             r*    test_open_dataset_from_fileinfosr  
  sj     0A,E4S!Q1z*H%%h/Fjj	jIG""7+225999r,   c           	         | \  }}}}}}}}t        j                  d      }ddlm}	m}
 |j                  ||ddj                  ||      i      }t        j                  |d|      }|j                         j                  |      sJ  |
 |	|            }t        j                  |d|      }|j                         j                  |      sJ y )	Ns3fsr   )FSSpecHandlerrj   endpoint_urlzhttp://{}:{})r   secretclient_kwargsr   r  )r   importorskipr{   r  rj   S3FileSystemrX   r   rF   r   r   )r(  r:   rd   rF  r*  r+  r,  r-  r  r  rj   rU   rF   s                r*   $test_open_dataset_from_uri_s3_fsspecr  
  s     =N9E4AtT:zv&D6			N11$=
 
 
B jjiB?G$$U+++ 
mB'	(BjjiB?G$$U+++r,   c                 ~   ddl m} | d   \  }}}}d}d}dj                  ||||||      }|j                  |      \  }	}|dk(  sJ  |	j                  |       t        j                  dg d	i      }
 |	j                  |      5 }t        j                  |
|       d d d        t        j                  |d
      }|j                         j                  |
      sJ dj                  ||||      }g d}|D ]O  \  }}|j                  |      }t        j                  ||d
      }|j                         j                  |
      rOJ  t        j                  t
        j                   d      5  |j                  d      }t        j                  d|       d d d        d}d}|j                  |      }t        j                  t"              5 }t        j                  d|       d d d        t%        j&                        |j                  d||      k(  sJ d}|j                  |      }t        j                  t"              5 }t        j                  d|       d d d        t%        |j&                        |j                  d||      k(  sJ y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   [xY w)Nr   r  r  theirbucketnested/folder/data.parquetzOs3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrM   r  r   r   3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r   z/nested/folder/data.parquet)r   r  r  zMissing bucket namer   r  z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  NotFoundFile)r{   r  rX   r  rY   r0   r:   rZ   r_   r`   r   rF   r   r   r   r   r3  r  rQ   r   )r  r  r*  r+  r,  r-  bucketrd   r)  rU   r:   re   rF   templaterV  prefixr   excs                     r*   -test_open_dataset_from_s3_with_filesystem_urir    si    &)2<)@&D$
JF'D&&,f
FD$'
 
 ""3'HB;;;;BMM&HHc9%&E			t	$
uc" 
% jjY/G$$U+++ 	>DD
D$	
 
E oof%**Tc)D!((/// 
 
r.C	Dooc"


<M 
E
	! 
 &D
//$
C	z	"c


>c2 
#syy>U\\*dC@@@@3D
//$
C	z	"c


>c2 
#syy>U\\&$<<<<] 
%	$4 
E	D 
#	" 
#	"s0   J+)J
J'J3JJ$'J03J<c                     t        |       \  }}t        j                  d      }|j                  d      }t	        j
                  ||      }|j                  j                  |j                        sJ y )Nfsspecfiler  )r  r   r  r   r   rF   r.   r   )r  r:   rd   r  ro   rF   s         r*   test_open_dataset_from_fsspecr  ]  s\    %g.KE4  *F'Gjj'2G>>  ...r,   c                 J   t        j                  d      }t        j                  dg di      }| dz  }t	        j
                  ||       |j                  d      }|j                  |       d   j                  d      sJ t        j                         }t        j                  t        j                  |            }|j                  ||      }|j                  |j                         sJ |j#                  ||      }|j$                  j                  |j                         sJ y )Nr  rM   r  r  r  r   )r   r  r0   r:   r_   r`   r   lsendswithr   r   rU   rj   r  r  r   r.   r   r`  )	r  r  r:   rd   	fsspec_fsrX   r   r.   rG   s	            r*   test_file_format_inspect_fsspecr  h  s       *F HHc9%&E^#DNN5$ !!&)I<< #,,^<<< !!#F !1!1)!<=J^^D*-F==&&&##D*5H##**5<<888r,   c                 4   | dz  }t        j                  ddgdz  t        d      d      }t        j                  |j                  dg      j                  d	      }t        j                  |||d
       t        j                  t        j                  dt        j                  d      fg      d	      }t        j                  |d
|      }t        j                  d      t        j                  d      kD  }|j                  ||      }|j                  d      j                         g dk(  sJ dd l}t        j                  d       |j                   ddd      kD  }|j                  ||      }|j                  d      j                         g dk(  sJ y )Ntest_partition_timestamps
2012-01-01z
2012-01-02r   r   )datesr  r  r   ri  r  r  r  r{  r   r  )r   rK   r   r'  r^  r   i  r   )r0   r:   r   r   r   rn  r.   r  r  rF   r1   r"   	Timestampr   r  r:  r   )r  r   rd   r:   r   rF   r  r   s           r*   test_filter_timestampr    s\    00DHH-1Bi E
 ??5<<	299&IDUDtIF ??299wS0A&B%CD"(*DjjidCG!BLL$>>I##GI#>E<<'')_<<<!$5H$5$5dAq$AAI##GI#>E<<'')_<<<r,   c                 P   t        j                  dt        j                  g dt        j                               i      }t	        | |      \  }}t        j                  t        |            }t        j                  d      dkD  }t        |j                  ||            dk(  sJ y )NrM   )r   r   r   rK   r   r   r  r   r   rK   )r0   r:   r  r  r  r   rF   rQ   r1   r   r   )r  r   r:   rF  rd   rF   filter_s          r*   test_filter_implicit_castr    s     HHc288$6RWWYGHIE!'51GAtjjT#GhhsmaG~&&ww&?@AEEEr,   c                 *   t        j                  dg di      }t        | |      \  }}t        j                  t        |            }|j                  |t        j                  d      t        j                  d       k(        }|j                  dk(  sJ y )Nr  )rM   rN   Nr   r   )
r0   r:   r  r   rF   rQ   r   r1   r   r   )r  r   r:   rF  rd   rF   s         r*   test_filter_equal_nullr    s}     HHc+,-E!'51GAtjjT#G##48 $ E >>Qr,   c                    t        j                  g dt        d      D cg c]  }t        j                  ddd|       c}t        dd      D cg c]  }t        j                  dd|       c}d      }t	        | |      \  }}t        j                  t        |            }t        j                  t        j                  d      t        j                  dd	g            }|j                  ||
      j                  dk(  sJ t        j                  t        j                  d            dk\  }|j                  ||
      j                  dk(  sJ t        j                  t        j                  d      t        j                  d            }|j                  |d|i      }	|	d   j!                         g dk(  sJ y c c}w c c}w )N)rM   rN   NrM   r  r   i  r   r  r  r  rM   rN   r   rK   r  r   r  r   r   r  )r0   r:   r   r   r  r   rF   rQ   r  is_inr1   r  r   r   hourdays_betweenr:  )
r  r   r)   r:   rF  rd   rF   r  r   r  s
             r*   test_filter_compute_expressionr$    so   HH'8=aA1haA.A5:1a[A[ha+[A E
 "'51GAtjjT#Ghhrxx}bhhSz&:;G""77";DDIIIggbhhsm$)G""77";DDIII??288C="((3-8D$$Wvtn$EF&>##%888 BAs   F<
Gc                 @   t        j                  | t        j                  d      t        j                               }t        j
                  |g      }t        |j                               dk(  sJ t        d |j                         D              sJ |j                         d   j                  |j                               sJ |j                         j                  |j                               sJ t        |j                         t         j                        sJ y )Nr  r   r   c              3   P   K   | ]  }t        |t        j                           y wr=   )r@   r0   r  )r[  r  s     r*   r\  z%test_dataset_union.<locals>.<genexpr>  s     K1JAz!RYY'1Js   $&r   )r   r   rU   r   r   UnionDatasetFactoryr   r  r~  r   r  r@   r   r  )r   r  r   s      r*   test_dataset_unionr(    s    ''rx0##%E $$eW-G w&&()Q...K1H1H1JKKKK""$Q'..u}}?????##EMMO444gnn&

333r,   c                 h	   t        j                  d|d      }t        j                  d|dddg      }t        j                  d|dd	      }|j                  |j                  cxk7  r|j                  k7  sJ  J t        j                  |||g      }t        |t         j                        sJ d
}t        j                  t        |      5  t        j                  ||g|       d d d        t        j                  dt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ |j                         j                  j                  |      sJ t        j                  ||g      }t        j                  dt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ |j                         j                  j                  |      sJ t        j                  dt        j                         fdt        j                         fdt        j                         fg      }t        j                  ||g|      }|j                         j                  j                  |      sJ t        j                  dt        j                         fdt        j                         fdt        j                         fg      }t        j                  ||g|      }|j                         j                  j                  |      sJ t        j                   t#        d      dgdz  dgdz  z   dgg d      }t%        | |      \  }	}
t        j                  |
      }t        j                  t        j&                  d      5  t        j                  ||g       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)Nr  r   r  r  weekr   r   rX   r   /hiver   z$cannot pass any additional argumentsr   r  r   r   r   r   r   rT   r,  r^  r  r   r  r   	abcdefghj)r   r   r   rx  )r:   zUnable to merge)r   rF   r.   r@   r  r   r   r  r0   r2   r3   r4   r5   r   r   r   r:   r   r  ArrowTypeError)r  r   r  r  child3	assembledmsgr  r:   rF  rd   child4s               r*   &test_union_dataset_from_other_datasetsr3    s   ZZ]9MFZZ	mI&,g%68FZZM)%+-F ==FMM:V]]:::::

FFF34Ii111
0C	z	-


FF#> 
. ii		"((*	"**,	"))+			"((*! O ""?333&&--o>>>

FF+,Iii		"((*	"**,	"))+		"((*! O ""?333&&--o>>>ii	"((*	"))+	! O
 

FF+ODI&&--o>>>ii	"((*	"))+	BIIK ! O
 

FF+ODI&&--o>>> HHeAhqB4!8 3[A57E!'7GAtZZF	r((0A	B


FF#$ 
C	Bc 
.	-b 
C	Bs   R:R(R%(R1c                     d}t        j                  t        |      5  t        j                  g d|        d d d        y # 1 sw Y   y xY w)Nz8points to a directory, but only file paths are supportedr   )r  r  r,  r  )r   r   IsADirectoryErrorr   rF   )r   r1  s     r*   4test_dataset_from_a_list_of_local_directories_raisesr6    s1    
DC	(	4


1mL 
5	4	4s   AA
c           
         t        j                  t        j                  d|       t        j                  d|       t        j                  d|       g      }t        j                  dt        j                         fdt        j
                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ t        j                  t        j                  d|       t        j                  d|       t        j                  d| d	
      g      }t        j                  dt        j                         fdt        j
                         fdt        j                         fdt        j                         fdt        j                         fdt        j                         fg      }|j                  j                  |      sJ y )Nr  r  r  r,  r   r   r   r   r   )r   r   r   r   )
r   rF   r0   r.   r2   r3   r4   r5   r   r   )r   rF   r  s      r*   &test_union_dataset_filesystem_datasetsr8  "  sj   jj


86


97


7}5 G
 ii		"((*	"**,	"))+	! O >>  111 jj


86


97


7}6J G
 ii		"((*	"**,	"))+		"((*! O >>  111r,   c                     t        j                  g dg dd      }t        j                  | dz         d fd	}d }|} ||||j                         |j                  }|} |||       t        j                  ddg      }t        j                  g dg dgd	d
g      } |||       t        j                  dg      }t        j                  g dgd
g      } |||       t        j                  ddg      }t        j                  g dt        j
                  g dd      gd
dg      } |||       t        j                  ddg      }t        j                  t         dz        |      }t        j                  |d
   j                  d      |d	   gd
d	g      } |||       t        j                  d
t        j                  t        j                               fdg      }t        j                  t         dz        |      }|j                  j                  |      sJ t        j                  t        d      5  j!                  |       d d d        y # 1 sw Y   y xY w)Nr  r  r  r  rL   r  c                    t        j                  t        dz        |       }||j                  j	                  |      sJ |j                  j	                  |       sJ j                  |      }|j	                  |      sJ y )Nr  rT   )r   rF   rQ   r.   r   r   )r.   rN  r  rF   r  r   r  s        r*   ri  z-test_specified_schema.<locals>._check_datasetG  su    **S>!9:6J&>>((999>>((000((1}}X&&&r,   )r  )rN   r4   )rM   r3   rN   rM   rx  )r  r   NNNr   r  r  )rM   r   rT   z#Unsupported cast from int64 to listr   r=   )r0   r:   r_   r`   r.   r  r   rF   rQ   r  list_r   r   r   r   r  r   )r  r   r:   ri  r.   rN  rF   s   ``     r*   test_specified_schemar>  B  s   HH9<89ENN5'N23' FH68U\\B \\FH68$ YY(.9:Fxxy1#sDH68$ YY'(FxxC51H68$ YY78Fxx"47CE"Cj*H 68$ YY(89:FjjW~56vFGxxs1s%"Cj*H 68$ YYbhhrxxz235EFGFjjW~56vFG>>  (((	*B
D(
D 
D 
Ds   I##I,c                    | dz  }t        j                  dg di      }t        j                  ||       t        j                  dt        j
                         fg      }t        j                  t        |      gdz  |      }|j                  j                  |      sJ |j                  |      }t        j                  t        d      5  |j                         }|j                          d d d        y # 1 sw Y   y xY w)Nr  rM   r  d   rT   z#Unsupported cast from int64 to nullr   )r0   r:   r_   r`   r.   r  r   rF   rQ   r   r   r   r   r  r6  r7  )r  r   fnr:   r.   rF   r   r   s           r*   test_incompatible_schema_hangrB  |  s     
>	!BHHc9%&ENN5"YYbggi()*Fjj#b'S8G>>  ((($$W-G	*B
D""$
D 
D 
Ds   !C66C?c                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }t        j                  |      5 }t        j
                  ||j                        }|j                  |j                         d          |j                          d d d        t        j                  |t        j                         	      }|j                  |      }|j                  |      sJ t        |       d
D ]=  }t        j                  ||	      }|j                  |      }|j                  |      r=J  y # 1 sw Y   xY w)Nr  r  r  r:  r4   rL   z
test.arrowr   r   )rz  arrow)r0   r:   r  rQ   output_streamRecordBatchFileWriterr.   write_batchr   r  r   rF   r  r   r   rI   )	r  r   r:   rd   r  r  rF   r  
format_strs	            r*   test_ipc_formatrI    s   HH288IF;88LyAC DE w%&D			$	4))$=5++-a01 
 
 jjb&6&6&89G$$W-F==/8&
**T*5((1}}U### ' 
 	s   *AE  E)c           	         ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }t        | d	z        }|j                  ||       t        j                  |t        j                         
      }t        |j                               }t        |d   t        j                        sJ |j                  |      }|j                  d       |j!                  |      sJ t#        |       t        j                  |d
      }|j                  |      }|j                  d       |j!                  |      sJ |j                  |dg      }|j                  d       |j!                  |j%                  dg            sJ |j                  |dt        j&                  d      dz  i      }|j                  d       |j!                  t        j                  dt        j                  g dd      i            sJ |j)                  |      dk(  sJ |j)                  |t        j&                  d      dkD        dk(  sJ y )Nr   orcr  r  r  r:  r4   rL   test.orcr   T)fullrL  rN   r   b2r   )r  r  g333333?rK   rM   r   r   )r   rL  r0   r:   r  rQ   r`   r   rF   r  r[   r>   r@   FileFragmentr   validater   rI   rn  r1   r   )r  r   rL  r:   rd   rF   r   r  s           r*   test_orc_formatrR    s   HH288IF;88LyAC DE w#$DOOE4 jjb&6&6&89GW**,-IilBOO444$$W-F
OOO==/8jje,G$$W-F
OOO==$$Wse$<F
OOO==se,---$$$ 12 % F OOO==
$I>?@   $$W-222$$WRXXc]Q5F$G1LLLr,   c                    ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }t        | d	z        }|j                  ||       t        j                  |d
      }t        |j                  |            }t        |      dk(  sJ |d   j                  dk(  sJ |d   j                  |j                         d         sJ y )Nr   rK  r  r  r  r:  r4   rL   rM  rL  r   r   rK   )r   rL  r0   r:   r  rQ   r`   r   rF   r[   r   r   r   r   )r  r   rL  r:   rd   rF   r  s          r*   test_orc_scan_optionsrT    s    HH288IF;88LyAC DE w#$DOOE4 jje,G.++G45Fv;!!9"""!9E,,.q1222r,   c                      	 ddl m}  y # t        $ rK t        j                  t
        d      5  t        j                  dd       d d d        Y y # 1 sw Y   Y y xY ww xY w)Nr   r  z'not built with support for the ORC filer   r  rL  r   )r  r  r  r   r   r  r   rF   rV  s    r*   test_orc_format_not_supportedrW    sK    *1 *]]G
 JJs5)
 
 
*s&   	 $AAAA	AAc            	      x   t        j                  t        d      5  t        j                  t        j                  dt        d      i      dd       d d d        t        j                         } t        j                  t        d      5  | j                          d d d        y # 1 sw Y   RxY w# 1 sw Y   y xY w)Nz9Writing datasets not yet implemented for this file formatr   rM   r   rL  z/tmp)rX   r`  )
r   r   r  r   r  r0   r:   r   r  make_write_options)ofs    r*   +test_orc_writer_not_implemented_for_datasetr[    s    	I
 	HHc59%&uv	
	
 
			B	I
 		
 

 

 
s   7B$
B0$B-0B9c                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  |d	       t        j                  |t        j                         
      }|j                  |      }|j                  |      sJ t        |       t        j                  |d
      }|j                  |      }|j                  |      sJ y )Nr  r3   r  r:  r4   rL   test.csvFr   r   r  )r0   r:   r  rQ   r  to_csvr   rF   r  r   r   rI   )r  r   r:   rd   rF   r  s         r*   test_csv_formatr`    s    HH288IG<88LyAC DE w#$D	OOT/jjb&6&6&89G$$W-F==/8jje,G$$W-F==r,   compression)bz2gziplz4zstdc                    t         j                  j                  |      s$t        j                  dj                  |             t        j                  t        j                  g dd      t        j                  g dd      d      }t        j                         }|dk7  r|nd	}t        | d
| z        }|j                  ||      5 }|j                         j                  d      }|j                  |j!                  d             d d d        t#        j$                  |t#        j&                               }	|j)                  |	      }
|
j+                  |      sJ y # 1 sw Y   WxY w)Nz{} support is not builtr  r3   r  r:  r4   rL   rc  gzz	test.csv.ra  Fr^  r6  r   )r   Codecis_availabler   skiprX   r0   r:   r  rU   ri   rQ   rZ   r  r_  writerE  r   rF   r  r   r   )r  ra  r   r:   r   suffixrd   r  csv_strrF   r  s              r*   test_csv_format_compressedro    s#    ==%%k2-44[ABHH288IG<88LyAC DE##%J'61[tFw9VH--.D		&	&t	&	E //#***7

7>>'*+ 
F jjb&6&6&89G$$W-F== 
F	Es   AE""E+c           	         t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |d      }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ t        j                  |t        j                  t        j                  j                  d	      
            }|j                  |      }|j                  t        j                  dt        j                  ddg      i            sJ t        j                  |t        j                  t        j                  j                  dg      
            }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ y # 1 sw Y   xY w)Nr]  wzskipped
col0
foo
bar
r  r   skipped)col0r  r  r   )r  r  rs  r  r  r9  )rr  rs  r  r  )rQ   r?   rl  r   rF   r   r   r0   r:   r  r  r  r  )r  r   rd   r  rF   r  s         r*   test_csv_format_optionsru  -  st   w#$D	dCD

./ 
jje,G$$W-F==
)RXX&<=>?A A A jjb&6&6VV''!'4'6 7G$$W-F==6288UEN+C"DEFFFjjb&6&6VV''eW'='? @G$$W-F==
%"CDEFH H H 
s   G

Gc           
      X   t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |t        j
                  t        j                  j                  d                  }|j                  |      }g d}|j                  |k(  sJ |j                  t        j                  t        j                  d	g      t        j                  d
g      t        j                  dg      t        j                  d	g      d            sJ y # 1 sw Y   xY w)Nr]  rq  z1,a,true,1
T)autogenerate_column_namesr  r   )f0r   rw  r  r   rM   )rQ   r?   rl  r   rF   r  r0   r  r  r   r9  r   r:   r  )r  r   rd   r  rF   r  expected_column_namess          r*   (test_csv_format_options_generate_columnsrz  B  s    w#$D	dCD

>" 
 jjb&6&6VV''$'G'I JG$$W-F4"7777==1#)+3%)+4&)9)+1##8 9 : : : 
s   D  D)c           	         t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  |d      }t
        j                  j                  dgd      }t        j                  |t        j                  j                  d	
            }|j                  ||      }|j                  t        j                  dt        j                  g d      i            sJ t        j                  |      }t        j                  ||      }|j                  |      }|j                  t        j                  dt        j                  g d      i            sJ t        j                         }|j                  ||      }|j                  t        j                  dt        j                  g d      i            sJ y # 1 sw Y   xY w)Nr]  rq  zcol0
foo
spam
MYNULL
r  r   MYNULLT)null_valuesr  r  r  )r  r  )fragment_scan_optionsrs  )r  spamNr  )r  r  r|  )rQ   r?   rl  r   rF   r   r  r  r  r0   r  r   r   r:   r  r  )	r  r   rd   r  rF   r  r   r  rI  s	            r*   test_csv_fragment_optionsr  R  s   w#$D	dCD

./ 
jje,Gkk00hZEI 1 KO'''VV''5'9;G $$WG$LF==62884I+J"KLMMM!!/BJjjj1G$$W-F==62884I+J"KLMMM'')G$$WG$LF==
&"((#<=>?A A A% 
s   GGc                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  |t        j                               }|j                  |      }|j                  |      sJ t        |       t        j                  |d      }|j                  |      }|j                  |      sJ y # 1 sw Y   xY w)Nr  r3   r  r:  r4   rL   	test.jsonrecordsorientr   r  },{}
{rq  r   r  )r0   r:   r  rQ   r  to_jsonreplacer?   rl  r   rF   r  r   r   rI   r  r   r:   rd   re   r  rF   r  s           r*   test_json_formatr  j  s   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dCA	 
 jjb&7&7&9:G$$W-F==/8jjf-G$$W-F== 
s   EEc           	      B   t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  t        d      5  t        j                  |t        j                  t         j                  j!                  d                  }d d d        t        j                  |t        j                  t         j                  j!                  d                  }|j#                  |      }|j%                  |      sJ y # 1 sw Y   xY w# 1 sw Y   xY wNr  r3   r  r:  r4   rL   r  r  r  r   r  r  r  rq  ztry to increase block sizer   r   r  r  r   @   )r0   r:   r  rQ   r  r  r  r?   rl  r   r   r  r   rF   r  r  r  r   r   r  s           r*   test_json_format_optionsr    s?   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dCA	 
 
z9
;**T"*;*;,,,:+< =
;
 jjb&7&7WW((B(7'9 :G$$W-F== 

; 
;s   F		A
F	FFc                    t        j                  t        j                  g dd      t        j                  g dd      d      }t        | dz        }|j	                         j                  d	      d
d j                  dd      }t        |d      5 }|j                  |       d d d        t        j                  t        d      5  t        j                  t         j                  j                  d            }t        j                   |t        j"                  |            }d d d        t        j                  t         j                  j                  d            }t        j                   |t        j"                  |            }|j%                  |      }|j'                  |      sJ y # 1 sw Y   xY w# 1 sw Y   xY wr  )r0   r:   r  rQ   r  r  r  r?   rl  r   r   r  r   r  r  r  rF   r  r   r   )	r  r   r:   rd   re   r  r   rF   r  s	            r*   test_json_fragment_optionsr    s[   HH288IG<88LyAC DE w$%D
//

#
#9
#
5a
;
C
CE6
RC	dCA	 
 
z9
;,,,,,:<**T"*;*;G*DE	
; ((WW((B(79Gjjb&7&7&@AG$$W-F== 

; 
;s   F3	AG 3F= G	c                 h   t        | dz        }dD ]  \  }}t        |d      5 }|j                  |       d d d        t        j                  dt        j
                         fdt        j
                         fg      }t        j                  dgdgd|	      }t        j                  j                  |
      }t        j                  |      }	t        j                  ||	      }
|
j                  j                  |      sJ |
j                         j                  |      rJ  y # 1 sw Y   xY w)Nr]  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrM   rN   un
   éléphantrL   rT   encodingr  r   )rQ   r?   rl  r0   r.   r5   r:   r  r  r   r  rF   r   r   )r  r   rd   r  
input_rowsr  r  expected_tabler  r   dataset_transcodeds              r*   test_encodingr    s   w#$D!* $JJz"  ))c299;%7#ryy{9K$LM)5#8@OQ vv))8)<&&LAZZ[A!((//@@@!**,33NCCC%! s   D((D1	c                 ,   t        | dz        }t        |d      5 }|j                  d       d d d        t        j                  dt        j
                         fdt        j
                         fg      }t        j                  dgdgd|	      }t        j                  |d
|      }t        j                  t        j                  j                  d      5  |j                  |       d d d        t        j                  j!                  d      }t        j"                  |      }t        j                  ||      }	|	j                  j%                  |      sJ |	j                         j%                  |      sJ y # 1 sw Y   RxY w# 1 sw Y   xY w)Nr]  r  s   ,b
un,lphant   érN   r  r  )r  rN   rT   r  r  zinvalid UTF8r   r  r  r  r   )rQ   r?   rl  r0   r.   r5   r:   r   rF   r   r   r   r&  r3  r   r  r  r  r   )
r  r   rd   r  r  r  rF   r  r   r  s
             r*   test_column_names_encodingr    s>   w#$D	dD	T

/0 
 ii$		!4sBIIK6H IJOXXdV%1N4<KMN jjeODG	w{{//~	F( 
G 66%%y%9L""=KD=$$++O<<<&&(//???% 
	 
G	Fs   E=F
=F
Fc                    ddl m} t        j                  t        j                  g dd      t        j                  g dd      d      }| d	z  }|j                           ||t        |d
z               t        j                  |t        j                               }|j                  |      }|j                  |      sJ t        |       t        j                  |d      }|j                  |      }|j                  |      sJ |j                  |ddg      }|j                  ddgk(  sJ |j                  |ddg      }|j                  ddgk(  sJ  ||t        |dz        d       t        j                  t               5  |j                  t        j                  |d             d d d        y # 1 sw Y   y xY w)Nr   )r  r  r  r  r:  r4   rL   feather_datasetr  r   r  rN   rM   r   zdata1.featherr   version)pyarrow.featherr  r0   r:   r  r  rQ   r   rF   r  r   r   rI   r9  r   r   r  )r  r   r  r:   r9  rF   r  s          r*   test_feather_formatr    s   -HH288IF;88LyAC DE ))GMMO%W~567jj)9)9);<G$$W-F==/8jj3G$$W-F== $$WsCj$AF3*,,,$$WsCj$AF3*,,, %W67C	z	"

79 EF 
#	"	"s   
'F::G)rd  re  brotlic                    t        j                  t        j                  dgdz  d      t        j                  g ddz  d      d      }t         j                  j	                  |      st        j                          | d	z  }|j                          t        j                         }| d
z  }|j                          t        j                  |t        |dz        ||j                  d              |dk(  rt        j                  t        d      5  |j                  |      }d d d        t        j                  t        d      5  t        j                  |      }|j                  |      }d d d        y |j                  |      }t        j                  |t        |dz        ||       t        j                  |t        j                               }	|j!                  |	      }
|
j#                  |      sJ |dz  dz  }|j%                         j&                  }|dz  dz  }|j%                         j&                  }||k  sJ y # 1 sw Y   &xY w# 1 sw Y   y xY w)Nr   ,  r  r  r:  r@  r4   rL   feather_dataset_compressedfeather_dataset_uncompressedz
data.arrowrh  rX   file_optionsr  zCompression typer   r   part-0.arrow)r0   r:   r  ri  rj  r   rk  r  r   r  r  rQ   rY  r   r  rF   r   r   statst_size)r  ra  r   r:   r9  r   uncompressed_basedirwrite_optionscodecrF   r  compressed_filecompressed_sizeuncompressed_fileuncompressed_sizes                  r*   test_feather_format_compressedr    s    HH288QCG&988L$49EG HE88  -44GMMO""$K"%CC  </0 333E	 h]]:-?@'::' ; )M A ]]:-?@HH[)E'::u:MM A 	22{2KMGl"#"	 jj)9)9);<G$$W-F==,~=O%**,44O,|;nL)..088....1 A@ A 	s   I	(I	IIc           	      d   g }t        d      D ]h  }t        j                  |gdz  t        d      D cg c]  }t        j                          c}d      }t	        j
                  |t        |       |       j t        | dz        }t	        j                  j                  ||       ||fS c c}w )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    r   r   r}  metadata_collector	_metadata)	r   r0   r:   rr  r_   r]  rQ   write_metadatar.   )	root_pathr  r)   rF  r:   metadata_paths         r*   _create_parquet_dataset_simpler  1  s    
 1Xb%PR)0T)Q)0TUV
3y>6H	
  	K/0Mm- % 1Us   B-c                    | dz  }t        |      \  }}t        j                  |      }|j                  j	                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ y )NrZ  r   (   )	r  r   parquet_datasetr.   r   r   r   r   r   )r  r  r  r:   rF   r  s         r*   test_parquet_dataset_factoryr  G  s     00I9)DM5  /G>>  ...w}}"""F??b   r,   win32z'Results in FileNotFoundError on Windows)reasonc                    t        j                  d      }| dz  }t        |      \  }}|j                  d      }t	        j
                  t	        j                  |            }t        j                  ||      }|j                  j                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ y )Nr  rZ  r  r  r   r  )r   r  r  r   rU   rj   r  r   r  r.   r   r   r   r   r   )	r  r  r  r  r:   r  r   rF   r  s	            r*   #test_parquet_dataset_factory_fsspecr  S  s       *F 00I9)DM5 !!&)I !1!1)!<=J  :FG>>  ...w}}"""F??b   r,   c                    | dz  }t        j                  dgdz  t        j                  j	                  d      d      }g }t        j                  |t        |      |       t        |dz        }t        j                  |j                  ||       t        j                  |      }|j                  j                  |j                        sJ |j                         }|j                  dk(  sJ y )NrZ  r   r   r}  r  r  )r0   r:   r.  rr  randnr_   r]  rQ   r  r.   r   r  r   r   r   )r  r  r:   r  r  rF   r  s          r*   &test_parquet_dataset_factory_roundtripr  k  s     00IHHQC"HBIIOOB,?@AEs9~2D 	K/0Mm-   /G>>  ...F??b   r,   c                    g }t        d      D ]l  }t        j                  dt        t        |dz  |dz   dz              i      }| | dz  }t	        j
                  |||       |d   j                  | d       n t        | dz        }t	        j                  j                  ||       t        j                  |      }|j                         }|j                  d      j                         }|t        t        dd	            k(  sJ y )
Nr   r   r   z.parquetr  r  r  r   r@  )r   r0   r:   r[   r_   r`   set_file_pathrQ   r  r.   r   r  r   r  r:  )	r  	metadatasr)   r:   
table_pathr  rF   scanned_tablescanned_cols	            r*   "test_parquet_dataset_factory_orderr    s     I 2Y4adQqS"H-./1!H~-

ujYG"##qcN3  +-.MellM9=  /G$$&M&&t,668K$uQ}----r,   c                    | dz  }t        |      \  }}t        |j                  d            d   j                          t	        j
                  |      }|j                  j                  |j                        sJ t        |j                        dk(  sJ t        j                  t              5  |j                          d d d        y # 1 sw Y   y xY w)Ntest_parquet_dataset_invalid	*.parquetr   r   )r  r[   globunlinkr   r  r.   r   r   r   r   r   r  r   )r  r  r  r:   rF   s        r*   $test_parquet_dataset_factory_invalidr    s     88I9)DM5	$%a(//1  /G>>  ...w}}"""	(	) 
*	)	)s   .CCc                    t        t        | j                  d                  }t        j                  |d         j
                  j                         }g }|D ][  }t        j                  |      j                  }|j                  t        |j                  |                    |j                  |       ] | dz  }t        j                  |||       |S )Nr  r   r  r  )r[   r  rglobr_   ParquetFiler.   to_arrow_schemar  r  rQ   r  r   r  )r  parquet_pathsr.   r  rd   r  r  s          r*   _create_metadata_filer    s    	 <=>M^^M!,-44DDFF>>$'00s4#3#3I#>?@!!(+ 
 +M2D r,   c           
         t        j                  t        j                  t        d            t        j                  t        j
                  j                  d            t        j                  t	        j                  ddgd            gg d      }|j                  ddi      }t        j                  |t        |       d	g
       t        |       |fS )Nro  rM   rN   r   rv  rx  r   r   r   rh  )r0   r:   r  r   r.  rr  r  r-  r8   r_   r]  rQ   r  )r  r:   s     r*   #_create_parquet_dataset_partitionedr    s    HH
rRXXbiioob&9:
C:r*+- #E
 ))5'*:;Es9~vhG +U22r,   c                    | dz  }t        |      \  }}t        j                  d      }t        j                  ||      }|j                  j                  |j                        sJ t        |j                        dk(  sJ |j                         }|j                  dk(  sJ |j                         j                  d      j                  d	      }|j                         }t        j                  j                  ||       y )
N(test_parquet_dataset_factory_partitionedr   ri  r  r   ro  r   Tdrop)r  r   r   r  r.   r   r   r   r   r   r  sort_valuesreset_indexr"   testingassert_frame_equal)r  r  r  r:   r   rF   r  rN  s           r*   r  r    s     DDI>yIM5??&1L  \JG>>  ...w}}"""F??b    ++D1==4=HF HJJ!!&(3r,   c                 F   | dz  }t        |      \  }}t        j                  |d      }|j                  j	                  |j                        sJ d|j                  j
                  v sJ t        |j                               }d|d   j                  j
                  v sJ y )N%test_parquet_dataset_factory_metadatar   r     keyr   )	r  r   r  r.   r   r  r[   r>   r`  )r  r  r  r:   rF   r   s         r*   r  r    s     AAI>yIM5  VDG>>  ...W^^,,,,,W**,-IYq\11:::::r,   c                    |\  }}| dz  }t        |      \  }} ||g      5  t        j                  |t        j                  d      |      }d d d         |g       5  t	        j                               }d d d         |g       5  t	        j                  t        j                  d      dkD               d d d         |g       5  d   j                  t        j                  d      dkD         d d d         |g       5  d   j                         }	|	d   j                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   exY w# 1 sw Y   y xY w)N#test_parquet_dataset_lazy_filteringr   ri  )r   r   r      r   )	r  r   r  r   r[   r>   r1   r   r  )
r  r   rU   rz   r  r  rF  rF   r   rg_fragmentss
             r*   r  r    s=    'B
 ??I5i@M1 
}o	&$$7 
' 
b	..01	 
 
b	W""288D>B#678 
 
b	!''(;< 
 
b	 |668Q002 
	' 
'	& 
	 
	 
	 
	s;   -D5E	1E
+E'E%5D>E
EE"%E.c                 H   t        j                  dg di      }| dz  }|j                  |       t        j                  |      }|j                  |      j                  }|j                  |dg      j                  }d|j                  v sJ |j                  |d      sJ y )NrM   r  r  r   s   pandasTr	  )	r"   r#   r  r   rF   r   r.   r  r   )r  r   r9   rd   rF   r.   r5  s          r*   test_dataset_schema_metadatar    s     
sI&	'B^#DMM$jjG$$W-44F%..w.FMM '''==)$=???r,   c                    t        j                  dt        j                  g dd      i      }t        j                  |t        | dz               t        j                  dt        j                         fg      }t        j                  | dz  d|      }|j                  |t        j                  d      dkD  	      }|d   j                  |d   j                  d
      j                  d            sJ t        |j!                               d   }|j                  |t        j                  d      dkD  |      }|d   j                  |d   j                  d
      j                  d            sJ y )Nrg  r  r   r  r  r   r  r   r   r3   r   r  )r0   r:   r  r_   r`   rQ   r.   r3   r   rF   r   r1   r   r  rb  r[   r>   )r  r   r:   r.   rF   filteredrG   s          r*   test_filter_mismatching_schemar  %  s<    HHeRXXlABCENN5#g678 YY
+,-Fjj. 6CG
 &&wrxx7J&KHE?!!%,"3"3G"<"B"B1"EFFFG))+,Q/H&&%1,V ' =HE?!!%,"3"3G"<"B"B1"EFFFr,   c                    t        j                  dj                         t        t	        d            d      }t        | dz        }t        j                  ||dg       t        j                  |d      }|j                  |      }|j                  |dg	      }|j                  d      j                  |j                  d            sJ y )
Nza a b br   r  r!  r   rh  r   r  r   )r0   r:   r  r[   r   rQ   r_   r]  r   rF   r   r  r   )r  r   r:   rd   rF   all_cols	part_onlys          r*   +test_dataset_project_only_partition_columnsr  ;  s     HHioo/U1XGHEw'(DtVH=jjF3G&&w/H''&'BI??6")))*:*:6*BCCCr,   c           	         t        j                  dt        j                  g dd      i      }| dz  }|j	                  |d       t        j                  |dt        j                  dt        j                         fg      	      }t        j                  dt        j                  g dt        j                               i      }|j                  |      j                  |      sJ y )
Nrg  r<  objectdtypez(test_dataset_project_null_column.parquetr   r  r   r  )r"   r#   r.  r  r  r   rF   r0   r.   r3   r:   r   r   )r  r   r9   r  rF   rN  s         r*    test_dataset_project_null_columnr  K  s     
ubhh'9JK	LB<<AMM!IM&jj9 "		E288:+>*? @BGxx);RXXZ HIJH""7+228<<<r,   c                    ddl m} t        j                  g dg dg dd      }|j	                  || dz         t        j                  | dz  d	      }|j                  |t        j                  d
      t        j                  d      j                  dd      t        j                  d      dk(  d      }t        j                  g dt        j                  g dd      g dd      }|j                  |      sJ t        j                  t        d      5  |j                  |d
d
i       d d d        y # 1 sw Y   y xY w)Nr   r  r  )r  r  r  r7  r  r  r  r   r  r  r   Fsafer  rM   )	A_renamedB_as_intC_is_ar   r  )TFFzExpected an Expressionr   )r   r  r0   r:   r  r   rF   r   r1   r  r  r   r   r   r   )r  r   r  r:   rF   r  rN  s          r*   test_dataset_project_columnsr  Y  s   HH9<oNOE%>!9:jj>1)DG$$WXXc]HHSM&&wU&;((3-3&7$ F
 xxHHYW5& H
 ==""" 
y(@	A#s< 
B	A	As   D::Ec                    t        |       \  }}t        j                  |      }t        |j                  t        j
                        sJ t        |       \  }}t        j                  |      }t        |j                  t        j
                        sJ t        j                  |d      }|j                  }|J t        |t        j                        sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |j                        dk(  sJ |j                  d   t        j                  g dt        j                               k(  sJ t        j                  t        j                  dt        j                         fg      d      }t        |t        j                        sJ t        |j                        dk(  sJ t        d |j                  D              sJ t        j                  ||      }|j                  }t        |t        j                        sJ |j                  t        j                  dt        j                         fg      k(  sJ t        |j                        dk(  sJ t        d	 |j                  D              sJ t        j                  |d      }t        j                  t!        |j#                               |j                  |j$                  |j&                  
      }|j                  J | dz  }t)        |      \  }}t        j*                  |d      }|j                  }|J t        |t        j                        sJ |j                  t        j                  dt        j,                         fg      k(  sJ t        |j                        dk(  sJ t/        |j                  d   j1                               ddhk(  sJ y )Nr   r  r   r   r   )r   r   r   ri  c              3   $   K   | ]  }|d u  
 y wr=   r   rZ  s     r*   r\  z6test_dataset_preserved_partitioning.<locals>.<genexpr>       4"3QqDy"3r^  c              3   $   K   | ]  }|d u  
 y wr=   r   rZ  s     r*   r\  z6test_dataset_preserved_partitioning.<locals>.<genexpr>  r  r^  r   zdata-partitioned-metadatarM   rN   )r  r   rF   r@   r   r   r  r|  r.   r0   r   r   rn  r  r~  r   r[   r>   rX   r   r  r  r5   r}   r:  )	r  rF  rd   rF   r  r   r  r  r  s	            r*   #test_dataset_preserved_partitioningr
  q  sG    "'*GAtjjGg**B,D,DEEE 37;JjjGg**B,D,DEEE jjF3GDdB//000;;"))fbhhj%9$:;;;;t  !Q&&&Q288Irxxz#BBBB ??299vrxxz&:%;<VLDdB//000t  !Q&&&4$"3"34444jjD1GDdB//000;;"))fbhhj%9$:;;;;t  !Q&&&4$"3"34444 jjF3G##W""$%gnn~~'*<*<H   ((( 55I:9EM1  VDGDdB//000;;"))fbiik%:$;<<<<t  !Q&&& t  #--/0S#J>>>r,   c                    t        j                  t        j                  dt        j                               t        j                  dt        j                  t        j
                         t        j                                     g      }t        j                  g dt        t        d            d|      }t        | dz        }t        j                  ||dg       t        j                  | dz        }|j                  d      j                         |j                  d      j                         k(  sJ |j                  d      j!                  |j                  d            sJ y )	Nrg  r   )NNrM   rM   r   r  rT   r!  rh  )r0   r.   r1   r3   r  r   r5   r:   r[   r   rQ   r_   r]  r0  r  r:  r   )r  r.   r:   rd   actual_tables        r*   +test_write_to_dataset_given_null_just_worksr    s    YY

#
rxxz299;?@ F HH4!%(^-5;=E w'(DtVH===>!9:L v&00 	f		'	'	)* * *u%,,U\\%-@AAAr,   c                     dd l m}  |j                  |  |j                  |dfg            } |j                  | |      S )Nr   	ascending)r   )pyarrow.computecomputesort_indicesSortOptionsr   )tabsort_colr  sorted_indicess       r*   _sort_tabler    sA     $R__^R^^h%<$=>@N2773''r,   c                 `   |xs |}t        j                  | |d|d       t        |j                  d            }t	        |      t	        |      k(  sJ t        j
                  |d|      }t        |j                         |      j                  t        | j                         |            sJ y )NrD  FrX   r   r   *r{  )	r   r  r[   r  r}   rF   r  r   r   )rF   r`  expected_filesr  base_dir_pathr   
file_pathsr  s           r*   _check_dataset_roundtripr    s    !-XMWhw".EC m))#./Jz?c.1111 zzgLBH x((*H5<<G$$&13 3 3r,   c                    | dz  }|j                          t        |      }t        j                  |      }| dz  }|dz  g}t	        |t        |      |d|       | dz  }|dz  g}t	        |||d|       | dz  }|j                          t        |      }t        j                  |      }| dz  }|dz  g}t	        |t        |      |d|       y )Nr  zsingle-file-targetr  rM   zsingle-file-target2r  zsingle-directory-target)r  r  r   rF   r  rQ   rf  )r  rc   rF  rF   targetr  s         r*   test_write_datasetr!    s     -'IOOI&Ajj#G ++F~-.NWc&k>3O ,,F~-.NWfnc6J ,,IOO"9-Ajj#G00F~-.NWc&k>3Or,   c                 R   | dz  }t        |      }t        j                  d      }t        j                  ||      }| dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j                  t	        j
                  d	t	        j                         fg      d      }t        |t        |      |d
||       | dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j                  t	        j
                  d	t	        j                         fg            }t        |t        |      |d
||       y )Npartitionedr   ri  r  zpartitioned-hive-targetpart=ar  part=br   r   partitioned-dir-targetrM   rN   )	r  r   r   rF   r0   r.   r5   r  rQ   )r  rc   rF  r   rF   r   expected_pathsr  s           r*   test_write_dataset_partitionedr(    s6    -'I+I6A??&1Ljj>G 00F6H,~=6H,~=N //
		FBIIK()*6;VndF(*
 //Ffsl^3fsl^3N //
		FBIIK()*,VndF(*r,   c                    t        j                  g dg dd      }t        j                  || ddg       t        j                  | ddg      }|j
                  }|D ch c]9  }t        t        j                  |      j                  |       j                        ; }}|h dk(  sJ |j                         }|j                  |      sJ y c c}w )Nr#  r%  rL   rz  rN   r{  >   r   r$  r  r0   r:   r   r  rF   r   rQ   rk  rl  r  rm  r   r   r  r:   r  r   r  partitioning_dirsr  s          r*   #test_write_dataset_with_field_namesr-  $  s    HH+/BCEUGE#&%) 

75uEIOOEBGBGQGLLO''0778%   ///((*O!!%(((    >Cc                    t        j                  g dg dd      }t        j                  || ddgd       t        j                  | dd      }|j
                  }|D ch c]9  }t        t        j                  |      j                  |       j                        ; }}|h d	k(  sJ |j                         }|j                  |      sJ y c c}w )
Nr#  r%  rL   rz  rN   r   )rX   r   partitioning_flavorr{  >   b=xb=yb=zr*  r+  s          r*   (test_write_dataset_with_field_names_hiver4  5  s    HH+/BCEUGE#&%VE 

75vFIOOEBGBGQGLLO''0778%    5555((*O!!%(((r.  c                    t        j                  g dg dg dd      }t        j                  || ddg       t        j                  | ddg      }t        j                         5 }t        j                  |j                  ddg	      |ddg       t        j                  |ddg      }|j                         }t        |j                               |j                  d
      j                         k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr#  r%  r  r7  rz  rN   r{  r  r   rM   )r0   r:   r   r  rF   r  r  r   r   r  r  drop_columnsr  r:   rF   tempdir2r  r  s         r*   test_write_dataset_with_scannerr9  F  s    HH+/$& 'E UGE#&%) jjcUCG		$	$	&(
#s<!%se	E JJxSEJ	#,,.O--/ ++C0::<= 	= = 
'	&	&s   'BDDc           	         	
 t        j                          G fddt              }t        j                   |t        j
                                     t        j                  t        j                  dt        j                               g      }t        j                  t        j                  t        t        d                  g|      	d
d}dd		
fd
}t        j                  j!                   |       |d	      t        j"                   fd      }|j%                          	 t'        j&                         fd}d}d} |       dk  r/
|kD  r

|k(  rd	}n"
}t'        j(                  d        |       dk  r/|sJ 	 dj+                          |j-                          y # dj+                          |j-                          w xY w)Nc                       e Zd Z fdZy)6test_write_dataset_with_backpressure.<locals>.GatingFsc                 ^    j                          | j                  j                  ||      S )Nr  )waitrs   rZ   )ru   rd   r  consumer_gates      r*   rZ   zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamb  s)     88..th.GGr,   N)r   r   r   rZ   )r@  s   r*   GatingFsr<  a  s	    	Hr,   rA  r(   r  rT   r          Tc               3   h   K   k  r(sy t        j                  d       dz    k  r'y y w)Ng{Gz?r   )r  sleep)rg   batches_readend
keep_goings   r*   counting_generatorz@test_write_dataset_with_backpressure.<locals>.counting_generatorq  s:     S JJtALK S s   ,22r  c                  H    t        j                  t              d       S )Nr   r  )r   r  rQ   )	gating_fsr   r  s   r*   r  z6test_write_dataset_with_backpressure.<locals>.<lambda>~  s    r''S\)	Kr,   )r   c                  2    t        j                           z
  S r=   )r  )starts   r*   durationz6test_write_dataset_with_backpressure.<locals>.duration  s    99;&&r,   Fr   r  )	threadingEventr   rU   rj   ri   r0   r.   r1   r   r]   r  r[   r   r   r2  r^   ThreadrM  r  rE  r}   r  )r  rA  r.   min_backpressurerI  write_threadrN  
last_valuebackpressure_probably_hitrg   rF  r@  rG  rK  rH  r   rM  s   `        @@@@@@@@r*   $test_write_dataset_with_backpressurerV  Y  s    OO%M
H< H
 ););)= >?IYY456F OORXXd5+;&<=>vNEL
CJ jj%%V & ?G ##KLL !			' 
$)!j2o..:-04-)
JJsO j2o )((
 
 
s   8AF6 F6 6$Gc                    t        j                  g dg dd      }t        j                  || ddg       t        j                  | ddg      }t        j                         5 }t        j                  ||ddg       t        j                  |ddg      }|j                         }t        |j                               |j                         k(  sJ 	 d d d        y # 1 sw Y   y xY w)Nr%  r  rN   r  rz  rN   r{  )
r0   r:   r   r  rF   r  r  r   r  r  r7  s         r*   test_write_dataset_with_datasetrY    s    HH?;<EUGE#&%) jjcUCG		$	$	&(
( %SE	; JJxSEJ	#,,.O--/0EOO4EEEE 
'	&	&s   $A0CC'c           	      X   | dz  }t        j                  g dg dd      }t        j                  t        j                  t        j
                  dt        j                               g      d      }d }t        j                  |||d	
       t        j                  g dg dd      }t        j                  t         j                        5  t        j                  |||d	
       d d d        t        j                  ddgi      }|dz  dz  }t        j                  j                  ||       t        j                  |||d	d       t        j                  g dg dd      }t        j                  | d	|      j                         } |||       |j!                         sJ t        j                  |||d	d       t        j                  g dg dd      }t        j                  | d	|      j                         } |||       |j!                         rJ y # 1 sw Y   9xY w)Nr   r%  r  rX  r  r   )r.   r  c                     | j                         j                  d      j                  d      }|j                         j                  d      j                  d      }|j                  |      sJ y )NrN   Tr  )r  r  r  r   )r  r  df1df2s       r*   compare_tables_ignoring_orderzGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_order  sZ    lln((-99t9Dlln((-99t9Dzz#r,   rz  r  r7  r  rN   ezc=2z	foo.arrowoverwrite_or_ignore)r   rX   existing_data_behavior)r_  r   rM   rN   r  )r   r   r   rK   r   r{  delete_matching)r   rM   rN   r  r  )r0   r:   r   r   r.   r1   r3   r  r   r   r3  r   r  r  rF   r   exists)	r  rc   r:   r   r^  extra_table
extra_fileoverwrittenreadbacks	            r*    test_write_dataset_existing_datarh    s   $IHH?;<E??"))	#rxxz	"#+%-35L UILOHH?;<E 
r	'
	&25	B 
( ((C#<(KU"[0JOO!!+z: UIL!,AC (('o>@Kzz'%'355=XZ !(K8 UIL!:KM ((!5LIJKzz'%'355=XZ !(K8  """"9 
(	's   HH)c                 f    t        |       D cg c]  }t        j                  ||       c}S c c}w r=   )r   rr  randint)r%  r{  r|  rF  s       r*   _generate_random_int_arrayrk    s)    .3Dk:kFNN3$k:::s   .c                     g }g }t        |       D ]<  }|j                  t        |d|             |j                  dt        |      z          > t	        j
                  ||      }|S )Nr   )r%  r{  r|  r  r(   ry  )r   r   rk  rQ   r0   r]   )num_of_columnsnum_of_recordsr(   r9  r)   r]   s         r*   _generate_data_and_columnsrp    si    DL>".N343AC 	D 	C#a&L)	 #
 ??LALr,   c                 t    t        t        t        j                  |       j	                  d|                   S )Nz**/*.)r   r[   rk  rl  r  base_directoryr   s     r*   _get_num_of_files_generatedrt    s-    tGLL055k]6KLMNNr,   c                    | dz  }dd}d}d}t        ||      }t        j                  ||d|       t        j                  |      }|z  dz   }t        |      |k(  sJ g }t        |      D ]V  \  }	}
|t        |
      z  }t        j                  |d      }|j                  |j                         j                  d	          X |t        |      k(  sJ |t        |      k(  sJ t        fd
|D              sJ y )Nr   r   r   #   r   )rX   max_rows_per_filemax_rows_per_groupr   r   r   c              3   (   K   | ]	  }|k    y wr=   r   )r[  file_rowcountrw  s     r*   r\  z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>  s      <$:=  11$:s   )rp  r   r  r5  r6  r   rW   rQ   rF   r   r   shaper  r~  )r  rc   rx  rn  ro  r]   files_in_direxpected_partitionsresult_row_combinationrF  f_filef_pathrF   rw  s                @r*   $test_write_dataset_max_rows_per_filer    s*   $INN-n.<>L \9Y'8(:< ::i(L ),==A | 3333  |,	6S[(**VI6%%g&6&6&8&>&>q&AB - #&<"====S!78888 <$:< < < <r,   c                    | dz  }d}d}d}g d}|D cg c]  }t        ||       }}|dz  }t        j                  ||||d       t        j                  |      }	t        |	      D ]  \  }
}|t        |      z  }t        j                  |d	      }|j                         }|j                         }t        |      D ]6  \  }}|j                  }|t        |      d
z
  k  r||k\  r||k  r.J ||k  r6J   y c c}w )Nr   r  r$  r   )
r   r   r   r   r   r   r   r   r   r   min_rows_groupr   )r(   r`  min_rows_per_grouprx  rX   r   r   )rp  r   r  r5  r6  rW   rQ   rF   r   r   r   r   )r  rc   r  rx  rn  record_sizesro  record_batchesdata_sourcer|  rF  r  r  rF   r:   batchesr  rg   rows_per_batchs                      r*   %test_write_dataset_min_rows_per_groupr  !  s2   $IN1L -9:,8. 11?A,8  : ..K.;(:(:%'
 ::k*L|,	6s6{***VI6  """$"7+IB"^^NCL1$$%);;"&889 9 &);;;; , -:s   C:c                    | dz  }d}d}d}t        ||      }|dz  }t        j                  |||d       t        j                  |      }g }|D ]i  }	|t        |	      z  }
t        j                  |
d      }|j                         }|j                         }|D ]  }|j                  |j                          k |dd	gk(  sJ y )
Nr   r  r      max_rows_groupr   )r(   r`  rx  rX   r   r9  )rp  r   r  r5  r6  rQ   rF   r   r   r   r   )r  rc   rx  rn  ro  r]   r  r|  batched_datar  r  rF   r:   r  rg   s                  r*   %test_write_dataset_max_rows_per_groupr  E  s    $INN-n.<>L ..K,(:%' ::k*LLs6{***VI6  """$E/   B8###r,   c                    | dz  }d}d}ddg}t        j                  g dg dg|      }t        j                  g d	g d
g|      }t        j                  g dg dg|      }t        j                  g dg dg|      }t         j                  j                  ||||g      }	t	        j
                  t        j                  ||   t        j                         fg      d      }
|dz  }t	        j                  |	||
|       d } |||||      \  }}||k(  sJ |dz  }d}t	        j                  |	||
||d        |||||      \  }}||kD  sJ y )Nr   r   r   c1c2)r   r   rK   r   r   r   )rM   rN   r  r  r_  rM   rm  )r   r  r'  r$  r   r   )rM   rN   r  r  r_  r  )r^  r   r:  r9  r   r   )rM   rN   r  r  r_  r  )rW  r  r  r#  r   r   )rM   rN   r  r  r_  rN   r   ri  default)r(   r`  r   rX   c                 z    t        | |      }t        t        j                  j	                  ||               }||fS )Nrr  )rt  r   r0   r  unique)r  r]   r   col_idnum_of_files_generatednumber_of_partitionss         r*   _get_compare_pairz<test_write_dataset_max_open_files.<locals>._get_compare_pair  s>    !<&K"A"2::#4#4\&5I#JK%';;;r,   max_1rK   F)r(   r`  r   rX   max_open_filesr   )	r0   r]   r6   r^   r   r   r.   r5   r  )r  rc   r   partition_column_idr9  record_batch_1record_batch_2record_batch_3record_batch_4r:   r   data_source_1r  r  r  data_source_2r  s                    r*   !test_write_dataset_max_open_filesr  b  s   $IK$<L__+>+I+K+79N __+=+I+K+79N __+@+I+K+79N __+A+I+K+79N HH!!>>#1>#C DE ??
		L!45ryy{CDEL 	)M%-".{D< M>;/1 10 "%9999 'MN%-".{$2G
 M>;/1 10 "$8888r,   c                    | dz  }t        |      }t        j                  |t        j                  j	                  d            }| dz  }|dz  |dz  dz  |dz  |dz  dz  g}t        j
                  t        j                  |j                  j                  d	      g      d	t        j                  ddg      i
      }t        |t        |      |d||       y )Nr#  Tr  r  r&  rM   r  rN   r   rm  r   )r  r   rF   r|  r  r   r0   r.   r1   r  r  rQ   )r  rc   rF  rF   r   r'  r   s          r*   #test_write_dataset_partitioned_dictr    s     -'I+I6A jj((1141HJG //Ffsl^3fsl^3N ??299V$.& $'bhhSz235L VndF!#r,   c                    | dz  }t        |      }t        j                  |d      }t        j                  t	        j
                  dt	        j                         fg      d      }| dz  }g fd}t        j                  ||d|d	|
       |dz  dz  |dz  dz  h}t        t        t        j                              }||k(  sJ | dz  }	t        j                  ||	d|d       t        j                  |d|      }
t        j                  |	d|      }|
j                         j                  |j                               sJ y )Nr#  r   r  r   ri  partitioned1c                 <    j                  | j                         y r=   )r   rd   )written_filepaths_writtens    r*   file_visitorz4test_write_dataset_use_threads.<locals>.file_visitor  s    \../r,   r  TrX   r   r   r  r$  part-0.featherr%  partitioned2Fr  r{  )r  r   rF   r   r0   r.   r5   r  r}   r\   rk  rl  r   r   )r  rc   rF  rF   r   target1r  r'  paths_written_settarget2result1result2r  s               @r*   test_write_dataset_use_threadsr    sB    -'I+I6Ajj8G??
		FBIIK()*6;L &GM0 | 	(--(--N Cm<=...&G jjNGjjNG$$W%5%5%7888r,   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  ||d	d
       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t	        j                  |d      j                         }|j                  |      sJ | dz  }|dz  |dz  dz  |dz  |dz  dz  g}g g fd}t	        j                  t        j                  dt        j                         fg      d      }t	        j
                  ||d
d	||       t        |j                  d            }t        |      t        |      k(  sJ D cg c]!  }t        j                   j#                  |      # }	}|	k(  sJ t	        j                  |d|      }|j                         j                  |      sJ t%              dk(  sJ D ]  }
t'        j(                  |
      |v rJ  y c c}w )Nro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z#test_write_table.<locals>.<genexpr>  rt  ru  rM   r   rN   rv  rx  singledat_{i}.arrowr  basename_templaterX   r  zdat_0.arrowrz  r   r#  r$  r%  c                 r    j                  | j                         j                  | j                         y r=   )r   rd   r%  )r  visited_pathsvisited_sizess    r*   r  z&test_write_table.<locals>.file_visitor  s+    \../\../r,   r   r   ri  )rX   r  r   r  r{  r   )r0   r:   r  r   r   r  r[   r  r}   rF   r   r   r   r.   r5   r5  rd   getsizer   rk  rl  )r  r:   r`  r  r'  r  r  r   rd   actual_sizesvisited_pathr  r  s              @@r*   test_write_tabler    s<   HH
rRXX%IuRy%II
#sebj() "#E
 !HUH'6yJ hnnS)*J./Nz?c.1111ZZ/88:F== &H8X0=@8X0=@N
 MM0 ??
		FBIIK()*6;LUHY'6".\K hnnS)*Jz?c.11116CDmdBGGOOD)mLDL(((ZZ\JF??##E***}"""%||L)^;;; & Es   &Ic           	      p   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }t        j                  |gdz        }| d	z  }t        j                  ||d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  |g|d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  |j                         |d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  |      sJ | dz  }t        j                  ||g|d
       t        |j                  d            t        |dz  g      k(  sJ t        j                  |d      j                         j                  t        j                  |gdz              sJ y )Nr   c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z6test_write_table_multiple_fragments.<locals>.<genexpr>  rt  ru  rM   r   rN   rv  rx  r   r  r  r   r  r  rz  zsingle-listmultiplezmultiple-table)r0   r:   r  r   rw  r   r  r}   r  rF   r   r   r   )r  r:   r`  s      r*   #test_write_table_multiple_fragmentsr    sn   HH
rRXX%IuRy%II
#cUQY&' "#E eWQY'E !HUHY7x~~c"#sH7G,G+H'IIII::hu-668??FFF &HeWhy9x~~c"#sH7G,G+H'IIII::hu-668??FFF #HU%%')Dx~~c"#s	$	$%(' ' ' '::hu-668??FFF ))HeU^Xi@x~~c"#s	$	$%(' ' ' '::hu-668??
%#  r,   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  d	 |j                         D        ||j                  d
d       t	        j                  |d      j                         }|j                  |      sJ | dz  }t         j                  j                  |j                  |j                               }t	        j
                  ||d
d       t	        j                  |d      j                         }|j                  |      sJ y )Nro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z&test_write_iterable.<locals>.<genexpr>>  rt  ru  rM   r   rN   rv  rx  inmemory_iterablec              3       K   | ]  }|  y wr=   r   )r[  rg   s     r*   r\  z&test_write_iterable.<locals>.<genexpr>C  s     <);e);s   r  r  )r.   r  rX   rz  r   inmemory_readerr  )r0   r:   r  r   r   r  r   r.   rF   r   r   r  r^   )r  r:   r`  r  r   s        r*   test_write_iterabler  <  s;   HH
rRXX%IuRy%II
#sebj() "#E
 ,,H<)9)9);<h!LL'6yJ ZZ/88:F==**H!!..u||/4/?/?/ACFVX'6yJZZ/88:F==r,   c           	         t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }t	        j
                  |      }| dz  }t	        j                  |j                  |      |d	
       |j                  t	        j
                  |d
            }|j                  |      sJ | dz  }t	        j                  |j                  |dg      |d	
       |j                  t	        j
                  |d
            }|j                  |j                  dg            sJ t        j                  t        d      5  t	        j                  |j                  |      ||j                  d	       d d d        y # 1 sw Y   y xY w)Nro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z%test_write_scanner.<locals>.<genexpr>T  rt  ru  rM   r   rN   rv  rx  dataset_from_scannerr  r   rz  dataset_from_scanner2r   r   zCannot specify a schemar   )r.   rX   )r0   r:   r  r   r   rF   r  r   r   r   rn  r   r   r  r.   )r  r   r:   rF   r`  r  s         r*   test_write_scannerr  R  sx   HH
rRXX%IuRy%II
#sebj() "#E jjG//H^++9.$$RZZ%GHF== 00H^++GdV+Di1$$RZZ%GHF==tf-... 
z)B	C
//8( %Y	@ 
D	C	Cs   3GGc                    t        j                  t        j                  t        d            t        j                  dgdz  dgdz  z         j	                         gddg      }t        j                  |j                  dg      j                        }| dz  }t        j                  ||d	|
       t
        j                  j                  dgd      }t        j                  |d|
      j                         }|j                  |      sJ y )Nro  rM   r   rN   rg  r   rx  rF   r  r{  Tr  rz  )r0   r:   r  r   rP  r   r   rn  r.   r  r   r  rF   r   r   )r  r:   r   r`  partitioning_readr  s         r*   !test_write_table_partitioned_dictr  l  s     HH
r
#sebj();;= V_E
 ??5<<#9#@#@AL"Hx	
 0099	4 : )ZZ->hj  ==r,   c           
      V   t        j                  t        j                  t        d      d      t        j                  t	        j
                  ddd      j                  d            t        j                  t	        j                  dd	gd
            gg d      }| dz  }t        j                  ||d       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t        j                  |d      j                         }|j                  |      sJ dD ]z  }t        j                          }|j#                  |      }dt%        |      v sJ | dj'                  |      z  }t        j                  ||||       t)        j*                  |dz        }	|dk(  rdnd}
|	j,                  |
k(  sJ t        j                  |d      j                         }|j.                  }|dk(  rB|j                  d|j1                  d      j3                  t        j4                                     }|dv rC|j                  d|j1                  d      j3                  t        j6                  d                  }|j9                  |      }|j                  |      r{J  y )Nro  r  r  r  zdatetime64[D]r  zdatetime64[ns]rM   rN   r   rv  rx  r  r   r   r  part-0.parquet)1.02.42.6r  z(<pyarrow.dataset.ParquetFileWriteOptionszparquet_dataset_version{0}r  r  r  r   )r  r  r   r  )r0   r:   r  r   r.  r/  r  r-  r   r  r[   r  r}   rF   r   r   r   rY  r  rX   r_   read_metadataformat_versionr.   r1   	with_typer3   r  r  )r  r:   r`  r  r'  r  r  rX   optsmetaexpected_versionr.   rN  s                r*   test_write_dataset_parquetr    s@    HH
r*
<?CJJ 	
C:r*+	
 "#E **HUHY7hnnS)*J!112Nz?c.1111ZZ3<<>F== )%%'(((99T$ZGGG9@@II
dK+; ;<$+u$45%""&6666 HY7@@BeZZ6<<?#<#<RXXZ#HIFn$ZZ6<<?#<#<R\\$=O#PQF::f%}}X&&&' )r,   c           	      r   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }| dz  }t	        j
                  ||d	
       t        |j                  d            }|dz  g}t        |      t        |      k(  sJ t	        j                  |d	
      j                         }|j                  |      sJ t	        j                  t        j                  j                  |j                   j"                              }|j%                  d      }| dz  }t	        j
                  ||||       t	        j                  ||
      j                         }|j                  |      sJ y )Nro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z)test_write_dataset_csv.<locals>.<genexpr>  rt  ru  rM   r   rN   )r   rw  chr1rx  csv_datasetr  r   r  z
part-0.csvrt  r  F)include_headercsv_dataset_noheaderr  )r0   r:   r  r   r   r  r[   r  r}   rF   r   r   r  r   r  r  r.   ry  rY  )r  r:   r`  r  r'  r  rX   r  s           r*   test_write_dataset_csvr    so   HH
rRXX%IuRy%II
#sebj() "#E
 &HUHU3hnnS)*J-.Nz?c.1111ZZ/88:F== 7;;+B+B\\'' ,C ,) *F$$E$:D//HUHV$GZZ099;F==r,   c           	      J   t        j                  t        j                  t        d            t        j                  d t        d      D              t        j                  dgdz  dgdz  z         gg d      }dfd	}| d
z  }t	        j
                  ||d|       sJ y )Nro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z:test_write_dataset_parquet_file_visitor.<locals>.<genexpr>  rt  ru  rM   r   rN   rv  rx  Fc                 X    | j                   | j                   j                  dk(  rdy y y )NrK   T)r  r(  )r  visitor_calleds    r*   r  z=test_write_dataset_parquet_file_visitor.<locals>.file_visitor  s2    !!-%%11Q6!N 7 .r,   r  r   )rX   r  )r0   r:   r  r   r   r  )r  r:   r  r`  r  s       @r*   'test_write_dataset_parquet_file_visitorr    s    HH
rRXX%IuRy%II
#sebj() "#E
 N" **HUHY".0 >r,   c                 r   t        d      D cg c]  }|gdz  D ]  }|  }}}t        d      D cg c]  }|gdz  D ]  }|dz  	  }}}t        j                  ||dgdz  dgdz  z   d      }| dz  }t        j                  t        j
                  dt        j                         fg      d	
      }g d fd}t        j                  ||d|d|       |dz  dz  |dz  dz  h}	t        t        t        j                              }
|
|	k(  sJ J j                  dk(  sJ y c c}}w c c}}w )Nr   r   rM   ro  rN   rv  r#  r   r   ri  c                 l    | j                   r| j                   j                  | j                         y r=   )r  r   rd   )r  r  sample_metadatas    r*   r  zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitor  s+      *33O\../r,   r   Tr  r$  r  r%  r   )r   r0   r:   r   r   r.   r5   r  r}   r\   rk  rl  r(  )r  r   itemf1_valsf2_valsr:   r  r   r  r'  r  r  r  s              @@r*   +test_partition_dataset_parquet_file_visitorr    sW   !&qCugldtltGC$)!HFH5%2tBwwHGFHHG7"ebjC52:57 8E -'I??
		FBIIK()*6;L MO0 y| 	H//H//N Cm<=...&&&&&!+++? DFs
   D-D3c                 6   t        j                  dt        j                  dd      gi      }|d   j                  j
                  dk(  sJ t        j                  || d       t        j                  | dz        }|d   j                  j
                  dk(  sJ y )NrM   r  zEurope/Brussels)tzr   r   r  )
r0   r:   r"   r  r  r  r   r  r_   r0  )r  r:   r  s      r*   (test_write_dataset_arrow_schema_metadatar    s    
 HHcBLL:KLMNOE:??!2222UGI6]]7%556F#;"3333r,   c                     ddl m} t        j                  dg di      }|j	                  ddi      }t        j                  || d       |j                  | d	z        j                  }|j                  ddik(  sJ y )
Nr   r  rM   r  r     valuer  r   r  )
r   r  r0   r:   r8   r   r  r0  r.   r  )r  r  r:   r.   s       r*   "test_write_dataset_schema_metadatar    sr    HHc9%&E))68*<=EUGI6*: :;BBF??vx0000r,   c                     t        j                  dg di      }|j                  ddi      }t        j                  || d       t        j                  | dz        j                  }|j                  ddik(  sJ y )NrM   r  r  r  r   r   r  )	r0   r:   r8   r   r  r_   r0  r.   r  )r  r:   r.   s      r*   *test_write_dataset_schema_metadata_parquetr    so     HHc9%&E))68*<=EUGI6]]7%556==F??vx0000r,   c           	         | \  }}}}}}}}dj                  ||||      }t        j                  t        j                  t	        d            t        j                  d t	        d      D              t        j                  dgdz  dgdz  z         gg d      }t        j                  t        j                  d	t        j                         fg      d
      }	t        j                  |d|d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ |j                  d      }t        j                  ||d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ |j                  d      }t        j                  |d|d|	       t        j                  d|dd
      j                         }
|
j                  |      sJ y )Nr  ro  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z(test_write_dataset_s3.<locals>.<genexpr>4  rt  ru  rM   r   rN   rv  rx  r   r   ri  zmybucket/datasetr  r+  rz  zmybucket/dataset2r{  r  r  zmybucket/dataset3)rX   r0   r:   r  r   r   r   r.   r5   r  rF   r   r   )r(  rF  rU   r*  r+  r,  r-  uri_templater:   r   r  r)  s               r*   test_write_dataset_s3r  )  s    7H3Aq"atZ=DD
D$	0 
 HH
rRXX%IuRy%II
#sebj()+ #E
 ??299vryy{&;%<=fMD !b
 ZZr%fhj  == 

1
2CUC	EZZ5vhj  == 

j
)Czc)$ ZZ5vhj  ==r,   aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	         ddl m} | d   \  }}}}t        | t        dd        |dddj	                  ||      d      }t        j                  t        j                  t        d	            t        j                  d
 t        d	      D              t        j                  dgdz  dgdz  z         gg d      }t        j                  t        j                  dt        j                         fg      d      }t        j                  |d|dd|d       t        j                  d|dd      j                         }|j!                  |      sJ t        j                  |d|dd|d       t        j                  d|dd      j                         }|j!                  |      sJ t#        j$                  t&        d      5  t        j                  |d|ddd       d d d         |dddj	                  ||      dd       }t#        j$                  t&        d!      5  t        j                  |d|ddd       d d d        y # 1 sw Y   fxY w# 1 sw Y   y xY w)"Nr   )r  r  test_dataset_limited_user
limited123z{}:{}http)r,  r-  endpoint_overrideschemero  c              3   D   K   | ]  }t        j                            y wr=   rq  rs  s     r*   r\  z1test_write_dataset_s3_put_only.<locals>.<genexpr>  rt  ru  rM   r   rN   rv  rx  r   r   ri  zexisting-bucketr  Fr`  )r   rX   rY   r   ra  rz  r+  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   rX   rY   ra  limited)r,  r-  r  r  allow_bucket_creationz(Access Denied|ACCESS_DENIED))r{   r  r   _minio_put_only_policyrX   r0   r:   r  r   r   r   r.   r5   r  rF   r   r   r   r   r'  )	r  r  r*  r+  rF  rU   r:   r   r  s	            r*   test_write_dataset_s3_put_onlyr	  l  s4    ( !.D$1y*@:LJ	.!..t4	
B HH
rRXX%IuRy%II
#rSEBJ&') #E
 ??299vryy{&;%<=fMD  RU4 ZZbVhj  ==  RT4 ZZbVhj  == 
wE
G
(R#8	

G 
!..t4"
B 
w&E	F
(R#8	
 
G	F!
G 
G  
G	Fs   >H?I?IIc           
         t        j                  dd d gi      }t        j                  || dz         t        j                  t        j
                  dt        j                  t        j                         t        j                                     g      }t        j                  j                  | dz  g|t        j                         t        j                               }|j                  |      }|j                  |k(  sJ y )NrM   r  )rm   r.   rX   r   )r0   r:   r_   r`   r.   r1   r  r   r5   r   r   r   r   rU   ri   r   )r  r   r:   r.   fsdss        r*   $test_dataset_null_to_dictionary_castr    s     HHcD$<()ENN5'N23YY
bmmBHHJ		<= F **'(##%%%'	 + D ##D)E<<6!!!r,   c                 ~   t        j                  g dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd	      }t        j                  || d
z  d       t        j                  | d
z  d      }|j                  |dd      }|j                         t        j                  g dg dg dd      k(  sJ |j                  |ddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )Nr   r   r  rM   rN   r  colAr  r  rz  r   c   r   r   Zr  r  )colBcol3r  r  r  r  r  Nr  r  r  
full outer)	join_typer   r   r  r  rM   rN   r  Nr  r  Nr  r0   r:   r   r  rF   r  r   r  r  r  ds1r  ds2r  s         r*   test_dataset_joinr#    s%   	 
B R46
**Wt^E
2C	 
B R46
**Wt^E
2CXXc66*F?? * !    XXc66\XBF??$$V,%%: 1   r,   c                 |   t        j                  g dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd	      }t        j                  || d
z  d       t        j                  | d
z  d      }|j                  |d      }|j                         t        j                  g dg dg dd      k(  sJ |j                  |ddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )Nr  r  r  r  rz  r   r  r  )r  r  r  r  r  r  r  _rr  right_suffixr  r  r  r  r   s         r*   test_dataset_join_unique_keyr(    s#   	 
B R46
**Wt^E
2C	 
B R46
**Wt^E
2CXXc6"F?? * !    XXc6\XMF??$$V,%%: 1   r,   c           	         t        j                  g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg d	g d
d      }t        j                  || dz  d       t        j                  | dz  d      }|j                  |ddd      }|j                         j                  d      t        j                  g dg dg dg dg dgg d      k(  sJ y )Nr  r   ro  <   r  )r  r  colValsr  rz  r   r  r  ro  r   r  r  r  r  r%  r&  r  )r   ro  r+  Nr  )r   ro  Nr  r  )r  r  r,  colB_r	colVals_rrx  r  r   s         r*   test_dataset_join_collisionsr0    s    	" 
B
 R46
**Wt^E
2C	" 
B
 R46
**Wt^E
2CXXc6\XMF??$$V,: @1A A A Ar,   c                 &   t         j                  j                  g dg dd      }t        j                  || dz  d       t        j
                  | dz  d      }t         j                  j                  g dg dg d	d
      }t        j                  || dz  d       t        j
                  | dz  d      }|j                  |ddddd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ y )N)r   r   r   r  r'  )rM   rN   rM   rN   r  r  r  rz  r   )r   r^  r  )rM   rN   g)r  r  g      @)r  r  colCr  r  r  r   r  r  onby	toleranceright_onright_by)r  NNNN)r  r  r3  )
r0   r6   from_pydictr   r  rF   	join_asofr   r  r:   r   s         r*   test_dataset_join_asofr<  '  s    			) 
B R46
**Wt^E
2C			 
B
 R46
**Wt^E
2C]]6Q&  F ??$$V,),: 1   r,   c                    t        j                  g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg d	g d
g dd      }t        j                  || dz  d       t        j                  | dz  d      }|j                  |dddgd      }|j                         j                  d      t        j                  g dg dg dg dd      k(  sJ y )Nr  r*  r  )r  r  r5  r  rz  r   r-  r  r  r  )r  r,  r  r5  r  r5  r  r  r   r5  r6  r7  )Nr  Nr  r  r5  r,  )r0   r:   r   r  rF   r;  r   r  r   s         r*   "test_dataset_join_asof_multiple_byr@  C  s    	 
B
 R46
**Wt^E
2C	"	 
B R46
**Wt^E
2C]]&&)Q  F ??$$V,$	: 1   r,   c                    t        j                  dg di      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g dg dd      }t        j                  || d	z  d       t        j                  | d	z  d      }|j                  |dg d
      }|j                         t        j                  g dg dd      k(  sJ y )Nr5  r  r  rz  r   r  r  )r,  r5  r  r   r>  )r  r  r  )r5  r,  )r0   r:   r   r  rF   r;  r   r   s         r*   test_dataset_join_asof_empty_byrB  a  s    	i 
B R46
**Wt^E
2C	" 
B R46
**Wt^E
2C]]q  F ??"* !   r,   c           
          t        j                  g dg dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }t        j                  g d	g d
g dg dg dd      }t        j                  || dz  d       t        j                  | dz  d      }d}t        j                  t        |      5  |j                  |dddgddddg       d d d        y # 1 sw Y   y xY w)Nr  r*  r  r  r?  r  rz  r   r-  r  )r@  rC  r  r  r  )r  r,  colUniqr  r5  r  zXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.r   r5  r  r  r   r4  )	r0   r:   r   r  rF   r   r   r  r;  )r  r  r!  r  r"  r1  s         r*   !test_dataset_join_asof_collisionsrE  y  s    	"	 
B R46
**Wt^E
2C	"" 
B R46
**Wt^E
2C	7  
z	-Dff-VV$4 	 	
 
.	-	-s   C44C=dstyperU   memc           	      R   t        j                  g dg dd      }|dk(  r6t        j                  || dz  d       t        j                  | dz  d      }n!|dk(  rt        j                  |      }nt
        |j                  t        j                  d	      d
k        j                  t        j                  d      dk(        }|dk(  rt        j                  nt        j                  }t        ||      sJ |j                         t        j                  dgdgd      k(  sJ |j                  d      t        j                  dgdgd      k(  sJ |j                  t        j                  d	      dk        j                  t        j                  d	      dkD        j                  t        j                  d	      dk7        }|j                         t        j                  dgdgd      k(  sJ t        j                  || dz  d       t        j                  | dz  d      }|j                         t        j                  dgdgd      k(  sJ |j                  t        j                  t        j                  ddgddgd            dd      }|j                         j!                  d      t        j                  dd gddgddgd      k(  sJ t#        j$                  t&              5  |j                  d        d d d        t#        j$                  t(              5  |j+                          d d d        |j,                  j/                  d      }	|j                  t        j                  d	      d
k        j1                  |	      }
|
j                         t        j                  d	ddgi      k(  sJ t#        j$                  t         j2                        5  |j1                  |	      j                          d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nr   r   r  r$  rM   rN   r  r2  r  rU   r  rz  r   rG  r  rK   r  rM   r   r   r$  r  r   r   rN   r  r   ro  r  r  zright outerkeysr  r  )r  r  r  )r0   r:   r   r  rF   r  r   r  r1   r   r  r@   r   r   r   r  r  r   r   r   r  r>   r.   rc  replace_schemar3  )r  rF  r  r!  r  rN  r2r  joinedschema_without_col2	newschemas              r*   test_dataset_filterrS    s    
$ 
B ~
Wt^E:jj46	5jjn!! ZZ(1,-44RXXf5E5LMF'-~r##2;M;MHfh'''??* !   
 ;;q>RXX'     
BHHV$q(	)	0	0
1
%gRXXf-=-BgC ;;=BHH&     VWz1%@zz'J.u=H"((, #    [[BHHRc
. %    /F ??$$V,D	Rc
: 1    
y	!

4 
" 
z	" 
# **++A.


1n()  288A- $    
r	' 	12;;=	 
(	' 
"	! 
#	" 
(	's$   /P"P PPPP&c           	         t        j                  g dg dd      }t        j                  g dg dd      }|dk(  rkt        j                  || dz  d	       t        j                  | dz  d	      }t        j                  || d
z  d	       t        j                  | d
z  d	      }n6|dk(  r+t        j                  |      }t        j                  |      }nt
        t        j                  ||f      j                  t        j                  d      dk  t        j                  d      dk(  z        }|j                         t        j                  g dg dd      k(  sJ |j                  t        j                  t        j                  ddgddgd            dd      }|j                         j                  d      t        j                  g dg dg dd      k(  sJ |j                  t        j                  d      dk        }|j                  t        j                  d      dk        }	t        j                  t        d      5  t        j                  ||	f       d d d        y # 1 sw Y   y xY w)NrI  rJ  r  )r^  r   r:  )hr)   lrU   r  rz  r   r  rG  r  rK   r^  )r   r   r^  )rM   rN   rU  r   ro  rM   rN   rK  r  z
left outerrL  )r   ro  N)r  r  r  zcurrently not supportedr   )r0   r:   r   r  rF   r  r   r  r1   r   r  r  r   r   r  )
r  rF  r  r  r!  r"  filtered_union_dsrP  filtered_ds1filtered_ds2s
             r*   test_union_dataset_filterrZ    s    
$ 
B 
 
B ~
Wt^E:jj46
Wt^E:jj46	5jjnjjn!!

C:.55	&	A	"((6"2a"78 %%'2885 ,   
 ##BJJrxxRc
9 0 %  $ .F ??$$V,: 1    ::bhhv.23L::bhhv.34L	z)B	C


L,/0 
D	C	Cs   I//I8c                    | dz  }t        |      \  }}t        j                  |      }|j                         }|j                  dk(  sJ |j                  t        j                  d      dk        }|j                         j                  dk(  sJ t        j                  t              5  |j                          d d d        y # 1 sw Y   y xY w)Ntest_parquet_dataset_filterr  r   r   ro  )r  r   r  r   r   r   r  r1   r   r   r  r>   )r  r  r  rF  rF   r  filtered_dss          r*   r\  r\    s    77I5i@M1  /GF??b   ..$!!34K!**b000	z	"!!# 
#	"	"s   )CCc                    t        j                  t        j                  t        d            gdg      }t	        j
                  |      }dt	        j                  d      i}|j                  |      }t	        j                  || dgd       t        j                  t        d	      5  t	        j                  || dgd       d
d
d
       y
# 1 sw Y   y
xY w)z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    ro  original_columnrx  renamed_columnr   rz  r  z0'Column original_column does not exist in schemar   N)r0   r:   r  r   r   rF   r1   r   r  r   r   KeyError)r  r:   table_datasetr   r   s        r*   4test_write_dataset_with_scanner_use_projected_schemarc  )  s     HHbhhuRy)*3D2EFEJJu%M"((#45G ##G#4G(8'9%I 	O	
 	W,=+>u	
		
 	
 	
s   )CCrX   )rz  r   c           
         |dk(  rt        j                  d       t        j                  ddgddgd dddgdd	id gd
ddg dddigd
gd      }t	        j
                  || dz  |       t	        j                  | dz  |      }|j                  g d      }|j                         dd ddgd d	dd gddddg ddd dgddgk(  sJ y )Nr   zpyarrow.parquetabc123qrs456r   r   buttonr  r  )r  elementvaluesstructsscrollwindow)NrK   r   fizzbuzz)user_ida.dotted.fieldinteractionr:   r   )ro  zinteraction.typezinteraction.valueszinteraction.structsrp  r   )rm  r  )ro  r  ri  rj  rp  )	r   r  r0   r:   r   r  rF   r   r:  )r  rX   r:   r!  s       r*   test_read_table_nested_columnsrr  B  s   -.HH(H!5)*A(q6u~t&<	>hff-=,>	@& E UGg-f=
**Ww&v
6C LL:  ;E ??dq!f"5148A	Oh,$T23q	J!   r,   c                 x   ddl m} | dz  }t        j                  j	                  t        j
                  g dt        j                               t        j
                  g dt        j                               gddg      } |j                  ||ddgd	
        |j                  |dd	t        j                  t        j                  dt        j                               t        j                  dt        j                               g            j                         j                         }||j                  d      k(  sJ |j                  d      j                         }t!        t#        |            }|D cg c]  }dt%        |d      z    }}t!        t'        j(                              }||k(  sJ y c c}w )Nr   )rF   zslash-writer-xr   r   rK   r   r   )experiment/A/f.csvzexperiment/B/f.csvru  zexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metarz  r   )r(   r`  rX   r   r0  )r  rX   r   r.   r   z	exp_meta=r  r   )r   rF   r0   r6   r  r  r   r  r  r.   r1   r   r  r  r  r:  r  r}   r   r5  r6  )tmpdirr   rd   dt_tabler0  rw  encoded_pathsr  s           r*   !test_dataset_partition_with_slashr{  ^  ss   %$$Dxx##
"((*-
 ()+	4%5 8@6L	NH B \" yy"((8RXXZ8((:rwwy9; <	 hj!  z))(3333q!++-Hc(m$HDLMHD[5B#77HMM

4()JJ&&& Ns   8F7c                 d   t        j                  t        j                  dt        j                         d      t        j                  dt        j                         d      g      }g dg dg}t         j                  j                  ||      }t        j                  || d	z         t        j                  | d	z  d
      }|j                         j                  j                  |      sJ t        j                  || dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |      sJ t        j                  ||g| dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |      sJ y )Nr   F)nullabler$  Tr  Nr   NrT   	nulltest1r   r   	nulltest2	nulltest3)r0   r.   r1   r3   r6   r  r_   r]  r   rF   r   r   r  )r  schema_nullablerS  r:   rF   s        r*   'test_write_dataset_preserve_nullabilityr    sW    ii
bhhj51
bhhj40!2 3O )FHH   @Ew45jj;.yAG$$++O<<<UGk1)Djj;.yAG$$++O<<<eU^W{%:9Mjj;.yAG$$++O<<<r,   c                    t        j                  t        j                  dt        j                         ddi      t        j                  dt        j                               g      }t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }g dg dg}t         j                  j                  ||      }t         j                  j                  ||      }t        j                  ||g| d	z  d
       t        j                  | d	z  d
      }|j                         j                  j                  |d      sJ t        j                  ||g| dz  d
       t        j                  | dz  d
      }|j                         j                  j                  |d      sJ t        j                  ||g| dz  d
|       t        j                  | dz  d
      }|j                         j                  j                  |d      sJ y )Nr   s   foos   barr>  r$  r  r~  rT   test1r   r   Tr	  test2test3r  )r0   r.   r1   r3   r6   r  r   r  rF   r   r   )r  schema_metadataschema_no_metarS  r:   table_no_metarF   s          r*   *test_write_dataset_preserve_field_metadatar    s   ii
bhhjFF+;<
bhhj!!# $O YY
bhhj!
bhhj! # $N )FHH   @EHH(((GM e]+Ww->yQjj7*9=G$$++OD+QQQmU+Ww->yQjj7*9=G$$++N4+PPP mU+Ww->y+-jj7*9=G$$++OD+QQQr,   c                    dD ]_  }dD ]V  }t        j                  t        j                  dt        j                               t        j                  dt        j                               g      }g dg dg}t         j                  j                  ||      }t        j                         }| d| z  }t        j                  ||d|j                  ||	      d
       t        j                  |d      }|j                  D ]Z  }	t        j                  |	      }
|
j                  d      j                  d      }|j                   |u sJ |j"                  ||z  u rZJ  Y b y )N)TFr   r$  r  r~  rT   write_page_index_r   )write_statisticswrite_page_indexr`  )rX   r  ra  r   r   )r0   r.   r1   r3   r6   r  r   r   r  rY  rF   r   r_   r  r  r  has_offset_indexhas_column_index)r  r  r  r.   rS  r:   r   r`  r!  r  r  ccs               r*   #test_write_dataset_write_page_indexr    sJ   ) -YYbhhj)bhhj) + ,F  1FHH(((?E..0K#45E4F!GGH (;;%5%5 <  (=	 **Xi8C		++D1''*11!4**.>>>>**.>AQ.QQQQ "- !. *r,   c                    t        j                  t        j                  g d      t        j                  g d      gddg      }|dk(  r6t        j                  || dz  d	       t        j
                  | dz  d	      }n!|d
k(  rt        j
                  |      }nt        |j                  d      j                         j                         g dg ddk(  sJ |j                  dg      j                         j                         g dg ddk(  sJ |j                  t        j                  d      dk        j                  d      j                         j                         g dg ddk(  sJ t         j                  j                  t        j                  g dt        j                               t        j                  g d      gddg      }t        j
                  |      }|j                  dg      }|j                         j                         }|d   g dk(  sJ |d   g dk(  sJ |j                  dg      }|j                         j                         }|d   g dk(  sJ |d   g dk(  sJ y )N)rK   r   r   r   r   )rN   rM   rN   rM   r  ri  rM  rx  rU   r  rz  r   rG  )rM   rM   rN   rN   r  rt  )rM  ri  )ri  
descending)r  rN   rN   rM   rM   )r   r   rK   r   r   r   )rM   rM   rN   r  )r   r'  r'  rv  r  )r  carr  foobarrM   rN   )rM   r  )rv  r'  r'  r   )r  r  r  r  )rM   r  )r0   r:   r  r   r  rF   r  r  r   r  r   r  r1   r6   r  r3   )r  rF  r:   r   
sorted_tabsorted_tab_dicts         r*   test_dataset_sort_byr    sJ    HH
!
*+ !E
 ~
$u=ZZ$u5	5ZZ!!::h((*446)!;   
 ::/01::<FFH)!M   
 99bhhx(1,.66hj!   HH  
RXXZ0
01" 3Z ! E 
E	B012J ))+557O3=0003#BBBB/01J ))+557O3=0003#BBBBr,   c                 N   t        j                  dg di      }t         j                  j                         }|j	                  d      }| dz  }t        j                  ||||       t        j                  d      }t         j                  j                  |      }t        j                  ||	      j                         }||k(  sJ | d
z  }t        ||       t        |j                               }	t        |	      dk(  sJ |	d   }
t        |
j                               }|d   |d   k7  sJ |d   |d   c|d<   |d<   |
j                  |       t        j                  d      }t         j                  j                  |      }t        j                  ||	      j                         }||k7  sJ |t        j                  dg di      k(  sJ t!        j"                  t$        d      5  t        j                  ||	      j                         }ddd       y# 1 sw Y   yxY w)zwCheck that checksum verification works for datasets created with
    ds.write_dataset and read with ds.dataset.to_tablerM   r  T)write_page_checksumcorrect_dir)r(   r`  rX   r  r  )default_fragment_scan_optionsr   corrupted_dirr   r      $   F)r   rK   r   r   zCRC checksum verificationr   N)r0   r:   rF   r   rY  r   r  r  r   r   r[   iterdirr   	bytearray
read_byteswrite_bytesr   r   r'  )r  
table_origpq_write_formatr  original_dir_pathpq_scan_opts_crcpq_read_format_crctable_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crctable_corruptrF  s                   r*   1test_checksum_write_dataset_read_dataset_to_tabler    s/    3-.J jj224O#66  7 "M  -/""	 44#')55&6 6 8**! hj  $$$ !?2 23
  $$6$>$>$@A'(A---215,779:H B<8B<'''!)"x|HRL(2, ##H- 77#(*JJ88&9 9 ;JJ#8::B(* 
 J&&&BHHc<%89999 
w&A	BJJ%
 (* 	
 
C	B	Bs   ,&HH$c                     d} d}t        j                  t              5 }t        j                  j
                  j                  d       d d d        | t        j                        v s|t        |j                        v sJ t        j                  j                         }d}t        j                  t        |      5  |j                  d       d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+   z;make_write_options\(\) takes exactly 0 positional argumentsr   )	r   r   r   r0   rF   r   rY  rQ   r   )msg_1msg_2excinfopformatr1  s        r*   test_make_write_options_errorr  T  s    -E(E 
y	!W


$$77; 
"C&&%3w}}3E*EEEjj**,G
IC	y	,""2& 
-	, 
"	! 
-	,s   *C;C"C"C+rr  r=   )r   r   r   (  r   r   r5  rk  r>  rr  sysr  rB  rO  r  shutilr   urllib.parser   numpyr.  r  r   r   r0   r  r  r  pyarrow.csvr  r{   rU   pyarrow.jsonpyarrow.libr   pyarrow.tests.utilr   r   r   r	   r
   r   r"   r  rF   r   pyarrow.parquetr   r_   mark
pytestmarkr+   r;   rI   fixturera   r   r   r   r   r  r!  r)  r=  rG  rK  rO  rQ  rW  r  r  parametrizerQ   tupler  r  r  r  r  r  r  r  r   r  r!  s3r4  rK  rN  rX  r^  re  rj  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r'  r)  r3  r<  r?  rD  rH  rJ  rL  rQ  rU  r\  r  rf  ri  ro  rs  ru  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r(  r  r  r  r  r  r  r  r  r  r$  r(  r3  r6  r8  r>  rB  rI  rL  rR  rT  rW  r[  r`  ro  ru  rz  r  r  r  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r!  r(  r-  r4  r9  rV  rY  rh  rk  rp  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r#  r(  r0  r<  r@  rB  rE  rS  rZ  r\  rc  rr  r{  r  r  r  r  r  r  r   r,   r*   <module>r     s  $   	    
              ,, ,   [[  
K +%  D  @ h0  0f   	
 	H+	+\ DG DGN), 89 89v " " -I -I` % %$ - -, 5 5 O OA!HF$ F8
8
 %8
 #(8
8
v#"6  " ) ) 6 6r$TNJ2 ,BOOH-$$/  t}5E"  6E"P 4 4$ 5-  5-pH2H, "> ">J  " 7 7@ 	%  	% 9N 9Nx  : ) )" 
:  
: %0 %0P  "5Jp '  'T 7 7 7  7 ) ), &. &.R $ $6 ' '> 	 	  3  3F     F  @ACIC I< +eT]; @AC@C < @@  @ACa,Ca,H  @ACR,CR,jJ? (  (2N
+;
=O"6J H H(*8,>@" I I
 I I L L $ $$ < <& J J& 7 7&    , - - = =*"?(DJ  6 7$" !# !#H ? ?, , , N N.A +v)>?5$-8+eT];)i  o&	?# !< , 62 < 9 @ 62r ; ;< D D( 
:  
: :  : ,  ,0 ?=  ?=D / / 9 92 = =6 F F 	 	 9 9(4?%DM2@ 6) 6)r  "$, #M #ML 3 3,*    "    $  )    (H*: A0    (    (    ,D2@0G@  ) 
,/ ,/^ , !  ! CLLG+D  F!F  !( !  !2 . .* 	  	$3 4  4$ 
;  
; 3  3N @  @  G G* D  D 	=  	==0 :?  :?z B  B&( ?C3&  P  PF *  *>)")"=& H  HVF" -# -#`;	O !< !<H  <  <F $ $8 A9 A9H #  #0 #9  #9L,<^!H ,@4 0 ('  ('V 4  *  ,  ,F 4  4	1 1 1 ,   , ^ $ I
  I
X " "$  <  < A A6  6  :  . 
 
> %$ K>K>\ %$ *1*1Z$
2 #56 76"'J = =.R:R@ %$ -C-C`FR'eq  	B"  	B
  	B
  	Bs[   AR8 3AS 8AS ?AS" R8ASSASSASSASSASSASS"AS-S,AS-