
    gJ1                         d dl mZ d dlZd dlmZ d dlmZ d dlmZ  G d d      Z	dZ
 G d d	      Zdd
Zde_        dddddddddddddZdj                  e
      e_        y)    )IntegralN)Table)_resolve_filesystem_and_pathc                   >   e Zd ZdZd Zed        Zed        Zed        Zed        Z	ed        Z
ed        Zed	        Zed
        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        ZddZddZddZy)ORCFilea  
    Reader interface for a single ORC file

    Parameters
    ----------
    source : str or pyarrow.NativeFile
        Readable source. For passing Python file objects or byte buffers,
        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
    c                 l    t        j                         | _        | j                  j                  |       y N)_orc	ORCReaderreaderopen)selfsources     @/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/orc.py__init__zORCFile.__init__&   s"    nn&     c                 6    | j                   j                         S )z/The file metadata, as an arrow KeyValueMetadata)r   metadatar   s    r   r   zORCFile.metadata*        {{##%%r   c                 6    | j                   j                         S )z#The file schema, as an arrow schema)r   schemar   s    r   r   zORCFile.schema/   s     {{!!##r   c                 6    | j                   j                         S )zThe number of rows in the file)r   nrowsr   s    r   r   zORCFile.nrows4   s     {{  ""r   c                 6    | j                   j                         S )z!The number of stripes in the file)r   nstripesr   s    r   r   zORCFile.nstripes9   r   r   c                 6    | j                   j                         S )z4Format version of the ORC file, must be 0.11 or 0.12)r   file_versionr   s    r   r   zORCFile.file_version>   s     {{''))r   c                 6    | j                   j                         S )z2Software instance and version that wrote this file)r   software_versionr   s    r   r    zORCFile.software_versionC        {{++--r   c                 6    | j                   j                         S )zCompression codec of the file)r   compressionr   s    r   r#   zORCFile.compressionH        {{&&((r   c                 6    | j                   j                         S )z?Number of bytes to buffer for the compression codec in the file)r   compression_sizer   s    r   r&   zORCFile.compression_sizeM   r!   r   c                 6    | j                   j                         S )z{Name of the writer that wrote this file.
        If the writer is unknown then its Writer ID
        (a number) is returned)r   writerr   s    r   r(   zORCFile.writerR   s    
 {{!!##r   c                 6    | j                   j                         S )zVersion of the writer)r   writer_versionr   s    r   r*   zORCFile.writer_versionY        {{))++r   c                 6    | j                   j                         S )zRNumber of rows per an entry in the row index or 0
        if there is no row index)r   row_index_strider   s    r   r-   zORCFile.row_index_stride^   s     {{++--r   c                 6    | j                   j                         S )zNumber of stripe statistics)r   nstripe_statisticsr   s    r   r/   zORCFile.nstripe_statisticsd        {{--//r   c                 6    | j                   j                         S )z/Length of the data stripes in the file in bytes)r   content_lengthr   s    r   r2   zORCFile.content_lengthi   r+   r   c                 6    | j                   j                         S )z<The number of compressed bytes in the file stripe statistics)r   stripe_statistics_lengthr   s    r   r4   z ORCFile.stripe_statistics_lengthn   s     {{3355r   c                 6    | j                   j                         S )z1The number of compressed bytes in the file footer)r   file_footer_lengthr   s    r   r6   zORCFile.file_footer_lengths   r0   r   c                 6    | j                   j                         S )z*The number of bytes in the file postscript)r   file_postscript_lengthr   s    r   r8   zORCFile.file_postscript_lengthx   s     {{1133r   c                 6    | j                   j                         S )zThe number of bytes in the file)r   file_lengthr   s    r   r:   zORCFile.file_length}   r$   r   Nc                    |y | j                   }g }|D ]q  }t        |t              r\t        |      }d|cxk  rt	        |      k  r$n n!||   j
                  }|j                  |       Vt        dt	        |      |fz        |c S  |S )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r   columnsr   namescols        r   _select_nameszORCFile._select_names   s    ?C#x(#h)c&k) +**CLL%$ &/25f+s1C&D E E   r   c                 ^    | j                  |      }| j                  j                  ||      S )a  Read a single stripe from the file.

        Parameters
        ----------
        n : int
            The stripe index
        columns : list
            If not None, only these columns will be read from the stripe. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.RecordBatch
            Content of the stripe as a RecordBatch.
        rB   )rE   r   read_stripe)r   nrB   s      r   rH   zORCFile.read_stripe   s.    " $$W-{{&&q'&::r   c                 \    | j                  |      }| j                  j                  |      S )a  Read the whole file.

        Parameters
        ----------
        columns : list
            If not None, only these columns will be read from the file. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'. Output always follows the
            ordering of the file and not the `columns` list.

        Returns
        -------
        pyarrow.Table
            Content of the file as a Table.
        rG   )rE   r   read)r   rB   s     r   rK   zORCFile.read   s,      $$W-{{00r   r	   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r    r#   r&   r(   r*   r-   r/   r2   r4   r6   r8   r:   rE   rH   rK    r   r   r   r      so   ! & & $ $ # # & & * * . . ) ) . . $ $ , , . .
 0 0 , , 6 6 0 0 4 4 ) )(;(1r   r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   p    e Zd Zdj                  e      ZdZddddddd	d
d
ddddZd Zd Z	d Z
d Zd Zy)	ORCWritera  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
F0.12      uncompressed   speed'          N皙?r   
batch_sizestripe_sizer#   compression_block_sizecompression_strategyr-   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                    t        j                         | _        | j                  j                  |||||||||	|
||       d| _        y )Nr]   T)r
   rS   r(   r   is_open)r   wherer   r^   r_   r#   r`   ra   r-   rb   rc   rd   re   s                r   r   zORCWriter.__init__   sV     nn&%!###9!5-/*G!5- 	 	
 r   c                 $    | j                          y r	   closer   s    r   __del__zORCWriter.__del__      

r   c                     | S r	   rQ   r   s    r   	__enter__zORCWriter.__enter__  s    r   c                 $    | j                          y r	   rj   )r   argskwargss      r   __exit__zORCWriter.__exit__  rm   r   c                 V    | j                   sJ | j                  j                  |       y)a
  
        Write the table into an ORC file. The schema of the table must
        be equal to the schema used when opening the ORC file.

        Parameters
        ----------
        table : pyarrow.Table
            The table to be written into the ORC file
        N)rg   r(   write)r   tables     r   ru   zORCWriter.write  s"     |||% r   c                 `    | j                   r"| j                  j                          d| _         yy)z$
        Close the ORC file
        FN)rg   r(   rk   r   s    r   rk   zORCWriter.close#  s'     <<KK DL r   )rL   rM   rN   format_orc_writer_args_docsrO   rg   r   rl   ro   rs   ru   rk   rQ   r   r   rS   rS      sa    
 F !  G % -+(-&-"'#&/2&*"&<!!r   rS   c                     t        | |      \  }}||j                  |      } |8t        |      dk(  r*t        |       j	                         j                  |      }|S t        |       j	                  |      }|S )Nr   rG   )r   open_input_filer>   r   rK   select)r   rB   
filesystempathresults        r   
read_tabler   ,  s|    3FJGJ++D1s7|q0%%'..w7 M %%g%6Mr   a  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. Output always follows the ordering of the file and
    not the `columns` list. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   c                    t        |t              r t        j                  dt        d       || }} t        |||||||||	|
||      5 }|j                  |        d d d        y # 1 sw Y   y xY w)NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelr]   )r<   r   warningswarnFutureWarningrS   ru   )rv   rh   r   r^   r_   r#   r`   ra   r-   rb   rc   rd   re   r(   s                 r   write_tabler   P  s|     %&'4	

 eu	!51)+&C1)
 
U
 
 
s   A##A,a]  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
)NN)numbersr   r   pyarrow.libr   pyarrow._orcr
   
pyarrow.fsr   r   ry   rS   r   rO   r   rx   rQ   r   r   <module>r      s   &     3`1 `1F DI! I!X

 0 $,*',%,!&"%.1%)!%!H F !  r   