
    g&                         d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ  G d d      Zd Zh d	Z	 	 dd
Z	 	 ddZddZy)    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   .    e Zd ZdZddZddZd Zd	dZy)
FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    c                      || _         || _        y N)pathsvalidate_schema)selfpath_or_pathsr   s      D/var/www/openai/venv/lib/python3.12/site-packages/pyarrow/feather.py__init__zFeatherDataset.__init__)   s    "
.    Nc                 F   t        | j                  d   |      }|g| _        |j                  | _        | j                  dd D ]H  }t        ||      }| j                  r| j                  ||       | j                  j                  |       J t        | j                        S )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   _tablesr   r   validate_schemasappendr   )r   r   _filpathtables        r   r   zFeatherDataset.read_table-   s     $**Q-9vkkJJqrNDtW5E##%%dE2LL&	 #
 T\\**r   c                     | j                   j                  |j                         s0t        dj                  || j                   |j                               y )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   piecer   s      r   r   zFeatherDataset.validate_schemasF   sH    {{!!%,,/ 2$fUDKK%*\\34 4 0r   c                 F    | j                  |      j                  |      S )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   	to_pandas)r   r   r&   s      r   read_pandaszFeatherDataset.read_pandasM   s*      w/99# : % 	%r   )Tr   )NT)__name__
__module____qualname____doc__r   r   r   r)    r   r   r   r      s    	/+24%r   r   c                    |j                   dk(  ry |j                  t        j                         t        j                         fv rt        dj                  |             t        dj                  | t        |j                                    )Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)
num_chunkstypeextbinarystringr"   r#   str)namecols     r   check_chunked_overflowr8   a   ss    
~~
xxCJJL#**,// 006t> 	>
  **0&s388}*EG 	Gr   >   lz4zstduncompressedc                    t         j                  rDt         j                  r4t        | t         j                  j
                        r| j                         } t        j                  |       rp|dk(  rd}n|dk(  rd}nt        d      t        j                  | |      }|dk(  r;t        |j                  j                        D ]  \  }}	||   }
t        |	|
        n| }|dk(  rYt        |j                         t        t#        |j                               kD  rt        d      |t        d      |Nt        d	      |t%        j&                  d
      rd}n)|'|t(        vrt        dj+                  |t(                    	 t-        j.                  ||||||       y# t0        $ rB t        |t2              r0	 t5        j6                  |        # t4        j8                  $ r Y  w xY w w xY w)a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   F   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize option	lz4_framer9   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   have_pandas
has_sparse
isinstancepdSparseDataFrameto_denseis_data_framer"   r   from_pandas	enumerater   namesr8   lencolumn_namessetr   is_available_FEATHER_SUPPORTED_CODECSr#   r   write_feather	Exceptionr5   osremoveerror)dfdestr@   rA   rB   rC   r>   r   ir6   r7   s              r   rS   rS   s   s   2 ""2{~~==>B  $ a<"N\!NDEE!!"^Da<$U\\%7%784Ah&tS1 9 !|u!!"SU-?-?)@%AAFGG" & ' '   & ' ' 5#5#5k#BK%!:: ))/0I*KL L
ud1B)2G	E  dC 		$ 	 88 s0   >F G%4G
	G%
G G%G  G%c                 D     t        | |||      j                  dd|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr&   r&   r.   r'   )sourcer   r&   r\   kwargss        r   read_featherr_      s;    6+JJ!!*N7BNFLN Or   c                    t        j                  | ||      }||j                         S |D cg c]  }t        |       }}t	        t        d |            r|j                  |      }n\t	        t        d |            r|j                  |      }n4|D cg c]  }|j                   }	}t        dj                  ||	            |j                  dk  r|S t        t        |            |k(  r|S |j                  |      S c c}w c c}w )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )use_memory_mapr&   c                     | t         k(  S r   )intts    r   <lambda>zread_table.<locals>.<lambda>  s    cr   c                     | t         k(  S r   )r5   rd   s    r   rf   zread_table.<locals>.<lambda>  s    18r   z<Columns must be indices or names. Got columns {} of types {}   )r   FeatherReaderreadr1   allmapread_indices
read_namesr*   	TypeErrorr#   rC   sortedrP   select)
r]   r   r\   r&   readercolumncolumn_typesr   re   column_type_namess
             r   r   r      s   * ##z{DF {{}/67wVDLwL7
3!<01##G,	S#\2	3!!'*1=>AQZZ> 5):;= 	=
 ~~	G		( ||G$$' 8 ?s   DD)NNNr=   )NTF)NFT)rU   pyarrow.pandas_compatr   pyarrow.libr   r   r   r   libr2   pyarrowr   pyarrow._featherr	   r   r8   rR   rS   r_   r   r.   r   r   <module>r{      s`   & 
 -0 0   )A% A%HG <  AE*+Qh 48!O@.%r   