
    g@<              
          d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZ ddlmZmZmZ  ej0                  e      Zh dZh d	Ze	 G d
 d             Zdeej<                  ef   deeef   fdZ deeej<                  f   deeeeeee!f   f      ddfdZ"deeej<                  f   deeej<                  f   ddfdZ#dejH                  dedeeej<                  e!f   ddfdZ%deeee!f   de!fdZ&dedefdZ'dedee   ddfdZ(dejH                  dejR                  de*fdZ+y)     N)contextmanager)	dataclassfield)Path)AnyDict	GeneratorIterableTupleUnion   )DDUFCorruptedFileErrorDDUFExportErrorDDUFInvalidEntryNameError>   .txt.json.model.safetensors>   config.jsonscheduler_config.jsontokenizer_config.jsonpreprocessor_config.jsonc                       e Zd ZU dZeed<   eed<   eed<    ed      Ze	ed<   e
deed	d	f   fd
       ZddedefdZy	)	DDUFEntrya  Object representing a file entry in a DDUF file.

    See [`read_dduf_file`] for how to read a DDUF file.

    Attributes:
        filename (str):
            The name of the file in the DDUF archive.
        offset (int):
            The offset of the file in the DDUF archive.
        length (int):
            The length of the file in the DDUF archive.
        dduf_path (str):
            The path to the DDUF archive (for internal use).
    filenamelengthoffsetF)repr	dduf_pathreturnNc              #   N  K   | j                   j                  d      5 }t        j                  |j                         dt        j                        5 }|| j
                  | j
                  | j                  z     ddd       ddd       y# 1 sw Y   xY w# 1 sw Y   yxY ww)a-  Open the file as a memory-mapped file.

        Useful to load safetensors directly from the file.

        Example:
            ```py
            >>> import safetensors.torch
            >>> with entry.as_mmap() as mm:
            ...     tensors = safetensors.torch.load(mm)
            ```
        rbr   )r   accessN)r   openmmapfilenoACCESS_READr   r   )selffmms      X/var/www/openai/venv/lib/python3.12/site-packages/huggingface_hub/serialization/_dduf.pyas_mmapzDDUFEntry.as_mmap9   su      ^^  &!188:a8H8HIRt{{T[['@AA J '&II '&s4   B%5B)B<B	B%B	BB"B%encodingc                     | j                   j                  d      5 }|j                  | j                         |j	                  | j
                        j                  |      cddd       S # 1 sw Y   yxY w)zRead the file as text.

        Useful for '.txt' and '.json' entries.

        Example:
            ```py
            >>> import json
            >>> index = json.loads(entry.read_text())
            ```
        r"   )r-   N)r   r$   seekr   readr   decode)r(   r-   r)   s      r+   	read_textzDDUFEntry.read_textJ   sR     ^^  &!FF4;;66$++&--x-@ '&&s   AA,,A5)zutf-8)__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r	   bytesr,   r2        r+   r   r   "   se     MKK'It'B5$#45 B B A# AC Ar<   r   r   r    c                 X   i }t        |       } t        j                  d|         t        j                  t        |       d      5 }|j                         D ]  }t        j                  d|j                          |j                  t        j                  k7  rt        d      	 t        |j                         t        ||      }t        |j                  ||j                   |       ||j                  <    	 ddd       d|vrt        d	      t#        j$                  |d   j'                               }t)        ||j+                                t        j                  d
|  dt-        |       d       |S # t        $ r}t        d|j                         |d}~ww xY w# 1 sw Y   xY w)a  
    Read a DDUF file and return a dictionary of entries.

    Only the metadata is read, the data is not loaded in memory.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to read.

    Returns:
        `Dict[str, DDUFEntry]`:
            A dictionary of [`DDUFEntry`] indexed by filename.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).

    Example:
        ```python
        >>> import json
        >>> import safetensors.torch
        >>> from huggingface_hub import read_dduf_file

        # Read DDUF metadata
        >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")

        # Returns a mapping filename <> DDUFEntry
        >>> dduf_entries["model_index.json"]
        DDUFEntry(filename='model_index.json', offset=66, length=587)

        # Load model index as JSON
        >>> json.loads(dduf_entries["model_index.json"].read_text())
        {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...

        # Load VAE weights using safetensors
        >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
        ...     state_dict = safetensors.torch.load(mm)
        ```
    zReading DDUF file rzReading entry z)Data must not be compressed in DDUF file.z!Invalid entry name in DDUF file: N)r   r   r   r   model_index.json7Missing required 'model_index.json' entry in DDUF file.zDone reading DDUF file z. Found z entries)r   loggerinfozipfileZipFiler7   infolistdebugr   compress_type
ZIP_STOREDr   _validate_dduf_entry_namer   _get_data_offsetr   	file_sizejsonloadsr2   _validate_dduf_structurekeyslen)r   entrieszfrB   er   indexs          r+   read_dduf_filerU   Z   so   N GYI
KK$YK01	Y	-KKMDLL>$--9:!!W%7%77,-XYYi)$--8 &b$/F%.vdnnXa&GDMM" " 
.$ ($%^__JJw12<<>?EUGLLN3
KK))HS\N(STN! - i,/PQUQ^Q^P_-`aghhi 
.	-s1   AF $E69>F 6	F?FFF  F)rQ   c                 $   t         j                  d|  d       t               }d}t        j                  t        |       dt        j                        5 }|D ]  \  }}||v rt        d|       |j                  |       |dk(  r-	 t        j                  t        |      j                               }	 t        |      }t         j!                  d	| d
       t#        |||        	 ddd       |t        d      	 t%        ||       t         j                  d|         y# t        j                  $ r}t        d      |d}~ww xY w# t        $ r}t        d|       |d}~ww xY w# 1 sw Y   xY w# t&        $ r}t        d      |d}~ww xY w)a  Write a DDUF file from an iterable of entries.

    This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
    In particular, you don't need to save the data on disk before exporting it in the DDUF file.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
            An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
            The filename should be the path to the file in the DDUF archive.
            The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.

    Raises:
        - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).

    Example:
        ```python
        # Export specific files from the local disk.
        >>> from huggingface_hub import export_entries_as_dduf
        >>> export_entries_as_dduf(
        ...     dduf_path="stable-diffusion-v1-4-FP16.dduf",
        ...     entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
        ...         ("model_index.json", "path/to/model_index.json"),
        ...         ("vae/config.json", "path/to/vae/config.json"),
        ...         ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
        ...         ("text_encoder/config.json", "path/to/text_encoder/config.json"),
        ...         ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
        ...         # ... add more entries here
        ...     ]
        ... )
        ```

        ```python
        # Export state_dicts one by one from a loaded pipeline
        >>> from diffusers import DiffusionPipeline
        >>> from typing import Generator, Tuple
        >>> import safetensors.torch
        >>> from huggingface_hub import export_entries_as_dduf
        >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
        ... # ... do some work with the pipeline

        >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
        ...     # Build an generator that yields the entries to add to the DDUF file.
        ...     # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
        ...     # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
        ...     yield "vae/config.json", pipe.vae.to_json_string().encode()
        ...     yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
        ...     yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
        ...     yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
        ...     # ... add more entries here

        >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
        ```
    zExporting DDUF file ''NwzCan't add duplicate entry: r?   z#Failed to parse 'model_index.json'.zInvalid entry name: zAdding entry 'z' to DDUF filer@   zInvalid DDUF file structure.zDone writing DDUF file )rA   rB   setrC   rD   r7   rH   r   addrL   rM   _load_contentr1   JSONDecodeErrorrI   r   rF   _dump_content_in_archiverN   r   )r   rQ   	filenamesrT   archiver   contentrS   s           r+   export_entries_as_ddufra      s   t KK'	{!45IE	Yg.@.@	AW!(Hg9$%(CH:&NOOMM(#--X JJ}W'='D'D'FGEP4X> LL>(>BC$Wh@! ") 
B( }WXXE 	2 KK))56% ++ X)*OPVWWX
 - P%(<XJ&GHaOP 
B	A0 " E<=1DEsf   1E),D"3E	>(E)=E5 "E5EEE)		E&E!!E&&E))E25	F>F

Ffolder_pathc                 ~    t              dt        t        t        t         f      ffd}t	        |  |              y)a  
    Export a folder as a DDUF file.

    AUses [`export_entries_as_dduf`] under the hood.

    Args:
        dduf_path (`str` or `os.PathLike`):
            The path to the DDUF file to write.
        folder_path (`str` or `os.PathLike`):
            The path to the folder containing the diffusion model.

    Example:
        ```python
        >>> from huggingface_hub import export_folder_as_dduf
        >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
        ```
    r    c               3   t  K   t              j                  d      D ]  } | j                         s| j                  t        vrt
        j                  d|  d       @| j                        }t        |j                        dk\  rt
        j                  d|  d       |j                         | f  y w)Nz**/*zSkipping file 'z' (file type not allowed)   z"' (nested directories not allowed))r   globis_filesuffixDDUF_ALLOWED_ENTRIESrA   rF   relative_torP   partsas_posix)pathpath_in_archiverb   s     r+   _iterate_over_folderz3export_folder_as_dduf.<locals>._iterate_over_folder  s     %**62D<<>{{"66tf4MNO"..{;O?(()Q.tf4VWX!**,d22 3s   B5B8N)r   r
   r   r7   ra   )r   rb   ro   s    ` r+   export_folder_as_ddufrp      s9    $ {#K3(5d+;"< 3 9&:&<=r<   r_   r   r`   c                 v   | j                  |dd      5 }t        |t        t        f      r=t        |      }|j                  d      5 }t	        j
                  ||d       d d d        n1t        |t              r|j                  |       nt        d| d      d d d        y # 1 sw Y   xY w# 1 sw Y   y xY w)NrX   T)force_zip64r"   i   zInvalid content type for z. Must be str, Path or bytes.)	r$   
isinstancer7   r   shutilcopyfileobjr:   writer   )r_   r   r`   
archive_fhcontent_path
content_fhs         r+   r]   r]     s    	h	6*gT{+=L""4(J"":z?K )('W%!$=hZGd"eff 
7	6 )( 
7	6s#   3B/B# :B/#B,	(B//B8c                     t        | t        t        f      rt        |       j                         S t        | t              r| S t        dt        |        d      )zoLoad the content of an entry as bytes.

    Used only for small checks (not to dump content into archive).
    z6Invalid content type. Must be str, Path or bytes. Got .)rs   r7   r   
read_bytesr:   r   type)r`   s    r+   r[   r[   *  sR    
 'C;'G}''))	GU	# VW[\cWdVeefghhr<   
entry_namec                     d| j                  d      d   z   t        vrt        d|        d| v rt        d|  d      | j                  d      } | j	                  d      dkD  rt        d|  d      | S )	Nr{   zFile type not allowed: \z0Entry names must use UNIX separators ('/'). Got /   z-DDUF only supports 1 level of directory. Got )splitri   r   stripcount)r~   s    r+   rI   rI   7  s    
Zc"2&&.BB'*A*(NOOz'*Z[eZffg(hii!!#&Jq '*WXbWccd(effr<   rT   entry_namesc                 @   t        | t              st        dt        |        d      D ch c]  }d|v s|j	                  d      d    }}|D ]D  | vrt        d d      t        fdt        D              r0t        d d	t         d       y
c c}w )a  
    Consistency checks on the DDUF file structure.

    Rules:
    - The 'model_index.json' entry is required and must contain a dictionary.
    - Each folder name must correspond to an entry in 'model_index.json'.
    - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').

    Args:
        index (Any):
            The content of the 'model_index.json' entry.
        entry_names (Iterable[str]):
            The list of entry names in the DDUF file.

    Raises:
        - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
    z>Invalid 'model_index.json' content. Must be a dictionary. Got r{   r   r   zMissing required entry 'z' in 'model_index.json'.c              3   0   K   | ]  } d | v   yw)r   Nr;   ).0required_entryr   folders     r+   	<genexpr>z+_validate_dduf_structure.<locals>.<genexpr>[  s%     rUq>fXQ~./;>Uqs   z!Missing required file in folder 'z!'. Must contains at least one of N)rs   dictr   r}   r   anyDDUF_FOLDER_REQUIRED_ENTRIES)rT   r   entrydduf_foldersr   s    `  @r+   rN   rN   B  s    $ eT"$'efjkpfqerrs%tuu5@Q[EC5LEKK$Q'[LQ(+CF8Kc)deerUqrr(3F8;\]y\zz{| 	  Rs
   	BBrR   rB   c                 d   | j                   t        d      |j                  }| j                   j                  |       | j                   j	                  d      }t        |      dk  rt        d      t        j                  |dd d      }t        j                  |dd d      }|dz   |z   |z   }|S )a1  
    Calculate the data offset for a file in a ZIP archive.

    Args:
        zf (`zipfile.ZipFile`):
            The opened ZIP file. Must be opened in read mode.
        info (`zipfile.ZipInfo`):
            The file info.

    Returns:
        int: The offset of the file data in the ZIP archive.
    z+ZipFile object must be opened in read mode.   zIncomplete local file header.      little)fpr   header_offsetr/   r0   rP   r9   
from_bytes)rR   rB   r   local_file_headerfilename_lenextra_field_lendata_offsets          r+   rJ   rJ   a  s     
uu}$%RSS &&M EEJJ}

2
"$%DEE >>"3Br":HELnn%6r"%=xHO  "$|3oEKr<   ),rL   loggingr%   osrt   rC   
contextlibr   dataclassesr   r   pathlibr   typingr   r   r	   r
   r   r   errorsr   r   r   	getLoggerr3   rA   ri   r   r   PathLiker7   rU   r:   ra   rp   rD   r]   r[   rI   rN   ZipInfor9   rJ   r;   r<   r+   <module>r      s      	   % (  ? ? W W 
		8	$    4A 4A 4AnBeBKK$45 B$sI~:N BJX7S"++%&X719%U3PTV[K[E\@\:]1^X7	X7v!>U3+;%< !>5QTVXVaVaQaKb !>gk !>H	ggoo 	g 	guUXZ\ZeZeglUlOm 	grv 	g
i5dE!12 
iu 
i# # C hsm  >" " "C "r<   