
    g[                     T   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZ dd
lmZmZ ddl m!Z!m"Z"m#Z# ddl$m%Z% ddl&m'Z' erddl(m)Z)  ejT                  e+      Z,dZ-dZ.dZ/ddddddddddde0dee0ef   de0dee0   dee1   deeee0   e0f      deeee0   e0f      dee2   de1d e2fd!Z3 G d" d#e jh                        Z5eeef   Z6 G d$ d%      Z7d&e7ddde0de0de0f
d'Z8d&e7d(eee5ee6   f      fd)Z9d*e6d(dfd+Z:d,ee6   ddde0de0de0d(dfd-Z;d*e6ddde0de0de0d(dfd.Z<d,ee6   ddde0de0de0d(dfd/Z= G d0 d1e      Z>d*e6d(e>fd2Z?d3d4d(ee6   fd5Z@d3d4d6e2d(ee6   fd7ZAd3d4d(ee6   fd8ZBd9e0d(dfd:ZCy);    N)datetime)Path)Lock)TYPE_CHECKINGListOptionalTupleUnion   )	constants)CommitOperationAdd
UploadInfo_fetch_upload_modes)LocalUploadFileMetadataLocalUploadFilePathsget_local_upload_pathsread_upload_metadata)DEFAULT_REVISION
REPO_TYPES)DEFAULT_IGNORE_PATTERNSfilter_repo_objectstqdm)_format_size)sha_fileobj)HfApi
   K      T<   )revisionprivateallow_patternsignore_patternsnum_workersprint_reportprint_report_everyapir   repo_idfolder_path	repo_typer    r!   r"   r#   r$   r%   r&   c                   |t        d      |t        vrt        dt               |t        }t              j	                         j                         j                         st        d d      |g }nt        |t              r|g}|t        z  }|'t        j                         xs d}t        |dz
  d      }| j                  |||d	      }t        j                  d
|        |j                   }t#        fdj%                  d      D        ||      }|D cg c]  }t'        |       }}t        j                  dt)        |       d       t+        |d      D cg c]  }|t-        |j.                        f }}t1        |      }t3        |      D cg c]#  }t5        j6                  t8        || |||d      % }}|D ]  }|j;                           |	rt=        d|j?                         z          tA        j@                         }	 tA        jB                  d       tA        j@                         |z
  |
k\  r/|	rtE        |j?                                tA        j@                         }|jG                         rtI        j                  d       n|D ]  }|jK                           t        j                  |j?                                tI        j                  d       yc c}w c c}w c c}w )zUpload a large folder to the Hub in the most resilient way possible.

    See [`HfApi.upload_large_folder`] for the full documentation.
    NzFor large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`. If you are using the CLI, pass it as `--repo-type=model`.z"Invalid repo type, must be one of zProvided path: 'z' is not a directoryr      T)r(   r*   r!   exist_okzRepo created: c              3   ~   K   | ]4  }|j                         s|j                        j                          6 y wN)is_filerelative_toas_posix).0pathr)   s     Y/var/www/openai/venv/lib/python3.12/site-packages/huggingface_hub/_upload_large_folder.py	<genexpr>z/upload_large_folder_internal.<locals>.<genexpr>b   s4     i>VdZ^ZfZfZh		+	&	/	/	1>Vs   =$=z**/*)r"   r#   zFound z candidate files to uploadzRecovering from metadata files)desc)statusr'   r(   r*   r    )targetkwargsz

zIs done: exiting main loopzUpload is complete!)&
ValueErrorr   r   r   
expanduserresolveis_dir
isinstancestrr   os	cpu_countmaxcreate_repologgerinfor(   r   globr   lenr   r   path_in_repoLargeUploadStatusrange	threadingThread_worker_jobstartprintcurrent_reporttimesleep_print_overwriteis_doneloggingjoin)r'   r(   r)   r*   r    r!   r"   r#   r$   r%   r&   nb_coresrepo_urlfiltered_paths_listrelpath
paths_listpathsitemsr8   _threadsthreadlast_report_tss     `                    r5   upload_large_folder_internalrc   0   s   & I
 	
 
"=j\JKK#{#..088:K+K=8LMNN	OS	)*+..O<<>&Q(Q,* w)W_cdH
KK.
+,G .ik>N>Nv>Vi%'
 ObbNa7(g>NaJb
KK&Z))CDE
 *+KLLE 
$[%2D2DEFL 
  u%F {# $A 	 "&$		
 $     fv,,../YY[N


199;'+== !6!6!89!YY[N>>LL56    KK%%'(LL&'] cs   $K,*K1#(K6c                       e Zd Z ej                         Z ej                         Z ej                         Z ej                         Z ej                         Z	y)	WorkerJobN)
__name__
__module____qualname__enumautoSHA256GET_UPLOAD_MODEPREUPLOAD_LFSCOMMITWAIT     r5   re   re      sC    TYY[FdiikODIIKMTYY[F499;Drq   re   c                   :    e Zd ZdZdee   fdZdefdZde	fdZ
y)rJ   zBContains information, queues and tasks for a large upload process.r^   c                 N   || _         t        j                         | _        t        j                         | _        t        j                         | _        t        j                         | _        t               | _        d| _	        d| _
        d| _        d| _        d| _        d | _        t        j                          | _        | j                   D ]  }|\  }}|j$                  | j                  j'                  |       0|j(                  | j                  j'                  |       X|j(                  dk(  r(|j*                  s| j
                  j'                  |       |j,                  s| j                  j'                  |       t.        j1                  d|j2                   d        y )Nr   lfszSkipping file z! (already uploaded and committed))r^   queueQueuequeue_sha256queue_get_upload_modequeue_preupload_lfsqueue_commitr   locknb_workers_sha256nb_workers_get_upload_modenb_workers_preupload_lfsnb_workers_commitnb_workers_waitinglast_commit_attemptr   now_started_atsha256putupload_modeis_uploadedis_committedrE   debugrI   )selfr^   itemr]   metadatas        r5   __init__zLargeUploadStatus.__init__   sC   
7<{{}@E">Ckkm 7<{{}F	&'/0'-.%&''(48 #<<> JJD"OE8&!!%%d+%%-**..t4%%.x7K7K((,,T2**!!%%d+~e.@.@-AAbcd rq   returnc                    d}d}d}d}d}d}d}d}d}	d}
d}| j                   5  | j                  D ]  \  }}|j                  r|
dz  }
|	|j                  z  }	|dz  }|j                  |dz  }||j                  z  }|j
                  dk(  r|dz  }|j
                  |dz  }|j                  r|dz  }||j                  z  }|j                  s|dz  }||j                  z  } t        |	      }t        j                         }|j                  d      }|| j                  z
  }t        |      j                  d      d   }d}|d| d	| d
z  }|dz  }|dz  }|d| d| d	t        |       d| d	z  }|d| d| d	t        |       d| d	z  }|dkD  r	|d| dz  }|d| d| d	t        |       d| d	z  }|d|
 dz  }|dz  }|d| j                   dz  }|d| j                    dz  }|d| j"                   dz  }|d| j$                   dz  }|d| j&                   dz  }|dz  }|cddd       S # 1 sw Y   yxY w)z<Generate a report of the current status of the large upload.r   r   Nrt   z%Y-%m-%d %H:%M:%S.z
---------- z (z) z----------
z	Files:   zhashed /z) | zpre-uploaded: )z (+z unsure)z | committed: z | ignored: 
z	Workers: z	hashing: z | zget upload mode: zpre-uploading: zcommitting: z	waiting: z3---------------------------------------------------)r{   r^   should_ignoresizer   r   r   r   r   r   r   strftimer   r@   splitr|   r}   r~   r   r   )r   	nb_hashedsize_hashednb_preuploadednb_lfsnb_lfs_unsuresize_preuploadednb_committedsize_committed
total_sizeignored_filestotal_filesr_   r   total_size_strr   now_strelapsedelapsed_strmessages                       r5   rQ   z LargeUploadStatus.current_report   s   	
YY#zz8))!Q&Mhmm+
q ??.NI8==0K''50aKF''/!Q&M''"a'N$5$(( A%L"hmm3N%  *& **5N,,.Cll#67GD,,,Gg,,,S1!4K%G7)2k]"55G&G{"G1[ML<U;VVWXfWggkllG'7q<P`CaBbbcdrcsstuuGq Sx88~Q{m2lSaFbEccdesdttuvvGm_B77G{"G4#9#9":#>>G*4+J+J*K3OOG)F)F(GsKKGd&<&<%=SAAG4#:#:";2>>GxG_ YYs   B*H4EH44H=c                 ~    | j                   5  t        d | j                  D              cd d d        S # 1 sw Y   y xY w)Nc              3   V   K   | ]!  \  }}|j                   xs |j                   # y wr/   r   r   r3   r_   r   s      r5   r6   z,LargeUploadStatus.is_done.<locals>.<genexpr>
  s+     eZd;1hx,,F0F0FFZd   '))r{   allr^   r   s    r5   rU   zLargeUploadStatus.is_done  s(    YYeZ^ZdZdee YYs   3<N)rf   rg   rh   __doc__r   
JOB_ITEM_Tr   r@   rQ   boolrU   rp   rq   r5   rJ   rJ      s3    Led:. e>= =~f frq   rJ   r8   c                    	 d}t        |       }|y|\  }}|t        j                  k(  rX|d   }	 t        |       | j                  j                  |       | j                  5  | xj                  dz  c_        ddd       n|t        j                  k(  r	 t!        |||||       |D ]  }|\  }
}|j"                  r|j$                  dk(  r| j&                  j                  |       @|j$                  dk(  r| j(                  j                  |       k| j                  j                  |        | j                  5  | xj*                  dz  c_        ddd       n-|t        j,                  k(  r\|d   }	 t/        |||||       | j(                  j                  |       | j                  5  | xj0                  dz  c_        ddd       n|t        j2                  k(  rU	 t5        |||||       | j                  5  t7        j6                         | _        | xj:                  dz  c_        ddd       nV|t        j<                  k(  rCt7        j>                  t@               | j                  5  | xjB                  dz  c_!        ddd       # t        $ r  t        $ rR}	t        j                  d|	        t        j                          | j                  j                  |       Y d}	~	d}	~	ww xY w# 1 sw Y   rxY w# t        $ r  t        $ r7}	t        j                  d|	        t        j                          Y d}	~	d}	~	ww xY w# 1 sw Y   xY w# t        $ r  t        $ rR}	t        j                  d	|	        t        j                          | j&                  j                  |       Y d}	~	d}	~	ww xY w# 1 sw Y   >xY w# t        $ r  t        $ rY}	t        j                  d
|	        t        j                          |D ]  }| j(                  j                  |        Y d}	~	Ad}	~	ww xY w# 1 sw Y   xY w# 1 sw Y   xY w)a  
    Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
    and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.

    If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.

    Read `upload_large_folder` docstring for more information on how tasks are prioritized.
    Nr   zFailed to compute sha256: r   )r'   r(   r*   r    zFailed to get upload mode: rt   regularzFailed to preupload LFS: zFailed to commit: )"_determine_next_jobre   rk   _compute_sha256rx   r   KeyboardInterrupt	ExceptionrE   error	traceback
format_excrw   r{   r|   rl   _get_upload_moder   r   ry   rz   r}   rm   _preupload_lfsr~   rn   _commitrR   r   r   ro   rS   WAITING_TIME_IF_NO_TASKSr   )r8   r'   r(   r*   r    next_jobjobr^   r   er_   r   s               r5   rN   rN     s    AE 'v.
U )"""8D.%,,006 ((A-(  I---' CI`hi "8))''50..2248))Y6''++D10044T:  11Q61  I+++8D5tg]ef##''- //14/  I$$$239W_` -1YY[*((A-(  INN"JJ/0))Q.) i  %  .9!=>$$&##''--.
  %  ':1#>?$$&&'(  %  58<=$$&**..t445
  %  21!56$$&!D''++D1 "2
  s   &J "K2K> <M
5+M ,N=O
 ;/P8+QK/AK**K/2K;>M,MM
MN:(AN55N:=O
P5AP00P58QQr   c                 0   | j                   5  | j                  dk(  r| j                  j                         dkD  r| j                  |t        j
                         | j                  z
  dkD  rX| xj                  dz  c_        t        j                  d       t        j                  t        | j                        fcd d d        S | j                  dk(  ru| j                  j                         dk\  rX| xj                  dz  c_        t        j                  d       t        j                  t        | j                        fcd d d        S | j                  j                         dk\  rY| xj                  dz  c_        t        j                  d       t        j                  t        | j                  d	      fcd d d        S | j                  j                         dkD  rg| j                   dk(  rX| xj                   dz  c_        t        j                  d
       t        j"                  t%        | j                        fcd d d        S | j&                  j                         dkD  rg| j(                  dk(  rX| xj(                  dz  c_        t        j                  d       t        j*                  t%        | j&                        fcd d d        S | j                  j                         dkD  rh| j                  dk(  rY| xj                  dz  c_        t        j                  d       t        j                  t        | j                  d	      fcd d d        S | j                  j                         dkD  rw| j                   dk(  st,        j.                  sX| xj                   dz  c_        t        j                  d       t        j"                  t%        | j                        fcd d d        S | j&                  j                         dkD  rX| xj(                  dz  c_        t        j                  d       t        j*                  t%        | j&                        fcd d d        S | j                  j                         dkD  rY| xj                  dz  c_        t        j                  d       t        j                  t        | j                  d	      fcd d d        S | j                  dk(  r| j                  j                         dkD  r| j                  |t        j
                         | j                  z
  dkD  rX| xj                  dz  c_        t        j                  d       t        j                  t        | j                        fcd d d        S | j                  dk(  r| j                  j                         dkD  r| j&                  j                         dk(  r| j                  j                         dk(  r| j                  j                         dk(  r| j(                  dk(  rv| j                  dk(  rg| j                   dk(  rX| xj                  dz  c_        t        j                  d       t        j                  t        | j                        fcd d d        S t1        d | j2                  D              rt        j5                  d       	 d d d        y | xj6                  dz  c_        t        j                  dt8         d       t        j:                  g fcd d d        S # 1 sw Y   y xY w)Nr   i,  r   z;Job: commit (more than 5 minutes since last commit attempt)r   zJob: commit (>100 files ready)r   z&Job: get upload mode (>10 files ready)2   z5Job: preupload LFS (no other worker preuploading LFS)z.Job: sha256 (no other worker computing sha256)z:Job: get upload mode (no other worker getting upload mode)zJob: preupload LFSzJob: sha256zJob: get upload moder   z-Job: commit (1 min since last commit attempt)zJob: commitc              3   V   K   | ]!  \  }}|j                   xs |j                   # y wr/   r   r   s      r5   r6   z&_determine_next_job.<locals>.<genexpr>  s+     aT`[Q&&@(*@*@@T`r   z.All files have been processed! Exiting worker.zNo task available, waiting... (zs))r{   r   rz   qsizer   rR   rE   r   re   rn   _get_items_to_commitrx   r}   rl   _get_nry   r~   rm   _get_onerw   r|   rk   r   HF_HUB_ENABLE_HF_TRANSFERr   r^   rF   r   r   ro   )r8   s    r5   r   r   t  s   	 $$)##))+a/**6		f8886A$$)$LLVW$$&:6;N;N&OP 
 %%*v/B/B/H/H/Jc/Q$$)$LL9:$$&:6;N;N&OP! 
& ))//1R7--2-LLAB--vf6R6RTV/WX- 
2 ''--/!38W8W[\8\++q0+LLPQ++Xf6P6P-QR9 
>   &&(1,1I1IQ1N$$)$LLIJ$$hv/B/B&CDE 
J ))//1A5&:[:[_`:`--2-LLUV--vf6R6RTV/WXQ 
X ''--/!3++q0	8[8[++q0+LL-.++Xf6P6P-QRc 
h   &&(1,$$)$LL'$$hv/B/B&CDo 
t ))//1A5--2-LL/0--vf6R6RTV/WX{ 
B $$)##))+a/**6		f8886A$$)$LLHI$$&:6;N;N&OPQ 
Z $$)##))+a/##))+q0,,2249**002a7((A-11Q6//14$$)$LL'$$&:6;N;N&OPq 
v aTZT`T`aaKKHI{ 
B %%*%LL:;S:TTVWXNNB'G 
s_   B+\A:\A,\<A:\ A:\A;\	B
\A+\A,\B*\<C>\2\?A\\r   c                     | \  }}|j                   B|j                  j                  d      5 }t        |      j	                         |_         ddd       |j                  |       y# 1 sw Y   xY w)z1Compute sha256 of a file and save it in metadata.Nrb)r   	file_pathopenr   hexsave)r   r]   r   fs       r5   r   r     sW    OE8__!!$'1)!n002HO (MM% ('s   A&&A/r^   c                    | D cg c]  }t        |       }}t        ||||j                         |       t        | |      D ]=  \  }}|\  }}	|j                  |	_        |j                  |	_        |	j                  |       ? yc c}w )zmGet upload mode for each file and update metadata.

    Also receive info if the file should be ignored.
    )	additionsr*   r(   headersr    N)	_build_hacky_operationr   _build_hf_headerszip_upload_moder   _should_ignorer   r   )
r^   r'   r(   r*   r    r   r   additionr]   r   s
             r5   r   r     s    
 ;@@%$'-%I@%%' eY/hx'44!)!8!8e	 0 As   Bc                     | \  }}t        |       }|j                  ||||g       d|_        |j                  |       y)z'Preupload LFS file and update metadata.)r(   r*   r    r   TN)r   preupload_lfs_filesr   r   )r   r'   r(   r*   r    r]   r   r   s           r5   r   r     sL    OE8%d+H*	    HMM%rq   c                     | D cg c]  }t        |       }}|j                  ||||d       | D ]  \  }}d|_        |j                  |        yc c}w )zCommit files to the repo.z(Add files using upload-large-folder tool)r(   r*   r    
operationscommit_messageTN)r   create_commitr   r   )	r^   r'   r(   r*   r    r   r   r]   r   s	            r5   r   r     sh    :?@%$'-%I@A   !x $e ! As   Ac                       e Zd ZddZy)HackyCommitOperationAddNc                 n    t        | j                  t              rt        | j                        | _        y y r/   )r?   path_or_fileobjr   r@   r   s    r5   __post_init__z%HackyCommitOperationAdd.__post_init__!  s*    d**D1#&t';';#<D  2rq   )r   N)rf   rg   rh   r   rp   rq   r5   r   r      s    =rq   r   c                 |   | \  }}t        |j                  |j                        }|j                  j                  d      5 }|j	                  d      d d }d d d        |j
                  t        d      t        t        j                  |j
                        |j                        |_        |S # 1 sw Y   \xY w)N)rI   r   r   i   z&sha256 must have been computed by now!)r   r   sample)r   rI   r   r   peekr   r;   r   bytesfromhexr   upload_info)r   r]   r   	operationfiler   s         r5   r   r   &  s    OE8'U5G5GY^YhYhiI			d	#t3% 
$ABB&emmHOO.LS[S`S`iopI 
$	#s   B22B;ru   zqueue.Queue[JOB_ITEM_T]c                 &     | j                          gS r/   )get)ru   s    r5   r   r   6  s    EIIK=rq   nc                     t        t         | j                         |            D cg c]  } | j                          c}S c c}w r/   )rK   minr   r   )ru   r   r_   s      r5   r   r   :  s8    !&s;5;;=!'<!=>!=AIEIIK!=>>>s   Ac                     g }d\  }}	  | j                          dk(  r|S |t        k\  s	|t        k\  r|S  | j                         }|j	                  |       |\  }}|j
                  dk(  r|dz  }n|dz  }l)zXSpecial case for commit job: the number of items to commit depends on the type of files.)r   r   r   rt   r   )r   MAX_NB_LFS_FILES_PER_COMMITMAX_NB_REGULAR_FILES_PER_COMMITr   appendr   )ru   r^   r   
nb_regularr   r_   r   s          r5   r   r   >  s     !EFJ
5;;=AL 00JBa4aL uyy{T85(aKF!OJ! rq   reportc                 $   | dz  } t        j                         j                  t        fd| j	                         D              }t        |      D ]@  }t        j                  j                  d       t        j                  j                  d       B t        j                  j                  |        t        j                  j                  dt        | j	                         d         z
  z         t        j                  j                          y)zPrint a report, overwriting the previous lines.

    Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
    to print the report.

    Note: works well only if no other process is writing to `sys.stdout`!
    r   c              3   @   K   | ]  }t        |      z  d z     yw)r   N)rH   )r3   lineterminal_widths     r5   r6   z#_print_overwrite.<locals>.<genexpr>c  s"     S?Rt3t9.2?Rs   z[Kz[Fr   N)shutilget_terminal_sizecolumnssum
splitlinesrK   sysstdoutwriterH   flush)r   nb_linesr_   r   s      @r5   rT   rT   V  s     dNF--/77N Sv?P?P?RSSH 8_

$

" 
 JJVJJSNS1B1B1DR1H-IIJKJJrq   )Dri   rV   rA   ru   r   r  rL   rR   r   r   pathlibr   r   typingr   r   r   r	   r
    r   _commit_apir   r   r   _local_folderr   r   r   r   r   r   utilsr   r   r   utils._cache_managerr   	utils.shar   hf_apir   	getLoggerrf   rE   r   r   r   r@   r   intrc   Enumre   r   rJ   rN   r   r   r   r   r   r   r   r   r   r   rT   rp   rq   r5   <module>r     s     	   
       > >  L L v v 3 E E . " 			8	$ "$ !  #"6:7;!% d(	d(d( sDy!d(
 d( smd( d^d( U49c>23d( eDIsN34d( #d( d( d(X		  ')@@A
cf cfLd/d/	d/ d/ 	d/
 d/Nd( 1 d(huYPTU_P`E`?a6b d(X*  D, 7 S UX dg lp ( ' C C [^ cg 4
# ' C C [^ cg (=0 = 0G  - $z2B ?+ ? ?Z8H ? 9 d:>N 0S T rq   