
    g                         d dl Z d dlZd dlZd dlmZ d dlZd dlmZ d dlZd dl	Z	d dl
Z
d dlZd dlmZ ddlmZmZ  eej                   d      Z G d	 d
      ZdedefdZddee   fdZd Zedk(  r e        yy)    N)Path)Optional)datetime   )AsyncLoggerLogLevelT)	log_levelverbosec                   R    e Zd ZdefdZdedefdZdedefdZdededefd	Zd
 Z	y)DatabaseMigrationdb_pathc                 x    || _         | j                  t        j                  j	                  |            | _        y N)r   _ensure_content_dirsospathdirnamecontent_paths)selfr   s     H/var/www/openai/venv/lib/python3.12/site-packages/crawl4ai/migrations.py__init__zDatabaseMigration.__init__   s)    !66rwww7OP    	base_pathreturnc                     dddddd}i }|j                         D ]A  \  }}t        j                  j                  ||      }t        j                  |d       |||<   C |S )	Nhtml_contentcleaned_htmlmarkdown_contentextracted_contentscreenshots)htmlcleanedmarkdown	extractedr    T)exist_ok)itemsr   r   joinmakedirs)r   r   dirsr   keyr   r   s          r   r   z&DatabaseMigration._ensure_content_dirs   sg    "%*,(
  JJLLC77<<	73DKKt,!%M# ) r   contentc                     t        j                         }|j                  |j                                |j	                         }|S r   )xxhashxxh64updateencode	hexdigest)r   r+   xcontent_hashs       r   _generate_content_hashz(DatabaseMigration._generate_content_hash(   s1    LLN	!"{{}r   content_typec                   K   |sy| j                  |      }t        j                  j                  | j                  |   |      }t        j                  j                  |      sLt        j                  |dd      4 d {   }|j                  |       d {    d d d       d {    |S |S 7 17 7 # 1 d {  7  sw Y   |S xY ww)N wzutf-8)encoding)	r4   r   r   r'   r   existsaiofilesopenwrite)r   r+   r5   r3   	file_pathfs         r   _store_contentz DatabaseMigration._store_content/   s     227;GGLL!3!3L!A<P	ww~~i(}}YgFF!ggg&&& GF | G& GFFF sZ   A=C?B1 CB7B3B7C(B5)	C3B75C7C
=C >C
Cc                 N  K   t         j                  dd       	 t        j                  | j                        4 d{   }|j                  d      4 d{   }|j                          d{   }ddd      d{    d}D ]  }|\  }}}}	}
}| j                  |d       d{   }| j                  |d       d{   }| j                  |	d	       d{   }| j                  |
d
       d{   }| j                  |d       d{   }|j                  d||||||f       d{    |dz  }|dz  dk(  st         j                  d| dd        |j                          d{    t         j                  d| dd       ddd      d{    y7 h7 Q7 <7 /# 1 d{  7  sw Y   @xY w7 7 7 7 7 7 7 `7 7# 1 d{  7  sw Y   yxY w# t        $ r*}t         j                  dddt        |      i       |d}~ww xY ww)z/Migrate existing database to file-based storagezStarting database migration...INITtagNzqSELECT url, html, cleaned_html, markdown, 
                       extracted_content, screenshot FROM crawled_datar   r!   r"   r#   r$   r    aE  
                        UPDATE crawled_data 
                        SET html = ?, 
                            cleaned_html = ?,
                            markdown = ?,
                            extracted_content = ?,
                            screenshot = ?
                        WHERE url = ?
                    r   d   z	Migrated z records...zMigration completed. z records processed.COMPLETEMigration failed: {error}ERRORerrormessagerD   params)loggerinfo	aiosqliteconnectr   executefetchallr@   commitsuccess	ExceptionrI   str)r   dbcursorrowsmigrated_countrowurlr!   r   r#   r   
screenshot	html_hashcleaned_hashmarkdown_hashextracted_hashscreenshot_hashes                     r   migrate_databasez"DatabaseMigration.migrate_database<   s1     	4&A/	 ((66"::J  !'!22D	  "#CWZTC|X7H* '+&9&9$&G GI)-)<)<\9)U#UL*.*=*=h
*S$SM+/+>+>?PR]+^%^N,0,?,?
M,Z&ZO ** & $\='#?	@ 	@ 	@ #a'N%+q0i/?{$KQWX1  6 iik!!!6~6FFYZ`jkK 766 3	    !H#U$S%^&Z	@  "I 7666N  	LL3Q(  
 G	sR  H%#G/ F&G/ GF)GF20F,1F25G F/)G*G+GGGGG8G9GGG2G3G0G5G6GG/  G!G/ %H%&G/ )G,F2/G2G	8F;9G	 	GGGGGGGG/ G, G#!G,(G/ +H%,G/ /	H"8%HH""H%N)
__name__
__module____qualname__rV   r   dictr   r4   r@   rd    r   r   r   r      sU    Q Qc d c c C s s 4r   r   r   r   c           	        K   t         j                  j                  |       st        j	                  dd       yt        j                         j                  d      }|  d| }	 t        j                  d       d{    t        j                  | |       t        j	                  d| d	       |S 7 6# t        $ r*}t        j                  d
ddt        |      i       |d}~ww xY ww)z"Create backup of existing databasez,No existing database found. Skipping backup.rB   rC   Nz%Y%m%d_%H%M%Sz.backup_r   zDatabase backup created at: rF   rG   rH   rI   rJ   )r   r   r:   rM   rN   r   nowstrftimeasynciosleepshutilcopy2rU   rI   rV   )r   	timestampbackup_pathrc   s       r   backup_databasers   r   s     77>>'"BO ''8IIXi[1KmmA 	Wk*2;-@jQ 	  3Q( 	 	
 s<   A"C,%B6 =B4>5B6 3C,4B6 6	C)?%C$$C))C,c                 ^  K   | 3t         j                  j                  t        j                         dd      } t         j                  j                  |       st        j                  dd       yt        |        d{   }|syt        |       }|j                          d{    y7 +7 w)zRun database migrationNz	.crawl4aizcrawl4ai.dbz/No existing database found. Skipping migration.rB   rC   )r   r   r'   r   homer:   rM   rN   rs   r   rd   )r   rr   	migrations      r   run_migrationrw      s     '',,tyy{KG77>>'"E6R (00K!'*I

$
$
&&& 1
 's$   A;B-=B)>%B-#B+$B-+B-c                      ddl } | j                  d      }|j                  dd       |j                         }t	        j
                  t        |j                               y)zCLI entry point for migrationr   Nz/Migrate Crawl4AI database to file-based storage)descriptionz	--db-pathzCustom database path)help)argparseArgumentParseradd_argument
parse_argsrm   runrw   r   )r{   parserargss      r   mainr      sP    $$1b$cF
*@ADKKdll+,r   __main__r   )r   rm   loggingpathlibr   rO   typingr   r-   r;   ro   timer   async_loggerr   r   DEBUGrM   r   rV   rs   rw   r   re   ri   r   r   <module>r      s    	           / 
x~~t	<
\ \|3 3 6'# '"- zF r   