
    g                     @   d dl Z d dlmZ d dlZd dlmZmZ e j                  j                   e j                  d ej                               d      a e j                  t        d       e j                  j                  t        d      ad	 Zd defdZd Zdedeeeeeeeeeeef	      fdZd!dedededededed
edededefdZdefdZd Zd Zd"dedefdZedk(  r=e j                  j5                  t              r e j6                  t                e        yy)#    N)Path)OptionalTupleCRAWL4_AI_BASE_DIRECTORYz	.crawl4aiT)exist_okzcrawl4ai.dbc                      t        j                  t              } | j                         }|j	                  d       | j                          | j                          y )Na  
        CREATE TABLE IF NOT EXISTS crawled_data (
            url TEXT PRIMARY KEY,
            html TEXT,
            cleaned_html TEXT,
            markdown TEXT,
            extracted_content TEXT,
            success BOOLEAN,
            media TEXT DEFAULT "{}",
            links TEXT DEFAULT "{}",
            metadata TEXT DEFAULT "{}",
            screenshot TEXT DEFAULT ""
        )
    )sqlite3connectDB_PATHcursorexecutecommitclose)connr   s     F/var/www/openai/venv/lib/python3.12/site-packages/crawl4ai/database.pyinit_dbr   
   s@    ??7#D[[]F
NN  	 	KKMJJL    media
new_columnc                    t                	 t        j                  t              }|j	                         }|j                  d|  d       |j                          |j                          y # t        $ r}t        d|        Y d }~y d }~ww xY w)Nz$ALTER TABLE crawled_data ADD COLUMN z TEXT DEFAULT ""z2Error altering database to add screenshot column: 
check_db_pathr	   r
   r   r   r   r   r   	Exceptionprint)r   r   r   es       r   alter_db_add_screenshotr      st    OHw'=j\IYZ[

 HB1#FGGH   AA+ +	B4BBc                  &    t         st        d      y )Nz%Database path is not set or is empty.)r   
ValueError r   r   r   r   *   s    @AA r   urlreturnc                    t                	 t        j                  t              }|j	                         }|j                  d| f       |j                         }|j                          |S # t        $ r}t        d|        Y d }~y d }~ww xY w)NzSELECT url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot FROM crawled_data WHERE url = ?zError retrieving cached URL: 
r   r	   r
   r   r   r   fetchoner   r   r   )r!   r   r   resultr   s        r   get_cached_urlr'   .   s    O	w'  b  eh  dj  	k"

 -aS12s   AA* *	B3BBhtmlcleaned_htmlmarkdownextracted_contentsuccesslinksmetadata
screenshotc
                 ,   t                	 t        j                  t              }
|
j	                         }|j                  d| |||||||||	f
       |
j                          |
j                          y # t        $ r}t        d|        Y d }~y d }~ww xY w)Na  
            INSERT INTO crawled_data (url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ON CONFLICT(url) DO UPDATE SET
                html = excluded.html,
                cleaned_html = excluded.cleaned_html,
                markdown = excluded.markdown,
                extracted_content = excluded.extracted_content,
                success = excluded.success,
                media = excluded.media,      
                links = excluded.links,    
                metadata = excluded.metadata,      
                screenshot = excluded.screenshot
        zError caching URL: r   )r!   r(   r)   r*   r+   r,   r   r-   r.   r/   r   r   r   s                r   	cache_urlr1   ;   s    O)w'  4x1BGUTY[ceop	r 	

 )#A3'(()s   A%A2 2	B;BBc                     t                	 t        j                  t              } | j	                         }|j                  d       |j                         }| j                          |d   S # t        $ r}t        d|        Y d }~yd }~ww xY w)Nz!SELECT COUNT(*) FROM crawled_datar   zError getting total count: r$   )r   r   r&   r   s       r   get_total_countr3   S   st    O	w':;"

ay +A3/0r   c                     t                	 t        j                  t              } | j	                         }|j                  d       | j                          | j                          y # t        $ r}t        d|        Y d }~y d }~ww xY w)NzDELETE FROM crawled_datazError clearing database: r   r   r   r   s      r   clear_dbr6   `   sg    O/w'12

 /)!-../   AA' '	B0BBc                     t                	 t        j                  t              } | j	                         }|j                  d       | j                          | j                          y # t        $ r}t        d|        Y d }~y d }~ww xY w)NzDROP TABLE crawled_datazError flushing database: r   r5   s      r   flush_dbr9   k   sg    O/w'01

 /)!-../r7   default_valuec                 $   t                	 t        j                  t              }|j	                         }|j                  d|  d| d       |j                          |j                          y # t        $ r}t        d|        Y d }~y d }~ww xY w)NzUPDATE crawled_data SET z = "z" WHERE screenshot IS NULLz!Error updating existing records: r   )r   r:   r   r   r   s        r   update_existing_recordsr<   v   sw    O7w'1*T-Pjkl

 71!5667s   A!A. .	B7B

B__main__)r   ){}r>   r>    )r   r>   )ospathlibr   r	   typingr   r   pathjoingetenvhomer   makedirsr   strr   r   boolr'   r1   intr3   r6   r9   r<   __name__existsremover    r   r   <module>rN      sb   	   "
'',,yryy!;YTYY[I;
W Gd #
'',,w
.*	H 	HB sCc3SRVX[/[)\ ] )3 )c ) ) )X[ )fj )tw )  IL )  `c )  x{ )0 	/	/	7 	7c 	7 z	ww~~g		'I	 r   