
    g                       d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dl	Z	d dl
Z
d dlZd dlmZmZmZ d dlmZ d dlmZ d d	lmZ  e         ej,                  d
d      Z ee      Z ej2                  d       e G d d             ZdZ eeeed      ZddZej<                  	 	 	 	 	 	 dd       Zej<                  dd       Z ej<                  dd       Z!y)    )annotations)	dataclass)load_dotenv)AsyncOpenAIN)Agent
ModelRetry
RunContext)OpenAIModel)List)
Collection	LLM_MODELzgpt-4-0125-previewzif-token-present)send_to_logfirec                  "    e Zd ZU ded<   ded<   y)PydanticAIDepsr   
collectionr   openai_clientN)__name__
__module____qualname____annotations__     2/var/www/openai/gen-rag-crawl/pydantic_ai_agent.pyr   r      s    r   r   ub  
# Optimized Prompt for Long-Form Content

## 📌 Instructions for AI
You are an **SEO-focused NLP content writer** who generates **high-quality, natural, and engaging content**. The article must:

- Avoid **robotic** or **cringe** writing.
- **Eliminate** complex sentences and unnecessary marketing language.
- Maintain a **simple and clear structure**, using **active voice** and **natural flow**.
- Only include images that are **explicitly found** in the crawled **data repository**.
- **DO NOT assume** or infer an image exists—if no valid image is found, **do not insert any image**.
- If an image exists on `<siteurl>` but is **not stored in the repository**, **do not include it**.
- Do **not** generate placeholder URLs or use external sources unless explicitly retrieved.
---

## 📝 General Guidelines

### ✅ Outline Structure
- **Create an article outline** based on **relevant internal links** and **image embeds**.
- **Do not include** broad topics like *sustainability* or *trends*.
- Use **8 subheadings**, each with **300-500 words**.

### ✅ Writing Style
- Write in **simple English (7-year-old readability)** but **avoid childish analogies**.
- **No robotic phrasing** or repetitive links.
- **Use an expert tone** with valuable **insights & personal anecdotes**.

### ✅ Formatting Requirements
- Use **Markdown** for proper formatting.
- Include **bullet points, tables, and graphs** where helpful.
- Embed **internal links** using **keyword-rich anchor text**:
  ```markdown
  [Brand Name](https://example.com/brand-page)
  ```
- Ensure **all images are clickable** with **their relevant URLs**:
  ```markdown
  ![Image Alt Text](https://example.com/image.jpg)
  ```

### ✅ Content Structure
- **Key Takeaways**: Add a **summary list or table** at the top.
- **Internal Links**: Integrate **diverse internal links** (brand-specific where applicable).
- **Image Embeds**: Include **one image per paragraph** if possible.
- **FAQs**: At the end, add **frequently asked questions** based on user intent.

---

## ❌ Example to Avoid
### Poor Writing Example:
```markdown
Men’s sneakers have evolved over the years. In 2025, they are a mix of style, durability, and innovation.
```
### ✅ Better Version:
```markdown
The best men’s sneakers in 2025 combine premium materials, modern aesthetics, and cutting-edge comfort. Here’s what to look for.
```

---

## 🎯 Article Topic:  
**Best Sneakers for Men in 2025**

---

## 🔹 Inputs for AI

### 1️⃣ **Key Information**  
*(Paste data from Perplexity or another reliable source here.)*  

### 2️⃣ **Internal Links**
Example:  
```markdown
[Luxury Sneakers](https://2men.it/collections/sneakers)
```
*(Provide only **relevant** links to avoid redundancy.)*  

### 3️⃣ **Image Embeds**
Example:  
```markdown
![Kiton Sneakers](https://cdn.shopify.com/s/files/1/0665/8172/5438/collections/KITON_SHOES.jpg)
```

---
   )system_prompt	deps_typeretriesc                   K   	 |j                   j                  d|        d{   }|j                  d   j                  S 7 # t        $ r}t        d|        dgdz  cY d}~S d}~ww xY ww)z!Get embedding vector from OpenAI.ztext-embedding-3-small)modelinputNr   zError getting embedding: i   )
embeddingscreatedata	embedding	Exceptionprint)textr   responsees       r   get_embeddingr*      sy     &1188*$ 9 
 
 }}Q)))
  )!-.sTzsD   A. A AA A.A 	A+A& A+!A.&A++A.c                  K   	 t        || j                  j                         d{   }| j                  j                  j	                  |gdddg      }|d   d   syg }t        |d   d   |d   d         D ](  \  }}d|d	    d
| d|d    d}|j                  |       * dj                  |      S 7 # t        $ r&}t        d|        dt        |       cY d}~S d}~ww xY ww)z:Retrieve relevant documentation chunks based on the query.N   	documents	metadatas)query_embeddings	n_resultsincluder   z No relevant documentation found.z
# title

z


Source: url
z

---

z Error retrieving documentation: )r*   depsr   r   queryzipappendjoinr%   r&   str)	ctx
user_queryquery_embeddingresultsformatted_chunksdocmetadata
chunk_textr)   s	            r   retrieve_relevant_documentationrD      s'    
; -j#((:P:P QQ((%%++-. +. , 
 {#A&5 !5a!8'+:Nq:QRMCG  		% J ##J/ S !!"233- R0  ;0451#a&::;sQ   C*#B8 B67B8 C* AB8 5C*6B8 8	C'C"C'C*"C''C*c                   K   	 | j                   j                  j                  dg      }|d   sg S t        t	        d |d   D                    }|S # t
        $ r}t        d|        g cY d}~S d}~ww xY ww)z5Retrieve a list of all available documentation pages.r.   )r1   c              3  &   K   | ]	  }|d      yw)r4   Nr   ).0metas     r   	<genexpr>z+list_documentation_pages.<locals>.<genexpr>   s     G2F$$u+2Fs   z&Error retrieving documentation pages: N)r6   r   getsortedsetr%   r&   )r<   r?   urlsr)   s       r   list_documentation_pagesrN      sz     ((%%));-)@{#IcG'+2FGGH 6qc:;	s>   A8-A A8A A8	A5A0*A5+A80A55A8c                  K   	 | j                   j                  j                  d|iddg      }|d   sd| S t        t	        |d   |d         d       }|d   d	   d
   j                  d      d   }d| dg}|D ]  \  }}|j                  |        dj                  |      S # t        $ r&}t        d|        dt        |       cY d}~S d}~ww xY ww)z;Retrieve the full content of a specific documentation page.r4   r-   r.   )wherer1   zNo content found for URL: c                    | d   d   S )N   chunk_numberr   )xs    r   <lambda>z"get_page_content.<locals>.<lambda>   s    !A$~.r   )keyr   rR   r2   z - z# r5   r3   zError retrieving page content: N)r6   r   rJ   rK   r8   splitr9   r:   r%   r&   r;   )	r<   r4   r?   sorted_results
page_titleformatted_contentrA   _r)   s	            r   get_page_contentr\      s    :((%%))#,k(B * 
 {#/u55$gk&:;.

 $A&q)'288?B
!*R01$FC$$S) % {{,-- :/s340Q99:s?   C4B' CA-B' &C'	C0CCCCC)r'   r;   r   r   returnzList[float])r<   RunContext[PydanticAIDeps]r=   r;   r]   r;   )r<   r^   r]   z	List[str])r<   r^   r4   r;   r]   r;   )"
__future__r   dataclassesr   dotenvr   litellmr   logfireasynciohttpxospydantic_air   r   r	   pydantic_ai.models.openair
   typingr   chromadb.api.models.Collectionr   getenvllmr   	configurer   r   pydantic_ai_agentr*   toolrD   rN   r\   r   r   r   <module>rp      s   " !      	 5 5 1  5 bii12C   "4 5    Sj 	.! 
	 ;	#;14;; ;D    : :r   