from __future__ import annotations

from dataclasses import dataclass
from dotenv import load_dotenv
from litellm import AsyncOpenAI
import logfire
import asyncio
import httpx
import os

from pydantic_ai import Agent, ModelRetry, RunContext
from pydantic_ai.models.openai import OpenAIModel
from typing import List
from chromadb.api.models.Collection import Collection

load_dotenv()

llm = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
model = OpenAIModel(llm)

logfire.configure(send_to_logfire="if-token-present")



@dataclass
class PydanticAIDeps:
    collection: Collection
    openai_client: AsyncOpenAI

# system_prompt = """
# You are an expert assistant with access to a knowledge base of documentation and content.
# Your job is to help users understand and work with the content they've provided.
#  Write an 800 word article describing the things to see and do close and Refer to the data repo that you created when you crawled <siteurl> and include internal links to relevant pages.
#  Include images link in the article taken from the data repo and include our affiliate links directly to our partners where appropriate.
#  Add shortcodes that populate our article with a Places to Stay slider unit with 8 hotels that are close to the location. Make these hotels all 4* and above.
# When analyzing the content, make sure to:
# 1. Provide accurate information based on the stored content
# 2. Cite specific examples when possible
# 3. Be clear when you're making assumptions or inferring information
# """

system_prompt = """
# Optimized Prompt for Long-Form Content

## 📌 Instructions for AI
You are an **SEO-focused NLP content writer** who generates **high-quality, natural, and engaging content**. The article must:

- Avoid **robotic** or **cringe** writing.
- **Eliminate** complex sentences and unnecessary marketing language.
- Maintain a **simple and clear structure**, using **active voice** and **natural flow**.
- Only include images that are **explicitly found** in the crawled **data repository**.
- **DO NOT assume** or infer an image exists—if no valid image is found, **do not insert any image**.
- If an image exists on `<siteurl>` but is **not stored in the repository**, **do not include it**.
- Do **not** generate placeholder URLs or use external sources unless explicitly retrieved.
---

## 📝 General Guidelines

### ✅ Outline Structure
- **Create an article outline** based on **relevant internal links** and **image embeds**.
- **Do not include** broad topics like *sustainability* or *trends*.
- Use **8 subheadings**, each with **300-500 words**.

### ✅ Writing Style
- Write in **simple English (7-year-old readability)** but **avoid childish analogies**.
- **No robotic phrasing** or repetitive links.
- **Use an expert tone** with valuable **insights & personal anecdotes**.

### ✅ Formatting Requirements
- Use **Markdown** for proper formatting.
- Include **bullet points, tables, and graphs** where helpful.
- Embed **internal links** using **keyword-rich anchor text**:
  ```markdown
  [Brand Name](https://example.com/brand-page)
  ```
- Ensure **all images are clickable** with **their relevant URLs**:
  ```markdown
  ![Image Alt Text](https://example.com/image.jpg)
  ```

### ✅ Content Structure
- **Key Takeaways**: Add a **summary list or table** at the top.
- **Internal Links**: Integrate **diverse internal links** (brand-specific where applicable).
- **Image Embeds**: Include **one image per paragraph** if possible.
- **FAQs**: At the end, add **frequently asked questions** based on user intent.

---

## ❌ Example to Avoid
### Poor Writing Example:
```markdown
Men’s sneakers have evolved over the years. In 2025, they are a mix of style, durability, and innovation.
```
### ✅ Better Version:
```markdown
The best men’s sneakers in 2025 combine premium materials, modern aesthetics, and cutting-edge comfort. Here’s what to look for.
```

---

## 🎯 Article Topic:  
**Best Sneakers for Men in 2025**

---

## 🔹 Inputs for AI

### 1️⃣ **Key Information**  
*(Paste data from Perplexity or another reliable source here.)*  

### 2️⃣ **Internal Links**
Example:  
```markdown
[Luxury Sneakers](https://2men.it/collections/sneakers)
```
*(Provide only **relevant** links to avoid redundancy.)*  

### 3️⃣ **Image Embeds**
Example:  
```markdown
![Kiton Sneakers](https://cdn.shopify.com/s/files/1/0665/8172/5438/collections/KITON_SHOES.jpg)
```

---
"""

pydantic_ai_agent = Agent(
    model, system_prompt=system_prompt, deps_type=PydanticAIDeps, retries=2
)


async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]:
    """Get embedding vector from OpenAI."""
    try:
        response = await openai_client.embeddings.create(
            model="text-embedding-3-small", input=text
        )
        return response.data[0].embedding
    except Exception as e:
        print(f"Error getting embedding: {e}")
        return [0] * 1536


@pydantic_ai_agent.tool
async def retrieve_relevant_documentation(
    ctx: RunContext[PydanticAIDeps], user_query: str
) -> str:
    """Retrieve relevant documentation chunks based on the query."""
    try:
        query_embedding = await get_embedding(user_query, ctx.deps.openai_client)

        results = ctx.deps.collection.query(
            query_embeddings=[query_embedding],
            n_results=5,
            include=["documents", "metadatas"],
        )

        if not results["documents"][0]:
            return "No relevant documentation found."

        formatted_chunks = []
        for doc, metadata in zip(results["documents"][0], results["metadatas"][0]):
            chunk_text = f"""
# {metadata['title']}

{doc}

Source: {metadata['url']}
"""
            formatted_chunks.append(chunk_text)

        return "\n\n---\n\n".join(formatted_chunks)

    except Exception as e:
        print(f"Error retrieving documentation: {e}")
        return f"Error retrieving documentation: {str(e)}"


@pydantic_ai_agent.tool
async def list_documentation_pages(ctx: RunContext[PydanticAIDeps]) -> List[str]:
    """Retrieve a list of all available documentation pages."""
    try:
        results = ctx.deps.collection.get(include=["metadatas"])

        if not results["metadatas"]:
            return []

        urls = sorted(set(meta["url"] for meta in results["metadatas"]))
        return urls

    except Exception as e:
        print(f"Error retrieving documentation pages: {e}")
        return []


@pydantic_ai_agent.tool
async def get_page_content(ctx: RunContext[PydanticAIDeps], url: str) -> str:
    """Retrieve the full content of a specific documentation page."""
    try:
        results = ctx.deps.collection.get(
            where={"url": url}, include=["documents", "metadatas"]
        )

        if not results["documents"]:
            return f"No content found for URL: {url}"

        sorted_results = sorted(
            zip(results["documents"], results["metadatas"]),
            key=lambda x: x[1]["chunk_number"],
        )

        page_title = sorted_results[0][1]["title"].split(" - ")[0]
        formatted_content = [f"# {page_title}\n"]

        for doc, _ in sorted_results:
            formatted_content.append(doc)

        return "\n\n".join(formatted_content)

    except Exception as e:
        print(f"Error retrieving page content: {e}")
        return f"Error retrieving page content: {str(e)}"