import os
from typing import Dict, List, Optional
from urllib.parse import urljoin

import html2text
import httpx
from autogen_core.code_executor import ImportFromModule
from autogen_core.tools import FunctionTool
from bs4 import BeautifulSoup


async def google_search(
    query: str,
    num_results: int = 3,
    include_snippets: bool = True,
    include_content: bool = True,
    content_max_length: Optional[int] = 10000,
    language: str = "en",
    country: Optional[str] = None,
    safe_search: bool = True,
) -> List[Dict[str, str]]:
    """
    Perform a Google search using the Custom Search API and optionally fetch webpage content.

    Args:
        query: Search query string
        num_results: Number of results to return (max 10)
        include_snippets: Include result snippets in output
        include_content: Include full webpage content in markdown format
        content_max_length: Maximum length of webpage content (if included)
        language: Language code for search results (e.g., en, es, fr)
        country: Optional country code for search results (e.g., us, uk)
        safe_search: Enable safe search filtering

    Returns:
        List[Dict[str, str]]: List of search results, each containing:
            - title: Result title
            - link: Result URL
            - snippet: Result description (if include_snippets=True)
            - content: Webpage content in markdown (if include_content=True)
    """
    api_key = os.getenv("GOOGLE_API_KEY")
    cse_id = os.getenv("GOOGLE_CSE_ID")

    if not api_key or not cse_id:
        raise ValueError("Missing required environment variables. Please set GOOGLE_API_KEY and GOOGLE_CSE_ID.")

    num_results = min(max(1, num_results), 10)

    async def fetch_page_content(url: str, max_length: Optional[int] = 50000) -> str:
        """Helper function to fetch and convert webpage content to markdown"""
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(url, headers=headers, timeout=10)
                response.raise_for_status()

                soup = BeautifulSoup(response.text, "html.parser")

                # Remove script and style elements
                for script in soup(["script", "style"]):
                    script.decompose()

                # Convert relative URLs to absolute
                for tag in soup.find_all(["a", "img"]):
                    if tag.get("href"):
                        tag["href"] = urljoin(url, tag["href"])
                    if tag.get("src"):
                        tag["src"] = urljoin(url, tag["src"])

                h2t = html2text.HTML2Text()
                h2t.body_width = 0
                h2t.ignore_images = False
                h2t.ignore_emphasis = False
                h2t.ignore_links = False
                h2t.ignore_tables = False

                markdown = h2t.handle(str(soup))

                if max_length and len(markdown) > max_length:
                    markdown = markdown[:max_length] + "\n...(truncated)"

                return markdown.strip()

        except Exception as e:
            return f"Error fetching content: {str(e)}"

    params = {
        "key": api_key,
        "cx": cse_id,
        "q": query,
        "num": num_results,
        "hl": language,
        "safe": "active" if safe_search else "off",
    }

    if country:
        params["gl"] = country

    try:
        async with httpx.AsyncClient() as client:
            response = await client.get("https://www.googleapis.com/customsearch/v1", params=params, timeout=10)
            response.raise_for_status()
            data = response.json()

            results = []
            if "items" in data:
                for item in data["items"]:
                    result = {"title": item.get("title", ""), "link": item.get("link", "")}
                    if include_snippets:
                        result["snippet"] = item.get("snippet", "")

                    if include_content:
                        result["content"] = await fetch_page_content(result["link"], max_length=content_max_length)

                    results.append(result)

            return results

    except httpx.RequestError as e:
        raise ValueError(f"Failed to perform search: {str(e)}") from e
    except KeyError as e:
        raise ValueError(f"Invalid API response format: {str(e)}") from e
    except Exception as e:
        raise ValueError(f"Error during search: {str(e)}") from e


# Create the enhanced Google search tool
google_search_tool = FunctionTool(
    func=google_search,
    description="""
    Perform Google searches using the Custom Search API with optional webpage content fetching.
    Requires GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables to be set.
    """,
    global_imports=[
        ImportFromModule("typing", ("List", "Dict", "Optional")),
        "os",
        "httpx",
        "html2text",
        ImportFromModule("bs4", ("BeautifulSoup",)),
        ImportFromModule("urllib.parse", ("urljoin",)),
    ],
)