autogen/python/packages/autogen-studio/autogenstudio/gallery/tools/bing_search.py

import json
import os
from typing import Dict, List, Optional
from urllib.parse import urljoin

import html2text
import httpx
from autogen_core.code_executor import ImportFromModule
from autogen_core.tools import FunctionTool
from bs4 import BeautifulSoup


async def bing_search(
    query: str,
    num_results: int = 3,
    include_snippets: bool = True,
    include_content: bool = True,
    content_max_length: Optional[int] = 10000,
    language: str = "en",
    country: Optional[str] = None,
    safe_search: str = "moderate",
    response_filter: str = "webpages",
) -> List[Dict[str, str]]:
    """
    Perform a Bing search using the Bing Web Search API.

    Args:
        query: Search query string
        num_results: Number of results to return (max 50)
        include_snippets: Include result snippets in output
        include_content: Include full webpage content in markdown format
        content_max_length: Maximum length of webpage content (if included)
        language: Language code for search results (e.g., 'en', 'es', 'fr')
        country: Optional market code for search results (e.g., 'us', 'uk')
        safe_search: SafeSearch setting ('off', 'moderate', or 'strict')
        response_filter: Type of results ('webpages', 'news', 'images', or 'videos')

    Returns:
        List[Dict[str, str]]: List of search results

    Raises:
        ValueError: If API credentials are invalid or request fails
    """
    # Get and validate API key
    api_key = os.getenv("BING_SEARCH_KEY", "").strip()

    if not api_key:
        raise ValueError(
            "BING_SEARCH_KEY environment variable is not set. " "Please obtain an API key from Azure Portal."
        )

    # Validate safe_search parameter
    valid_safe_search = ["off", "moderate", "strict"]
    if safe_search.lower() not in valid_safe_search:
        raise ValueError(f"Invalid safe_search value. Must be one of: {', '.join(valid_safe_search)}")

    # Validate response_filter parameter
    valid_filters = ["webpages", "news", "images", "videos"]
    if response_filter.lower() not in valid_filters:
        raise ValueError(f"Invalid response_filter value. Must be one of: {', '.join(valid_filters)}")

    async def fetch_page_content(url: str, max_length: Optional[int] = 50000) -> str:
        """Helper function to fetch and convert webpage content to markdown"""
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(url, headers=headers, timeout=10)
                response.raise_for_status()

                soup = BeautifulSoup(response.text, "html.parser")

                # Remove script and style elements
                for script in soup(["script", "style"]):
                    script.decompose()

                # Convert relative URLs to absolute
                for tag in soup.find_all(["a", "img"]):
                    if tag.get("href"):
                        tag["href"] = urljoin(url, tag["href"])
                    if tag.get("src"):
                        tag["src"] = urljoin(url, tag["src"])

                h2t = html2text.HTML2Text()
                h2t.body_width = 0
                h2t.ignore_images = False
                h2t.ignore_emphasis = False
                h2t.ignore_links = False
                h2t.ignore_tables = False

                markdown = h2t.handle(str(soup))

                if max_length and len(markdown) > max_length:
                    markdown = markdown[:max_length] + "\n...(truncated)"

                return markdown.strip()

        except Exception as e:
            return f"Error fetching content: {str(e)}"

    # Build request headers and parameters
    headers = {"Ocp-Apim-Subscription-Key": api_key, "Accept": "application/json"}

    params = {
        "q": query,
        "count": min(max(1, num_results), 50),
        "mkt": f"{language}-{country.upper()}" if country else language,
        "safeSearch": safe_search.capitalize(),
        "responseFilter": response_filter,
        "textFormat": "raw",
    }

    # Make the request
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                "https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params, timeout=10
            )

            # Handle common error cases
            if response.status_code == 401:
                raise ValueError("Authentication failed. Please verify your Bing Search API key.")
            elif response.status_code == 403:
                raise ValueError(
                    "Access forbidden. This could mean:\n"
                    "1. The API key is invalid\n"
                    "2. The API key has expired\n"
                    "3. You've exceeded your API quota"
                )
            elif response.status_code == 429:
                raise ValueError("API quota exceeded. Please try again later.")

            response.raise_for_status()
            data = response.json()

        # Process results based on response_filter
        results = []
        if response_filter == "webpages" and "webPages" in data:
            items = data["webPages"]["value"]
        elif response_filter == "news" and "news" in data:
            items = data["news"]["value"]
        elif response_filter == "images" and "images" in data:
            items = data["images"]["value"]
        elif response_filter == "videos" and "videos" in data:
            items = data["videos"]["value"]
        else:
            if not any(key in data for key in ["webPages", "news", "images", "videos"]):
                return []  # No results found
            raise ValueError(f"No {response_filter} results found in API response")

        # Extract relevant information based on result type
        for item in items:
            result = {"title": item.get("name", "")}

            if response_filter == "webpages":
                result["link"] = item.get("url", "")
                if include_snippets:
                    result["snippet"] = item.get("snippet", "")
                if include_content:
                    result["content"] = await fetch_page_content(result["link"], max_length=content_max_length)

            elif response_filter == "news":
                result["link"] = item.get("url", "")
                if include_snippets:
                    result["snippet"] = item.get("description", "")
                result["date"] = item.get("datePublished", "")
                if include_content:
                    result["content"] = await fetch_page_content(result["link"], max_length=content_max_length)

            elif response_filter == "images":
                result["link"] = item.get("contentUrl", "")
                result["thumbnail"] = item.get("thumbnailUrl", "")
                if include_snippets:
                    result["snippet"] = item.get("description", "")

            elif response_filter == "videos":
                result["link"] = item.get("contentUrl", "")
                result["thumbnail"] = item.get("thumbnailUrl", "")
                if include_snippets:
                    result["snippet"] = item.get("description", "")
                result["duration"] = item.get("duration", "")

            results.append(result)

        return results[:num_results]

    except httpx.HTTPError as e:
        error_msg = str(e)
        if "InvalidApiKey" in error_msg:
            raise ValueError("Invalid API key. Please check your BING_SEARCH_KEY environment variable.") from e
        elif "KeyExpired" in error_msg:
            raise ValueError("API key has expired. Please generate a new key.") from e
        else:
            raise ValueError(f"Search request failed: {error_msg}") from e
    except json.JSONDecodeError:
        raise ValueError("Failed to parse API response. " "Please verify your API credentials and try again.") from None
    except Exception as e:
        raise ValueError(f"Unexpected error during search: {str(e)}") from e


# Create the Bing search tool
bing_search_tool = FunctionTool(
    func=bing_search,
    description="""
    Perform Bing searches using the Bing Web Search API. Requires BING_SEARCH_KEY environment variable.
    Supports web, news, image, and video searches.
    See function documentation for detailed setup instructions.
    """,
    global_imports=[
        ImportFromModule("typing", ("List", "Dict", "Optional")),
        "os",
        "httpx",
        "json",
        "html2text",
        ImportFromModule("bs4", ("BeautifulSoup",)),
        ImportFromModule("urllib.parse", ("urljoin",)),
    ],
)
Fix warnings in AGS (#5320) This PR does the following: - Fix warning messages in AGS on launch. - Improve Cli message to include app URL on startup from command line - Minor improvements default gallery generator. (add more default tools) - Improve new session behaviour. ## Related issue number Closes #5097 ## Checks 2025-02-03 22:32:34 -08:00			`import json`
			`import os`
			`from typing import Dict, List, Optional`
			`from urllib.parse import urljoin`

			`import html2text`
			`import httpx`
			`from autogen_core.code_executor import ImportFromModule`
			`from autogen_core.tools import FunctionTool`
			`from bs4 import BeautifulSoup`


			`async def bing_search(`
			`query: str,`
Enable LLM Call Observability in AGS (#5457) <!-- Thank you for your contribution! Please review https://microsoft.github.io/autogen/docs/Contribute before opening a pull request. --> <!-- Please add a reviewer to the assignee section when you create a PR. If you don't have the access to it, we will shortly find a reviewer and assign them to your PR. --> It is often helpful to inspect the raw request and response to/from an LLM as agents act. This PR, does the following: - Updates TeamManager to yield LLMCallEvents from core library - Run in an async background task to listen for LLMCallEvent JIT style when a team is run - Add events to an async queue and - yield those events in addition to whatever actual agentchat team.run_stream yields. - Update the AGS UI to show those LLMCallEvents in the messages section as a team runs - Add settings panel to show/hide llm call events in messages. - Minor updates to default team <img width="1539" alt="image" src="https://github.com/user-attachments/assets/bfbb19fe-3560-4faa-b600-7dd244e0e974" /> <img width="1554" alt="image" src="https://github.com/user-attachments/assets/775624f5-ba83-46e8-81ff-512bfeed6bab" /> <img width="1538" alt="image" src="https://github.com/user-attachments/assets/3becbf85-b75a-4506-adb7-e5575ebbaec4" /> ## Why are these changes needed? <!-- Please give a short summary of the change and the problem this solves. --> ## Related issue number <!-- For example: "Closes #1234" --> Closes #5440 ## Checks - [ ] I've included any doc changes needed for https://microsoft.github.io/autogen/. See https://microsoft.github.io/autogen/docs/Contribute#documentation to build and test documentation locally. - [ ] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [ ] I've made sure all auto checks have passed. 2025-02-08 20:50:12 -08:00			`num_results: int = 3,`
Fix warnings in AGS (#5320) This PR does the following: - Fix warning messages in AGS on launch. - Improve Cli message to include app URL on startup from command line - Minor improvements default gallery generator. (add more default tools) - Improve new session behaviour. ## Related issue number Closes #5097 ## Checks 2025-02-03 22:32:34 -08:00			`include_snippets: bool = True,`
			`include_content: bool = True,`
Enable LLM Call Observability in AGS (#5457) <!-- Thank you for your contribution! Please review https://microsoft.github.io/autogen/docs/Contribute before opening a pull request. --> <!-- Please add a reviewer to the assignee section when you create a PR. If you don't have the access to it, we will shortly find a reviewer and assign them to your PR. --> It is often helpful to inspect the raw request and response to/from an LLM as agents act. This PR, does the following: - Updates TeamManager to yield LLMCallEvents from core library - Run in an async background task to listen for LLMCallEvent JIT style when a team is run - Add events to an async queue and - yield those events in addition to whatever actual agentchat team.run_stream yields. - Update the AGS UI to show those LLMCallEvents in the messages section as a team runs - Add settings panel to show/hide llm call events in messages. - Minor updates to default team <img width="1539" alt="image" src="https://github.com/user-attachments/assets/bfbb19fe-3560-4faa-b600-7dd244e0e974" /> <img width="1554" alt="image" src="https://github.com/user-attachments/assets/775624f5-ba83-46e8-81ff-512bfeed6bab" /> <img width="1538" alt="image" src="https://github.com/user-attachments/assets/3becbf85-b75a-4506-adb7-e5575ebbaec4" /> ## Why are these changes needed? <!-- Please give a short summary of the change and the problem this solves. --> ## Related issue number <!-- For example: "Closes #1234" --> Closes #5440 ## Checks - [ ] I've included any doc changes needed for https://microsoft.github.io/autogen/. See https://microsoft.github.io/autogen/docs/Contribute#documentation to build and test documentation locally. - [ ] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [ ] I've made sure all auto checks have passed. 2025-02-08 20:50:12 -08:00			`content_max_length: Optional[int] = 10000,`
Fix warnings in AGS (#5320) This PR does the following: - Fix warning messages in AGS on launch. - Improve Cli message to include app URL on startup from command line - Minor improvements default gallery generator. (add more default tools) - Improve new session behaviour. ## Related issue number Closes #5097 ## Checks 2025-02-03 22:32:34 -08:00			`language: str = "en",`
			`country: Optional[str] = None,`
			`safe_search: str = "moderate",`
			`response_filter: str = "webpages",`
			`) -> List[Dict[str, str]]:`
			`"""`
			`Perform a Bing search using the Bing Web Search API.`

			`Args:`
			`query: Search query string`
			`num_results: Number of results to return (max 50)`
			`include_snippets: Include result snippets in output`
			`include_content: Include full webpage content in markdown format`
			`content_max_length: Maximum length of webpage content (if included)`
			`language: Language code for search results (e.g., 'en', 'es', 'fr')`
			`country: Optional market code for search results (e.g., 'us', 'uk')`
			`safe_search: SafeSearch setting ('off', 'moderate', or 'strict')`
			`response_filter: Type of results ('webpages', 'news', 'images', or 'videos')`

			`Returns:`
			`List[Dict[str, str]]: List of search results`

			`Raises:`
			`ValueError: If API credentials are invalid or request fails`
			`"""`
			`# Get and validate API key`
			`api_key = os.getenv("BING_SEARCH_KEY", "").strip()`

			`if not api_key:`
			`raise ValueError(`
			`"BING_SEARCH_KEY environment variable is not set. " "Please obtain an API key from Azure Portal."`
			`)`

			`# Validate safe_search parameter`
			`valid_safe_search = ["off", "moderate", "strict"]`
			`if safe_search.lower() not in valid_safe_search:`
			`raise ValueError(f"Invalid safe_search value. Must be one of: {', '.join(valid_safe_search)}")`

			`# Validate response_filter parameter`
			`valid_filters = ["webpages", "news", "images", "videos"]`
			`if response_filter.lower() not in valid_filters:`
			`raise ValueError(f"Invalid response_filter value. Must be one of: {', '.join(valid_filters)}")`

			`async def fetch_page_content(url: str, max_length: Optional[int] = 50000) -> str:`
			`"""Helper function to fetch and convert webpage content to markdown"""`
			`headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}`

			`try:`
			`async with httpx.AsyncClient() as client:`
			`response = await client.get(url, headers=headers, timeout=10)`
			`response.raise_for_status()`

			`soup = BeautifulSoup(response.text, "html.parser")`

			`# Remove script and style elements`
			`for script in soup(["script", "style"]):`
			`script.decompose()`

			`# Convert relative URLs to absolute`
			`for tag in soup.find_all(["a", "img"]):`
			`if tag.get("href"):`
			`tag["href"] = urljoin(url, tag["href"])`
			`if tag.get("src"):`
			`tag["src"] = urljoin(url, tag["src"])`

			`h2t = html2text.HTML2Text()`
			`h2t.body_width = 0`
			`h2t.ignore_images = False`
			`h2t.ignore_emphasis = False`
			`h2t.ignore_links = False`
			`h2t.ignore_tables = False`

			`markdown = h2t.handle(str(soup))`

			`if max_length and len(markdown) > max_length:`
			`markdown = markdown[:max_length] + "\n...(truncated)"`

			`return markdown.strip()`

			`except Exception as e:`
			`return f"Error fetching content: {str(e)}"`

			`# Build request headers and parameters`
			`headers = {"Ocp-Apim-Subscription-Key": api_key, "Accept": "application/json"}`

			`params = {`
			`"q": query,`
			`"count": min(max(1, num_results), 50),`
			`"mkt": f"{language}-{country.upper()}" if country else language,`
			`"safeSearch": safe_search.capitalize(),`
			`"responseFilter": response_filter,`
			`"textFormat": "raw",`
			`}`

			`# Make the request`
			`try:`
			`async with httpx.AsyncClient() as client:`
			`response = await client.get(`
			`"https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params, timeout=10`
			`)`

			`# Handle common error cases`
			`if response.status_code == 401:`
			`raise ValueError("Authentication failed. Please verify your Bing Search API key.")`
			`elif response.status_code == 403:`
			`raise ValueError(`
			`"Access forbidden. This could mean:\n"`
			`"1. The API key is invalid\n"`
			`"2. The API key has expired\n"`
			`"3. You've exceeded your API quota"`
			`)`
			`elif response.status_code == 429:`
			`raise ValueError("API quota exceeded. Please try again later.")`

			`response.raise_for_status()`
			`data = response.json()`

			`# Process results based on response_filter`
			`results = []`
			`if response_filter == "webpages" and "webPages" in data:`
			`items = data["webPages"]["value"]`
			`elif response_filter == "news" and "news" in data:`
			`items = data["news"]["value"]`
			`elif response_filter == "images" and "images" in data:`
			`items = data["images"]["value"]`
			`elif response_filter == "videos" and "videos" in data:`
			`items = data["videos"]["value"]`
			`else:`
			`if not any(key in data for key in ["webPages", "news", "images", "videos"]):`
			`return [] # No results found`
			`raise ValueError(f"No {response_filter} results found in API response")`

			`# Extract relevant information based on result type`
			`for item in items:`
			`result = {"title": item.get("name", "")}`

			`if response_filter == "webpages":`
			`result["link"] = item.get("url", "")`
			`if include_snippets:`
			`result["snippet"] = item.get("snippet", "")`
			`if include_content:`
			`result["content"] = await fetch_page_content(result["link"], max_length=content_max_length)`

			`elif response_filter == "news":`
			`result["link"] = item.get("url", "")`
			`if include_snippets:`
			`result["snippet"] = item.get("description", "")`
			`result["date"] = item.get("datePublished", "")`
			`if include_content:`
			`result["content"] = await fetch_page_content(result["link"], max_length=content_max_length)`

			`elif response_filter == "images":`
			`result["link"] = item.get("contentUrl", "")`
			`result["thumbnail"] = item.get("thumbnailUrl", "")`
			`if include_snippets:`
			`result["snippet"] = item.get("description", "")`

			`elif response_filter == "videos":`
			`result["link"] = item.get("contentUrl", "")`
			`result["thumbnail"] = item.get("thumbnailUrl", "")`
			`if include_snippets:`
			`result["snippet"] = item.get("description", "")`
			`result["duration"] = item.get("duration", "")`

			`results.append(result)`

			`return results[:num_results]`

Fix termination UI in AGS (#5888) <!-- Thank you for your contribution! Please review https://microsoft.github.io/autogen/docs/Contribute before opening a pull request. --> <!-- Please add a reviewer to the assignee section when you create a PR. If you don't have the access to it, we will shortly find a reviewer and assign them to your PR. --> ## Why are these changes needed? Fix termination UI in AGS, ensure it can be edited correctly <img width="1269" alt="image" src="https://github.com/user-attachments/assets/eaa7a92f-a1ea-4ab4-a679-2894ac441311" /> <img width="1273" alt="image" src="https://github.com/user-attachments/assets/6db81068-932f-4d4e-9512-279770c02bf2" /> <img width="1270" alt="image" src="https://github.com/user-attachments/assets/5ca9df7d-b968-46c9-9d62-becd78809273" /> <!-- Please give a short summary of the change and the problem this solves. --> ## Related issue number <!-- For example: "Closes #1234" --> Closes #5872 ## Checks - [ ] I've included any doc changes needed for <https://microsoft.github.io/autogen/>. See <https://github.com/microsoft/autogen/blob/main/CONTRIBUTING.md> to build and test documentation locally. - [ ] I've added tests (if relevant) corresponding to the changes introduced in this PR. - [ ] I've made sure all auto checks have passed. 2025-03-11 11:46:25 -07:00			`except httpx.HTTPError as e:`
Fix warnings in AGS (#5320) This PR does the following: - Fix warning messages in AGS on launch. - Improve Cli message to include app URL on startup from command line - Minor improvements default gallery generator. (add more default tools) - Improve new session behaviour. ## Related issue number Closes #5097 ## Checks 2025-02-03 22:32:34 -08:00			`error_msg = str(e)`
			`if "InvalidApiKey" in error_msg:`
			`raise ValueError("Invalid API key. Please check your BING_SEARCH_KEY environment variable.") from e`
			`elif "KeyExpired" in error_msg:`
			`raise ValueError("API key has expired. Please generate a new key.") from e`
			`else:`
			`raise ValueError(f"Search request failed: {error_msg}") from e`
			`except json.JSONDecodeError:`
			`raise ValueError("Failed to parse API response. " "Please verify your API credentials and try again.") from None`
			`except Exception as e:`
			`raise ValueError(f"Unexpected error during search: {str(e)}") from e`


			`# Create the Bing search tool`
			`bing_search_tool = FunctionTool(`
			`func=bing_search,`
			`description="""`
			`Perform Bing searches using the Bing Web Search API. Requires BING_SEARCH_KEY environment variable.`
			`Supports web, news, image, and video searches.`
			`See function documentation for detailed setup instructions.`
			`""",`
			`global_imports=[`
			`ImportFromModule("typing", ("List", "Dict", "Optional")),`
			`"os",`
			`"httpx",`
			`"json",`
			`"html2text",`
			`ImportFromModule("bs4", ("BeautifulSoup",)),`
			`ImportFromModule("urllib.parse", ("urljoin",)),`
			`],`
			`)`