AutoGen Web Search Tools — How to Add Web Access to AI Agents

Microsoft's AutoGen framework lets you build multi-agent AI systems that collaborate on complex tasks. But most AutoGen agents are limited to their training data -- they cannot search the web or access live information. This tutorial shows you how to add web search capabilities to AutoGen agents using SearchHive's SwiftSearch API, so your agents can research, fact-check, and pull real-time data.

Prerequisites

Python 3.10+
An AutoGen installation (pip install autogen-agentchat)
A SearchHive API key (free at searchhive.dev -- 500 credits to start)

Key Takeaways

AutoGen agents need external tool integration to access live web data
SearchHive's SwiftSearch API provides structured search results in a single REST call
You can also use ScrapeForge for page content extraction and DeepDive for multi-hop research
The entire integration takes under 50 lines of Python code

Step 1: Install Dependencies

pip install autogen-agentchat httpx python-dotenv

AutoGen's latest version uses a function-calling pattern where agents can invoke external tools. We will create a search tool that any agent can call.

Step 2: Create the SearchHive Search Tool

# tools/searchhive_tools.py
import httpx
import json
from typing import Annotated

SEARCHHIVE_API_KEY = "your-api-key-here"
SEARCHHIVE_BASE = "https://api.searchhive.dev/v1"


def web_search(
    query: Annotated[str, "The search query to look up on the web"]
) -> str:
    """Search the web using SearchHive SwiftSearch API.

    Returns structured search results with titles, URLs, and snippets.
    Use this to find current information, facts, pricing, or any
    data that may not be in the training data.
    """
    try:
        resp = httpx.post(
            f"{SEARCHHIVE_BASE}/swiftsearch",
            headers={
                "Authorization": f"Bearer {SEARCHHIVE_API_KEY}",
                "Content-Type": "application/json"
            },
            json={"query": query, "limit": 5},
            timeout=15.0
        )
        resp.raise_for_status()
        data = resp.json()

        if not data.get("results"):
            return f"No results found for: {query}"

        formatted = []
        for i, r in enumerate(data["results"], 1):
            formatted.append(
                f"{i}. {r['title']}\n"
                f"   URL: {r['url']}\n"
                f"   {r.get('snippet', 'No snippet')}")
        return "\n\n".join(formatted)
    except httpx.HTTPStatusError as e:
        return f"Search error: {e.response.status_code}"
    except httpx.TimeoutException:
        return "Search timed out. Try again."


def scrape_page(
    url: Annotated[str, "The URL of the page to scrape"]
) -> str:
    """Extract full text content from a web page using ScrapeForge.

    Use this after web_search to get detailed content from a specific URL.
    Returns markdown-formatted text from the page.
    """
    try:
        resp = httpx.post(
            f"{SEARCHHIVE_BASE}/scrapeforge",
            headers={
                "Authorization": f"Bearer {SEARCHHIVE_API_KEY}",
                "Content-Type": "application/json"
            },
            json={"url": url, "format": "markdown"},
            timeout=30.0
        )
        resp.raise_for_status()
        data = resp.json()
        content = data.get("content", "")
        # Truncate to avoid token limits
        return content[:4000] if len(content) > 4000 else content
    except Exception as e:
        return f"Scrape error: {str(e)}"


def deep_research(
    query: Annotated[str, "Research topic to investigate in depth"]
) -> str:
    """Perform multi-hop research on a topic using SearchHive DeepDive.

    Automatically searches multiple sources and synthesizes findings.
    Best for complex questions that require cross-referencing.
    """
    try:
        resp = httpx.post(
            f"{SEARCHHIVE_BASE}/deepdive",
            headers={
                "Authorization": f"Bearer {SEARCHHIVE_API_KEY}",
                "Content-Type": "application/json"
            },
            json={"query": query, "max_depth": 2},
            timeout=60.0
        )
        resp.raise_for_status()
        data = resp.json()
        return data.get("summary", "No summary available")
    except Exception as e:
        return f"Research error: {str(e)}"

Step 3: Configure AutoGen Agents with Web Access

# autogen_search_agent.py
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_ext.models.openai import OpenAIChatCompletionClient
from tools.searchhive_tools import web_search, scrape_page, deep_research

# Configure the LLM (use any OpenAI-compatible endpoint)
model_client = OpenAIChatCompletionClient(
    model="gpt-4o-mini",
    api_key="your-llm-api-key"
)

# Researcher agent with web search capability
researcher = AssistantAgent(
    name="researcher",
    model_client=model_client,
    tools=[web_search, scrape_page, deep_research],
    system_message=(
        "You are a research agent. When you need information, "
        "use the web_search tool first. For detailed analysis of "
        "a specific page, use scrape_page. For complex multi-source "
        "questions, use deep_research. Always cite your sources."
    )
)

# Writer agent that synthesizes findings
writer = AssistantAgent(
    name="writer",
    model_client=model_client,
    system_message=(
        "You are a writer agent. You receive research from the "
        "researcher and produce clear, well-structured reports. "
        "Include source citations from the research."
    )
)

Step 4: Run a Multi-Agent Research Conversation

# main.py
import asyncio
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_search_agent import researcher, writer

async def run_research(topic: str):
    team = RoundRobinGroupChat(
        [researcher, writer],
        termination_condition=MaxMessageTermination(8)
    )

    result = await team.run(task=topic)
    for message in result.messages:
        print(f"[{message.source}]: {message.content}\n")

    return result

if __name__ == "__main__":
    asyncio.run(
        run_research(
            "Research the current state of Rust in web development. "
            "What frameworks are popular? What are the benchmarks?"
        )
    )

The researcher agent will call web_search("Rust web development frameworks 2026"), get structured results, optionally call scrape_page on interesting URLs, and pass findings to the writer agent for synthesis.

Step 5: Add Fallback Search Provider

For resilience, add a fallback search tool in case SearchHive is unavailable:

def web_search_with_fallback(
    query: Annotated[str, "Search query"]
) -> str:
    """Search with automatic fallback between providers."""
    # Try SearchHive first
    result = web_search(query)
    if "Search error" not in result and "No results" not in result:
        return result

    # Fallback: use DeepDive which has built-in redundancy
    return deep_research(query)

Step 6: Cost Optimization

SearchHive credits work across all three APIs. Here is how credit consumption maps:

Operation	Credits Used	Notes
SwiftSearch query	1 credit	Returns 5-10 results
ScrapeForge page	1-3 credits	Depends on page complexity
DeepDive research	5-15 credits	Multi-hop with synthesis

On the $9/month Starter plan (5,000 credits), you can handle approximately:

5,000 search queries, or
2,500 searches + 1,000 page scrapes, or
300 deep research tasks

Compare this to SerpApi at $25/month for just 1,000 searches, or Brave Search API at $5/1K queries. SearchHive covers search, scraping, and research at a lower per-operation cost. /compare/serpapi

Common Issues

Agent calls search too frequently

Add a note in the researcher's system message: "Only search when you genuinely need external information. Do not search for common knowledge." AutoGen respects system message instructions reasonably well.

Search results truncated

The web_search tool formats only the first 5 results. Increase limit in the API call if you need more, but remember that more results consume more tokens in the agent's context window.

DeepDive timeouts on complex topics

Complex research topics with max_depth: 3 can take 30-60 seconds. Use max_depth: 2 for most tasks and max_depth: 1 for simple lookups.

Rate limiting on high-volume tasks

If you are running multiple agents in parallel, each search call counts against your rate limit. The $49 Builder plan (100K credits, higher rate limits) handles parallel agent workloads better.

Complete Code Example

Here is the full working example combining everything:

# full_autogen_search.py
import asyncio
import httpx
from typing import Annotated
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_ext.models.openai import OpenAIChatCompletionClient

API_KEY = "your-searchhive-key"
BASE = "https://api.searchhive.dev/v1"
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def search_web(query: Annotated[str, "Web search query"]) -> str:
    resp = httpx.post(f"{BASE}/swiftsearch", headers=HEADERS,
                      json={"query": query, "limit": 5}, timeout=15)
    data = resp.json()
    if not data.get("results"): return f"No results for: {query}"
    return "\n".join(
        f"{i+1}. {r['title']} - {r['url']}\n   {r.get('snippet', '')}"
        for i, r in enumerate(data["results"])
    )

def scrape(url: Annotated[str, "URL to scrape"]) -> str:
    resp = httpx.post(f"{BASE}/scrapeforge", headers=HEADERS,
                      json={"url": url, "format": "markdown"}, timeout=30)
    return resp.json().get("content", "")[:4000]

model_client = OpenAIChatCompletionClient(
    model="gpt-4o-mini", api_key="your-llm-key"
)

researcher = AssistantAgent(
    name="researcher", model_client=model_client,
    tools=[search_web, scrape],
    system_message="Search the web to find accurate, current information. "
        "Cite sources by URL. Use scrape() for detailed page content."
)

writer = AssistantAgent(
    name="writer", model_client=model_client,
    system_message="Write clear reports based on the researcher's findings."
)

async def main():
    team = RoundRobinGroupChat([researcher, writer],
                               termination_condition=MaxMessageTermination(6))
    result = await team.run("What are the best Python web frameworks in 2026?")
    for msg in result.messages:
        print(f"[{msg.source}]: {msg.content[:300]}\n")

asyncio.run(main())

Next Steps

Add caching: Wrap search_web with a simple functools.lru_cache to avoid duplicate searches within a conversation
Structured output: Use ScrapeForge's extract parameter to pull specific data fields (prices, dates, names) instead of full page text
Scale up: Move from the free 500 credits to the $9 Starter plan for sustained agent operations

SearchHive's unified API means your AutoGen agents get search, scraping, and research through a single integration. No separate API keys, no separate billing, no separate error handling. Sign up for free and give your agents web access today. /tutorials/autogen-web-search-tools

AutoGen Web Search Tools — How to Add Web Access to AI Agents

AI-Powered Research