A chatbot with web access can answer questions about current events, look up prices, find documentation, and provide up-to-date information that static models can't. Building one is straightforward -- you need a language model for conversation and a search API for real-time data retrieval.
This tutorial shows how to build a chatbot with web access using SearchHive's SwiftSearch API for retrieval and OpenAI-compatible APIs for language generation. The architecture works with any LLM provider.
Key Takeaways
- Search-augmented chatbots combine an LLM with real-time web search results
- SearchHive's SwiftSearch returns structured search results that feed cleanly into LLM context
- The retrieval-augmented generation (RAG) pattern keeps responses grounded and factual
- Python's
openailibrary works with OpenAI, Anthropic, and local models alike - The full implementation is under 150 lines of Python
Prerequisites
- Python 3.10+
- A SearchHive API key (free tier)
- An OpenAI API key (or Anthropic/Ollama for local inference)
Install dependencies:
pip install openai requests
Step 1: Set Up the Search Client
SearchHive's SwiftSearch API returns structured web results you can inject into LLM prompts.
import requests
SEARCHHIVE_API_KEY = "your_searchhive_api_key"
def web_search(query: str, num_results: int = 5) -> list:
# Search the web and return structured results
response = requests.get(
"https://api.searchhive.dev/v1/swiftsearch",
headers={"Authorization": f"Bearer {SEARCHHIVE_API_KEY}"},
params={"q": query, "num": num_results}
)
response.raise_for_status()
data = response.json()
return data.get("results", [])
# Test
results = web_search("Python 3.13 new features", num_results=3)
for r in results:
print(f"{r['title']}: {r['snippet'][:100]}...")
Step 2: Format Search Results as LLM Context
Convert search results into a context string the LLM can use.
def format_search_context(results: list, max_chars: int = 4000) -> str:
# Format search results into a context block for the LLM
if not results:
return "No web search results found."
context_parts = []
total_chars = 0
for i, result in enumerate(results, 1):
snippet = result.get("snippet", "")
url = result.get("url", "")
title = result.get("title", "")
entry = f"[{i}] {title}\n URL: {url}\n {snippet}"
if total_chars + len(entry) > max_chars:
break
context_parts.append(entry)
total_chars += len(entry)
return "\n\n".join(context_parts)
Step 3: Build the Chatbot with Ollama (Free, Local LLM)
Run a local model so you don't need an OpenAI subscription. Ollama is free and runs entirely on your machine.
# Install and run a model
curl -fsSL https://ollama.ai/install.sh | sh
ollama pull llama3
ollama serve
from openai import OpenAI
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
def ask_with_web_search(question: str) -> str:
# Search the web
results = web_search(question)
context = format_search_context(results)
# Build the prompt with search context
system_prompt = (
"You are a helpful assistant with web access. "
"Use the provided search results to answer the user's question. "
"If the search results don't contain enough information, say so. "
"Always cite your sources by referencing the [N] numbers."
)
user_prompt = (
f"Search results:\n\n{context}\n\n"
f"User question: {question}"
)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.3,
)
return response.choices[0].message.content
answer = ask_with_web_search("What is the current price of Bitcoin?")
print(answer)
Step 4: Add Conversation Memory
A chatbot needs to remember the conversation history for follow-up questions.
class WebChatBot:
def __init__(self, llm_client, model: str = "llama3"):
self.client = llm_client
self.model = model
self.history = []
self.system_prompt = (
"You are a helpful assistant with web access. "
"When the user asks about current events, facts, or data, "
"search the web first. Use [N] citations for sources. "
"For casual conversation, respond normally."
)
def chat(self, message: str) -> str:
# Determine if we need to search
needs_search = self._should_search(message)
context = ""
if needs_search:
results = web_search(message)
context = f"\n\nWeb search results:\n{format_search_context(results)}"
self.history.append({"role": "user", "content": message + context})
# Keep last 10 messages for context window
messages = [{"role": "system", "content": self.system_prompt}]
messages.extend(self.history[-10:])
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=0.3,
)
reply = response.choices[0].message.content
self.history.append({"role": "assistant", "content": reply})
return reply
def _should_search(self, message: str) -> bool:
# Heuristic: search for questions about current info
search_keywords = [
"price", "current", "latest", "news", "today",
"2025", "2026", "how much", "who is", "what is",
"weather", "score", "stock", "update", "release"
]
message_lower = message.lower()
return any(kw in message_lower for kw in search_keywords)
def reset(self):
self.history = []
Step 5: Add DeepDive for Detailed Page Analysis
When search snippets aren't enough, use DeepDive to get full page content.
def deep_search(query: str) -> dict:
# Search + scrape the top result for full content
results = web_search(query, num_results=1)
if not results:
return {"summary": "No results found."}
top_url = results[0]["url"]
response = requests.post(
"https://api.searchhive.dev/v1/deepdive",
headers={
"Authorization": f"Bearer {SEARCHHIVE_API_KEY}",
"Content-Type": "application/json"
},
json={
"url": top_url,
"prompt": f"Extract key information related to: {query}"
}
)
response.raise_for_status()
return {
"url": top_url,
"title": results[0]["title"],
"content": response.json()
}
Step 6: Interactive CLI Chatbot
Wrap everything in a terminal-based chatbot.
def run_chat():
llm_client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
bot = WebChatBot(llm_client)
print("Web-Connected Chatbot")
print("Type 'quit' to exit, 'reset' to clear history")
print("-" * 40)
while True:
user_input = input("\nYou: ").strip()
if user_input.lower() == "quit":
break
if user_input.lower() == "reset":
bot.reset()
print("History cleared.")
continue
reply = bot.chat(user_input)
print(f"\nBot: {reply}")
if __name__ == "__main__":
run_chat()
Step 7: Add Fallback to OpenAI (Optional)
If you want to use OpenAI's API instead of a local model, swap the client:
from openai import OpenAI
import os
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
# Just change the model and client in WebChatBot
bot = WebChatBot(client, model="gpt-4o-mini")
Complete Code Example
import requests
from openai import OpenAI
SEARCHHIVE_API_KEY = "your_searchhive_api_key"
def web_search(query: str, num_results: int = 5) -> list:
response = requests.get(
"https://api.searchhive.dev/v1/swiftsearch",
headers={"Authorization": f"Bearer {SEARCHHIVE_API_KEY}"},
params={"q": query, "num": num_results}
)
return response.json().get("results", [])
def format_context(results: list) -> str:
parts = []
for i, r in enumerate(results, 1):
parts.append(f"[{i}] {r['title']}: {r['snippet']}")
return "\n".join(parts)
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
def chat(question: str) -> str:
results = web_search(question)
context = format_context(results)
response = client.chat.completions.create(
model="llama3",
messages=[
{"role": "system", "content": "Answer using search results. Cite sources with [N]."},
{"role": "user", "content": f"Results:\n{context}\n\nQ: {question}"}
]
)
return response.choices[0].message.content
if __name__ == "__main__":
while True:
q = input("You: ").strip()
if q == "quit":
break
print("Bot:", chat(q))
Common Issues
Ollama not running: Make sure ollama serve is running before starting the chatbot. Check with curl http://localhost:11434/api/tags.
Empty search results: SwiftSearch returns empty results for very specific queries. Try broadening the search terms or using a different query formulation.
LLM hallucinating despite search results: Lower the temperature to 0.1-0.3 and explicitly instruct the model to only use the provided search results in the system prompt.
Rate limits on search API: SwiftSearch free tier has rate limits. Add a cache for frequently asked questions to reduce API calls.
Context window overflow: Large search results can exceed the LLM's context window. The format_search_context function limits output to max_chars to prevent this.
Next Steps
- Add streaming responses for a better UX
- Build a web interface with Gradio or Streamlit
- Add tool-calling for structured data extraction (prices, dates, etc.)
- Implement semantic caching to avoid redundant searches
Get started with 500 free credits on SearchHive for your chatbot's web search layer. SwiftSearch provides clean, structured results that integrate seamlessly with any LLM. No credit card required.
See also: /blog/build-ai-search-engine-with-python, /compare/tavily, /compare/serper