Building a Python API wrapper lets you abstract away the raw HTTP calls to web scraping services and work with clean, Pythonic interfaces. Whether you're wrapping SearchHive, SerpAPI, Firecrawl, or a custom scraper, a well-designed wrapper makes your codebase cleaner, testable, and easier to maintain.
This tutorial walks through building a production-ready Python API wrapper for SearchHive's SwiftSearch, ScrapeForge, and DeepDive endpoints -- with retry logic, error handling, async support, and type hints.
Key Takeaways
- A good wrapper abstracts HTTP details behind simple Python methods
- Use
httpxfor both sync and async HTTP support in one library - Add retry logic with exponential backoff to handle rate limits gracefully
- Type hints and Pydantic models make your wrapper self-documenting
- SearchHive's unified API key works across all endpoints, simplifying auth
Prerequisites
- Python 3.10+
- A SearchHive API key (get one free)
- Basic familiarity with REST APIs and Python classes
Install dependencies:
pip install httpx pydantic
Step 1: Set Up the Base Client
Start with a base HTTP client that handles authentication and common configuration.
import httpx
from typing import Optional
class SearchHiveClient:
BASE_URL = "https://api.searchhive.dev/v1"
def __init__(self, api_key: str, timeout: int = 30):
self.api_key = api_key
self.timeout = timeout
self._client = httpx.Client(
base_url=self.BASE_URL,
headers={"Authorization": f"Bearer {api_key}"},
timeout=timeout,
)
def __enter__(self):
return self
def __exit__(self, *args):
self._client.close()
def _request(self, method: str, endpoint: str, **kwargs) -> dict:
response = self._client.request(method, endpoint, **kwargs)
response.raise_for_status()
return response.json()
def close(self):
self._client.close()
This gives you a context-managed client with automatic auth header injection.
Step 2: Add Retry Logic
Web scraping APIs hit rate limits. Add automatic retries with exponential backoff.
import time
import httpx
class SearchHiveClient:
BASE_URL = "https://api.searchhive.dev/v1"
def __init__(self, api_key: str, timeout: int = 30,
max_retries: int = 3, base_delay: float = 1.0):
self.api_key = api_key
self.timeout = timeout
self.max_retries = max_retries
self.base_delay = base_delay
self._client = httpx.Client(
base_url=self.BASE_URL,
headers={"Authorization": f"Bearer {api_key}"},
timeout=timeout,
)
def _request_with_retry(self, method: str, endpoint: str,
**kwargs) -> dict:
last_error = None
for attempt in range(self.max_retries):
try:
response = self._client.request(method, endpoint, **kwargs)
if response.status_code == 429:
retry_after = float(response.headers.get(
"Retry-After", self.base_delay * (2 ** attempt)
))
time.sleep(retry_after)
continue
response.raise_for_status()
return response.json()
except (httpx.HTTPError, httpx.TimeoutException) as e:
last_error = e
if attempt < self.max_retries - 1:
time.sleep(self.base_delay * (2 ** attempt))
raise last_error
Step 3: Implement SwiftSearch Methods
SwiftSearch is SearchHive's search results API. Wrap it with clean methods for different search types.
from typing import Optional, List
class SearchHiveClient:
# ... previous __init__ and _request_with_retry ...
def search(self, query: str, num: int = 10,
engine: str = "google",
country: Optional[str] = None) -> dict:
# Search the web and return structured results
params = {"q": query, "num": num, "engine": engine}
if country:
params["country"] = country
return self._request_with_retry(
"GET", "/swiftsearch", params=params
)
def search_news(self, query: str, num: int = 10) -> dict:
# Search news sources
return self.search(query, num=num, engine="news")
def search_images(self, query: str, num: int = 10) -> dict:
# Search for images
return self.search(query, num=num, engine="images")
Step 4: Implement ScrapeForge Methods
ScrapeForge extracts content from individual URLs with JS rendering and proxy support.
class SearchHiveClient:
# ... previous methods ...
def scrape(self, url: str, format: str = "markdown",
country: Optional[str] = None) -> dict:
# Scrape a single URL with JS rendering
params = {"url": url, "format": format}
if country:
params["country"] = country
return self._request_with_retry(
"GET", "/scrapeforge", params=params
)
def scrape_batch(self, urls: List[str],
format: str = "markdown") -> List[dict]:
# Scrape multiple URLs sequentially
results = []
for url in urls:
result = self.scrape(url, format=format)
results.append(result)
return results
Step 5: Add Pydantic Response Models
Typed response models give you autocomplete, validation, and documentation.
from pydantic import BaseModel
from typing import Optional, List
class SearchResult(BaseModel):
title: str
url: str
snippet: str
position: int
class SearchResponse(BaseModel):
results: List[SearchResult]
total_results: Optional[int] = None
query: Optional[str] = None
class ScrapeResponse(BaseModel):
title: str
content: str
url: str
format: str
class SearchHiveClient:
# ... previous methods ...
def search_typed(self, query: str, num: int = 10) -> SearchResponse:
# Search with typed response validation
raw = self.search(query, num=num)
return SearchResponse(**raw)
def scrape_typed(self, url: str) -> ScrapeResponse:
# Scrape with typed response validation
raw = self.scrape(url)
return ScrapeResponse(**raw)
Step 6: Add Async Support
For high-throughput scraping, add an async client alongside the sync one.
import httpx
import asyncio
from typing import List
class AsyncSearchHiveClient:
BASE_URL = "https://api.searchhive.dev/v1"
def __init__(self, api_key: str, timeout: int = 30,
max_retries: int = 3):
self.api_key = api_key
self._client = httpx.AsyncClient(
base_url=self.BASE_URL,
headers={"Authorization": f"Bearer {api_key}"},
timeout=timeout,
)
self.max_retries = max_retries
async def _request(self, method: str, endpoint: str,
**kwargs) -> dict:
for attempt in range(self.max_retries):
try:
response = await self._client.request(
method, endpoint, **kwargs
)
if response.status_code == 429:
await asyncio.sleep(1.0 * (2 ** attempt))
continue
response.raise_for_status()
return response.json()
except httpx.HTTPError:
if attempt == self.max_retries - 1:
raise
await asyncio.sleep(1.0 * (2 ** attempt))
async def scrape_batch(self, urls: List[str]) -> List[dict]:
# Scrape multiple URLs concurrently
tasks = [self._request("GET", "/scrapeforge",
params={"url": u, "format": "json"})
for u in urls]
return await asyncio.gather(*tasks)
async def close(self):
await self._client.aclose()
Complete Working Example
Put it all together with a practical example:
import asyncio
from searchhive_client import SearchHiveClient, AsyncSearchHiveClient
def sync_example():
with SearchHiveClient("your_api_key") as client:
# Search the web
results = client.search("best Python web scraping libraries")
for r in results.get("results", [])[:5]:
print(f"{r['title']}: {r['url']}")
# Scrape a page
page = client.scrape(
"https://example.com/article",
format="markdown"
)
print(f"Scraped {len(page['content'])} characters")
async def async_example():
async with AsyncSearchHiveClient("your_api_key") as client:
urls = [
"https://example.com/page1",
"https://example.com/page2",
"https://example.com/page3",
]
results = await client.scrape_batch(urls)
for r in results:
print(f"Scraped: {r['title']}")
if __name__ == "__main__":
sync_example()
asyncio.run(async_example())
Common Issues
Rate limiting (429 errors): The retry logic handles this automatically. If you're consistently hitting limits, consider upgrading your SearchHive plan for higher rate limits.
Timeouts on slow pages: Large pages or JS-heavy sites may take time to render. Increase the timeout parameter when initializing the client.
free JSON formatter decode errors: If a scrape endpoint returns HTML instead of JSON, check that the format parameter is set correctly. ScrapeForge defaults to JSON when no format is specified.
SSL errors: Corporate proxies may interfere. You can pass a custom verify parameter to the underlying httpx client if needed.
Next Steps
- Add caching with
functools.lru_cacheor Redis for repeated queries - Create a CLI interface using
clickortyper - Build a Flask/FastAPI layer to expose your wrapper as a microservice
- Add logging with Python's
loggingmodule for production monitoring
Ready to start scraping? Get 500 free credits on SearchHive -- no credit card needed, full API access from day one. Check the docs for all available endpoints and parameters.
See also: /blog/searchhive-python-sdk-tutorial, /compare/firecrawl, /compare/serpapi