How to Build API Documentation Generators: Step-by-Step Guide
Building an API documentation generator saves your team hours of manual work and keeps your docs in sync with your code. This tutorial walks through creating one from scratch using Python, with SearchHive's APIs for enriching docs with real-time data.
Prerequisites
Before starting, you'll need:
- Python 3.8+ installed on your machine
- pip for package management
- A SearchHive API key (free tier gives 500 credits) from searchhive.dev
- Basic familiarity with REST APIs and Python
Install the required packages:
pip install fastapi uvicorn markdown jinja2 httpx pydantic
Step 1: Define Your API Schema
Every good documentation generator starts with a clear schema definition. Use OpenAPI 3.0 spec as your source of truth.
Create a sample API spec file:
# api_spec.py
from pydantic import BaseModel
from typing import Optional, List
class EndpointSpec(BaseModel):
path: str
method: str
summary: str
description: str
parameters: List[dict] = []
request_body: Optional[dict] = None
responses: dict = {}
tags: List[str] = []
# Define your endpoints
endpoints = [
EndpointSpec(
path="/api/v1/search",
method="GET",
summary="Search the web",
description="Performs a web search query and returns structured results",
parameters=[
{"name": "q", "in": "query", "required": True, "schema": {"type": "string"}},
{"name": "limit", "in": "query", "required": False, "schema": {"type": "integer", "default": 10}}
],
responses={"200": {"description": "Search results returned successfully"}}
),
EndpointSpec(
path="/api/v1/scrape",
method="POST",
summary="Scrape a URL",
description="Extracts structured data from a web page",
request_body={
"required": True,
"content": {"application/json": {"schema": {"type": "object", "properties": {"url": {"type": "string"}}}}}
},
responses={"200": {"description": "Scraped content returned"}}
)
]
Step 2: Set Up the Documentation Generator Core
Create the main generator class that transforms your spec into readable documentation:
# doc_generator.py
import json
from datetime import datetime
from pathlib import Path
from jinja2 import Template
DOC_TEMPLATE = """
# {{ title }}
> Auto-generated on {{ date }} | Version {{ version }}
---
## Overview
{{ overview }}
{% for endpoint in endpoints %}
## {{ endpoint.method }} `{{ endpoint.path }}`
**{{ endpoint.summary }}**
{{ endpoint.description }}
### Parameters
| Name | In | Required | Type | Description |
|------|----|----------|------|-------------|
{% for param in endpoint.parameters %}
| `{{ param.name }}` | {{ param.in }} | {{ "Yes" if param.required else "No" }} | {{ param.schema.type }} | {{ param.description | default("-") }} |
{% endfor %}
### Responses
| Status | Description |
|--------|-------------|
{% for status, info in endpoint.responses.items() %}
| {{ status }} | {{ info.description }} |
{% endfor %}
{% if endpoint.tags %}
**Tags:** {{ endpoint.tags | join(", ") }}
{% endif %}
---
{% endfor %}
"""
class DocGenerator:
def __init__(self, title, version, overview):
self.title = title
self.version = version
self.overview = overview
self.template = Template(DOC_TEMPLATE)
self.endpoints = []
def add_endpoint(self, endpoint_spec):
self.endpoints.append(endpoint_spec.model_dump())
def generate(self, output_path: str):
content = self.template.render(
title=self.title,
version=self.version,
overview=self.overview,
date=datetime.now().strftime("%Y-%m-%d"),
endpoints=self.endpoints
)
Path(output_path).write_text(content)
return content
# Usage
generator = DocGenerator(
title="My API Documentation",
version="1.0.0",
overview="A comprehensive REST API for web search and data extraction."
)
for ep in endpoints:
generator.add_endpoint(ep)
docs = generator.generate("docs/api-reference.md")
print(f"Generated {len(docs)} characters of documentation")
Step 3: Enrich Documentation with Live API Examples
Static documentation is fine, but live examples that actually work are better. Use SearchHive's SwiftSearch API to add real, working code samples:
# enrich_docs.py
import httpx
SEARCHHIVE_API_KEY = "your-api-key-here"
SEARCHHIVE_BASE = "https://api.searchhive.dev/v1"
def generate_live_example(endpoint_spec: dict) -> str:
if endpoint_spec["method"] == "GET" and "search" in endpoint_spec["path"]:
return (
'import httpx\n\n'
'response = httpx.get(\n'
' "https://api.searchhive.dev/v1/search",\n'
' params={"q": "python web scraping", "limit": 5},\n'
' headers={"Authorization": f"Bearer {SEARCHHIVE_API_KEY}"}\n'
')\n'
'data = response.json()\n'
'for result in data.get("results", []):\n'
' print(f"{result[\'title\']} - {result[\'url\']}")'
)
elif endpoint_spec["method"] == "POST" and "scrape" in endpoint_spec["path"]:
return (
'import httpx\n\n'
'response = httpx.post(\n'
' "https://api.searchhive.dev/v1/scrape",\n'
' json={"url": "https://example.com"},\n'
' headers={"Authorization": f"Bearer {SEARCHHIVE_API_KEY}"}\n'
')\n'
'data = response.json()\n'
'print(data.get("content", "No content"))'
)
return "# No live example available"
# Test the live examples work
def verify_example_works(example_code: str) -> bool:
try:
exec_globals = {"httpx": httpx, "SEARCHHIVE_API_KEY": SEARCHHIVE_API_KEY}
exec(example_code, exec_globals)
return True
except Exception as e:
print(f"Example failed: {e}")
return False
Step 4: Auto-Detect API Endpoints from Code
A proper generator should scan your codebase and auto-detect API routes. Here's how to extract endpoints from FastAPI apps:
# auto_detect.py
import ast
import re
from pathlib import Path
def detect_fastapi_routes(app_file: str) -> list:
source = Path(app_file).read_text()
tree = ast.parse(source)
routes = []
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
for decorator in node.decorator_list:
if isinstance(decorator, ast.Call) and hasattr(decorator.func, 'attr'):
method = decorator.func.attr.upper()
if method in ("GET", "POST", "PUT", "DELETE", "PATCH"):
path = ""
if decorator.args and isinstance(decorator.args[0], ast.Constant):
path = decorator.args[0].value
routes.append({
"method": method,
"path": path,
"function": node.name,
"docstring": ast.get_docstring(node) or "No description"
})
return routes
# Usage
routes = detect_fastapi_routes("app/main.py")
for r in routes:
print(f"{r['method']} {r['path']} - {r['function']}")
Step 5: Build a CI/CD Pipeline for Auto-Updated Docs
Keep your documentation fresh by generating it on every commit:
# build_docs.py - Run this in your CI pipeline
import subprocess
import sys
def build_and_validate():
# Step 1: Auto-detect routes
print("Detecting API routes...")
routes = detect_fastapi_routes("app/main.py")
# Step 2: Generate documentation
print("Generating documentation...")
generator = DocGenerator(
title="API Reference",
version=get_version(),
overview="Auto-generated from source code."
)
for r in routes:
generator.add_endpoint(EndpointSpec(**r))
docs = generator.generate("docs/api-reference.md")
print(f"Generated {len(docs)} chars of docs")
# Step 3: Validate links
print("Validating internal links...")
validate_links("docs/api-reference.md")
# Step 4: Deploy
print("Documentation build complete!")
return True
def get_version() -> str:
try:
result = subprocess.run(["git", "describe", "--tags"], capture_output=True, text=True)
return result.stdout.strip() or "0.0.0"
except FileNotFoundError:
return "0.0.0"
Step 6: Add Search-Powered Context to Documentation
Use SearchHive's DeepDive API to pull in relevant documentation from the web for context enrichment:
# context_enricher.py
import httpx
SEARCHHIVE_API_KEY = "your-api-key-here"
def enrich_with_context(endpoint_path: str, description: str) -> str:
query = f"{description} best practices API design"
response = httpx.post(
"https://api.searchhive.dev/v1/deepdive",
json={"query": query, "depth": 3},
headers={"Authorization": f"Bearer {SEARCHHIVE_API_KEY}"}
)
if response.status_code == 200:
data = response.json()
insights = data.get("insights", [])
if insights:
context_block = "\n### Best Practices\n\n"
for insight in insights[:3]:
context_block += f"- {insight.get('text', '')}\n"
return context_block
return ""
# Enrich each endpoint section
for ep in endpoints:
context = enrich_with_context(ep.path, ep.summary)
if context:
print(f"Added context for {ep.method} {ep.path}")
Complete Working Example
Here's the full pipeline in a single script:
# generate_all.py
from doc_generator import DocGenerator
from auto_detect import detect_fastapi_routes
from enrich_docs import generate_live_example
from api_spec import endpoints, EndpointSpec
def main():
# Auto-detect routes from codebase
detected = detect_fastapi_routes("app/main.py")
# Merge with manual specs
generator = DocGenerator(
title="Complete API Documentation",
version="2.1.0",
overview="REST API for web search, scraping, and data extraction. Built with SearchHive."
)
for ep in endpoints:
generator.add_endpoint(ep)
# Generate and save
content = generator.generate("docs/full-api-reference.md")
# Also generate per-endpoint code examples
examples_dir = Path("docs/examples")
examples_dir.mkdir(exist_ok=True)
for ep in endpoints:
slug = ep.path.replace("/api/v1/", "").replace("/", "-")
example = generate_live_example(ep.model_dump())
(examples_dir / f"{slug}.py").write_text(example)
print(f"Documentation generated: {len(content)} characters")
print(f"Code examples: {len(list(examples_dir.glob('*.py')))} files")
if __name__ == "__main__":
main()
Common Issues
Problem: Generated docs are out of date Solution: Run the generator in CI/CD on every push to main. Add it to your GitHub Actions or Makefile.
Problem: Code examples don't work
Solution: Use verify_example_works() to test each example during the build. Fail the build if any example breaks.
Problem: Missing descriptions for auto-detected endpoints
Solution: Enforce docstrings on all route handlers. The auto-detect parser reads ast.get_docstring() for descriptions.
Problem: Large APIs generate slow docs Solution: Generate docs incrementally. Only regenerate documentation for endpoints that changed since the last build using git diff.
Next Steps
- Add OpenAPI/Swagger free JSON formatter output alongside Markdown
- Set up automated deployment to a docs site (Vercel, Netlify)
- Add versioned documentation with git tags
- Integrate SearchHive's ScrapeForge for pulling competitor API docs as reference
For web data extraction that powers your documentation, check out SearchHive's free tier with 500 credits to get started -- no credit card needed. See the full API docs for SwiftSearch, ScrapeForge, and DeepDive endpoints.
Related: /tutorials/data-extraction-python | /compare/serpapi