Building a weather data dashboard with Python combines web scraping, data processing, and visualization into a single project. Whether you want to track local forecasts, compare weather across cities, or build alerts for extreme conditions, Python has the tools to make it happen.
This tutorial shows how to scrape weather data using SearchHive, process it with pandas, and visualize it with matplotlib -- creating a dashboard you can run locally or deploy as a web app.
Key Takeaways
- SearchHive's ScrapeForge can extract weather data from any public weather website
- Use DeepDive for AI-powered extraction when pages have complex layouts
- Pandas handles time series manipulation and aggregation
- Matplotlib and Plotly create publication-quality weather visualizations
- The complete dashboard runs under 100 lines of Python
Prerequisites
- Python 3.10+
- A SearchHive API key (free here)
- Basic familiarity with pandas
Install dependencies:
pip install requests pandas matplotlib plotly streamlit
Step 1: Scrape Current Weather Data
Use ScrapeForge to pull weather data from a public weather service.
import requests
import json
API_KEY = "your_searchhive_api_key"
def scrape_weather_page(url: str) -> dict:
response = requests.get(
"https://api.searchhive.dev/v1/scrapeforge",
headers={"Authorization": f"Bearer {API_KEY}"},
params={"url": url, "format": "json"}
)
response.raise_for_status()
return response.json()
# Scrape weather for a city
weather_data = scrape_weather_page(
"https://www.weather.gov/wrh/WeatherByState"
)
print(json.dumps(weather_data, indent=2)[:500])
Step 2: Extract Structured Forecast with DeepDive
Weather pages often have complex tables and dynamic content. DeepDive extracts exactly what you need.
def get_city_forecast(city: str) -> dict:
# Search for the city's weather page
search_response = requests.get(
"https://api.searchhive.dev/v1/swiftsearch",
headers={"Authorization": f"Bearer {API_KEY}"},
params={"q": f"{city} 7 day forecast weather.gov", "num": 1}
)
search_data = search_response.json()
url = search_data["results"][0]["url"]
# Extract forecast data using AI
extract_response = requests.post(
"https://api.searchhive.dev/v1/deepdive",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"url": url,
"prompt": (
"Extract the 7-day weather forecast. For each day get: "
"date, day name, high temperature (F), low temperature (F), "
"weather condition, precipitation chance, wind speed. "
"Return as JSON array."
)
}
)
extract_response.raise_for_status()
return extract_response.json()
forecast = get_city_forecast("Austin TX")
print(json.dumps(forecast, indent=2))
Step 3: Process Data into a DataFrame
Convert the extracted forecast into a pandas DataFrame for analysis and visualization.
import pandas as pd
def forecast_to_dataframe(forecast_data: dict) -> pd.DataFrame:
# Handle both array and object responses
if isinstance(forecast_data, dict):
for key, value in forecast_data.items():
if isinstance(value, list):
return pd.DataFrame(value)
return pd.DataFrame([forecast_data])
elif isinstance(forecast_data, list):
return pd.DataFrame(forecast_data)
df = forecast_to_dataframe(forecast)
print(df.head())
# Clean up temperature columns
for col in df.columns:
if "temp" in col.lower() or "high" in col.lower() or "low" in col.lower():
df[col] = pd.to_numeric(
df[col].astype(str).str.replace(r"[^0-9.-]", "", regex=True),
errors="coerce"
)
Step 4: Create Visualizations with Matplotlib
Build a temperature forecast chart.
import matplotlib.pyplot as plt
def plot_temperature_forecast(df: pd.DataFrame, city: str):
fig, ax = plt.subplots(figsize=(12, 5))
date_col = [c for c in df.columns if "date" in c.lower() or "day" in c.lower()]
high_col = [c for c in df.columns if "high" in c.lower()]
low_col = [c for c in df.columns if "low" in c.lower()]
if not date_col or not high_col:
print("Could not find required columns. Available:", df.columns.tolist())
return
x = range(len(df))
ax.plot(x, df[high_col[0]], "ro-", label="High", linewidth=2)
if low_col:
ax.plot(x, df[low_col[0]], "bo-", label="Low", linewidth=2)
ax.fill_between(x, df[low_col[0]], df[high_col[0]], alpha=0.1, color="red")
ax.set_xticks(x)
ax.set_xticklabels(df[date_col[0]], rotation=45, ha="right")
ax.set_ylabel("Temperature (F)")
ax.set_title(f"7-Day Temperature Forecast: {city}")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(f"{city.replace(' ', '_')}_forecast.png", dpi=150)
print("Chart saved!")
plot_temperature_forecast(df, "Austin TX")
Step 5: Compare Multiple Cities
Scrape and compare forecasts across multiple locations.
def compare_cities(cities: list) -> dict:
results = {}
for city in cities:
try:
forecast = get_city_forecast(city)
df = forecast_to_dataframe(forecast)
results[city] = df
print(f"Scraped forecast for {city}")
except Exception as e:
print(f"Failed to get forecast for {city}: {e}")
return results
def plot_city_comparison(results: dict):
fig, ax = plt.subplots(figsize=(12, 5))
for city, df in results.items():
high_col = [c for c in df.columns if "high" in c.lower()]
if high_col:
ax.plot(range(len(df)), df[high_col[0]], "-o",
label=city, linewidth=2)
ax.set_xlabel("Day")
ax.set_ylabel("High Temperature (F)")
ax.set_title("City Temperature Comparison")
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig("city_comparison.png", dpi=150)
print("Comparison chart saved!")
cities = ["Austin TX", "Denver CO", "Seattle WA"]
results = compare_cities(cities)
plot_city_comparison(results)
Step 6: Build a Streamlit Dashboard
Wrap everything in a Streamlit app for an interactive web dashboard.
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
st.title("Weather Dashboard")
st.caption("Powered by SearchHive APIs")
city = st.text_input("Enter city (e.g., Austin TX):", "Austin TX")
if st.button("Get Forecast"):
with st.spinner("Fetching forecast..."):
try:
forecast = get_city_forecast(city)
df = forecast_to_dataframe(forecast)
st.dataframe(df, use_container_width=True)
fig, ax = plt.subplots(figsize=(10, 4))
high_col = [c for c in df.columns if "high" in c.lower()]
low_col = [c for c in df.columns if "low" in c.lower()]
if high_col:
ax.plot(range(len(df)), df[high_col[0]], "r-o", label="High")
if low_col:
ax.plot(range(len(df)), df[low_col[0]], "b-o", label="Low")
ax.legend()
ax.set_title(f"7-Day Forecast: {city}")
st.pyplot(fig)
except Exception as e:
st.error(f"Error: {e}")
# Multi-city comparison
st.divider()
st.subheader("Multi-City Comparison")
default_cities = "Austin TX, Denver CO, Seattle WA"
cities_input = st.text_input("Cities (comma separated):", default_cities)
if st.button("Compare Cities"):
cities = [c.strip() for c in cities_input.split(",")]
results = compare_cities(cities)
plot_city_comparison(results)
st.image("city_comparison.png")
Run with streamlit run dashboard.py.
Complete Code Example
import requests
import pandas as pd
import matplotlib.pyplot as plt
API_KEY = "your_searchhive_api_key"
def get_city_forecast(city: str) -> dict:
search = requests.get(
"https://api.searchhive.dev/v1/swiftsearch",
headers={"Authorization": f"Bearer {API_KEY}"},
params={"q": f"{city} 7 day forecast", "num": 1}
)
url = search.json()["results"][0]["url"]
extract = requests.post(
"https://api.searchhive.dev/v1/deepdive",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
},
json={
"url": url,
"prompt": "Extract 7-day forecast: date, high temp F, low temp F, condition, precip chance. JSON array."
}
)
return extract.json()
def build_dashboard(city: str):
forecast = get_city_forecast(city)
df = pd.DataFrame(forecast) if isinstance(forecast, list) else pd.DataFrame([forecast])
print(df.to_string(index=False))
fig, ax = plt.subplots(figsize=(10, 4))
high = [c for c in df.columns if "high" in c.lower()]
low = [c for c in df.columns if "low" in c.lower()]
if high:
ax.plot(df[high[0]].values, "r-o", label="High")
if low:
ax.plot(df[low[0]].values, "b-o", label="Low")
ax.legend()
ax.set_title(f"Weather Forecast: {city}")
ax.set_ylabel("Temp (F)")
plt.tight_layout()
plt.show()
if __name__ == "__main__":
build_dashboard("Austin TX")
Common Issues
No weather data found: Some cities have multiple weather pages. Try adding "weather.gov" or "NWS" to your SwiftSearch query to target the right source.
Temperature format inconsistency: Weather sites use different formats. The DeepDive prompt specifies Fahrenheit, but verify the output and add unit conversion if needed.
Rate limiting on searches: SwiftSearch has rate limits. For repeated dashboard refreshes, cache the results with a TTL (e.g., don't re-fetch if the last request was within 30 minutes).
DeepDive returns text, not free JSON formatter: If the prompt doesn't specify JSON format, DeepDive may return prose. Always include "Return as JSON" in your prompt.
Next Steps
- Add historical data by scraping archive pages
- Integrate weather alerts via email or Slack webhooks
- Add air quality data from EPA sources
- Deploy the Streamlit dashboard on Streamlit Cloud or Railway
Get started with 500 free credits on SearchHive -- scrape weather data from any public source with JS rendering and proxy rotation built in. No credit card required.
See also: /blog/how-to-scrape-real-estate-listings-with-python, /compare/firecrawl, /compare/jina-reader