How to Extract Flight Price Data with Python
Flight prices change constantly -- sometimes multiple times per day. Travel agencies, fare comparison sites, and travel hackers all rely on automated flight price monitoring to catch deals and track trends.
This tutorial shows how to extract flight price data using Python and SearchHive's web scraping APIs, including handling JavaScript-rendered results, pagination, and data normalization.
Key Takeaways
- Flight search sites are heavily JavaScript-rendered and bot-protected -- simple scrapers fail
- SearchHive's ScrapeForge renders JavaScript and bypasses basic bot detection
- SwiftSearch can discover flight deals from search engines and travel blogs
- DeepDive extracts structured price data (airline, route, price, dates) into clean free JSON formatter
- Build a price tracker that alerts you when fares drop below your target
Prerequisites
- Python 3.8+
requestslibrary (pip install requests)- SearchHive API key (free tier available)
- Target routes or airports to monitor
Step 1: Discover Flight Deals via SwiftSearch
Start by finding current deals and fare information from travel sites:
import requests
import time
from datetime import datetime, timedelta
API_KEY = "your_api_key"
BASE = "https://api.searchhive.dev/v1"
def search_flight_deals(route, date_range="2026"):
# Search for flight deals and fare information
query = f"cheap flights {route} {date_range}"
response = requests.get(
f"{BASE}/search",
headers={"Authorization": f"Bearer {API_KEY}"},
params={
"q": query,
"num": 10
}
)
response.raise_for_status()
results = response.json().get("results", [])
return [
{
"title": r.get("title", ""),
"url": r.get("url", ""),
"snippet": r.get("snippet", "")
}
for r in results
]
# Find deals for NYC to London
deals = search_flight_deals("JFK to London Heathrow", "July 2026")
for d in deals[:5]:
print(f"{d['title']}")
print(f" {d['url']}")
Step 2: Scrape Flight Search Results
Extract flight data from comparison sites and airline pages:
def scrape_flight_page(url):
# Scrape a flight search/comparison page
response = requests.post(
f"{BASE}/scrape",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"url": url,
"render_js": True,
"wait_for": 3000, # Wait for dynamic prices to load
"format": "markdown"
}
)
response.raise_for_status()
return response.json()
def extract_flight_data(content):
# Extract structured flight data from page content
response = requests.post(
f"{BASE}/deepdive",
headers={"Authorization": f"Bearer {API_KEY}"},
json={
"content": content,
"extract": [
"airline_name",
"departure_airport",
"arrival_airport",
"departure_time",
"arrival_time",
"price",
"currency",
"flight_number",
"stops",
"flight_duration",
"booking_url"
]
}
)
response.raise_for_status()
return response.json()["data"]
Step 3: Build a Multi-Route Price Monitor
import sqlite3
def monitor_routes(routes, db_path="flight_prices.db"):
# Monitor multiple routes and store price data
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('''
CREATE TABLE IF NOT EXISTS prices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
route TEXT,
airline TEXT,
price REAL,
currency TEXT,
departure TEXT,
arrival TEXT,
stops INTEGER,
source_url TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
results = []
for route in routes:
print(f"\nMonitoring: {route}")
# Search for current prices
search_results = search_flight_deals(route)
for result in search_results[:3]: # Check top 3 sources
url = result["url"]
if not any(x in url for x in ["google.com/travel", "kayak", "skyscanner", "expedia"]):
continue
try:
scraped = scrape_flight_page(url)
flights = extract_flight_data(scraped.get("markdown", ""))
for flight in flights:
price_str = flight.get("price", "0").replace("$", "").replace(",", "")
try:
price = float(price_str)
except ValueError:
continue
c.execute(
"INSERT INTO prices (route, airline, price, currency, departure, arrival, stops, source_url) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
(route, flight.get("airline_name", ""), price,
flight.get("currency", "USD"),
flight.get("departure_airport", ""),
flight.get("arrival_airport", ""),
flight.get("stops", 0), url)
)
results.append({"route": route, "price": price, "airline": flight.get("airline_name", "")})
print(f" {flight.get('airline_name', '?')}: ${price}")
time.sleep(3) # Rate limiting
except Exception as e:
print(f" Error: {e}")
conn.commit()
conn.close()
return results
# Define routes to monitor
routes = [
"JFK to LHR London",
"LAX to NRT Tokyo",
"SFO to CDG Paris",
"ORD to FRA Frankfurt"
]
prices = monitor_routes(routes)
Step 4: Analyze Price Trends
def analyze_price_trends(db_path="flight_prices.db"):
# Analyze price trends across routes
conn = sqlite3.connect(db_path)
c = conn.cursor()
# Best current prices per route
print("\n--- BEST CURRENT PRICES ---")
c.execute('''
SELECT route, airline, MIN(price) as best_price,
AVG(price) as avg_price, COUNT(*) as data_points
FROM prices
WHERE scraped_at > datetime('now', '-24 hours')
GROUP BY route
ORDER BY route
''')
for row in c.fetchall():
print(f" {row[0]}: best ${row[2]:.0f} (avg ${row[3]:.0f}, {row[4]} data points)")
# Price changes over time
print("\n--- PRICE CHANGES (LAST 7 DAYS) ---")
c.execute('''
SELECT route,
MIN(CASE WHEN scraped_at > datetime('now', '-24 hours') THEN price END) as today_min,
MIN(CASE WHEN scraped_at BETWEEN datetime('now', '-7 days') AND datetime('now', '-24 hours') THEN price END) as week_min
FROM prices
GROUP BY route
HAVING today_min IS NOT NULL AND week_min IS NOT NULL
''')
for row in c.fetchall():
change = ((row[1] - row[2]) / row[2]) * 100
direction = "dropped" if change < 0 else "increased"
print(f" {row[0]}: ${row[2]:.0f} -> ${row[1]:.0f} ({abs(change):.1f}% {direction})")
conn.close()
analyze_price_trends()
Step 5: Set Up Price Drop Alerts
def check_price_alerts(target_prices, db_path="flight_prices.db"):
# Check if any route has dropped below target price
conn = sqlite3.connect(db_path)
c = conn.cursor()
alerts = []
for route, target in target_prices.items():
c.execute(
"SELECT MIN(price), airline, source_url FROM prices WHERE route = ? AND scraped_at > datetime('now', '-6 hours') GROUP BY airline",
(route,)
)
for row in c.fetchall():
if row[0] and row[0] <= target:
alerts.append({
"route": route,
"price": row[0],
"airline": row[1],
"url": row[2],
"target": target
})
print(f"ALERT: {route} at ${row[0]} (target: ${target}) via {row[1]}")
conn.close()
return alerts
# Set your target prices
my_targets = {
"JFK to LHR London": 400,
"LAX to NRT Tokyo": 600,
"SFO to CDG Paris": 500
}
alerts = check_price_alerts(my_targets)
Step 6: Export for Visualization
import csv
def export_for_chart(db_path="flight_prices.db", output="flight_data.csv"):
# Export price data for charting in Excel/Sheets
conn = sqlite3.connect(db_path)
c = conn.cursor()
c.execute('''
SELECT route, airline, price, currency, departure, arrival, stops, scraped_at
FROM prices
ORDER BY route, scraped_at
''')
with open(output, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(["Route", "Airline", "Price", "Currency", "Departure", "Arrival", "Stops", "Timestamp"])
writer.writerows(c.fetchall())
conn.close()
print(f"Exported to {output}")
export_for_chart()
Common Issues and Solutions
Issue: Flight sites show different prices based on location/cookies
Set the geo parameter in ScrapeForge to specify a country for your searches:
json={"url": url, "render_js": True, "geo": "US"}
Issue: Dynamic pricing loads after initial page render
Increase the wait_for parameter (in milliseconds) to give prices time to load:
json={"url": url, "render_js": True, "wait_for": 5000}
Issue: CAPTCHAs on travel sites
Travel sites are among the most aggressive with bot detection. SearchHive's proxy rotation handles most cases, but if you hit persistent CAPTCHAs, slow down your request rate and vary the user agent parser.
Next Steps
- Combine with /blog/build-competitive-intelligence-dashboard for broader competitive monitoring
- Add email/Slack notifications for price alerts
- Build a web frontend to display price charts
- Check out /compare/firecrawl for more scraping API options
Start tracking flight prices with 500 free credits. No credit card needed -- sign up and start monitoring fares today.