Python SDK
The official Python SDK for AlterLab. Simple, type-safe, and async-ready.
Installation
pip install alterlabQuick Start
from alterlab import AlterLab
# Initialize the clientclient = AlterLab(api_key="sk_live_...")# or set ALTERLAB_API_KEY env var
# Scrape a webpageresult = client.scrape("https://example.com")
# Access the contentprint(result.text)# Extracted text contentprint(result.html)# Raw HTMLprint(result.json)# Structured JSON (Schema.org, metadata)print(result.status_code)# HTTP status code
# Access billing infoprint(result.billing.cost_dollars)# Cost in USDprint(result.billing.tier_used)# Which tier was usedEnvironment Variable
ALTERLAB_API_KEY environment variable instead of passing the key directly.Client Options
from alterlab import AlterLab
# Basic initializationclient = AlterLab(api_key="sk_live_...")
# With all optionsclient = AlterLab( api_key="sk_live_...", base_url="https://alterlab.io",# Custom endpoint (optional) timeout=120,# Request timeout in seconds max_retries=3,# Auto-retry on transient failures retry_delay=1.0# Initial retry delay (exponential backoff))
# From environment variableimport os
os.environ["ALTERLAB_API_KEY"]="sk_live_..."client = AlterLab()# Reads from ALTERLAB_API_KEY| Option | Type | Default | Description |
|---|---|---|---|
api_key | str | env var | Your AlterLab API key |
base_url | str | https://alterlab.io | API base URL |
timeout | int | 120 | Request timeout in seconds |
max_retries | int | 3 | Max retries on transient failures |
retry_delay | float | 1.0 | Initial retry delay in seconds |
Scraping Methods
client.scrape(url, **options)
Main scraping method with intelligent tier escalation.
# Auto mode - intelligent tier escalationresult = client.scrape("https://example.com")print(result.text)# Extracted textprint(result.json)# Structured JSON dataprint(result.billing.cost_dollars)# Cost in USDclient.scrape_html(url)
Fast HTML-only scraping. Best for static sites.
# Force HTML-only mode (fastest, cheapest)result = client.scrape_html("https://example.com")print(result.html)# Raw HTML contentclient.scrape_js(url, **options)
JavaScript rendering for SPAs and dynamic content.
# Full JavaScript renderingresult = client.scrape_js("https://spa-app.com", screenshot=True,# Capture screenshot wait_for="#content"# Wait for selector)print(result.screenshot_url)# Screenshot URLclient.scrape_pdf(url, format="text")
Extract text from PDF documents.
result = client.scrape_pdf("https://example.com/document.pdf",format="markdown"# "text" or "markdown")print(result.text)client.scrape_ocr(url, language="eng")
Extract text from images using OCR.
result = client.scrape_ocr("https://example.com/image.png", language="eng"# eng, fra, deu, jpn, etc.)print(result.text)Structured Extraction
Extract structured data using JSON Schema, natural language prompts, or pre-built profiles.
JSON Schema Extraction
result = client.scrape("https://store.com/product/123", extraction_schema={"type":"object","properties":{"name":{"type":"string"},"price":{"type":"number"},"in_stock":{"type":"boolean"}}})print(result.json)# {"name": "...", "price": 29.99, "in_stock": true}Pre-built Profiles
# Use a pre-built extraction profileresult = client.scrape("https://store.com/product/123", extraction_profile="product"# product, article, job_posting, etc.)print(result.json)Natural Language Prompt
result = client.scrape("https://news.com/article", extraction_prompt="Extract the article title, author, and publish date")print(result.json)Search
Run web searches and get back AI-summarized results with source links.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Simple searchresults = client.search("best noise-cancelling headphones 2025")
for r in results.results:print(r.title)print(r.url)print(r.snippet)
# Search with optionsresults = client.search("python web scraping tutorial", limit=10,# Max results to return country="US",# Geo-target results language="en",)
# Get AI summary of resultsprint(results.summary)Map
Discover all URLs on a website — returns a sitemap-style link list.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Map all URLs on a sitesitemap = client.map("https://docs.example.com")
print(f"Found {len(sitemap.links)} URLs")for url in sitemap.links:print(url)
# Map with filterssitemap = client.map("https://example.com", include_paths=["/blog/*","/products/*"], exclude_paths=["/admin/*"], include_subdomains=False, limit=1000,# Cap at 1000 URLs)Extract
Extract structured data from a URL or raw HTML using a JSON Schema or natural language prompt — without a full scrape.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Extract using JSON Schemadata = client.extract("https://shop.example.com/product-123", schema={"type":"object","properties":{"name":{"type":"string"},"price":{"type":"number"},"in_stock":{"type":"boolean"},"images":{"type":"array","items":{"type":"string"}},}})
print(data.result["name"])print(data.result["price"])
# Extract using natural languagedata = client.extract("https://news.example.com/article-456", prompt="Extract the headline, author name, publication date, and article body")
print(data.result)Monitors
Create website change monitors that run on a schedule and notify you when content changes.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Create a monitormonitor = client.create_monitor( name="Competitor pricing", url="https://competitor.com/pricing", diff_mode="selector",# "semantic", "exact", or "selector" monitor_selectors=[".price"],# CSS selectors to watch check_interval="0 */6 * * *",# Every 6 hours notify_on="change",# "change" or "always" webhook_url="https://your-server.com/alerts",)
print(f"Monitor ID: {monitor.id}")print(f"Next check: {monitor.next_run_at}")
# List monitorsmonitors = client.list_monitors()for m in monitors:print(f"{m.name}: {m.status}")
# Get change historychanges = client.get_monitor_changes(monitor.id, limit=10)for change in changes:print(f"{change.detected_at}: {change.summary}")
# Pause/resumeclient.pause_monitor(monitor.id)client.resume_monitor(monitor.id)Alerts
Configure account-level alerts for balance thresholds, usage spikes, and error rate anomalies.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Create a low-balance alertalert = client.create_alert( name="Low balance warning",type="balance_low", threshold=5.0,# Trigger when balance < $5 channels=["email","webhook"], webhook_url="https://your-server.com/alerts",)
# Create a usage spike alertalert = client.create_alert( name="Unusual spend spike",type="daily_spend", threshold=50.0,# Trigger when daily spend > $50 channels=["email"],)
# List alertsalerts = client.list_alerts()for a in alerts:print(f"{a.name}: {a.type} (threshold: {a.threshold})")
# Delete an alertclient.delete_alert(alert.id)Cost Controls
Control costs by limiting tiers, setting budgets, or optimizing for cost vs speed.
from alterlab import AlterLab, CostControls
client = AlterLab(api_key="sk_live_...")
# Limit to cheap tiers onlyresult = client.scrape("https://example.com", cost_controls=CostControls( max_tier="2",# Don't go above HTTP tier prefer_cost=True,# Optimize for lowest cost fail_fast=True# Error instead of escalating))
# Estimate cost before scrapingestimate = client.estimate_cost("https://linkedin.com")print(f"Estimated: ${estimate.estimated_cost_dollars:.4f}")print(f"Confidence: {estimate.confidence}")Pricing Tiers
| Tier | Name | Price | Per $1 | Use Case |
|---|---|---|---|---|
| 1 | Curl | $0.0002 | 5,000 | Static HTML sites |
| 2 | HTTP | $0.0003 | 3,333 | TLS fingerprinting |
| 3 | Stealth | $0.002 | 500 | Browser checks |
| 4 | Browser | $0.004 | 250 | JS-heavy SPAs |
| 5 | Captcha | $0.02 | 50 | CAPTCHA solving |
Async Support
Use the async client for concurrent scraping with native asyncio support:
import asyncio
from alterlab import AsyncAlterLab
asyncdefmain():asyncwith AsyncAlterLab(api_key="sk_live_...")as client:# Single request result =await client.scrape("https://example.com")print(result.text)
# Concurrent requests (parallel scraping) urls =["https://example.com/page1","https://example.com/page2","https://example.com/page3",]
results =await asyncio.gather(*[client.scrape(url)for url in urls])
for r in results:print(r.title, r.billing.cost_dollars)
asyncio.run(main())BYOP (Bring Your Own Proxy)
Get 20% discount when using your own proxy. Configure your proxy integration in the dashboard first.
from alterlab import AlterLab, AdvancedOptions
client = AlterLab(api_key="sk_live_...")
# Use your configured proxy integrationresult = client.scrape("https://example.com", advanced=AdvancedOptions( use_own_proxy=True, proxy_country="US"# Optional: request specific geo))
# Check if BYOP was appliedif result.billing.byop_applied:print(f"Saved {result.billing.byop_discount_percent}%!")20% Discount
Batch Scraping
Submit multiple URLs in a single call. The SDK handles polling for results automatically.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Submit a batch of URLsbatch = client.batch_scrape( urls=["https://example.com/page-1","https://example.com/page-2","https://example.com/page-3",], formats=["markdown","json"], webhook_url="https://your-server.com/webhook"# Optional)
print(f"Batch ID: {batch.batch_id}")print(f"Status: {batch.status}")# processing
# Poll for results (blocks until complete)results = client.get_batch(batch.batch_id, wait=True)for item in results.items:print(f"{item.url}: {item.status}")Scheduler
Create recurring scrape schedules with cron expressions.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Create a daily scheduleschedule = client.create_schedule( name="Daily price check", urls=["https://store.example.com/product-1"], cron="0 9 * * *",# Every day at 9am timezone="America/New_York", formats=["json"], webhook_url="https://your-server.com/schedule-results")
print(f"Schedule ID: {schedule.id}")
# List all schedulesschedules = client.list_schedules()for s in schedules:print(f"{s.name}: {s.cron} ({s.status})")
# Pause/resumeclient.pause_schedule(schedule.id)client.resume_schedule(schedule.id)
# View execution historyhistory = client.get_schedule_history(schedule.id)for run in history:print(f"{run.executed_at}: {run.status}")Sessions (Authenticated Scraping)
Store browser cookies for scraping sites that require login.
from alterlab import AlterLab
client = AlterLab(api_key="sk_live_...")
# Create a session with cookiessession = client.create_session( name="Amazon Login", domain="amazon.com", cookies={"session-id":"...","session-token":"..."})
# Scrape with stored sessionresult = client.scrape("https://amazon.com/gp/yourstore", session_id=session.id)
# List sessionssessions = client.list_sessions()
# Validate session (check if cookies still work)status = client.validate_session(session.id)print(f"Valid: {status.is_valid}")Error Handling
from alterlab import( AlterLab, AuthenticationError, InsufficientCreditsError, RateLimitError, ScrapeError, TimeoutError
)
client = AlterLab(api_key="sk_live_...")
try: result = client.scrape("https://example.com")print(result.text)
except AuthenticationError:print("Invalid API key")
except InsufficientCreditsError:print("Please top up your balance")
except RateLimitError as e:print(f"Rate limited. Retry after {e.retry_after}s")
except ScrapeError as e:print(f"Scraping failed: {e.message}")
except TimeoutError:print("Request timed out")| Exception | HTTP Code | Description |
|---|---|---|
AuthenticationError | 401 | Invalid or missing API key |
InsufficientCreditsError | 402 | Insufficient balance |
RateLimitError | 429 | Too many requests |
ScrapeError | Various | Scraping failed |
TimeoutError | 408 | Request timed out |
API Reference
ScrapeResult Object
result.url # Scraped URLresult.status_code # HTTP statusresult.text # Extracted text contentresult.html # HTML contentresult.json # Structured JSON contentresult.title # Page titleresult.author # Author (if detected)result.billing # BillingDetails objectresult.billing.tier_used # Tier that succeededresult.billing.cost_dollars # Final cost in USDresult.screenshot_url # Screenshot URL (if requested)result.pdf_url # PDF URL (if requested)result.cached # Whether result was from cacheCheck Usage & Balance
usage = client.get_usage()print(f"Balance: ${usage.balance_dollars:.2f}")print(f"Used this month: {usage.credits_used_month} credits")Full Documentation
