Competitor price monitoring is one of those tasks that feels simple until you actually build it. You write CSS selectors for the price element, it works, and then the site redesigns three weeks later and your selectors return nothing. You fix them, they break again. Eventually you’re spending more time maintaining the scraper than you’re getting value from the data.
The approach in this post removes the selector maintenance problem entirely. Instead of targeting specific HTML elements, we convert each product page to clean Markdown and ask Claude to extract the price from the text. The LLM-based extraction handles layout changes automatically — as long as the price is visible on the page, it gets extracted, regardless of how the HTML is structured around it.
We’ll build a complete price monitor: scheduled page fetching, LLM extraction, SQLite history, and email or Slack alerts when prices change.
What We’re Building
- A list of competitor product URLs to monitor
- A daily job that fetches each page and extracts current pricing
- SQLite storage for price history per URL
- Change detection: alert when a price drops or rises beyond a threshold
- A simple report: current prices, last change, price history
Setup
pip install unweb anthropic sqlite-utils schedule
import os
import json
import sqlite3
import asyncio
import schedule
import time
from datetime import datetime
from typing import Optional
from dataclasses import dataclass
import unweb
import anthropic
unweb_client = unweb.Client(api_key=os.environ["UNWEB_API_KEY"])
claude = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
Step 1: Database Setup
def init_db(db_path: str = "prices.db") -> sqlite3.Connection:
conn = sqlite3.connect(db_path)
conn.execute("""
CREATE TABLE IF NOT EXISTS price_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT NOT NULL,
product_name TEXT,
price REAL,
currency TEXT DEFAULT 'USD',
availability TEXT,
quality_score INTEGER,
scraped_at TEXT NOT NULL,
raw_extraction TEXT
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_url_date
ON price_history(url, scraped_at)
""")
conn.commit()
return conn
def get_latest_price(conn: sqlite3.Connection, url: str) -> Optional[dict]:
"""Get the most recent price record for a URL."""
row = conn.execute(
"SELECT price, currency, scraped_at FROM price_history WHERE url = ? ORDER BY scraped_at DESC LIMIT 1",
(url,)
).fetchone()
if row:
return {"price": row[0], "currency": row[1], "scraped_at": row[2]}
return None
def save_price(conn: sqlite3.Connection, url: str, data: dict, quality_score: int):
conn.execute("""
INSERT INTO price_history (url, product_name, price, currency, availability, quality_score, scraped_at, raw_extraction)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
url,
data.get("product_name"),
data.get("price"),
data.get("currency", "USD"),
data.get("availability"),
quality_score,
datetime.utcnow().isoformat(),
json.dumps(data)
))
conn.commit()
Step 2: Fetch and Extract
@dataclass
class PriceData:
product_name: Optional[str]
price: Optional[float]
currency: Optional[str]
availability: Optional[str]
original_price: Optional[float] # price before discount, if shown
discount_pct: Optional[int] # discount percentage, if shown
def fetch_page_markdown(url: str, quality_threshold: int = 35) -> tuple[Optional[str], int]:
"""
Returns (markdown, quality_score). markdown is None if page is below threshold.
"""
try:
result = unweb_client.convert.url(url)
if result.quality_score < quality_threshold:
return None, result.quality_score
return result.markdown[:5000], result.quality_score
except Exception as e:
print(f"Fetch error for {url}: {e}")
return None, 0
def extract_price(markdown: str) -> Optional[PriceData]:
"""Extract pricing data from Markdown using Claude."""
message = claude.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=512,
system=(
"Extract pricing information from web page content. "
"Return only valid JSON with no explanation. Use null for missing fields."
),
messages=[{
"role": "user",
"content": f"""Extract pricing from this product page:
---
{markdown}
---
Return JSON with these fields:
- product_name (string): product name or title
- price (number): current price as a decimal, null if not found
- currency (string): currency code like "USD", "EUR", "GBP"
- availability (string): in stock, out of stock, pre-order, etc.
- original_price (number): original price if discounted, null otherwise
- discount_pct (integer): discount percentage if shown, null otherwise
JSON only."""
}]
)
raw = message.content[0].text.strip()
if raw.startswith("```"):
raw = raw.split("```")[1].lstrip("json").strip()
try:
data = json.loads(raw)
return PriceData(
product_name=data.get("product_name"),
price=data.get("price"),
currency=data.get("currency", "USD"),
availability=data.get("availability"),
original_price=data.get("original_price"),
discount_pct=data.get("discount_pct")
)
except (json.JSONDecodeError, KeyError) as e:
print(f"Extraction parse error: {e}")
return None
Step 3: Change Detection
def check_price_change(
url: str,
new_price: float,
conn: sqlite3.Connection,
alert_threshold_pct: float = 2.0
) -> Optional[dict]:
"""
Compare new price to last recorded price.
Returns a change dict if the price moved beyond the threshold, else None.
"""
latest = get_latest_price(conn, url)
if not latest or latest["price"] is None:
return None # no baseline to compare
old_price = latest["price"]
if old_price == 0:
return None
change_pct = ((new_price - old_price) / old_price) * 100
if abs(change_pct) >= alert_threshold_pct:
return {
"url": url,
"old_price": old_price,
"new_price": new_price,
"change_pct": round(change_pct, 2),
"direction": "down" if change_pct < 0 else "up",
"scraped_at": latest["scraped_at"]
}
return None
Step 4: The Monitor Loop
COMPETITORS = [
{"url": "https://competitor-a.com/products/widget-pro", "label": "Competitor A — Widget Pro"},
{"url": "https://competitor-b.com/shop/widget", "label": "Competitor B — Widget"},
{"url": "https://competitor-c.com/catalog/widget-standard", "label": "Competitor C — Widget Standard"},
]
ALERT_THRESHOLD_PCT = 2.0 # alert on any price change >= 2%
def run_monitor(conn: sqlite3.Connection, notify_fn=None):
"""
Fetch all competitor pages, extract prices, detect changes, and optionally notify.
"""
changes = []
errors = []
for item in COMPETITORS:
url = item["url"]
label = item["label"]
print(f"Checking {label}...")
markdown, quality = fetch_page_markdown(url)
if markdown is None:
errors.append({"url": url, "label": label, "reason": f"low quality ({quality})"})
continue
price_data = extract_price(markdown)
if price_data is None or price_data.price is None:
errors.append({"url": url, "label": label, "reason": "extraction failed"})
continue
# Check for change before saving
change = check_price_change(url, price_data.price, conn, ALERT_THRESHOLD_PCT)
if change:
change["label"] = label
changes.append(change)
# Save to history
save_price(conn, url, {
"product_name": price_data.product_name or label,
"price": price_data.price,
"currency": price_data.currency,
"availability": price_data.availability,
"original_price": price_data.original_price,
"discount_pct": price_data.discount_pct,
}, quality)
print(f" {label}: {price_data.currency} {price_data.price} (quality {quality})")
if changes:
print(f"\n⚡ {len(changes)} price change(s) detected:")
for c in changes:
direction = "↓" if c["direction"] == "down" else "↑"
print(f" {direction} {c['label']}: {c['old_price']} → {c['new_price']} ({c['change_pct']:+.1f}%)")
if notify_fn:
notify_fn(changes)
if errors:
print(f"\n⚠ {len(errors)} fetch/extraction error(s):")
for e in errors:
print(f" {e['label']}: {e['reason']}")
return changes, errors
Step 5: Alerts
Two simple notification options — email and Slack webhook. Drop whichever fits your setup.
import smtplib
import urllib.request
from email.mime.text import MIMEText
def notify_email(changes: list[dict], to_email: str, from_email: str, smtp_password: str):
lines = []
for c in changes:
direction = "DROPPED" if c["direction"] == "down" else "INCREASED"
lines.append(
f"{c['label']}: price {direction} {c['old_price']:.2f} → {c['new_price']:.2f} ({c['change_pct']:+.1f}%)\n{c['url']}"
)
body = "Price changes detected:\n\n" + "\n\n".join(lines)
msg = MIMEText(body)
msg["Subject"] = f"Competitor Price Alert: {len(changes)} change(s)"
msg["From"] = from_email
msg["To"] = to_email
with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
server.login(from_email, smtp_password)
server.sendmail(from_email, [to_email], msg.as_string())
def notify_slack(changes: list[dict], webhook_url: str):
lines = []
for c in changes:
emoji = ":arrow_down:" if c["direction"] == "down" else ":arrow_up:"
lines.append(
f"{emoji} *{c['label']}*: ${c['old_price']:.2f} → ${c['new_price']:.2f} ({c['change_pct']:+.1f}%)"
)
payload = json.dumps({"text": f"*Competitor Price Alert* — {len(changes)} change(s):\n" + "\n".join(lines)})
req = urllib.request.Request(
webhook_url,
data=payload.encode("utf-8"),
headers={"Content-Type": "application/json"}
)
urllib.request.urlopen(req)
Step 6: Scheduling
Run the monitor once a day with Python’s schedule library — no cron required.
def main():
conn = init_db()
slack_webhook = os.environ.get("SLACK_WEBHOOK_URL")
notify_fn = (lambda changes: notify_slack(changes, slack_webhook)) if slack_webhook else None
def daily_run():
print(f"\n[{datetime.utcnow().isoformat()}] Running price monitor...")
run_monitor(conn, notify_fn=notify_fn)
# Run immediately on startup, then daily at 08:00
daily_run()
schedule.every().day.at("08:00").do(daily_run)
print("Price monitor running. Press Ctrl+C to stop.")
while True:
schedule.run_pending()
time.sleep(60)
if __name__ == "__main__":
main()
Running on a server: use nohup python price_monitor.py & to keep it running after you disconnect. For production, wrap in systemd or Docker. The SQLite database persists between restarts — price history accumulates without any intervention.
Step 7: Price History Report
def print_price_report(conn: sqlite3.Connection, days: int = 30):
"""Print a summary of price history for all tracked URLs."""
rows = conn.execute("""
SELECT
url,
product_name,
MIN(price) as min_price,
MAX(price) as max_price,
(SELECT price FROM price_history p2
WHERE p2.url = p1.url ORDER BY scraped_at DESC LIMIT 1) as current_price,
COUNT(*) as checks,
MIN(scraped_at) as first_seen,
MAX(scraped_at) as last_seen
FROM price_history p1
WHERE scraped_at >= datetime('now', ?)
GROUP BY url
ORDER BY url
""", (f"-{days} days",)).fetchall()
print(f"\n{'URL':<50} {'Current':>10} {'Min':>10} {'Max':>10} {'Checks':>8}")
print("-" * 90)
for row in rows:
url_short = row[0][-47:] if len(row[0]) > 47 else row[0]
print(f"{url_short:<50} {row[4] or 'N/A':>10} {row[2] or 'N/A':>10} {row[3] or 'N/A':>10} {row[6]:>8}")
Why This Approach Outlasts Selectors
The LLM extraction step is more resilient than CSS selectors in one specific way: it understands what a price looks like in text, not just which HTML element it’s in. When a site changes its class names from .product-price to .pdp-current-price, or restructures its pricing section from a <span> to a <div>, your selectors break. The Markdown representation still contains the same text: $29.99 in a readable context that Claude can reliably identify.
The tradeoff is cost per extraction (a few fractions of a cent per Haiku call) and the small failure rate when the model mis-identifies a related price (a comparison price, the subscription price instead of one-time). For most monitoring use cases, both are acceptable.
The quality filter handles the other common failure mode: pages that are JS-rendered SPAs where requests.get() returns an empty skeleton. Pages below quality threshold 35 are flagged as errors rather than silently recording None — which is how you catch that your target changed their front-end rendering before it corrupts your history.
Clean Markdown. Reliable extraction.
UnWeb converts any URL to clean Markdown with a quality score on every response — the foundation for price monitoring, research pipelines, and any workflow that needs accurate data from the web. Free tier includes 500 conversions/month.