Building a Competitor Price Monitor in Python

Competitor price monitoring is one of those tasks that feels simple until you actually build it. You write CSS selectors for the price element, it works, and then the site redesigns three weeks later and your selectors return nothing. You fix them, they break again. Eventually you’re spending more time maintaining the scraper than you’re getting value from the data.

The approach in this post removes the selector maintenance problem entirely. Instead of targeting specific HTML elements, we convert each product page to clean Markdown and ask Claude to extract the price from the text. The LLM-based extraction handles layout changes automatically — as long as the price is visible on the page, it gets extracted, regardless of how the HTML is structured around it.

We’ll build a complete price monitor: scheduled page fetching, LLM extraction, SQLite history, and email or Slack alerts when prices change.

What We’re Building

A list of competitor product URLs to monitor
A daily job that fetches each page and extracts current pricing
SQLite storage for price history per URL
Change detection: alert when a price drops or rises beyond a threshold
A simple report: current prices, last change, price history

Setup

pip install unweb anthropic sqlite-utils schedule

import os
import json
import sqlite3
import asyncio
import schedule
import time
from datetime import datetime
from typing import Optional
from dataclasses import dataclass

import unweb
import anthropic

unweb_client = unweb.Client(api_key=os.environ["UNWEB_API_KEY"])
claude = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])

Step 1: Database Setup

def init_db(db_path: str = "prices.db") -> sqlite3.Connection:
    conn = sqlite3.connect(db_path)
    conn.execute("""
        CREATE TABLE IF NOT EXISTS price_history (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            url TEXT NOT NULL,
            product_name TEXT,
            price REAL,
            currency TEXT DEFAULT 'USD',
            availability TEXT,
            quality_score INTEGER,
            scraped_at TEXT NOT NULL,
            raw_extraction TEXT
        )
    """)
    conn.execute("""
        CREATE INDEX IF NOT EXISTS idx_url_date
        ON price_history(url, scraped_at)
    """)
    conn.commit()
    return conn


def get_latest_price(conn: sqlite3.Connection, url: str) -> Optional[dict]:
    """Get the most recent price record for a URL."""
    row = conn.execute(
        "SELECT price, currency, scraped_at FROM price_history WHERE url = ? ORDER BY scraped_at DESC LIMIT 1",
        (url,)
    ).fetchone()
    if row:
        return {"price": row[0], "currency": row[1], "scraped_at": row[2]}
    return None


def save_price(conn: sqlite3.Connection, url: str, data: dict, quality_score: int):
    conn.execute("""
        INSERT INTO price_history (url, product_name, price, currency, availability, quality_score, scraped_at, raw_extraction)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        url,
        data.get("product_name"),
        data.get("price"),
        data.get("currency", "USD"),
        data.get("availability"),
        quality_score,
        datetime.utcnow().isoformat(),
        json.dumps(data)
    ))
    conn.commit()

Step 2: Fetch and Extract

@dataclass
class PriceData:
    product_name: Optional[str]
    price: Optional[float]
    currency: Optional[str]
    availability: Optional[str]
    original_price: Optional[float]   # price before discount, if shown
    discount_pct: Optional[int]        # discount percentage, if shown


def fetch_page_markdown(url: str, quality_threshold: int = 35) -> tuple[Optional[str], int]:
    """
    Returns (markdown, quality_score). markdown is None if page is below threshold.
    """
    try:
        result = unweb_client.convert.url(url)
        if result.quality_score < quality_threshold:
            return None, result.quality_score
        return result.markdown[:5000], result.quality_score
    except Exception as e:
        print(f"Fetch error for {url}: {e}")
        return None, 0


def extract_price(markdown: str) -> Optional[PriceData]:
    """Extract pricing data from Markdown using Claude."""
    message = claude.messages.create(
        model="claude-haiku-4-5-20251001",
        max_tokens=512,
        system=(
            "Extract pricing information from web page content. "
            "Return only valid JSON with no explanation. Use null for missing fields."
        ),
        messages=[{
            "role": "user",
            "content": f"""Extract pricing from this product page:

---
{markdown}
---

Return JSON with these fields:
- product_name (string): product name or title
- price (number): current price as a decimal, null if not found
- currency (string): currency code like "USD", "EUR", "GBP"
- availability (string): in stock, out of stock, pre-order, etc.
- original_price (number): original price if discounted, null otherwise
- discount_pct (integer): discount percentage if shown, null otherwise

JSON only."""
        }]
    )

    raw = message.content[0].text.strip()
    if raw.startswith("```"):
        raw = raw.split("```")[1].lstrip("json").strip()

    try:
        data = json.loads(raw)
        return PriceData(
            product_name=data.get("product_name"),
            price=data.get("price"),
            currency=data.get("currency", "USD"),
            availability=data.get("availability"),
            original_price=data.get("original_price"),
            discount_pct=data.get("discount_pct")
        )
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Extraction parse error: {e}")
        return None

Step 3: Change Detection

def check_price_change(
    url: str,
    new_price: float,
    conn: sqlite3.Connection,
    alert_threshold_pct: float = 2.0
) -> Optional[dict]:
    """
    Compare new price to last recorded price.
    Returns a change dict if the price moved beyond the threshold, else None.
    """
    latest = get_latest_price(conn, url)
    if not latest or latest["price"] is None:
        return None  # no baseline to compare

    old_price = latest["price"]
    if old_price == 0:
        return None

    change_pct = ((new_price - old_price) / old_price) * 100

    if abs(change_pct) >= alert_threshold_pct:
        return {
            "url": url,
            "old_price": old_price,
            "new_price": new_price,
            "change_pct": round(change_pct, 2),
            "direction": "down" if change_pct < 0 else "up",
            "scraped_at": latest["scraped_at"]
        }
    return None

Step 4: The Monitor Loop

COMPETITORS = [
    {"url": "https://competitor-a.com/products/widget-pro", "label": "Competitor A — Widget Pro"},
    {"url": "https://competitor-b.com/shop/widget", "label": "Competitor B — Widget"},
    {"url": "https://competitor-c.com/catalog/widget-standard", "label": "Competitor C — Widget Standard"},
]

ALERT_THRESHOLD_PCT = 2.0   # alert on any price change >= 2%


def run_monitor(conn: sqlite3.Connection, notify_fn=None):
    """
    Fetch all competitor pages, extract prices, detect changes, and optionally notify.
    """
    changes = []
    errors = []

    for item in COMPETITORS:
        url = item["url"]
        label = item["label"]
        print(f"Checking {label}...")

        markdown, quality = fetch_page_markdown(url)
        if markdown is None:
            errors.append({"url": url, "label": label, "reason": f"low quality ({quality})"})
            continue

        price_data = extract_price(markdown)
        if price_data is None or price_data.price is None:
            errors.append({"url": url, "label": label, "reason": "extraction failed"})
            continue

        # Check for change before saving
        change = check_price_change(url, price_data.price, conn, ALERT_THRESHOLD_PCT)
        if change:
            change["label"] = label
            changes.append(change)

        # Save to history
        save_price(conn, url, {
            "product_name": price_data.product_name or label,
            "price": price_data.price,
            "currency": price_data.currency,
            "availability": price_data.availability,
            "original_price": price_data.original_price,
            "discount_pct": price_data.discount_pct,
        }, quality)

        print(f"  {label}: {price_data.currency} {price_data.price} (quality {quality})")

    if changes:
        print(f"\n⚡ {len(changes)} price change(s) detected:")
        for c in changes:
            direction = "↓" if c["direction"] == "down" else "↑"
            print(f"  {direction} {c['label']}: {c['old_price']} → {c['new_price']} ({c['change_pct']:+.1f}%)")

        if notify_fn:
            notify_fn(changes)

    if errors:
        print(f"\n⚠ {len(errors)} fetch/extraction error(s):")
        for e in errors:
            print(f"  {e['label']}: {e['reason']}")

    return changes, errors

Step 5: Alerts

Two simple notification options — email and Slack webhook. Drop whichever fits your setup.

import smtplib
import urllib.request
from email.mime.text import MIMEText


def notify_email(changes: list[dict], to_email: str, from_email: str, smtp_password: str):
    lines = []
    for c in changes:
        direction = "DROPPED" if c["direction"] == "down" else "INCREASED"
        lines.append(
            f"{c['label']}: price {direction} {c['old_price']:.2f} → {c['new_price']:.2f} ({c['change_pct']:+.1f}%)\n{c['url']}"
        )

    body = "Price changes detected:\n\n" + "\n\n".join(lines)
    msg = MIMEText(body)
    msg["Subject"] = f"Competitor Price Alert: {len(changes)} change(s)"
    msg["From"] = from_email
    msg["To"] = to_email

    with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
        server.login(from_email, smtp_password)
        server.sendmail(from_email, [to_email], msg.as_string())


def notify_slack(changes: list[dict], webhook_url: str):
    lines = []
    for c in changes:
        emoji = ":arrow_down:" if c["direction"] == "down" else ":arrow_up:"
        lines.append(
            f"{emoji} *{c['label']}*: ${c['old_price']:.2f} → ${c['new_price']:.2f} ({c['change_pct']:+.1f}%)"
        )

    payload = json.dumps({"text": f"*Competitor Price Alert* — {len(changes)} change(s):\n" + "\n".join(lines)})
    req = urllib.request.Request(
        webhook_url,
        data=payload.encode("utf-8"),
        headers={"Content-Type": "application/json"}
    )
    urllib.request.urlopen(req)

Step 6: Scheduling

Run the monitor once a day with Python’s schedule library — no cron required.

def main():
    conn = init_db()

    slack_webhook = os.environ.get("SLACK_WEBHOOK_URL")
    notify_fn = (lambda changes: notify_slack(changes, slack_webhook)) if slack_webhook else None

    def daily_run():
        print(f"\n[{datetime.utcnow().isoformat()}] Running price monitor...")
        run_monitor(conn, notify_fn=notify_fn)

    # Run immediately on startup, then daily at 08:00
    daily_run()
    schedule.every().day.at("08:00").do(daily_run)

    print("Price monitor running. Press Ctrl+C to stop.")
    while True:
        schedule.run_pending()
        time.sleep(60)


if __name__ == "__main__":
    main()

Running on a server: use nohup python price_monitor.py & to keep it running after you disconnect. For production, wrap in systemd or Docker. The SQLite database persists between restarts — price history accumulates without any intervention.

Step 7: Price History Report

def print_price_report(conn: sqlite3.Connection, days: int = 30):
    """Print a summary of price history for all tracked URLs."""
    rows = conn.execute("""
        SELECT
            url,
            product_name,
            MIN(price) as min_price,
            MAX(price) as max_price,
            (SELECT price FROM price_history p2
             WHERE p2.url = p1.url ORDER BY scraped_at DESC LIMIT 1) as current_price,
            COUNT(*) as checks,
            MIN(scraped_at) as first_seen,
            MAX(scraped_at) as last_seen
        FROM price_history p1
        WHERE scraped_at >= datetime('now', ?)
        GROUP BY url
        ORDER BY url
    """, (f"-{days} days",)).fetchall()

    print(f"\n{'URL':<50} {'Current':>10} {'Min':>10} {'Max':>10} {'Checks':>8}")
    print("-" * 90)
    for row in rows:
        url_short = row[0][-47:] if len(row[0]) > 47 else row[0]
        print(f"{url_short:<50} {row[4] or 'N/A':>10} {row[2] or 'N/A':>10} {row[3] or 'N/A':>10} {row[6]:>8}")

Why This Approach Outlasts Selectors

The LLM extraction step is more resilient than CSS selectors in one specific way: it understands what a price looks like in text, not just which HTML element it’s in. When a site changes its class names from .product-price to .pdp-current-price, or restructures its pricing section from a <span> to a <div>, your selectors break. The Markdown representation still contains the same text: $29.99 in a readable context that Claude can reliably identify.

The tradeoff is cost per extraction (a few fractions of a cent per Haiku call) and the small failure rate when the model mis-identifies a related price (a comparison price, the subscription price instead of one-time). For most monitoring use cases, both are acceptable.

The quality filter handles the other common failure mode: pages that are JS-rendered SPAs where requests.get() returns an empty skeleton. Pages below quality threshold 35 are flagged as errors rather than silently recording None — which is how you catch that your target changed their front-end rendering before it corrupts your history.

Clean Markdown. Reliable extraction.

UnWeb converts any URL to clean Markdown with a quality score on every response — the foundation for price monitoring, research pipelines, and any workflow that needs accurate data from the web. Free tier includes 500 conversions/month.

Get your API key