"""
Stake.com API discovery script.
Intercepts all XHR/fetch network requests while navigating sports pages.
Saves captured requests/responses to stake_discovery_results.json
"""

import json
import time
import sys
import os
import re
from datetime import datetime

# We'll collect everything here
captured = {
    "graphql_requests": [],
    "rest_requests": [],
    "other_requests": [],
    "cookies": [],
    "headers_seen": {},
}

GRAPHQL_URL = "https://stake.com/_api/graphql"
TARGET_URLS = [
    "https://stake.com/sports/soccer",
    "https://stake.com/sports/basketball",
    "https://stake.com/sports/tennis",
]

# GraphQL operation names we care about
INTERESTING_OPS = [
    "sport", "event", "market", "match", "odds", "fixture",
    "upcoming", "prematch", "bet", "live", "soccer", "football",
    "basketball", "tennis", "league", "competition", "category",
    "sportEvent", "sportMarket", "sportBet", "sportLive",
]

def is_interesting(url, body=None):
    url_lower = url.lower()
    if "graphql" in url_lower:
        return True
    if any(kw in url_lower for kw in ["sport", "event", "market", "odds", "bet", "match", "fixture"]):
        return True
    if body and any(kw in str(body).lower() for kw in ["sport", "event", "market", "odds"]):
        return True
    return False

def truncate(data, max_len=3000):
    s = json.dumps(data, indent=2) if not isinstance(data, str) else data
    return s[:max_len] + "...[truncated]" if len(s) > max_len else s

def run_discovery():
    from playwright.sync_api import sync_playwright

    print("[*] Starting Playwright Chromium...")
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=[
                "--no-sandbox",
                "--disable-setuid-sandbox",
                "--disable-dev-shm-usage",
                "--disable-gpu",
                "--window-size=1280,900",
            ]
        )

        context = browser.new_context(
            viewport={"width": 1280, "height": 900},
            user_agent=(
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
            ),
            locale="en-NG",
            timezone_id="Africa/Lagos",
            extra_http_headers={
                "Accept-Language": "en-NG,en-GB;q=0.9,en;q=0.8",
            }
        )

        page = context.new_page()

        # ── intercept responses ──────────────────────────────────────────────
        def handle_response(response):
            url = response.url
            method = response.request.method
            status = response.status
            req_headers = dict(response.request.headers)
            resp_headers = dict(response.headers)

            # skip static assets
            if any(url.endswith(ext) for ext in [
                ".js", ".css", ".png", ".jpg", ".svg", ".woff", ".woff2",
                ".ico", ".gif", ".webp", ".map"
            ]):
                return
            if any(kw in url for kw in ["analytics", "gtm", "segment", "sentry",
                                          "hotjar", "intercom", "zendesk"]):
                return

            try:
                req_body_raw = response.request.post_data
                req_body = None
                if req_body_raw:
                    try:
                        req_body = json.loads(req_body_raw)
                    except Exception:
                        req_body = req_body_raw

                resp_body = None
                ct = resp_headers.get("content-type", "")
                if "json" in ct:
                    try:
                        resp_body = response.json()
                    except Exception:
                        pass

                if not is_interesting(url, req_body):
                    return

                entry = {
                    "url": url,
                    "method": method,
                    "status": status,
                    "request_headers": req_headers,
                    "request_body": req_body,
                    "response_headers": resp_headers,
                    "response_body": resp_body,
                    "timestamp": datetime.utcnow().isoformat(),
                }

                if "graphql" in url.lower():
                    op_name = None
                    if isinstance(req_body, dict):
                        op_name = req_body.get("operationName", "")
                    elif isinstance(req_body, list):
                        op_name = [r.get("operationName", "") for r in req_body]

                    entry["operation_name"] = op_name
                    captured["graphql_requests"].append(entry)
                    print(f"  [GQL] {op_name} | status={status}")
                else:
                    captured["rest_requests"].append(entry)
                    print(f"  [REST] {method} {url[:120]} | status={status}")

            except Exception as e:
                pass  # network errors on in-flight requests are expected

        page.on("response", handle_response)

        # ── Phase 1: soccer ──────────────────────────────────────────────────
        print("\n[*] Navigating to stake.com/sports/soccer ...")
        try:
            page.goto("https://stake.com/sports/soccer", wait_until="networkidle", timeout=60000)
        except Exception as e:
            print(f"  [warn] goto: {e}")
        time.sleep(4)

        # scroll to trigger lazy loads
        page.evaluate("window.scrollBy(0, 800)")
        time.sleep(2)
        page.evaluate("window.scrollBy(0, 800)")
        time.sleep(2)

        # ── Phase 2: click on a soccer event if any ───────────────────────
        print("\n[*] Trying to click a soccer event ...")
        try:
            # try clicking first event link
            event_link = page.query_selector("a[href*='/sports/soccer/']")
            if event_link:
                href = event_link.get_attribute("href")
                print(f"  Found event link: {href}")
                event_link.click()
                time.sleep(5)
                page.evaluate("window.scrollBy(0, 600)")
                time.sleep(2)
        except Exception as e:
            print(f"  [warn] click event: {e}")

        # ── Phase 3: basketball ──────────────────────────────────────────────
        print("\n[*] Navigating to stake.com/sports/basketball ...")
        try:
            page.goto("https://stake.com/sports/basketball", wait_until="networkidle", timeout=60000)
        except Exception as e:
            print(f"  [warn] goto basketball: {e}")
        time.sleep(4)
        page.evaluate("window.scrollBy(0, 800)")
        time.sleep(2)

        # ── Phase 4: tennis ──────────────────────────────────────────────────
        print("\n[*] Navigating to stake.com/sports/tennis ...")
        try:
            page.goto("https://stake.com/sports/tennis", wait_until="networkidle", timeout=60000)
        except Exception as e:
            print(f"  [warn] goto tennis: {e}")
        time.sleep(4)
        page.evaluate("window.scrollBy(0, 800)")
        time.sleep(2)

        # ── Phase 5: try stake.com/ng ────────────────────────────────────────
        print("\n[*] Checking stake.com/ng redirect ...")
        try:
            page.goto("https://stake.com/ng/sports/soccer", wait_until="networkidle", timeout=30000)
            final_url = page.url
            print(f"  Final URL after /ng: {final_url}")
        except Exception as e:
            print(f"  [warn] ng redirect: {e}")
        time.sleep(3)

        # ── Grab cookies ─────────────────────────────────────────────────────
        captured["cookies"] = context.cookies()
        print(f"\n[*] Captured {len(captured['cookies'])} cookies")

        browser.close()

    # ── Summarize ────────────────────────────────────────────────────────────
    print(f"\n[*] Total GraphQL requests captured: {len(captured['graphql_requests'])}")
    print(f"[*] Total REST requests captured:    {len(captured['rest_requests'])}")

    # Deduplicate GraphQL by operation name
    gql_by_op = {}
    for req in captured["graphql_requests"]:
        op = req.get("operation_name")
        key = str(op)
        if key not in gql_by_op:
            gql_by_op[key] = req

    print(f"\n[*] Unique GraphQL operations: {len(gql_by_op)}")
    for op, req in sorted(gql_by_op.items()):
        print(f"  - {op}")

    # Save full results
    out_path = "/home/cyborg/Desktop/claude/arb_bot/tools/stake_discovery_results.json"
    with open(out_path, "w") as f:
        json.dump({
            "graphql_by_operation": gql_by_op,
            "all_graphql": captured["graphql_requests"],
            "rest_requests": captured["rest_requests"],
            "cookies": captured["cookies"],
        }, f, indent=2, default=str)

    print(f"\n[*] Full results saved to: {out_path}")
    return gql_by_op, captured["rest_requests"]


if __name__ == "__main__":
    gql_ops, rest = run_discovery()

    print("\n" + "="*70)
    print("GRAPHQL OPERATIONS DETAIL")
    print("="*70)
    for op_name, req in sorted(gql_ops.items()):
        print(f"\n--- Operation: {op_name} ---")
        print(f"  URL:    {req['url']}")
        print(f"  Method: {req['method']}")
        print(f"  Status: {req['status']}")
        body = req.get("request_body")
        if body:
            print(f"  Request Body:")
            print("    " + json.dumps(body, indent=4)[:2000].replace("\n", "\n    "))
        resp = req.get("response_body")
        if resp:
            print(f"  Response (truncated):")
            print("    " + json.dumps(resp, indent=4)[:2000].replace("\n", "\n    "))

    print("\n" + "="*70)
    print("REST REQUESTS DETAIL")
    print("="*70)
    for req in rest[:20]:
        print(f"\n  {req['method']} {req['url'][:120]}")
        print(f"  Status: {req['status']}")
