"""
Stake.com API discovery — stealth approach with Cloudflare bypass attempts.
Uses playwright-stealth + realistic browser fingerprint + wait for CF challenge.
"""

import json
import time
import sys
from datetime import datetime

captured = {
    "graphql_requests": [],
    "rest_requests": [],
    "cookies": [],
}

def is_boring(url):
    boring_exts = [".js", ".css", ".png", ".jpg", ".svg", ".woff", ".woff2",
                   ".ico", ".gif", ".webp", ".map", ".ttf", ".otf"]
    boring_kw = ["analytics", "gtm.", "google-analytics", "segment.io",
                 "sentry", "hotjar", "intercom", "zendesk", "cloudflare",
                 "cdn-cgi", "challenge-platform", "rum?"]
    url_lower = url.lower()
    if any(url_lower.endswith(e) for e in boring_exts):
        return True
    if any(kw in url_lower for kw in boring_kw):
        return True
    return False

def is_interesting(url, body=None):
    url_lower = url.lower()
    if "graphql" in url_lower:
        return True
    interesting_kw = ["sport", "event", "market", "odds", "bet", "match",
                      "fixture", "league", "soccer", "football", "basket",
                      "tennis", "live", "upcoming", "prematch", "api"]
    if any(kw in url_lower for kw in interesting_kw):
        return True
    return False

def run():
    from playwright.sync_api import sync_playwright
    try:
        from playwright_stealth import stealth_sync
        HAS_STEALTH = True
    except ImportError:
        HAS_STEALTH = False
        print("[warn] playwright-stealth not available")

    print(f"[*] stealth available: {HAS_STEALTH}")

    with sync_playwright() as p:
        # Use firefox to avoid Chromium-specific bot detection
        print("[*] Launching Firefox ...")
        try:
            browser = p.firefox.launch(
                headless=True,
                firefox_user_prefs={
                    "general.platform.override": "Linux x86_64",
                },
            )
            browser_name = "firefox"
        except Exception as e:
            print(f"  Firefox failed: {e}, falling back to chromium")
            browser = p.chromium.launch(
                headless=True,
                args=[
                    "--no-sandbox",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-dev-shm-usage",
                    "--disable-gpu",
                    "--window-size=1366,768",
                ]
            )
            browser_name = "chromium"

        context = browser.new_context(
            viewport={"width": 1366, "height": 768},
            user_agent=(
                "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
                if browser_name == "firefox" else
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
            ),
            locale="en-GB",
            timezone_id="Africa/Lagos",
            extra_http_headers={
                "Accept-Language": "en-GB,en;q=0.9",
                "sec-fetch-dest": "document",
                "sec-fetch-mode": "navigate",
                "sec-fetch-site": "none",
            }
        )

        page = context.new_page()
        if HAS_STEALTH and browser_name == "chromium":
            stealth_sync(page)

        def handle_response(response):
            url = response.url
            if is_boring(url):
                return
            method = response.request.method
            status = response.status
            resp_headers = dict(response.headers)
            req_headers = dict(response.request.headers)

            try:
                req_body_raw = response.request.post_data
                req_body = None
                if req_body_raw:
                    try:
                        req_body = json.loads(req_body_raw)
                    except Exception:
                        req_body = req_body_raw

                resp_body = None
                ct = resp_headers.get("content-type", "")
                if "json" in ct:
                    try:
                        resp_body = response.json()
                    except Exception:
                        pass

                if not is_interesting(url, req_body):
                    return

                entry = {
                    "url": url,
                    "method": method,
                    "status": status,
                    "request_headers": req_headers,
                    "request_body": req_body,
                    "response_body": resp_body,
                    "ts": datetime.utcnow().isoformat(),
                }

                if "graphql" in url.lower():
                    op = None
                    if isinstance(req_body, dict):
                        op = req_body.get("operationName", "?")
                    elif isinstance(req_body, list):
                        op = [r.get("operationName") for r in req_body]
                    entry["operation_name"] = op
                    captured["graphql_requests"].append(entry)
                    print(f"  [GQL] op={op} status={status}")
                else:
                    captured["rest_requests"].append(entry)
                    print(f"  [REST] {method} {url[:100]} status={status}")

            except Exception as e:
                pass

        page.on("response", handle_response)

        # ── Step 1: Hit homepage first (more natural) ────────────────────────
        print("\n[*] Visiting homepage first ...")
        try:
            page.goto("https://stake.com/", timeout=30000, wait_until="domcontentloaded")
            print(f"  URL after goto: {page.url}")
            # Check if we hit a Cloudflare challenge page
            content = page.content()
            if "challenge" in content.lower() or "just a moment" in content.lower():
                print("  [!] Cloudflare challenge detected, waiting 15s ...")
                time.sleep(15)
                content = page.content()
                print(f"  URL after wait: {page.url}")
        except Exception as e:
            print(f"  [warn] homepage: {e}")
        time.sleep(3)

        # ── Step 2: Soccer page ───────────────────────────────────────────────
        print("\n[*] Navigating to /sports/soccer ...")
        try:
            page.goto("https://stake.com/sports/soccer", timeout=45000,
                      wait_until="domcontentloaded")
            print(f"  URL: {page.url}  title: {page.title()[:60]}")
            content = page.content()
            if "challenge" in content.lower() or "just a moment" in content.lower():
                print("  [!] CF challenge page, waiting 20s ...")
                time.sleep(20)
        except Exception as e:
            print(f"  [warn] soccer: {e}")
        time.sleep(5)
        page.evaluate("window.scrollBy(0, 600)")
        time.sleep(3)
        page.evaluate("window.scrollBy(0, 1200)")
        time.sleep(3)

        # ── Step 3: Try to get page source to see what's there ─────────────
        try:
            title = page.title()
            url_now = page.url
            print(f"  Current page: {url_now} | title: {title}")
        except:
            pass

        # ── Step 4: Basketball ────────────────────────────────────────────────
        print("\n[*] Navigating to /sports/basketball ...")
        try:
            page.goto("https://stake.com/sports/basketball", timeout=45000,
                      wait_until="domcontentloaded")
        except Exception as e:
            print(f"  [warn] basketball: {e}")
        time.sleep(5)

        # ── Step 5: Tennis ────────────────────────────────────────────────────
        print("\n[*] Navigating to /sports/tennis ...")
        try:
            page.goto("https://stake.com/sports/tennis", timeout=45000,
                      wait_until="domcontentloaded")
        except Exception as e:
            print(f"  [warn] tennis: {e}")
        time.sleep(5)

        # ── Step 6: Try direct GraphQL probe ─────────────────────────────────
        print("\n[*] Probing GraphQL endpoint directly via JS fetch ...")
        graphql_query = """
{
  query: "query SportUpcomingEvents($sportSlug: String!, $limit: Int) { sport(slug: $sportSlug) { id name slug upcomingEvents(limit: $limit) { id slug name status startTime homeTeam { name } awayTeam { name } markets { id name outcomes { id name odds } } } } }",
  variables: { sportSlug: "soccer", limit: 5 }
}
"""
        try:
            result = page.evaluate("""
                async () => {
                    const queries = [
                        {
                            operationName: "SportUpcomingEvents",
                            query: `query SportUpcomingEvents($sportSlug: String!, $limit: Int) {
                              sport(slug: $sportSlug) {
                                id name slug
                                upcomingEvents(limit: $limit) {
                                  id slug name status startTime
                                  homeTeam { name }
                                  awayTeam { name }
                                  markets { id name outcomes { id name odds } }
                                }
                              }
                            }`,
                            variables: { sportSlug: "soccer", limit: 5 }
                        },
                        {
                            operationName: "SportsPage",
                            query: `query SportsPage {
                              sports { id name slug icon }
                            }`,
                            variables: {}
                        }
                    ];

                    const results = [];
                    for (const q of queries) {
                        try {
                            const resp = await fetch('https://stake.com/_api/graphql', {
                                method: 'POST',
                                headers: {
                                    'Content-Type': 'application/json',
                                    'x-access-token': '',
                                },
                                credentials: 'include',
                                body: JSON.stringify(q)
                            });
                            const data = await resp.json();
                            results.push({
                                op: q.operationName,
                                status: resp.status,
                                data: data
                            });
                        } catch(e) {
                            results.push({ op: q.operationName, error: e.toString() });
                        }
                    }
                    return results;
                }
            """)
            print("  JS fetch results:")
            for r in result:
                print(f"    op={r.get('op')} status={r.get('status')} error={r.get('error','')}")
                if r.get("data"):
                    print(f"    data keys: {list(r['data'].keys()) if isinstance(r['data'], dict) else 'list'}")
                    captured["graphql_requests"].append({
                        "source": "js_fetch",
                        "operation_name": r.get("op"),
                        "status": r.get("status"),
                        "response_body": r.get("data"),
                    })
        except Exception as e:
            print(f"  [warn] JS fetch: {e}")

        # ── Step 7: Extract any window.__STATE__ or similar ──────────────────
        print("\n[*] Checking for embedded state/config in page JS ...")
        try:
            state_data = page.evaluate("""
                () => {
                    const candidates = [
                        window.__NUXT__,
                        window.__INITIAL_STATE__,
                        window.__STATE__,
                        window.__NEXT_DATA__,
                        window.initialData,
                        window.stakeConfig,
                    ];
                    for (const c of candidates) {
                        if (c) return JSON.stringify(c).slice(0, 5000);
                    }
                    return null;
                }
            """)
            if state_data:
                print(f"  Found window state: {state_data[:300]}")
                captured["window_state"] = state_data
        except Exception as e:
            print(f"  [warn] window state: {e}")

        # ── Cookies ──────────────────────────────────────────────────────────
        captured["cookies"] = context.cookies()
        browser.close()

    # ── Save results ─────────────────────────────────────────────────────────
    out_path = "/home/cyborg/Desktop/claude/arb_bot/tools/stake_discovery_results.json"
    with open(out_path, "w") as f:
        json.dump(captured, f, indent=2, default=str)

    print(f"\n[*] GQL requests: {len(captured['graphql_requests'])}")
    print(f"[*] REST requests: {len(captured['rest_requests'])}")
    print(f"[*] Cookies: {len(captured['cookies'])}")
    print(f"[*] Saved to {out_path}")

    return captured

if __name__ == "__main__":
    data = run()

    print("\n" + "="*70)
    print("GRAPHQL REQUESTS")
    print("="*70)
    seen_ops = set()
    for r in data["graphql_requests"]:
        op = r.get("operation_name", "?")
        key = str(op)
        if key in seen_ops:
            continue
        seen_ops.add(key)
        print(f"\n[Operation: {op}]")
        print(f"  Status: {r.get('status')}")
        body = r.get("request_body")
        if body:
            print(f"  Request body: {json.dumps(body, indent=4)[:1500]}")
        resp = r.get("response_body")
        if resp:
            print(f"  Response: {json.dumps(resp, indent=4)[:2000]}")

    print("\n" + "="*70)
    print("REST REQUESTS")
    print("="*70)
    for r in data["rest_requests"]:
        print(f"  {r['method']} {r['url']}")
        if r.get("response_body"):
            print(f"    response: {json.dumps(r['response_body'])[:500]}")
