"""
Stake.com API discovery — Firefox + extended Cloudflare wait strategy.
Firefox is much less likely to be flagged than Chromium.
"""

import json
import time
from datetime import datetime

captured = {
    "graphql_requests": [],
    "rest_requests": [],
    "cookies": [],
}

BORING_EXTS = {".js", ".css", ".png", ".jpg", ".jpeg", ".svg", ".woff",
               ".woff2", ".ico", ".gif", ".webp", ".map", ".ttf", ".otf", ".eot"}
BORING_KW = ["analytics", "gtm.", "google-analytics", "segment.io", "sentry",
             "hotjar", "intercom", "zendesk", "cdn-cgi", "challenge-platform",
             "rum?", "imgix.net", "sanity.io", "cloudflare"]

def is_boring(url):
    u = url.lower().split("?")[0]
    for ext in BORING_EXTS:
        if u.endswith(ext):
            return True
    for kw in BORING_KW:
        if kw in url.lower():
            return True
    return False

def is_api(url):
    u = url.lower()
    if "graphql" in u:
        return True
    kw = ["_api/", "/api/", "sport", "event", "market", "odds", "bet",
          "match", "fixture", "league", "competition", "prematch", "live"]
    return any(k in u for k in kw)

def handle_response_factory(page_label):
    def handle_response(response):
        url = response.url
        if is_boring(url):
            return
        if not is_api(url):
            return

        method = response.request.method
        status = response.status
        resp_headers = dict(response.headers)
        req_headers = dict(response.request.headers)

        try:
            req_body_raw = response.request.post_data
            req_body = None
            if req_body_raw:
                try:
                    req_body = json.loads(req_body_raw)
                except Exception:
                    req_body = req_body_raw

            resp_body = None
            ct = resp_headers.get("content-type", "")
            if "json" in ct:
                try:
                    resp_body = response.json()
                except Exception:
                    pass

            entry = {
                "page": page_label,
                "url": url,
                "method": method,
                "status": status,
                "request_headers": req_headers,
                "request_body": req_body,
                "response_body": resp_body,
                "ts": datetime.utcnow().isoformat(),
            }

            if "graphql" in url.lower():
                op = None
                if isinstance(req_body, dict):
                    op = req_body.get("operationName")
                elif isinstance(req_body, list):
                    op = [r.get("operationName") for r in req_body]
                entry["operation_name"] = op
                captured["graphql_requests"].append(entry)
                print(f"    [GQL:{page_label}] op={op!r:50} status={status}")
            else:
                captured["rest_requests"].append(entry)
                print(f"    [REST:{page_label}] {method} {url[:100]} status={status}")

        except Exception:
            pass
    return handle_response

def wait_for_cf(page, max_wait=30):
    """Wait up to max_wait seconds for Cloudflare challenge to pass."""
    start = time.time()
    while time.time() - start < max_wait:
        content = page.content()
        title = page.title()
        if "just a moment" not in title.lower() and "challenge" not in content.lower()[:500]:
            return True
        print(f"      [CF] waiting... {int(time.time()-start)}s  title={title[:40]}")
        time.sleep(3)
    return False

def navigate_and_wait(page, url, label, cf_wait=30):
    print(f"\n  [*] {label}: {url}")
    try:
        page.goto(url, timeout=30000, wait_until="domcontentloaded")
    except Exception as e:
        print(f"      [warn] goto: {str(e)[:80]}")

    passed = wait_for_cf(page, max_wait=cf_wait)
    if not passed:
        print(f"      [!] Still behind CF after {cf_wait}s")
        return False

    print(f"      [ok] page loaded: {page.url[:80]}  |  {page.title()[:50]}")
    time.sleep(3)
    # scroll to trigger lazy loads
    for scroll in [600, 1200, 2000]:
        page.evaluate(f"window.scrollTo(0, {scroll})")
        time.sleep(1.5)
    time.sleep(2)
    return True

def run():
    from playwright.sync_api import sync_playwright

    with sync_playwright() as p:
        print("[*] Launching Firefox (headless=False via Xvfb) ...")
        browser = p.firefox.launch(
            headless=False,
        )

        context = browser.new_context(
            viewport={"width": 1366, "height": 768},
            user_agent=(
                "Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
            ),
            locale="en-GB",
            timezone_id="Africa/Lagos",
            extra_http_headers={
                "Accept-Language": "en-GB,en;q=0.9",
            },
        )

        page = context.new_page()
        page.on("response", handle_response_factory("homepage"))

        # ── 1. Homepage ───────────────────────────────────────────────────────
        print("\n[PHASE 1] Homepage")
        try:
            page.goto("https://stake.com/", timeout=30000, wait_until="domcontentloaded")
        except Exception as e:
            print(f"  [warn] {e}")
        wait_for_cf(page, 45)
        time.sleep(5)

        # ── 2. Soccer page ────────────────────────────────────────────────────
        page.remove_listener("response", handle_response_factory("homepage"))
        page.on("response", handle_response_factory("soccer"))
        print("\n[PHASE 2] Soccer")
        navigate_and_wait(page, "https://stake.com/sports/soccer", "soccer", cf_wait=45)

        # Try clicking first match
        print("  Trying to open a match ...")
        try:
            # look for any event/match row link
            for sel in [
                "a[href*='/sports/soccer/']",
                "a[href*='/match/']",
                "[data-testid='event-row']",
                ".event-row a",
                "a[class*='event']",
            ]:
                el = page.query_selector(sel)
                if el:
                    href = el.get_attribute("href") or ""
                    print(f"    Found {sel} -> {href}")
                    el.click()
                    time.sleep(5)
                    break
        except Exception as e:
            print(f"    [warn] click event: {e}")
        time.sleep(3)

        # ── 3. Basketball ─────────────────────────────────────────────────────
        page.remove_listener("response", handle_response_factory("soccer"))
        page.on("response", handle_response_factory("basketball"))
        print("\n[PHASE 3] Basketball")
        navigate_and_wait(page, "https://stake.com/sports/basketball", "basketball", cf_wait=30)

        # ── 4. Tennis ─────────────────────────────────────────────────────────
        page.remove_listener("response", handle_response_factory("basketball"))
        page.on("response", handle_response_factory("tennis"))
        print("\n[PHASE 4] Tennis")
        navigate_and_wait(page, "https://stake.com/sports/tennis", "tennis", cf_wait=30)

        # ── 5. Nigeria URL ────────────────────────────────────────────────────
        page.remove_listener("response", handle_response_factory("tennis"))
        page.on("response", handle_response_factory("ng_soccer"))
        print("\n[PHASE 5] Nigeria URL")
        try:
            page.goto("https://stake.com/ng/sports/soccer", timeout=20000,
                      wait_until="domcontentloaded")
            print(f"  Redirected to: {page.url}")
        except Exception as e:
            print(f"  [warn] ng: {e}")
        time.sleep(5)

        # ── 6. Direct GraphQL probes via in-page JS ───────────────────────────
        print("\n[PHASE 6] GraphQL direct probe via JS")
        # Go back to soccer to have valid cookies
        try:
            page.goto("https://stake.com/sports/soccer", timeout=20000,
                      wait_until="domcontentloaded")
        except:
            pass
        wait_for_cf(page, 30)
        time.sleep(3)

        # Known Stake.com GraphQL queries from community research
        probe_queries = [
            {
                "operationName": "SportsListQuery",
                "query": """query SportsListQuery {
                  sports {
                    id name slug icon
                    leagues { id name slug }
                  }
                }""",
                "variables": {}
            },
            {
                "operationName": "SportMatchesQuery",
                "query": """query SportMatchesQuery($sportSlug: String!, $status: SportMatchStatus, $limit: Int) {
                  sportMatches(sportSlug: $sportSlug, status: $status, limit: $limit) {
                    id name slug status startTime
                    tournament { id name }
                    homeTeam { id name }
                    awayTeam { id name }
                    betMarkets { id marketId name
                      outcomes { id name odds active }
                    }
                  }
                }""",
                "variables": {"sportSlug": "soccer", "status": "UPCOMING", "limit": 10}
            },
            {
                "operationName": "EventBettingQuery",
                "query": """query EventBettingQuery($id: ID!) {
                  sportEvent(id: $id) {
                    id name slug startTime status
                    homeTeam { name }
                    awayTeam { name }
                    markets {
                      id marketName outcomes { id name price }
                    }
                  }
                }""",
                "variables": {"id": "1"}
            },
            # Stake actually uses these known operations
            {
                "operationName": "sportBettingPrematch",
                "query": """query sportBettingPrematch($slug: String!) {
                  sport(slug: $slug) {
                    id name slug
                    fixtures(status: UPCOMING, limit: 20) {
                      id name slug startTime
                      home { name }
                      away { name }
                      markets {
                        name
                        outcomes { name price }
                      }
                    }
                  }
                }""",
                "variables": {"slug": "soccer"}
            },
        ]

        gql_probe_results = []
        try:
            for q in probe_queries:
                r = page.evaluate(f"""
                    async () => {{
                        try {{
                            const resp = await fetch('https://stake.com/_api/graphql', {{
                                method: 'POST',
                                headers: {{
                                    'Content-Type': 'application/json',
                                    'Accept': 'application/json',
                                }},
                                credentials: 'include',
                                body: JSON.stringify({json.dumps(q)})
                            }});
                            const text = await resp.text();
                            return {{ status: resp.status, headers: Object.fromEntries(resp.headers), body: text.slice(0, 3000) }};
                        }} catch(e) {{
                            return {{ error: e.toString() }};
                        }}
                    }}
                """)
                print(f"  {q['operationName']}: status={r.get('status')} error={r.get('error','')}")
                if r.get("body"):
                    body_preview = r["body"][:200]
                    print(f"    body preview: {body_preview}")
                gql_probe_results.append({"query": q, "result": r})
        except Exception as e:
            print(f"  [warn] JS fetch probes: {e}")

        captured["gql_probe_results"] = gql_probe_results

        # ── 7. Capture network requests from background page load ─────────────
        print("\n[PHASE 7] Capturing intercepted GQL from background")
        # Get actual requests the page itself made
        all_requests = page.evaluate("""
            () => {
                if (window.performance) {
                    return performance.getEntriesByType('resource').map(r => ({
                        name: r.name,
                        initiatorType: r.initiatorType,
                        duration: Math.round(r.duration),
                    })).filter(r => r.name.includes('graphql') || r.name.includes('_api'));
                }
                return [];
            }
        """)
        print(f"  Performance API entries for API calls: {len(all_requests)}")
        for r in all_requests[:10]:
            print(f"    {r['initiatorType']} {r['name'][:100]} ({r['duration']}ms)")

        captured["performance_entries"] = all_requests
        captured["cookies"] = context.cookies()

        browser.close()

    # ── Save ──────────────────────────────────────────────────────────────────
    out = "/home/cyborg/Desktop/claude/arb_bot/tools/stake_discovery_results.json"
    with open(out, "w") as f:
        json.dump(captured, f, indent=2, default=str)

    print(f"\n[*] Results saved: {out}")
    print(f"[*] GQL intercepted: {len(captured['graphql_requests'])}")
    print(f"[*] REST intercepted: {len(captured['rest_requests'])}")
    return captured

if __name__ == "__main__":
    data = run()

    print("\n" + "="*70)
    print("ALL INTERCEPTED GQL REQUESTS (unique by operation)")
    print("="*70)
    seen = set()
    for r in data["graphql_requests"]:
        op = str(r.get("operation_name", "?"))
        if op in seen:
            continue
        seen.add(op)
        print(f"\n  op={op!r}  status={r['status']}")
        body = r.get("request_body")
        if body:
            print(f"  request: {json.dumps(body)[:600]}")
        resp = r.get("response_body")
        if resp:
            print(f"  response: {json.dumps(resp)[:1000]}")

    print("\n" + "="*70)
    print("DIRECT GRAPHQL PROBE RESULTS")
    print("="*70)
    for pr in data.get("gql_probe_results", []):
        q = pr["query"]
        r = pr["result"]
        print(f"\n  op={q['operationName']!r}")
        print(f"  status={r.get('status')}  error={r.get('error','')}")
        body = r.get("body", "")
        if body:
            print(f"  body: {body[:800]}")

    print("\n" + "="*70)
    print("COOKIES")
    print("="*70)
    for c in data.get("cookies", []):
        name = c.get("name","")
        domain = c.get("domain","")
        print(f"  {domain}  {name}")
