"""
Surebet247 Nigeria scraper.

Platform: sport-iframe.serhjs.xyz (betbook-transport)
Auth: API key in WS URL (no login required)

The platform uses a binary WebSocket with a custom msgpack-based RPC protocol
that is only accessible from a browser session. We use Playwright to capture
the WS frames, then decode them with msgpack.

WebSocket: wss://sport-iframe.serhjs.xyz/direct-feed/feed
           ?brand=CL38B1&X-Api-Key=b8b92942-ce6a-44e8-a5d5-6bf407e316a2

RPC frame format:
  Each frame = 1-byte length prefix + msgpack([type, headers, id, method?, data?])
  type 4 = RPC request  → [4, {}, id, method, args]
  type 2 = RPC reply    → [2, {}, id, [success, data]]
  type 3 = reply ack    → [3, {}, id, 2]

Key RPC methods observed:
  GetTournamentsBySport(sport_code, stage, context) → tournament list
  GetRichEventsByTournamentIdAndStage(tournament_uuid, stage, context) → event list
  GetMainMarketsByProfileAndEventIds(profile, event_ids, version, flag, context) → markets

Event data structure (item in reply data list):
  [False, event_id_str, event_tuple]
  event_tuple: [sport, category_uuid, tournament_uuid, type, startTime, stage,
                name, tradingStatus, status, num_competitors, competitors, ...]
  type = 0 (regular match), 2 (special/long-term)
  stage = 1 (prematch), 2 (live)
  tradingStatus = 1 (opened), 2 (suspended), 3 (removed)
  competitors: [[comp_id, name, slug, short_name, None], ...]

Market data structure (item in reply data list):
  [False, market_key, [selection_groups, hash, version, ?, ?, is_suspended]]
  market_key: [event_id, version, market_type_id, period, None, None]
  selection_groups: [[specifiers_list, outcomes_list, is_suspended, ?, ?], ...]
  Each outcome: [[outcome_type_id, []], price_in_cents, ...]

Market type IDs:
  2  → 1X2         outcomes: 0=Home, 1=Draw, 3=Away
  1  → Winner/H/A  outcomes: 0=Home, 3=Away
  5  → O/U Total   outcomes: 4=Over, 5=Under; specifiers[0] = line e.g. '2.5'

Sport codes: F=football, B=basketball, T=tennis

Sport pages:
  football:   https://www.surebet247.com/sport/football
  basketball: https://www.surebet247.com/sport/basketball
  tennis:     https://www.surebet247.com/sport/tennis
"""
import logging
import queue
import re
import threading
import time
from datetime import datetime
from typing import List, Dict, Optional, Tuple

import msgpack
import psutil

from scrapers.base import BaseScraper
from core.models import Event, Outcome

logger = logging.getLogger(__name__)

BASE_SITE = 'https://www.surebet247.com'

SPORT_SLUGS = {
    'football':   'football',
    'basketball': 'basketball',
    'tennis':     'tennis',
}

SPORT_CODES = {
    'football':   'F',
    'basketball': 'B',
    'tennis':     'T',
}

# market_type_id → (market_name, {outcome_type_id: label})
# For O/U: specifiers[0] = line (e.g. '2.5'), added dynamically to market name
# Discovered via tools/discover_surebet247_markets.py + inspect_surebet247_btts.py
# (profile=pro_main_period):
#   type 2 → 1X2           outcomes [0,1,3]
#   type 4 → Asian Handicap outcomes [86=Home, 87=Away]; specifier = handicap line
#             NOTE: NOT BTTS — was mislabeled. BTTS is absent from pro_main_period.
#   type 5 → O/U           outcomes [4=Over, 5=Under]; specifier = line e.g. '2.5'
MARKET_TYPES: Dict[int, Tuple[str, Dict[int, str]]] = {
    2: ('1X2',            {0: 'Home', 1: 'Draw', 3: 'Away'}),
    1: ('Home/Away',      {0: 'Home', 3: 'Away'}),
    4: ('Asian Handicap', {86: 'Home', 87: 'Away'}),
    5: ('Over/Under',     {4: 'Over', 5: 'Under'}),
}

# Only fetch these market types per sport
SPORT_MARKETS = {
    'football':   {2, 4, 5},  # 1X2 + AH + O/U
    'basketball': {1, 5},      # H/A + O/U
    'tennis':     {1},         # H/A only
}

EXPECTED_OUTCOMES = {
    2: 3,   # 1X2: Home/Draw/Away
    1: 2,   # H/A: Home/Away
    4: 2,   # AH: Home/Away
    5: 2,   # O/U: Over/Under
}

# Page load wait in seconds (browser loads events progressively)
LOAD_WAIT = 25
# Additional scroll cycles to trigger more data loading
SCROLL_CYCLES = 8


def _decode_frame(data: bytes):
    """Decode a WS binary frame. Format: N-byte length prefix + msgpack body."""
    if not data or len(data) < 2:
        return None
    for skip in (1, 2, 3, 4):
        try:
            return msgpack.unpackb(data[skip:], raw=False, strict_map_key=False)
        except Exception:
            pass
    return None


def _parse_events_from_frames(
    frames_recv: list,
    frames_sent: list,
    sport: str,
    stage: int = 1,
) -> List[Event]:
    """
    Parse raw WS frames into Event objects.

    Uses sent frames to map request IDs → methods, then filters received
    frames to only parse data from relevant RPC replies:
      - GetRichEventsByTournamentIdAndStage → event metadata
      - GetMainMarketsByProfileAndEventIds  → odds per event
    """
    wanted_markets = SPORT_MARKETS.get(sport, set())

    # Build request-ID → method map from sent frames
    method_by_id: Dict[str, str] = {}
    for frame in frames_sent:
        if not isinstance(frame, bytes):
            continue
        decoded = _decode_frame(frame)
        if (decoded and isinstance(decoded, (list, tuple))
                and len(decoded) >= 4 and decoded[0] == 4):
            method_by_id[str(decoded[2])] = str(decoded[3])

    # Only parse replies from these methods
    rich_event_ids = {
        rid for rid, m in method_by_id.items()
        if m in ('GetRichEventsByTournamentIdAndStage', 'GetRichEventsByIds')
    }
    market_ids = {
        rid for rid, m in method_by_id.items()
        if m == 'GetMainMarketsByProfileAndEventIds'
    }

    events_by_id: Dict[str, dict] = {}
    markets_by_event: Dict[str, list] = {}

    for frame in frames_recv:
        if not isinstance(frame, bytes):
            continue
        decoded = _decode_frame(frame)
        if not decoded or not isinstance(decoded, (list, tuple)):
            continue
        if decoded[0] != 2:  # only RPC replies
            continue
        if len(decoded) < 4:
            continue

        # Skip replies for methods we don't care about
        rid = str(decoded[2])
        is_rich_event = rid in rich_event_ids
        is_market     = rid in market_ids
        if not is_rich_event and not is_market:
            continue

        reply_data = decoded[3]
        if not isinstance(reply_data, (list, tuple)) or len(reply_data) < 2:
            continue

        success = reply_data[0]
        if not success:
            continue

        items = reply_data[1]
        if not isinstance(items, (list, tuple)):
            continue

        for item in items:
            if not isinstance(item, (list, tuple)) or len(item) < 3:
                continue
            # item[0] is always False (some flag)
            item_key = item[1]

            # ── Event entry: item_key is an int or string (event ID) ─────
            if isinstance(item_key, (int, str)) and is_rich_event:
                ev_data = item[2]
                if not isinstance(ev_data, (list, tuple)) or len(ev_data) < 11:
                    continue
                # Filter: type=0 (regular), stage matches expected, tradingStatus=1 (opened)
                ev_type        = ev_data[3]   # 0=regular, 2=special/longterm
                start_time     = ev_data[4]   # Unix timestamp
                ev_stage       = ev_data[5]   # 1=prematch, 2=live
                ev_name        = ev_data[6]   # 'Home - Away'
                trading_status = ev_data[7]   # 1=opened
                competitors    = ev_data[10]  # list of [id, name, slug, shortName, None]

                if ev_type != 0 or ev_stage != stage or trading_status != 1:
                    continue
                if not isinstance(competitors, (list, tuple)) or len(competitors) < 2:
                    continue

                home = competitors[0][1] if competitors[0] else ''
                away = competitors[1][1] if competitors[1] else ''
                if not home or not away:
                    # Fallback: parse event name
                    if ' - ' in str(ev_name):
                        parts = str(ev_name).split(' - ', 1)
                        home, away = parts[0].strip(), parts[1].strip()
                    else:
                        continue

                # Extract league name: look for the string at later indices
                league = ''
                for idx in range(12, min(20, len(ev_data))):
                    val = ev_data[idx]
                    if isinstance(val, str) and len(val) > 1 and not val.startswith('http'):
                        league = val
                        break

                try:
                    starts_at = datetime.utcfromtimestamp(start_time) if start_time else None
                except Exception:
                    starts_at = None

                events_by_id[item_key] = {
                    'home': home,
                    'away': away,
                    'starts_at': starts_at,
                    'league': league,
                    'sport': sport,
                }

            # ── Market entry: item_key is a list (market key) ────────────
            elif isinstance(item_key, (list, tuple)) and len(item_key) >= 3 and is_market:
                event_id    = str(item_key[0])
                market_type = item_key[2]
                period      = item_key[3] if len(item_key) > 3 else 0

                if market_type not in wanted_markets:
                    continue
                if market_type not in MARKET_TYPES:
                    continue

                # O/U and AH: only accept full-game (period 0) to avoid
                # phantom arbs between Betjara full-game lines and surebet247
                # 1st-half / quarter totals that happen to share the same line value.
                if market_type in (5, 4) and period != 0:
                    continue

                market_val = item[2]
                if not isinstance(market_val, (list, tuple)) or not market_val:
                    continue

                selection_groups = market_val[0]
                if not isinstance(selection_groups, (list, tuple)):
                    continue

                market_name_base, outcome_map = MARKET_TYPES[market_type]

                # Each group: [specifiers_wrapper, outcomes_list, is_suspended, ...]
                # specifiers_wrapper: [[]] for no specifier, [['2.5']] for O/U line
                for group in selection_groups:
                    if not isinstance(group, (list, tuple)) or len(group) < 3:
                        continue

                    specifiers_wrapper = group[0]  # e.g. [[]] or [['2.5']]
                    outcomes_raw       = group[1]
                    sel_suspended      = group[2]

                    if sel_suspended:
                        continue
                    if not isinstance(outcomes_raw, (list, tuple)):
                        continue

                    # For O/U and AH, the line is the first element of the first specifier
                    if market_type in (5, 4):
                        if (not specifiers_wrapper
                                or not isinstance(specifiers_wrapper[0], (list, tuple))
                                or not specifiers_wrapper[0]
                                or not isinstance(specifiers_wrapper[0][0], str)):
                            continue
                        line = specifiers_wrapper[0][0]
                        if market_type == 5:
                            market_name = f'Over/Under {line}'
                        else:
                            # Normalise AH line: reject quarter-ball, prefix '+' for positives
                            try:
                                val = float(line)
                                if abs(val * 4) % 2 != 0:
                                    continue  # quarter-ball — skip
                                line = f'+{val:g}' if val > 0 else f'{val:g}'
                            except ValueError:
                                continue
                            market_name = f'Asian Handicap {line}'
                    else:
                        market_name = market_name_base

                    outcomes = []
                    for oc in outcomes_raw:
                        if not isinstance(oc, (list, tuple)) or len(oc) < 2:
                            continue
                        oc_key   = oc[0]   # [type_id, []]
                        price_c  = oc[1]   # price in cents (213 → 2.13)
                        if not isinstance(oc_key, (list, tuple)) or not oc_key:
                            continue
                        oc_type = oc_key[0]
                        label   = outcome_map.get(oc_type)
                        if label is None:
                            continue
                        try:
                            odds = float(price_c) / 100.0
                        except (TypeError, ValueError):
                            continue
                        if odds <= 1.0:
                            continue
                        outcomes.append(Outcome(
                            name=label,
                            odds=odds,
                            bookmaker='Surebet247',
                            event_url=None,
                        ))

                    expected = EXPECTED_OUTCOMES.get(market_type, 2)
                    if len(outcomes) != expected:
                        continue

                    markets_by_event.setdefault(event_id, []).append(
                        (market_name, outcomes)
                    )

    # Build Event objects by joining events with their markets
    result: List[Event] = []
    for event_id, ev in events_by_id.items():
        mkt_list = markets_by_event.get(event_id, [])
        if not mkt_list:
            continue
        home_slug = re.sub(r'[^a-z0-9]+', '-', ev['home'].lower()).strip('-')
        away_slug = re.sub(r'[^a-z0-9]+', '-', ev['away'].lower()).strip('-')
        event_url = f'https://surebet247.com/gr8-sport/events/{home_slug}-{away_slug}-{event_id}'
        for market_name, outcomes in mkt_list:
            for o in outcomes:
                o.event_url = event_url
            result.append(Event(
                event_id  = f'sb247_{event_id}_{market_name.replace(" ", "_")}',
                bookmaker = 'Surebet247',
                sport     = ev['sport'],
                home_team = ev['home'],
                away_team = ev['away'],
                market    = market_name,
                outcomes  = outcomes,
                starts_at = ev['starts_at'],
                league    = ev['league'],
            ))

    return result


class _Surebet247Worker(threading.Thread):
    """
    Dedicated thread that owns a Playwright browser.

    On each scrape call, navigates to the sport page, captures binary WS frames
    from the direct-feed WebSocket, then returns decoded Event objects.

    The browser is kept alive between calls for efficiency.
    """

    def __init__(self):
        super().__init__(daemon=True, name='surebet247-playwright')
        self._q: queue.Queue = queue.Queue()
        self._ready = threading.Event()
        self._failed: Optional[Exception] = None
        self._pw = None
        self.start()
        if not self._ready.wait(timeout=120):
            raise RuntimeError('[Surebet247] browser worker timed out during init')
        if self._failed:
            raise self._failed

    def run(self):
        try:
            from playwright.sync_api import sync_playwright
            pw = sync_playwright().start()
            self._pw = pw
            browser = pw.chromium.launch(
                headless=True,
                args=[
                    '--no-sandbox',
                    '--disable-blink-features=AutomationControlled',
                    '--disable-gpu',
                    '--disable-dev-shm-usage',
                    '--disable-extensions',
                    '--disable-background-networking',
                    '--no-first-run',
                ],
            )
            ctx = browser.new_context(
                user_agent=(
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                    'AppleWebKit/537.36 (KHTML, like Gecko) '
                    'Chrome/120.0.0.0 Safari/537.36'
                ),
                viewport={'width': 1280, 'height': 900},
                locale='en-US',
            )
            # Create a blank page — sport pages navigate fresh per call so that
            # the websocket handler is set up before the WS connection opens.
            self._page = ctx.new_page()
            self._ctx = ctx
            self._browser = browser
            logger.info('[Surebet247] browser worker ready')
        except Exception as ex:
            self._failed = ex
            self._ready.set()
            return

        self._ready.set()

        while True:
            item = self._q.get()
            if item is None:
                break
            fn, result_q = item
            try:
                result_q.put(('ok', fn(self._page)))
            except Exception as ex:
                result_q.put(('err', ex))

        try:
            browser.close()
        except Exception:
            pass
        try:
            pw.stop()
        except Exception:
            pass

    def call(self, fn, timeout: float = 120):
        result_q: queue.Queue = queue.Queue()
        self._q.put((fn, result_q))
        try:
            kind, val = result_q.get(timeout=timeout)
        except queue.Empty:
            raise TimeoutError('[Surebet247] worker timed out')
        if kind == 'err':
            raise val
        return val

    def _kill_processes(self):
        """Force-kill the Playwright node driver and all its Chrome children."""
        pw, self._pw = self._pw, None
        if pw is None:
            return
        try:
            node_pid = pw._impl_obj._connection._transport._proc.pid
            proc = psutil.Process(node_pid)
            for child in proc.children(recursive=True):
                try:
                    child.kill()
                except psutil.NoSuchProcess:
                    pass
            proc.kill()
        except Exception:
            pass

    def stop(self):
        self._kill_processes()
        while True:
            try:
                self._q.get_nowait()
            except queue.Empty:
                break
        self._q.put(None)


def _scrape_sport_page(page, sport: str, sport_slug: str, stage: int = 1) -> List[Event]:
    """Navigate to sport page, capture WS frames, parse into Events.

    stage=1 → prematch (/sport/{slug})
    stage=2 → live     (/sport/{slug}/live)
    """
    frames_recv = []
    frames_sent = []
    last_recv = [time.time()]

    def handle_ws(ws):
        if 'direct-feed' not in ws.url:
            return

        def on_recv(payload):
            if isinstance(payload, bytes):
                frames_recv.append(payload)
                last_recv[0] = time.time()

        def on_sent(payload):
            if isinstance(payload, bytes):
                frames_sent.append(payload)

        ws.on('framereceived', on_recv)
        ws.on('framesent', on_sent)

    page.on('websocket', handle_ws)

    if stage == 2:
        # The sport-iframe WebSocket only connects on /live/{slug} (not /sport/{slug}/live).
        # A prior navigation to the prematch page is required to establish the iframe
        # session before the live page will send WS frames.
        prime_url = f'{BASE_SITE}/sport/{sport_slug}'
        logger.info(f'[Surebet247] priming session via {prime_url}')
        page.goto(prime_url, wait_until='domcontentloaded', timeout=60_000)
        time.sleep(4)
        url = f'{BASE_SITE}/live/{sport_slug}'
    else:
        url = f'{BASE_SITE}/sport/{sport_slug}'

    logger.info(f'[Surebet247] navigating to {url}')
    page.goto(url, wait_until='domcontentloaded', timeout=60_000)

    # Let initial data load
    time.sleep(8)

    # Scroll to trigger loading of more tournaments/events
    for _ in range(SCROLL_CYCLES):
        page.mouse.wheel(0, 700)
        time.sleep(0.6)

    # Wait for silence (no new WS data for 3 seconds), up to LOAD_WAIT total
    deadline = time.time() + LOAD_WAIT
    while time.time() < deadline:
        time.sleep(1)
        if time.time() - last_recv[0] > 3.0 and frames_recv:
            break

    # Remove WS handler to stop accumulating frames
    try:
        page.remove_listener('websocket', handle_ws)
    except Exception:
        pass

    label = 'live' if stage == 2 else 'prematch'
    logger.info(f'[Surebet247] captured {len(frames_recv)} recv / {len(frames_sent)} sent frames for {sport} ({label})')
    return _parse_events_from_frames(frames_recv, frames_sent, sport, stage=stage)


class Surebet247Scraper(BaseScraper):

    _worker: Optional[_Surebet247Worker] = None
    _worker_lock = threading.Lock()

    def __init__(self):
        super().__init__('Surebet247')

    def _get_worker(self) -> _Surebet247Worker:
        with self.__class__._worker_lock:
            if self.__class__._worker is None:
                self.__class__._worker = _Surebet247Worker()
            return self.__class__._worker

    def _reset_worker(self):
        """Cleanly stop the current worker (closes its browser) and clear the reference."""
        with self.__class__._worker_lock:
            old = self.__class__._worker
            self.__class__._worker = None
        if old:
            try:
                old.stop()
            except Exception:
                pass

    def get_events(self, sport: str) -> List[Event]:
        sport_slug = SPORT_SLUGS.get(sport)
        if not sport_slug:
            return []
        try:
            worker = self._get_worker()
            events = worker.call(
                lambda page: _scrape_sport_page(page, sport, sport_slug),
                timeout=120,
            )
            logger.info(f'[Surebet247] {sport}: {len(events)} events parsed')
            return events
        except Exception as ex:
            logger.error(f'[Surebet247] {sport} error: {ex}')
            self._reset_worker()
            return []

    def get_live_events(self, sport: str) -> List[Event]:
        sport_slug = SPORT_SLUGS.get(sport)
        if not sport_slug:
            return []
        try:
            worker = self._get_worker()
            events = worker.call(
                lambda page: _scrape_sport_page(page, sport, sport_slug, stage=2),
                timeout=120,
            )
            logger.info(f'[Surebet247] live {sport}: {len(events)} events parsed')
            return events
        except Exception as ex:
            logger.error(f'[Surebet247] live {sport} error: {ex}')
            self._reset_worker()
            return []
