"""
Repository for profiling storage (SQLAlchemy ORM).

Published vs staging
--------------------
* **user_profiling** — what admins read; updated by API/forced runs and by
  :meth:`publish_staging_to_published`.
* **user_profiling_staging** — batch writes only; no tracker updates until publish.
* **publish_staging_to_published** — one transaction: MERGE staging → published,
  MERGE tracker from staging user_ids, TRUNCATE staging. Rollback leaves published unchanged.
"""

from __future__ import annotations

import json
import os
import threading
from datetime import date, datetime, timedelta, timezone
from decimal import Decimal
from typing import Any, Dict, Optional
from urllib.parse import quote_plus

from sqlalchemy import create_engine, select, text
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, sessionmaker

from core.logger_config import get_logger
from storage.db_model import Base, UserProfiling, UserProfilingStaging, UserProfilingTracker

logger = get_logger(__name__)


def _tracker_row_for_user(session: Session, user_id: int) -> Optional[UserProfilingTracker]:
    return session.scalars(
        select(UserProfilingTracker).where(UserProfilingTracker.user_id == user_id).limit(1)
    ).first()


_engine: Optional[Engine] = None
_engine_lock = threading.Lock()


def _json_default(obj: Any) -> Any:
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()
    if isinstance(obj, Decimal):
        return float(obj)
    return str(obj)


def profile_data_to_json(profile_data: Dict[str, Any]) -> str:
    return json.dumps(profile_data, default=_json_default, ensure_ascii=False)


def _build_engine() -> Engine:
    server = os.getenv("DB_SERVER", "localhost")
    database = os.getenv("DB_NAME", "profiling_db")
    raw_user = os.getenv("DB_USER")
    raw_password = os.getenv("DB_PASSWORD")
    if raw_user is None or not str(raw_user).strip():
        raise ValueError(
            "DB_USER must be set to a non-empty value (no default credentials). "
            "Set it in the environment or .env."
        )
    if raw_password is None:
        raise ValueError(
            "DB_PASSWORD must be set (no default). Set it in the environment or .env."
        )
    username = str(raw_user).strip()
    password = raw_password
    driver_raw = os.getenv("DB_DRIVER", "{ODBC Driver 18 for SQL Server}")
    driver = driver_raw.strip("{}")
    # Default 7: align with pyodbc pool (see core.db_connection) for concurrent fetch + staging
    pool_size = int(os.getenv("DB_POOL_SIZE", "7"))

    trust_raw = os.getenv("DB_TRUST_SERVER_CERTIFICATE", "").strip().lower()
    trust_cert = trust_raw in ("1", "true", "yes", "on")
    odbc_parts = [
        f"Driver={{{driver}}};",
        f"Server={server};",
        f"Database={database};",
        f"UID={username};",
        f"PWD={password};",
        f"TrustServerCertificate=yes;",
    ]
    odbc = "".join(odbc_parts)
    url = f"mssql+pyodbc:///?odbc_connect={quote_plus(odbc)}"
    return create_engine(
        url,
        pool_pre_ping=True,
        pool_size=pool_size,
        max_overflow=0,
    )


def get_engine() -> Engine:
    global _engine
    with _engine_lock:
        if _engine is None:
            _engine = _build_engine()
        return _engine


# Delete+insert (not MERGE) so legacy duplicate user_id rows in published are removed
# before inserting one row per staging user (staging has PK user_id).
_DELETE_PUBLISHED_FOR_STAGING_USERS = text(
    """
    DELETE FROM dbo.user_profiling
    WHERE user_id IN (SELECT user_id FROM dbo.user_profiling_staging);
    """
)
_INSERT_PUBLISHED_FROM_STAGING = text(
    """
    INSERT INTO dbo.user_profiling (user_id, created_at, profile_json, deep_analysis_markdown)
    SELECT user_id, updated_at, profile_json, deep_analysis_markdown
    FROM dbo.user_profiling_staging;
    """
)

_MERGE_TRACKER_FROM_STAGING = text(
    """
    MERGE dbo.user_profiling_tracker AS T
    USING (SELECT DISTINCT user_id FROM dbo.user_profiling_staging) AS S
    ON T.user_id = S.user_id
    WHEN MATCHED THEN
      UPDATE SET last_completed_at = SYSUTCDATETIME()
    WHEN NOT MATCHED BY TARGET THEN
      INSERT (user_id, last_completed_at)
      VALUES (S.user_id, SYSUTCDATETIME());
    """
)

_TRUNCATE_STAGING = text("TRUNCATE TABLE dbo.user_profiling_staging;")

# Legacy dbo.user_profiling: ``id`` IDENTITY + ``user_id``. ORM INSERT as user_id-PK caused Msg 545.
# MERGE ON user_id fails if multiple rows share the same user_id. Replace-all: delete then insert (one row).
_DELETE_PUBLISHED_FOR_USER = text("DELETE FROM dbo.user_profiling WHERE user_id = :user_id;")
_INSERT_PUBLISHED_ROW = text(
    """
    INSERT INTO dbo.user_profiling (user_id, created_at, profile_json, deep_analysis_markdown)
    VALUES (:user_id, :created_at, :profile_json, :deep_analysis_markdown);
    """
)


class ProfilingRepository:
    """Published + staging + tracker."""

    def __init__(self, engine: Optional[Engine] = None):
        self._engine = engine or get_engine()
        self._sf = sessionmaker(
            bind=self._engine,
            class_=Session,
            expire_on_commit=False,
            autoflush=False,
            autocommit=False,
        )

    def ensure_tables(self) -> None:
        Base.metadata.create_all(self._engine)
        logger.info("Ensured ORM metadata (user_profiling, user_profiling_staging, tracker).")

    def truncate_staging(self) -> None:
        """Empty staging before a new full batch (call only when starting from user id 0)."""
        with self._sf() as session:
            with session.begin():
                session.execute(_TRUNCATE_STAGING)
        logger.info("Truncated dbo.user_profiling_staging.")

    def upsert_published(
        self,
        user_id: int,
        profile_data: Dict[str, Any],
        deep_analysis_markdown: Optional[str] = None,
    ) -> None:
        """
        API / forced path: upsert **published** row + tracker in one transaction.

        ``deep_analysis_markdown=None`` means omit: existing stored markdown is kept.
        Pass ``""`` (or whitespace-only, normalized to empty) to clear the column.
        """
        payload = profile_data_to_json(profile_data)
        if deep_analysis_markdown is None:
            deep_val: Optional[str] = None
            preserve_markdown = True
        elif deep_analysis_markdown.strip() == "":
            deep_val = None
            preserve_markdown = False
        else:
            deep_val = deep_analysis_markdown
            preserve_markdown = False

        now = datetime.now(timezone.utc).replace(tzinfo=None)

        with self._sf() as session:
            with session.begin():
                if preserve_markdown:
                    prev = session.scalars(
                        select(UserProfiling)
                        .where(UserProfiling.user_id == user_id)
                        .order_by(UserProfiling.id.desc())
                        .limit(1)
                    ).first()
                    if prev is not None and prev.deep_analysis_markdown:
                        s = str(prev.deep_analysis_markdown).strip()
                        if s:
                            deep_val = prev.deep_analysis_markdown

                insert_params = {
                    "user_id": user_id,
                    "created_at": now,
                    "profile_json": payload,
                    "deep_analysis_markdown": deep_val,
                }
                session.execute(_DELETE_PUBLISHED_FOR_USER, {"user_id": user_id})
                session.execute(_INSERT_PUBLISHED_ROW, insert_params)

                tr = _tracker_row_for_user(session, user_id)
                if tr is not None:
                    tr.last_completed_at = now
                else:
                    session.add(UserProfilingTracker(user_id=user_id, last_completed_at=now))

        logger.info("Upserted published user_profiling user_id=%s", user_id)

    def upsert_staging(
        self,
        user_id: int,
        profile_data: Dict[str, Any],
        deep_analysis_markdown: Optional[str] = None,
    ) -> None:
        """
        Batch path: upsert **staging** only (no tracker). Published/API data unchanged.
        """
        payload = profile_data_to_json(profile_data)
        deep_val = deep_analysis_markdown if deep_analysis_markdown is not None else None
        if deep_val is not None and deep_val.strip() == "":
            deep_val = None
        now = datetime.now(timezone.utc).replace(tzinfo=None)

        with self._sf() as session:
            with session.begin():
                row = session.get(UserProfilingStaging, user_id)
                if row is None:
                    row = UserProfilingStaging(user_id=user_id)
                    session.add(row)
                row.profile_json = payload
                row.deep_analysis_markdown = deep_val
                row.updated_at = now

        logger.debug("Upserted staging user_id=%s", user_id)

    def publish_staging_to_published(self) -> None:
        """
        Promote all staging rows to published + refresh tracker for those users + clear staging.
        Single transaction; on error nothing visible changes for readers.
        """
        with self._sf() as session:
            with session.begin():
                session.execute(_DELETE_PUBLISHED_FOR_STAGING_USERS)
                session.execute(_INSERT_PUBLISHED_FROM_STAGING)
                session.execute(_MERGE_TRACKER_FROM_STAGING)
                session.execute(_TRUNCATE_STAGING)
        logger.info("Published staging → user_profiling and truncated staging.")

    def should_skip_scheduled_run(self, user_id: int, recency_days: int) -> bool:
        if recency_days <= 0:
            return False
        cutoff = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(days=recency_days)
        with self._sf() as session:
            stmt = (
                select(UserProfilingTracker.user_id)
                .where(UserProfilingTracker.user_id == user_id)
                .where(UserProfilingTracker.last_completed_at >= cutoff)
                .limit(1)
            )
            return session.scalar(stmt) is not None

    def touch_tracker(self, user_id: int) -> None:
        now = datetime.now(timezone.utc).replace(tzinfo=None)
        with self._sf() as session:
            with session.begin():
                tr = _tracker_row_for_user(session, user_id)
                if tr is not None:
                    tr.last_completed_at = now
                else:
                    session.add(UserProfilingTracker(user_id=user_id, last_completed_at=now))

    def get_tracker_row(self, user_id: int) -> Optional[UserProfilingTracker]:
        with self._sf() as session:
            row = _tracker_row_for_user(session, user_id)
            if row is not None:
                session.expunge(row)
            return row

    def get_snapshot(self, user_id: int) -> Optional[UserProfiling]:
        """Latest published row for user_id (admin read path)."""
        with self._sf() as session:
            row = session.scalars(
                select(UserProfiling)
                .where(UserProfiling.user_id == user_id)
                .order_by(UserProfiling.id.desc())
                .limit(1)
            ).first()
            if row is not None:
                session.expunge(row)
            return row

    # Backward-compatible name used by facade
    def insert_snapshot(
        self,
        user_id: int,
        profile_data: Dict[str, Any],
        deep_analysis_markdown: Optional[str] = None,
    ) -> None:
        self.upsert_published(user_id, profile_data, deep_analysis_markdown)


_repo_singleton: Optional[ProfilingRepository] = None
_repo_lock = threading.Lock()


def get_profiling_repository(engine: Optional[Engine] = None) -> ProfilingRepository:
    """
    Default singleton uses env-driven :func:`get_engine`.
    Pass a SQLAlchemy :class:`~sqlalchemy.engine.Engine` for tests or an alternate database.
    """
    if engine is not None:
        return ProfilingRepository(engine=engine)
    global _repo_singleton
    with _repo_lock:
        if _repo_singleton is None:
            _repo_singleton = ProfilingRepository()
        return _repo_singleton
