"""
Shared validation for LLM-generated SQL (deep analysis tool + Pydantic reasoner output).
"""

from __future__ import annotations

import re

MAX_SQL_TOOL_ROWS = 100

# Whole-word forbidden tokens (read-only analytical tool).
_FORBIDDEN = re.compile(
    r"\b("
    r"MERGE|INSERT|UPDATE|DELETE|DROP|ALTER|CREATE|TRUNCATE|GRANT|REVOKE|"
    r"EXEC|EXECUTE|OPENROWSET|BULK\s+INSERT|"
    r"WAITFOR|SHUTDOWN|BACKUP|RESTORE|DBCC|KILL"
    r")\b",
    re.IGNORECASE,
)

# SELECT ... INTO (creates table / persists rows).
_SELECT_INTO = re.compile(
    r"\bSELECT\b[\s\S]*?\bINTO\s+(?:#|\[|@|\w)",
    re.IGNORECASE,
)

# Semicolon followed by another statement start (batch boundary). Do not use naive split(';')
# — semicolons appear inside string literals and T-SQL allows a trailing ';' on one statement.
_MULTI_STMT = re.compile(
    r";\s*\b(WITH|SELECT|INSERT|UPDATE|DELETE|MERGE|CREATE|DROP|ALTER|TRUNCATE|EXEC|EXECUTE)\b",
    re.IGNORECASE,
)


def normalize_llm_sql(query: str) -> str:
    """Strip whitespace, leading batch semicolons, and trailing statement terminators."""
    if query is None:
        return ""
    s = str(query).strip()
    while s.startswith(";"):
        s = s[1:].lstrip()
    s = s.rstrip().rstrip(";").rstrip()
    return s


def validate_llm_sql(query: str) -> str | None:
    """
    Return an error message if ``query`` is not allowed for the read-only tool.
    Return None if OK.
    """
    if query is None or not str(query).strip():
        return "Query is empty."
    q = normalize_llm_sql(query)
    if not q:
        return "Query is empty."
    if _MULTI_STMT.search(q):
        return "Only one SQL statement is allowed (no extra statements after a semicolon)."
    if _FORBIDDEN.search(q):
        return "Query contains forbidden SQL; only read-only SELECT (optionally with WITH) is allowed."
    if _SELECT_INTO.search(q):
        return "SELECT INTO is not allowed."
    lead = q.lstrip()
    lu = lead.upper()
    if not (lu.startswith("SELECT") or lu.startswith("WITH")):
        return "Query must begin with SELECT or WITH (CTE)."
    return None


def mssql_wrap_top_subquery(query: str, n: int = MAX_SQL_TOOL_ROWS) -> str:
    """
    Limit rows for plain ``SELECT`` by wrapping in ``SELECT TOP (n) * FROM ( … )``.

    ``WITH`` (CTE) queries cannot be placed inside that subquery in SQL Server
    ("Incorrect syntax near 'WITH'"); those are returned unchanged (leading ``;``
    for batch safety). Callers should cap result rows in application code.
    """
    inner = normalize_llm_sql(query)
    lu = inner.lstrip().upper()
    if lu.startswith("WITH"):
        return f";{inner}"
    return f"SELECT TOP ({n}) * FROM (\n{inner}\n) AS _eatance_sub"
