"""
Helper functions for the Deep Analysis Agent.
"""

import os
import re
import json
from datetime import datetime, date

from core.logger_config import get_logger

logger = get_logger(__name__)


def load_analytical_schema(base_dir: str | None = None) -> str:
    """
    Load the analytical schema markdown from file.
    If base_dir is None, uses the directory containing this module.
    """
    if base_dir is None:
        base_dir = os.path.dirname(__file__)
    try:
        schema_path = os.path.join(base_dir, "analytical_schema.md")
        with open(schema_path, "r") as f:
            return f.read()
    except Exception as e:
        logger.error(f"Error loading analytical_schema.md: {e}")
        return "STRICT SCHEMA: (Schema file not found. Fallback to default schema.)"


def _strip_inner(text: str) -> str:
    """Remove HTML tags from text and normalize whitespace."""
    if not text:
        return ""
    s = re.sub(r"<[^>]+>", "", text)
    s = " ".join(s.split())
    return s.strip()


def _html_to_markdown(content: str) -> str:
    """
    Convert simple HTML (h1–h3, ul/ol/li, p) to Markdown. No heavy dependencies.
    Matches tags case-insensitively and allows attributes (e.g. ``<p class="...">``).
    """
    if not content or not re.search(
        r"<\s*(h[1-3]|p|ul|ol|li)\b", content, flags=re.IGNORECASE
    ):
        return content
    # Headings: extract inner text (strip nested tags)
    content = re.sub(
        r"<h1\b[^>]*>(.*?)</h1>",
        lambda m: "# " + _strip_inner(m.group(1)) + "\n\n",
        content,
        flags=re.DOTALL | re.IGNORECASE,
    )
    content = re.sub(
        r"<h2\b[^>]*>(.*?)</h2>",
        lambda m: "## " + _strip_inner(m.group(1)) + "\n\n",
        content,
        flags=re.DOTALL | re.IGNORECASE,
    )
    content = re.sub(
        r"<h3\b[^>]*>(.*?)</h3>",
        lambda m: "### " + _strip_inner(m.group(1)) + "\n\n",
        content,
        flags=re.DOTALL | re.IGNORECASE,
    )
    # Paragraphs
    content = re.sub(
        r"<p\b[^>]*>(.*?)</p>",
        lambda m: _strip_inner(m.group(1)) + "\n\n",
        content,
        flags=re.DOTALL | re.IGNORECASE,
    )
    # List items (after <p> so <li><p>...</p></li> is already flattened)
    content = re.sub(
        r"<li\b[^>]*>(.*?)</li>",
        lambda m: "- " + _strip_inner(m.group(1)) + "\n",
        content,
        flags=re.DOTALL | re.IGNORECASE,
    )
    # Remove list containers
    content = re.sub(r"</?ul\b[^>]*>", "\n", content, flags=re.IGNORECASE)
    content = re.sub(r"</?ol\b[^>]*>", "\n", content, flags=re.IGNORECASE)
    return content


def serialize_sql_result(result) -> str:
    """JSON-serialize query result for LLM consumption; handles datetime/date."""
    def default(obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        return str(obj)
    return json.dumps(result, default=default, indent=2)


def normalize_markdown_output(content: str) -> str:
    """
    Fix LLM output so Markdown renders correctly: literal escape sequences
    (e.g. \\n, \\", \\#) are turned into real characters. Handles multiple levels
    of escaping. Uses only str.replace (no regex) for speed.
    Always returns a string (never None).
    """
    if content is None or not isinstance(content, str):
        return ""
    content = content.strip()
    if not content:
        return ""

    # --- Newlines: fix literal \n and \r (multiple levels of escaping) ---
    for _ in range(8):
        prev = content
        content = content.replace("\\\\n", "\n").replace("\\\\r", "\r")
        content = content.replace("\\n", "\n").replace("\\r", "\r")
        if content == prev:
            break
    content = content.replace("\r\n", "\n").replace("\r", "\n")

    # --- Escaped quotes (multiple levels) ---
    for _ in range(4):
        prev = content
        content = content.replace('\\\\"', '"').replace("\\\\'", "'")
        content = content.replace('\\"', '"').replace("\\'", "'")
        if content == prev:
            break

    # --- Markdown / punctuation that is often escaped ---
    for _ in range(4):
        prev = content
        content = content.replace("\\\\#", "#").replace("\\#", "#")
        content = content.replace("\\\\_", "_").replace("\\_", "_")
        content = content.replace("\\\\*", "*").replace("\\*", "*")
        content = content.replace("\\\\-", "-").replace("\\-", "-")
        content = content.replace("\\\\[", "[").replace("\\[", "[")
        content = content.replace("\\\\]", "]").replace("\\]", "]")
        content = content.replace("\\\\(", "(").replace("\\(", "(")
        content = content.replace("\\\\)", ")").replace("\\)", ")")
        if content == prev:
            break

    # --- Literal backslash (so \\ becomes \) — do after other escapes ---
    while "\\\\" in content:
        content = content.replace("\\\\", "\\")

    # --- Tabs ---
    content = content.replace("\\t", "\t")

    # --- Final defensive pass: any remaining literal \n, \r, or \' (guarantee none in output) ---
    content = content.replace("\\n", "\n").replace("\\r", "\r").replace("\\'", "'")
    content = content.replace("\r\n", "\n").replace("\r", "\n")

    # --- Convert HTML to Markdown when present ---
    content = _html_to_markdown(content)

    # --- Clean up excess blank lines (3+ -> 2) ---
    while "\n\n\n" in content:
        content = content.replace("\n\n\n", "\n\n")

    return content.strip()