import os
import time
import json
import uuid
import platform
# MySQL-only implementation; sqlite3 removed
from .db import get_connection
import logging
import hashlib
import threading
from typing import Any, Dict, Optional, Tuple
from datetime import datetime, timedelta
from ipaddress import ip_address, IPv4Address, IPv6Address
from decimal import Decimal

# User-Agent parsing
try:
    from user_agents import parse as parse_user_agent_lib
except ImportError:
    parse_user_agent_lib = None

# Optional LangSmith logging
try:
    from langsmith import Client as LangSmithClient  # type: ignore
except Exception:  # pragma: no cover
    LangSmithClient = None

logger = logging.getLogger(__name__)

class DecimalEncoder(json.JSONEncoder):
    """Custom JSON encoder that handles Decimal objects."""
    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super(DecimalEncoder, self).default(obj)

# Path optimization helper functions
def get_project_root() -> str:
    """Get the project root path from environment variable."""
    root_path = os.getenv('PROJECT_ROOT_PATH', os.getcwd())
    return os.path.abspath(root_path)

def get_generated_folder_path() -> str:
    """Get the path to the generated folder."""
    return os.path.join(get_project_root(), 'templates', 'generated')

def get_relative_folder_name(full_path: str) -> str:
    """Extract folder name from full path."""
    if not full_path:
        return ""
    # Get the last part of the path (folder name)
    return os.path.basename(os.path.normpath(full_path))

def reconstruct_full_path(folder_name: str) -> str:
    """Reconstruct full path from folder name."""
    if not folder_name:
        return ""
    return os.path.join(get_generated_folder_path(), folder_name)

# Timestamp formatting helper functions
def format_timestamp(timestamp: Optional[float]) -> Optional[str]:
    """Convert Unix timestamp to human-readable format."""
    if timestamp is None:
        return None
    try:
        return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
    except (ValueError, OSError):
        return None

def format_timestamp_iso(timestamp: Optional[float]) -> Optional[str]:
    """Convert Unix timestamp to ISO format."""
    if timestamp is None:
        return None
    try:
        return datetime.fromtimestamp(timestamp).isoformat()
    except (ValueError, OSError):
        return None

def add_formatted_timestamps(data: Dict[str, Any]) -> Dict[str, Any]:
    """Add human-readable timestamp fields to data dictionary."""
    if not data:
        return data
    
    # List of timestamp fields to format
    timestamp_fields = [
        'created_at', 'completed_at', 'month_start', 'next_reset'
    ]
    
    for field in timestamp_fields:
        if field in data:
            # Add formatted version
            data[f'{field}_formatted'] = format_timestamp(data[field])
            data[f'{field}_iso'] = format_timestamp_iso(data[field])
    
    return data

# Thread-local storage for database connections
_local = threading.local()

# Backwards-compatible data dir used for sqlite fallback
DATA_DIR = os.path.join(os.getcwd(), 'data')

# RATE LIMITING REMOVED - Only keeping logging functionality

def init_db() -> None:
    """Initialize MySQL tables for site generation tracking."""
    conn = get_connection()
    try:
        cur = conn.cursor()
        # Main table for tracking site generations
        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS site_generations (
                id INT PRIMARY KEY AUTO_INCREMENT,
                generation_id VARCHAR(64) NOT NULL UNIQUE,
                user_id VARCHAR(128) NOT NULL,
                user_email VARCHAR(320),
                device_fingerprint VARCHAR(255) NOT NULL,
                ip_address VARCHAR(64) NOT NULL,
                site_name VARCHAR(255) NOT NULL,
                user_agent TEXT,
                os_name VARCHAR(64),
                os_version VARCHAR(64),
                browser_name VARCHAR(64),
                browser_version VARCHAR(64),
                device_type VARCHAR(32),
                architecture VARCHAR(32),
                created_at DOUBLE NOT NULL,
                completed_at DOUBLE,
                status VARCHAR(50) NOT NULL DEFAULT 'running',
                error_message TEXT,
                model_used VARCHAR(255),
                tokens_used INT DEFAULT 0,
                cost_estimate DOUBLE DEFAULT 0.0,
                generation_time DOUBLE DEFAULT 0.0,
                INDEX idx_generation_id (generation_id),
                INDEX idx_user_id (user_id),
                INDEX idx_ip_address (ip_address),
                INDEX idx_device_fingerprint (device_fingerprint),
                INDEX idx_os_name (os_name),
                INDEX idx_browser_name (browser_name),
                INDEX idx_device_type (device_type)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS ip_generations (
                id INT PRIMARY KEY AUTO_INCREMENT,
                ip_address VARCHAR(64) NOT NULL,
                generation_count INT DEFAULT 0,
                month_start DOUBLE NOT NULL,
                created_at DOUBLE NOT NULL,
                updated_at DOUBLE NOT NULL,
                UNIQUE KEY uniq_ip_month (ip_address, month_start)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS device_generations (
                id INT PRIMARY KEY AUTO_INCREMENT,
                device_fingerprint VARCHAR(255) NOT NULL,
                generation_count INT DEFAULT 0,
                month_start DOUBLE NOT NULL,
                created_at DOUBLE NOT NULL,
                updated_at DOUBLE NOT NULL,
                UNIQUE KEY uniq_device_month (device_fingerprint, month_start)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )



        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS email_generations (
                id INT PRIMARY KEY AUTO_INCREMENT,
                email VARCHAR(320) NOT NULL,
                generation_count INT DEFAULT 0,
                month_start DOUBLE NOT NULL,
                created_at DOUBLE NOT NULL,
                updated_at DOUBLE NOT NULL,
                UNIQUE KEY uniq_email_month (email, month_start)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS suspicious_activity (
                id INT PRIMARY KEY AUTO_INCREMENT,
                ip_address VARCHAR(64) NOT NULL,
                device_fingerprint VARCHAR(255) NOT NULL,
                activity_type VARCHAR(128) NOT NULL,
                details TEXT,
                created_at DOUBLE NOT NULL
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS generation_events (
                id INT PRIMARY KEY AUTO_INCREMENT,
                event_id VARCHAR(64) NOT NULL UNIQUE,
                generation_id VARCHAR(64),
                event_type VARCHAR(128) NOT NULL,
                created_at DOUBLE NOT NULL,
                details TEXT,
                INDEX idx_event_id (event_id),
                INDEX idx_generation_id (generation_id)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        cur.execute(
            """
            CREATE TABLE IF NOT EXISTS langsmith_runs (
                id INT PRIMARY KEY AUTO_INCREMENT,
                run_id VARCHAR(64) NOT NULL UNIQUE,
                generation_id VARCHAR(64),
                user_id VARCHAR(128),
                website_name VARCHAR(255),
                website_email VARCHAR(255),
                website_phone VARCHAR(64),
                status VARCHAR(32),
                start_time DATETIME,
                end_time DATETIME,
                duration DOUBLE,
                total_cost DOUBLE,
                prompt_cost DOUBLE,
                completion_cost DOUBLE,
                total_tokens INT,
                prompt_tokens INT,
                completion_tokens INT,
                outputs_success BOOL,
                destination_path TEXT,
                files_generated JSON,
                error TEXT,
                has_errors BOOL,
                error_count INT,
                run_type VARCHAR(64),
                platform VARCHAR(128),
                langsmith_project VARCHAR(128),
                colors JSON,
                typography VARCHAR(128),
                pages JSON,
                summary_total_duration DOUBLE,
                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                INDEX idx_run_id (run_id),
                INDEX idx_generation_id (generation_id),
                INDEX idx_user_id (user_id),
                INDEX idx_status (status)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
            """
        )

        # Indexes
        cur.execute("CREATE INDEX IF NOT EXISTS idx_ip_month ON ip_generations(ip_address, month_start)")
        cur.execute("CREATE INDEX IF NOT EXISTS idx_device_month ON device_generations(device_fingerprint, month_start)")
        cur.execute("CREATE INDEX IF NOT EXISTS idx_email_month ON email_generations(email, month_start)")

        cur.execute("CREATE INDEX IF NOT EXISTS idx_suspicious_ip ON suspicious_activity(ip_address)")
        cur.execute("CREATE INDEX IF NOT EXISTS idx_suspicious_device ON suspicious_activity(device_fingerprint)")

        conn.commit()
        logger.info("Database initialized with comprehensive tracking tables")
    finally:
        try:
            cur.close()
        except Exception:
            pass

def _connect():
    """Return configured DB connection."""
    return get_connection()

def _row_to_dict(row: Optional[Any]) -> Optional[Dict[str, Any]]:
    if row is None:
        return None
    return {key: row[key] for key in row.keys()}

def _get_month_start(timestamp: float) -> float:
    """Get the start of the month for a given timestamp."""
    dt = datetime.fromtimestamp(timestamp)
    month_start = dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
    return month_start.timestamp()

def _normalize_ip(ip_str: str) -> str:
    """Normalize IP address for consistent storage."""
    try:
        ip = ip_address(ip_str)
        return str(ip)
    except ValueError:
        # If it's not a valid IP, hash it for storage
        return hashlib.md5(ip_str.encode()).hexdigest()[:16]

def _normalize_email(email: str) -> str:
    """Normalize email address for consistent storage."""
    if not email:
        return ""
    return email.strip().lower()

def get_system_platform_info() -> Dict[str, str]:
    """
    Get accurate system platform information using Python's platform module.
    This provides the same accurate OS detection that LangSmith uses.
    """
    try:
        system = platform.system()
        release = platform.release()
        version = platform.version()
        
        # Simple, non-hardcoded approach - just use what the system reports
        os_name = system
        os_version = release
        
        # Build platform string similar to LangSmith format
        if system == "Windows":
            platform_string = f"{os_name}-{os_version}-{version}-SP0"
        else:
            platform_string = f"{os_name}-{os_version}"
            
        return {
            'os_name': os_name,
            'os_version': os_version,
            'platform_string': platform_string,
            'system': system,
            'release': release,
            'version': version
        }
        
    except Exception as e:
        logger.warning(f"Failed to get system platform info: {e}")
        return {
            'os_name': 'Unknown',
            'os_version': 'Unknown',
            'platform_string': 'Unknown',
            'system': 'Unknown',
            'release': 'Unknown',
            'version': 'Unknown'
        }

def parse_user_agent(user_agent: str) -> Dict[str, Optional[str]]:
    """
    Parse User-Agent string to extract device and OS information.
    Uses system platform detection for accurate OS information (like LangSmith).
    
    Args:
        user_agent: Raw User-Agent string
        
    Returns:
        Dictionary with parsed information
    """
    if not user_agent:
        return {
            'user_agent': None,
            'os_name': None,
            'os_version': None,
            'browser_name': None,
            'browser_version': None,
            'device_type': 'unknown',
            'architecture': None
        }
    
    # Initialize result with defaults
    result = {
        'user_agent': user_agent,
        'os_name': None,
        'os_version': None,
        'browser_name': None,
        'browser_version': None,
        'device_type': 'desktop',
        'architecture': None
    }
    
    # Get accurate system platform information (like LangSmith does)
    system_platform = get_system_platform_info()
    result['os_name'] = system_platform['os_name']
    result['os_version'] = system_platform['os_version']
    
    try:
        if parse_user_agent_lib:
            # Use the user-agents library for browser and device information
            ua = parse_user_agent_lib(user_agent)
            
            # Extract browser information
            if ua.browser.family:
                result['browser_name'] = ua.browser.family
            if ua.browser.version_string:
                result['browser_version'] = ua.browser.version_string
            
            # Extract device type
            if ua.is_mobile:
                result['device_type'] = 'mobile'
            elif ua.is_tablet:
                result['device_type'] = 'tablet'
            elif ua.is_bot:
                result['device_type'] = 'bot'
            else:
                result['device_type'] = 'desktop'
            
            # Extract architecture if available
            if hasattr(ua, 'os') and hasattr(ua.os, 'architecture'):
                result['architecture'] = ua.os.architecture
            
        else:
            # Fallback: minimal detection without library
            logger.warning("user-agents library not available, using minimal fallback")
            # Just use system platform info, no hardcoded patterns
        
    except Exception as e:
        logger.warning(f"Failed to parse User-Agent: {e}")
    
    return result

def maybe_log_langsmith(event_name: str, payload: Dict[str, Any]) -> None:
    """Optionally log a custom event to LangSmith for observability."""
    try:
        if LangSmithClient is None:
            return
        if not os.getenv('LANGSMITH_API_KEY'):
            return
        client = LangSmithClient()
        client.create_run(
            name=f"site_generation::{event_name}",
            inputs=payload,
            run_type="tool",
            project_name=os.getenv('LANGCHAIN_PROJECT', 'SiteGenerationMetrics'),
            serialized={"id": "site_generation_event", "kwargs": {}},
        )
    except Exception as e:
        logger.info(f"LangSmith logging skipped: {e}")

def log_event(generation_id: str, event_type: str, details: Optional[Dict[str, Any]] = None) -> None:
    """Insert a detailed event log row. Best-effort; do not crash on errors."""
    try:
        conn = _connect()
        with conn:
            conn.execute(
                "INSERT INTO generation_events (event_id, generation_id, event_type, created_at, details) VALUES (?, ?, ?, ?, ?)",
                (
                    uuid.uuid4().hex,
                    generation_id,
                    event_type,
                    time.time(),
                    json.dumps(details or {}, cls=DecimalEncoder)
                ),
            )
    except Exception as e:
        logger.warning(f"Failed to write generation event log: {e}")

def log_suspicious_activity(ip_address: str, device_fingerprint: str, activity_type: str, details: Optional[Dict[str, Any]] = None) -> None:
    """Log suspicious activity for analytics monitoring - NO BLOCKING."""
    try:
        conn = _connect()
        with conn:
                    conn.execute(
                "INSERT INTO suspicious_activity (ip_address, device_fingerprint, activity_type, details, created_at) VALUES (?, ?, ?, ?, ?)",
                (
                    _normalize_ip(ip_address),
                    device_fingerprint,
                    activity_type,
                    json.dumps(details or {}, cls=DecimalEncoder),
                    time.time()
                ),
            )
        logger.warning(f"Suspicious activity logged: {activity_type} from {ip_address}")
    except Exception as e:
        logger.error(f"Failed to log suspicious activity: {e}")

# RATE LIMITING FUNCTIONS REMOVED - Only keeping for analytics logging
def get_ip_generations(ip_address: str, month_start: float) -> int:
    """Get the number of generations for an IP address in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        cur = conn.execute(
            "SELECT generation_count FROM ip_generations WHERE ip_address = ? AND month_start = ?",
            (_normalize_ip(ip_address), month_start)
        )
        row = cur.fetchone()
        return row['generation_count'] if row else 0
    except Exception as e:
        logger.error(f"Failed to get IP generations: {e}")
        return 0

def get_device_generations(device_fingerprint: str, month_start: float) -> int:
    """Get the number of generations for a device in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        cur = conn.execute(
            "SELECT generation_count FROM device_generations WHERE device_fingerprint = ? AND month_start = ?",
            (device_fingerprint, month_start)
        )
        row = cur.fetchone()
        return row['generation_count'] if row else 0
    except Exception as e:
        logger.error(f"Failed to get device generations: {e}")
        return 0

def get_email_generations(email: str, month_start: float) -> int:
    """Get the number of generations for an email in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        cur = conn.execute(
            "SELECT generation_count FROM email_generations WHERE email = ? AND month_start = ?",
            (_normalize_email(email), month_start)
        )
        row = cur.fetchone()
        return row['generation_count'] if row else 0
    except Exception as e:
        logger.error(f"Failed to get email generations: {e}")
        return 0



def increment_ip_generations(ip_address: str, month_start: float) -> None:
    """Increment the generation count for an IP address in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        backend = getattr(conn, 'backend', os.getenv('DB_BACKEND', 'sqlite'))
        with conn:
            if backend in ('mysql', 'mariadb'):
                conn.execute(
                    """
                    INSERT INTO ip_generations (ip_address, generation_count, month_start, created_at, updated_at)
                    VALUES (%s, 1, %s, %s, %s)
                    ON DUPLICATE KEY UPDATE generation_count = generation_count + 1, updated_at = VALUES(updated_at)
                    """,
                    (_normalize_ip(ip_address), month_start, time.time(), time.time())
                )
            else:
                conn.execute(
                    """
                    INSERT INTO ip_generations (ip_address, generation_count, month_start, created_at, updated_at)
                    VALUES (?, 1, ?, ?, ?)
                    ON CONFLICT(ip_address, month_start) 
                    DO UPDATE SET generation_count = generation_count + 1, updated_at = ?
                    """,
                    (_normalize_ip(ip_address), month_start, time.time(), time.time(), time.time())
                )
    except Exception as e:
        logger.error(f"Failed to increment IP generations: {e}")

def increment_device_generations(device_fingerprint: str, month_start: float) -> None:
    """Increment the generation count for a device in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        backend = getattr(conn, 'backend', os.getenv('DB_BACKEND', 'sqlite'))
        with conn:
            if backend in ('mysql', 'mariadb'):
                conn.execute(
                    """
                    INSERT INTO device_generations (device_fingerprint, generation_count, month_start, created_at, updated_at)
                    VALUES (%s, 1, %s, %s, %s)
                    ON DUPLICATE KEY UPDATE generation_count = generation_count + 1, updated_at = VALUES(updated_at)
                    """,
                    (device_fingerprint, month_start, time.time(), time.time())
                )
            else:
                conn.execute(
                    """
                    INSERT INTO device_generations (device_fingerprint, generation_count, month_start, created_at, updated_at)
                    VALUES (?, 1, ?, ?, ?)
                    ON CONFLICT(device_fingerprint, month_start) 
                    DO UPDATE SET generation_count = generation_count + 1, updated_at = ?
                    """,
                    (device_fingerprint, month_start, time.time(), time.time(), time.time())
                )
    except Exception as e:
        logger.error(f"Failed to increment device generations: {e}")

def increment_email_generations(email: str, month_start: float) -> None:
    """Increment the generation count for an email in the current month - FOR ANALYTICS ONLY."""
    try:
        conn = _connect()
        backend = getattr(conn, 'backend', os.getenv('DB_BACKEND', 'sqlite'))
        with conn:
            if backend in ('mysql', 'mariadb'):
                conn.execute(
                    """
                    INSERT INTO email_generations (email, generation_count, month_start, created_at, updated_at)
                    VALUES (%s, 1, %s, %s, %s)
                    ON DUPLICATE KEY UPDATE generation_count = generation_count + 1, updated_at = VALUES(updated_at)
                    """,
                    (_normalize_email(email), month_start, time.time(), time.time())
                )
            else:
                conn.execute(
                    """
                    INSERT INTO email_generations (email, generation_count, month_start, created_at, updated_at)
                    VALUES (?, 1, ?, ?, ?)
                    ON CONFLICT(email, month_start) 
                    DO UPDATE SET generation_count = generation_count + 1, updated_at = ?
                    """,
                    (_normalize_email(email), month_start, time.time(), time.time(), time.time())
                )
    except Exception as e:
        logger.error(f"Failed to increment email generations: {e}")



def can_generate_site(
    user_id: str, 
    ip_address: str, 
    device_fingerprint: str, 
    email: Optional[str] = None
) -> Dict[str, Any]:
    """
    RATE LIMITING REMOVED - Always allows generation but keeps logging for analytics.
    
    Returns:
        Dict with 'can_generate' always True and detailed logging information
    """
    try:
        current_time = time.time()
        month_start = _get_month_start(current_time)
        
        # Normalize inputs
        normalized_ip = _normalize_ip(ip_address)
        normalized_email = _normalize_email(email) if email else ""
        
        # Get current counts for logging purposes only
        ip_generations = get_ip_generations(normalized_ip, month_start)
        device_generations = get_device_generations(device_fingerprint, month_start)
        email_generations = get_email_generations(normalized_email, month_start) if normalized_email else 0
        
        # Calculate total attempts for logging
        total_attempts = ip_generations + device_generations
        
        # Always allow generation (rate limiting removed)
        can_generate = True
        
        # Prepare response with logging data
        result = {
            'can_generate': can_generate,
            'rate_limiting_disabled': True,
            'limits': {
                'ip_generations': ip_generations,
                'device_generations': device_generations,
                'email_generations': email_generations,
                'total_attempts': total_attempts
            },
            'month_start': month_start,
            'next_reset': _get_month_start(month_start + 2592000),  # Next month
            'reason': 'Rate limiting disabled - generation always allowed'
        }
        
        # Add formatted timestamps for better readability
        result = add_formatted_timestamps(result)
        
        # Log generation attempt for analytics (no blocking)
        log_event(
            generation_id=f"check_{uuid.uuid4().hex}",
            event_type="generation_check",
            details={
                "user_id": user_id,
                "ip_address": normalized_ip,
                "device_fingerprint": device_fingerprint,
                "email": normalized_email,
                "timestamp": current_time,
                "rate_limiting_disabled": True,
                "counts": result['limits']
            }
        )
        
        # Log to LangSmith for analytics
        maybe_log_langsmith("limit_check", {
            "user_id": user_id,
            "ip_address": normalized_ip,
            "device_fingerprint": device_fingerprint,
            "email": normalized_email,
            "result": result
        })
        
        return result
        
    except Exception as e:
        logger.error(f"Error in generation check: {e}")
        # Always allow generation even on error
        return {
            'can_generate': True,
            'rate_limiting_disabled': True,
            'error': str(e),
            'warning': 'Error in check, but allowing generation',
            'reason': 'Rate limiting disabled - generation always allowed'
        }

def start_site_generation(
    *,
    user_id: str,
    user_email: Optional[str],
    device_fingerprint: str,
    ip_address: str,
    site_name: str,
    model_used: Optional[str] = None,
    user_agent: Optional[str] = None
) -> str:
    """Start tracking a new site generation and increment all relevant counters. RATE LIMITING REMOVED."""
    try:
        # Log generation check for analytics (no blocking)
        limit_check = can_generate_site(user_id, ip_address, device_fingerprint, user_email)
        # Rate limiting removed - always proceed with generation
        
        generation_id = uuid.uuid4().hex
        current_time = time.time()
        month_start = _get_month_start(current_time)
        
        # Normalize inputs
        normalized_ip = _normalize_ip(ip_address)
        normalized_email = _normalize_email(user_email) if user_email else ""
        
        # Parse User-Agent if provided
        user_agent_data = parse_user_agent(user_agent) if user_agent else {}
        
        conn = _connect()
        with conn:
            # Create generation record
            conn.execute(
                (
                    "INSERT INTO site_generations (generation_id, user_id, user_email, device_fingerprint, ip_address, "
                    "site_name, user_agent, os_name, os_version, browser_name, browser_version, device_type, architecture, "
                    "created_at, model_used) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
                ),
                (
                    generation_id,
                    user_id,
                    normalized_email,
                    device_fingerprint,
                    normalized_ip,
                    site_name,
                    user_agent_data.get('user_agent'),
                    user_agent_data.get('os_name'),
                    user_agent_data.get('os_version'),
                    user_agent_data.get('browser_name'),
                    user_agent_data.get('browser_version'),
                    user_agent_data.get('device_type'),
                    user_agent_data.get('architecture'),
                    current_time,
                    model_used
                ),
            )
            
            # Increment all relevant counters
            increment_ip_generations(normalized_ip, month_start)
            increment_device_generations(device_fingerprint, month_start)
            
            if normalized_email:
                increment_email_generations(normalized_email, month_start)
        
        # Log event
        log_event(generation_id, "generation_started", {
            "user_id": user_id,
            "ip_address": normalized_ip,
            "device_fingerprint": device_fingerprint,
            "email": normalized_email,
            "site_name": site_name,
            "user_agent_data": user_agent_data,
            "limits": limit_check
        })
        
        # Optional LangSmith logging
        maybe_log_langsmith("generation_started", {
            "generation_id": generation_id,
            "user_id": user_id,
            "ip_address": normalized_ip,
            "device_fingerprint": device_fingerprint,
            "email": normalized_email,
            "site_name": site_name,
            "user_agent_data": user_agent_data,
            "limits": limit_check
        })
        
        logger.info(f"Started site generation {generation_id} for user {user_id} (IP: {normalized_ip}, Device: {device_fingerprint})")
        return generation_id
        
    except Exception as e:
        logger.error(f"Failed to start site generation tracking: {e}")
        raise

def complete_site_generation(
    generation_id: str,
    *,
    status: str = 'completed',
    error_message: Optional[str] = None,
    tokens_used: int = 0,
    cost_estimate: float = 0.0,
    generation_time: float = 0.0
) -> None:
    """Mark a site generation as completed with optional metrics.
    
    Note: This function only updates the site_generations table.
    The langsmith_runs table should be updated directly from LangSmith trace data
    using save_langsmith_run_data() which contains the authoritative status and metrics.
    """
    try:
        current_time = time.time()
        
        conn = _connect()
        with conn:
            # Update site_generations table only
            # LangSmith data is authoritative for langsmith_runs table
            conn.execute(
                (
                    "UPDATE site_generations SET completed_at = ?, status = ?, error_message = ?, "
                    "tokens_used = ?, cost_estimate = ?, generation_time = ? WHERE generation_id = ?"
                ),
                (
                    current_time,
                    status,
                    error_message,
                    tokens_used,
                    cost_estimate,
                    generation_time,
                    generation_id
                ),
            )
        
        # Log completion event
        log_event(generation_id, "generation_completed", {
            "status": status,
            "error_message": error_message,
            "tokens_used": tokens_used,
            "cost_estimate": cost_estimate,
            "generation_time": generation_time
        })
        
        # Optional LangSmith logging
        maybe_log_langsmith("generation_completed", {
            "generation_id": generation_id,
            "status": status,
            "tokens_used": tokens_used,
            "cost_estimate": cost_estimate,
            "generation_time": generation_time
        })
        
        logger.info(f"Completed site generation {generation_id} with status: {status}")
        
    except Exception as e:
        logger.error(f"Failed to complete site generation {generation_id}: {e}")

def get_generation_stats(user_id: str, ip_address: str, device_fingerprint: str) -> Dict[str, Any]:
    """Get comprehensive generation statistics for a user/device/IP combination."""
    try:
        current_time = time.time()
        month_start = _get_month_start(current_time)
        
        # Normalize inputs
        normalized_ip = _normalize_ip(ip_address)
        normalized_device = device_fingerprint
        
        conn = _connect()
        
        # Get current month stats for all factors
        cur = conn.execute(
            """
            SELECT 
                COUNT(*) as total_generations,
                SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as successful_generations,
                SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as failed_generations,
                SUM(tokens_used) as total_tokens,
                SUM(cost_estimate) as total_cost,
                AVG(generation_time) as avg_generation_time
            FROM site_generations 
            WHERE (user_id = ? OR ip_address = ? OR device_fingerprint = ?) AND created_at >= ?
            """,
            (user_id, normalized_ip, normalized_device, month_start)
        )
        
        month_stats = cur.fetchone()
        
        # Get current limits status
        limit_check = can_generate_site(user_id, ip_address, device_fingerprint)
        
        # Get detailed breakdown
        cur = conn.execute(
            "SELECT ip_address, device_fingerprint, user_email, os_name, browser_name, device_type, COUNT(*) as count FROM site_generations WHERE created_at >= ? GROUP BY ip_address, device_fingerprint, user_email, os_name, browser_name, device_type",
            (month_start,)
        )
        
        breakdown = []
        for row in cur.fetchall():
            breakdown.append({
                'ip_address': row['ip_address'],
                'device_fingerprint': row['device_fingerprint'][:8] + '...',  # Truncate for privacy
                'email': row['user_email'][:8] + '...' if row['user_email'] else 'None',
                'os_name': row['os_name'],
                'browser_name': row['browser_name'],
                'device_type': row['device_type'],
                'count': row['count']
            })
        
        # Get platform analytics
        cur = conn.execute(
            """
            SELECT 
                os_name, browser_name, device_type, 
                COUNT(*) as count,
                AVG(generation_time) as avg_time,
                SUM(tokens_used) as total_tokens
            FROM site_generations 
            WHERE created_at >= ? 
            GROUP BY os_name, browser_name, device_type
            ORDER BY count DESC
            """,
            (month_start,)
        )
        
        platform_analytics = []
        for row in cur.fetchall():
            platform_analytics.append({
                'os_name': row['os_name'],
                'browser_name': row['browser_name'],
                'device_type': row['device_type'],
                'count': row['count'],
                'avg_generation_time': row['avg_time'] or 0.0,
                'total_tokens': row['total_tokens'] or 0
            })
        
        if month_stats:
            result = {
                'current_month': {
                    'total': month_stats['total_generations'],
                    'successful': month_stats['successful_generations'],
                    'failed': month_stats['failed_generations'],
                    'tokens_used': month_stats['total_tokens'] or 0,
                    'cost_estimate': month_stats['total_cost'] or 0.0,
                    'avg_generation_time': month_stats['avg_generation_time'] or 0.0
                },
                'limits': limit_check,
                'breakdown': breakdown,
                'platform_analytics': platform_analytics,
                'month_start': month_start,
                'next_reset': _get_month_start(month_start + 2592000)
            }
            
            # Add formatted timestamps for better readability
            result = add_formatted_timestamps(result)
            return result
        
        return {}
    except Exception as e:
        logger.error(f"Failed to get generation stats: {e}")
        return {}

def get_platform_analytics(limit: int = 50) -> Dict[str, Any]:
    """Get comprehensive platform analytics from User-Agent data."""
    try:
        conn = _connect()
        
        # Get overall platform distribution
        cur = conn.execute(
            """
            SELECT 
                os_name, browser_name, device_type,
                COUNT(*) as count,
                COUNT(DISTINCT ip_address) as unique_ips,
                COUNT(DISTINCT device_fingerprint) as unique_devices,
                AVG(generation_time) as avg_generation_time,
                SUM(tokens_used) as total_tokens,
                SUM(cost_estimate) as total_cost
            FROM site_generations 
            WHERE created_at >= ? AND os_name IS NOT NULL
            GROUP BY os_name, browser_name, device_type
            ORDER BY count DESC
            LIMIT ?
            """,
            (_get_month_start(time.time()), limit)
        )
        
        platform_stats = []
        for row in cur.fetchall():
            platform_stats.append({
                'os_name': row['os_name'],
                'browser_name': row['browser_name'],
                'device_type': row['device_type'],
                'generation_count': row['count'],
                'unique_ips': row['unique_ips'],
                'unique_devices': row['unique_devices'],
                'avg_generation_time': row['avg_generation_time'] or 0.0,
                'total_tokens': row['total_tokens'] or 0,
                'total_cost': row['total_cost'] or 0.0
            })
        
        # Get OS distribution
        cur = conn.execute(
            """
            SELECT 
                os_name,
                COUNT(*) as count,
                COUNT(DISTINCT ip_address) as unique_ips
            FROM site_generations 
            WHERE created_at >= ? AND os_name IS NOT NULL
            GROUP BY os_name
            ORDER BY count DESC
            """
        )
        
        os_distribution = []
        for row in cur.fetchall():
            os_distribution.append({
                'os_name': row['os_name'],
                'count': row['count'],
                'unique_ips': row['unique_ips']
            })
        
        # Get browser distribution
        cur = conn.execute(
            """
            SELECT 
                browser_name,
                COUNT(*) as count,
                COUNT(DISTINCT ip_address) as unique_ips
            FROM site_generations 
            WHERE created_at >= ? AND browser_name IS NOT NULL
            GROUP BY browser_name
            ORDER BY count DESC
            """
        )
        
        browser_distribution = []
        for row in cur.fetchall():
            browser_distribution.append({
                'browser_name': row['browser_name'],
                'count': row['count'],
                'unique_ips': row['unique_ips']
            })
        
        # Get device type distribution
        cur = conn.execute(
            """
            SELECT 
                device_type,
                COUNT(*) as count,
                COUNT(DISTINCT ip_address) as unique_ips
            FROM site_generations 
            WHERE created_at >= ? AND device_type IS NOT NULL
            GROUP BY device_type
            ORDER BY count DESC
            """
        )
        
        device_distribution = []
        for row in cur.fetchall():
            device_distribution.append({
                'device_type': row['device_type'],
                'count': row['count'],
                'unique_ips': row['unique_ips']
            })
        
        return {
            'platform_stats': platform_stats,
            'os_distribution': os_distribution,
            'browser_distribution': browser_distribution,
            'device_distribution': device_distribution,
            'generated_at': time.time()
        }
        
    except Exception as e:
        logger.error(f"Failed to get platform analytics: {e}")
        return {}

def get_suspicious_activity_report(limit: int = 100) -> list:
    """Get a report of suspicious activity for analytics monitoring - NO BLOCKING."""
    try:
        conn = _connect()
        cur = conn.execute(
            """
            SELECT ip_address, device_fingerprint, activity_type, details, created_at
            FROM suspicious_activity 
            ORDER BY created_at DESC 
            LIMIT ?
            """,
            (limit,)
        )
        
        activities = []
        for row in cur.fetchall():
            try:
                details = json.loads(row['details']) if row['details'] else {}
            except:
                details = {}
            
            activity = {
                'ip_address': row['ip_address'],
                'device_fingerprint': row['device_fingerprint'][:8] + '...',
                'activity_type': row['activity_type'],
                'details': details,
                'created_at': row['created_at']
            }
            
            # Add formatted timestamps for better readability
            activity = add_formatted_timestamps(activity)
            activities.append(activity)
        
        return activities
    except Exception as e:
        logger.error(f"Failed to get suspicious activity report: {e}")
        return []

def update_generation_progress(generation_id: str, current_step: int, step_name: str, step_details: Optional[Dict[str, Any]] = None) -> None:
    """Update generation progress with current step information."""
    try:
        conn = _connect()
        with conn:
            conn.execute(
                "UPDATE site_generations SET status = ?, error_message = ? WHERE generation_id = ?",
                (f"step_{current_step}_{step_name}", json.dumps(step_details or {}, cls=DecimalEncoder), generation_id)
            )
        
        # Log step progress event
        log_event(generation_id, f"step_{current_step}_started", {
            "step_number": current_step,
            "step_name": step_name,
            "step_details": step_details
        })
        
        logger.info(f"Generation {generation_id} progress: Step {current_step} - {step_name}")
        
    except Exception as e:
        logger.error(f"Failed to update generation progress for {generation_id}: {e}")

def log_generation_step(generation_id: str, step_number: int, step_name: str, step_result: Dict[str, Any]) -> None:
    """Log completion of a generation step with results."""
    try:
        # Log step completion event
        log_event(generation_id, f"step_{step_number}_completed", {
            "step_number": step_number,
            "step_name": step_name,
            "result": step_result,
            "timestamp": time.time()
        })
        
        # Update main record with step completion
        conn = _connect()
        with conn:
            conn.execute(
                "UPDATE site_generations SET status = ? WHERE generation_id = ?",
                (f"step_{step_number}_completed", generation_id)
            )
        
        logger.info(f"Generation {generation_id} completed step {step_number}: {step_name}")
        
    except Exception as e:
        logger.error(f"Failed to log generation step for {generation_id}: {e}")

def update_generation_metrics(generation_id: str, tokens_used: int = 0, cost_estimate: float = 0.0, generation_time: float = 0.0) -> None:
    """Update generation metrics incrementally."""
    try:
        conn = _connect()
        with conn:
            conn.execute(
                "UPDATE site_generations SET tokens_used = tokens_used + ?, cost_estimate = cost_estimate + ?, generation_time = ? WHERE generation_id = ?",
                (tokens_used, cost_estimate, generation_time, generation_id)
            )
        
        logger.info(f"Updated metrics for generation {generation_id}: tokens={tokens_used}, cost={cost_estimate}, time={generation_time}")
        
    except Exception as e:
        logger.error(f"Failed to update generation metrics for {generation_id}: {e}")

def log_generation_error(generation_id: str, error_message: str, error_details: Optional[Dict[str, Any]] = None) -> None:
    """Log generation error with details."""
    try:
        current_time = time.time()
        
        # Update main record with error
        conn = _connect()
        with conn:
            conn.execute(
                "UPDATE site_generations SET status = ?, error_message = ?, completed_at = ? WHERE generation_id = ?",
                ('error', error_message, current_time, generation_id)
            )
        
        # Log error event
        log_event(generation_id, "generation_error", {
            "error_message": error_message,
            "error_details": error_details,
            "timestamp": current_time
        })
        
        logger.error(f"Generation {generation_id} failed: {error_message}")
        
    except Exception as e:
        logger.error(f"Failed to log generation error for {generation_id}: {e}")


def save_langsmith_run_data(run_data: Dict[str, Any], generation_id: str = None) -> None:
    """Save LangSmith run data to the langsmith_runs table.
    
    Extracts important fields as specified in ToDo.md and stores them.
    """
    try:
        conn = _connect()
        
        def _get_nested(data: Dict[str, Any], path: str, default=None):
            """Helper to get nested dictionary values safely."""
            try:
                parts = path.split('.')
                current = data
                for part in parts:
                    if current is None:
                        return default
                    current = current.get(part)
                return current if current is not None else default
            except Exception:
                return default
        
        # Extract fields as specified in ToDo.md
        run_id = run_data.get('id') or run_data.get('trace_id')
        user_id = _get_nested(run_data, 'inputs.user_id') or _get_nested(run_data, 'outputs.data.summary.user_id')
        website_name = _get_nested(run_data, 'outputs.data.summary.website_name') or _get_nested(run_data, 'outputs.data.website_data.websiteName')
        website_email = _get_nested(run_data, 'outputs.data.website_data.restaurantEmail')
        website_phone = _get_nested(run_data, 'outputs.data.website_data.restaurantPhone')
        status = run_data.get('status')
        start_time = run_data.get('start_time')
        end_time = run_data.get('end_time')
        duration = _get_nested(run_data, 'outputs.duration') or _get_nested(run_data, 'outputs.data.summary.total_duration')
        total_cost = run_data.get('total_cost')
        prompt_cost = run_data.get('prompt_cost')
        completion_cost = run_data.get('completion_cost')
        total_tokens = run_data.get('total_tokens')
        prompt_tokens = run_data.get('prompt_tokens')
        completion_tokens = run_data.get('completion_tokens')
        outputs_success = _get_nested(run_data, 'outputs.success')
        destination_path = _get_nested(run_data, 'outputs.data.setup_result.destination_path')
        files_generated = _get_nested(run_data, 'outputs.data.summary.files_generated') or _get_nested(run_data, 'outputs.data.setup_result.html_files_created')
        error = run_data.get('error') or _get_nested(run_data, 'outputs.error')
        has_errors = _get_nested(run_data, 'outputs.data.summary.has_errors')
        error_count = _get_nested(run_data, 'outputs.data.summary.error_count')
        run_type = run_data.get('run_type')
        platform = _get_nested(run_data, 'extra.runtime.platform')
        langsmith_project = _get_nested(run_data, 'extra.metadata.LANGSMITH_PROJECT')
        colors = _get_nested(run_data, 'outputs.data.website_data.colors')
        typography = _get_nested(run_data, 'outputs.data.website_data.typography')
        pages = _get_nested(run_data, 'outputs.data.website_data.pages')
        summary_total_duration = _get_nested(run_data, 'outputs.data.summary.total_duration')
        
        # Insert or update run data
        insert_sql = '''
        INSERT INTO langsmith_runs (
            run_id, generation_id, user_id, website_name, website_email, website_phone,
            status, start_time, end_time, duration, total_cost, prompt_cost, completion_cost,
            total_tokens, prompt_tokens, completion_tokens, outputs_success, destination_path,
            files_generated, error, has_errors, error_count, run_type, platform, langsmith_project,
            colors, typography, pages, summary_total_duration
        ) VALUES (
            %s, %s, %s, %s, %s, %s,
            %s, %s, %s, %s, %s, %s, %s,
            %s, %s, %s, %s, %s,
            %s, %s, %s, %s, %s, %s, %s,
            %s, %s, %s, %s
        )
        ON DUPLICATE KEY UPDATE
            generation_id=VALUES(generation_id), user_id=VALUES(user_id),
            website_name=VALUES(website_name), website_email=VALUES(website_email), website_phone=VALUES(website_phone),
            status=VALUES(status), start_time=VALUES(start_time), end_time=VALUES(end_time), duration=VALUES(duration),
            total_cost=VALUES(total_cost), prompt_cost=VALUES(prompt_cost), completion_cost=VALUES(completion_cost),
            total_tokens=VALUES(total_tokens), prompt_tokens=VALUES(prompt_tokens), completion_tokens=VALUES(completion_tokens),
            outputs_success=VALUES(outputs_success), destination_path=VALUES(destination_path), files_generated=VALUES(files_generated),
            error=VALUES(error), has_errors=VALUES(has_errors), error_count=VALUES(error_count), run_type=VALUES(run_type),
            platform=VALUES(platform), langsmith_project=VALUES(langsmith_project), colors=VALUES(colors), typography=VALUES(typography),
            pages=VALUES(pages), summary_total_duration=VALUES(summary_total_duration)
        '''
        
        params = (
            run_id, generation_id, user_id, website_name, website_email, website_phone,
            status, start_time, end_time, duration, total_cost, prompt_cost, completion_cost,
            total_tokens, prompt_tokens, completion_tokens, outputs_success, destination_path,
            json.dumps(files_generated, cls=DecimalEncoder) if files_generated is not None else None,
            error, has_errors, error_count, run_type, platform, langsmith_project,
            json.dumps(colors, cls=DecimalEncoder) if colors is not None else None, typography, json.dumps(pages, cls=DecimalEncoder) if pages is not None else None,
            summary_total_duration
        )
        
        with conn:
            conn.execute(insert_sql, params)
        
        logger.info(f"Saved LangSmith run data for run_id: {run_id}")
        
    except Exception as e:
        logger.error(f"Failed to save LangSmith run data: {e}")
        raise