"""
Central configuration for :mod:`batch_scheduled_profiling`.

Edit values here (or duplicate this module per environment) instead of long argparse
lists on the cron line. The batch script imports ``BATCH_SCHEDULER`` by default.

See ``batch_scheduled_profiling`` module docstring for behavior of each option.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Optional


@dataclass
class BatchSchedulerConfig:
    """One pass of keyset batch profiling + optional publish."""

    # Pagination / throughput
    batch_size: int = 30
    """User IDs per DB page (keyset ``TOP n``)."""

    window_days: int = 60
    """Profiling lookback window (same idea as API)."""
    # Activity filter (user_activity_logs)
    batch_require_recent_activity: bool = True
    """If True, keyset pages only include users with ≥1 activity log in ``recent_activity_days``."""

    recent_activity_days: int = 90
    """~3 months; used for batch user list filter and (when batch filter is off) per-user gate."""


    sleep_between_users: float = 0.1
    """Seconds to sleep after each user (throttle DB + CPU)."""

    sleep_between_batches: float = 10.0
    """Seconds to sleep after each keyset page."""

    max_users: Optional[int] = None
    """Stop after this many successful profiles; ``None`` = no limit."""

    batch_max_workers: int = 5
    """How many users to profile in parallel within one keyset page (``1`` = sequential). Each user still uses the orchestrator's internal parallel fetches; size ``DB_POOL_SIZE`` for ``batch_max_workers x inner_fetch_workers``."""

    # Deep analysis (LLM)
    with_deep_analysis: bool = True
    """If True, run structured deep analysis per user (slow / costly)."""

    # Resume / checkpoint
    start_after_id: int = 0
    """Ignore users with id <= this; ``0`` means use checkpoint or start from beginning."""

    checkpoint_file: Optional[str] = None
    """Path to JSON ``last_success_user_id`` file; ``None`` = no checkpointing."""

    # Tracker / recency skip
    skip_recency_days: int = 10
    """Skip users published within this many days; ``0`` = never skip by recency."""

    ignore_recency: bool = False
    """If True, process every user regardless of tracker."""

    # Publish (end of full scan)
    skip_publish: bool = False
    """If True, do not MERGE staging → published after a natural full scan."""

    publish_with_failures: bool = False
    """If True, publish even when ``fail_count > 0``."""


# Default instance imported by the batch entrypoint — customize for your deployment.
BATCH_SCHEDULER = BatchSchedulerConfig()