<?php

// +-------------------------------------------------+
//  2002-2025 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// Author: Gregor Neveu
// Script CLI : Indexation AI des documents numeriques
// Usage: php AiIndexationDocnumCli.php [options]

use Pmb\AI\Orm\AISettingsOrm;
use Pmb\AI\Library\Source\Semantic;

// Constants
const PROGRESS_BAR_LENGTH = 50;
const BATCH_SLEEP_MICROSECONDS = 100000;
const MAX_BATCH_SIZE = 10000;
const MIN_BATCH_SIZE = 1;

if (PHP_SAPI !== 'cli') {
    die('This script can only be run from the command line.');
}

$base_path = __DIR__ . '/../..';
$class_path = $base_path . '/classes';
$include_path = $base_path . '/includes';

$base_noheader = 1;
$base_nocheck = 1;
$base_nobody = 1;
$base_nosession = 1;

ini_set('display_errors', 0);
error_reporting(0);

$_SERVER['REQUEST_URI'] = '';
$_SERVER['HTTP_USER_AGENT'] = '';

require_once($base_path . "/includes/init.inc.php");

// Check if AI is active
global $ai_active;
if (!$ai_active) {
    echo 'AI is not active' . PHP_EOL;
    exit(1);
}

// ============================================
// FONCTIONS CLI
// ============================================

/**
 * Display help for the script
 *
 * @return void
 */
function afficher_aide(): void
{
    echo "\n";
    echo "Usage: php AiIndexationDocnumCli.php [options]\n";
    echo "\n";
    echo "Options:\n";
    echo "  --help, -h          : Display this help\n";
    echo "  --verbose, -v       : Verbose mode\n";
    echo "  --batch-size <n>    : Number of elements per batch\n";
    echo "  --limit <n>         : Maximum number of elements to process\n";
    echo "  --clean             : Clean indexation before starting\n";
    echo "\n";
    echo "Description:\n";
    echo "  Index digital documents (explnum) using the AI configured in PMB\n";
    echo "  Uses the active AI configuration defined in PMB administration\n";
    echo "\n";
    echo "Examples:\n";
    echo "  php AiIndexationDocnumCli.php\n";
    echo "  php AiIndexationDocnumCli.php --verbose\n";
    echo "  php AiIndexationDocnumCli.php --clean --verbose\n";
    echo "  php AiIndexationDocnumCli.php --batch-size 50\n";
    echo "  php AiIndexationDocnumCli.php --limit 20 --batch-size 10\n";
    echo "\n";
    exit(0);
}

/**
 * Parse command line arguments
 *
 * @return array<string, mixed> Associative array with: verbose, batch_size, limit, clean
 */
function parse_arguments(): array
{
    global $argv, $argc;

    $args = [
        'verbose' => false,
        'batch_size' => null,
        'limit' => null,
        'clean' => false
    ];

    for ($i = 1; $i < $argc; $i++) {
        switch ($argv[$i]) {
            case '--help':
            case '-h':
                afficher_aide();
                break;

            case '--verbose':
            case '-v':
                $args['verbose'] = true;
                break;

            case '--clean':
                $args['clean'] = true;
                break;

            case '--batch-size':
                if (isset($argv[$i + 1])) {
                    $batch_size = (int)$argv[$i + 1];
                    // Validate batch size is within acceptable range
                    if ($batch_size < MIN_BATCH_SIZE || $batch_size > MAX_BATCH_SIZE) {
                        echo sprintf(
                            "ERROR: batch-size must be between %d and %d\n",
                            MIN_BATCH_SIZE,
                            MAX_BATCH_SIZE
                        );
                        exit(1);
                    }
                    $args['batch_size'] = $batch_size;
                    $i++;
                } else {
                    echo "ERROR: --batch-size requires a numeric argument\n";
                    exit(1);
                }
                break;

            case '--limit':
                if (isset($argv[$i + 1])) {
                    $limit = (int)$argv[$i + 1];
                    // Validate limit is positive
                    if ($limit <= 0) {
                        echo "ERROR: limit must be a positive number\n";
                        exit(1);
                    }
                    $args['limit'] = $limit;
                    $i++;
                } else {
                    echo "ERROR: --limit requires a numeric argument\n";
                    exit(1);
                }
                break;
        }
    }

    return $args;
}

/**
 * Log a message (respects verbose mode)
 *
 * @param string $message The message to log
 * @param bool $force Force output regardless of verbose mode
 * @return void
 */
function log_message(string $message, bool $force = false): void
{
    global $verbose;
    if ($verbose || $force) {
        echo "[" . date('Y-m-d H:i:s') . "] " . $message . "\n";
    }
}

/**
 * Display a CLI progress bar
 *
 * @param float $progress Progress percentage (0-100)
 * @param int|null $totalEntries Total number of entries
 * @param int|null $countIndexed Number of indexed entries
 * @return void
 */
function afficher_progress(float $progress, ?int $totalEntries = null, ?int $countIndexed = null): void
{
    $barLength = PROGRESS_BAR_LENGTH;
    $filledLength = (int)($barLength * $progress / 100);
    $bar = str_repeat('=', $filledLength) . str_repeat('-', $barLength - $filledLength);

    $info = '';
    if ($totalEntries !== null && $countIndexed !== null) {
        $info = " ($countIndexed/$totalEntries)";
    }

    echo "\r[" . $bar . "] " . number_format($progress, 2) . "%$info";
    flush();

    if ($progress >= 100) {
        echo "\n";
    }
}

// ============================================
// LOGIQUE METIER (conservee du script original)
// ============================================

/**
 * Initialize AI indexation
 *
 * @param bool $doClean Whether to clean existing indexation
 * @return int|false The AI settings ID or false on error
 */
function startIndexationAI(bool $doClean = false): int|false
{
    global $verbose;

    log_message("=== Initialisation de l'indexation AI ===", true);

    // Verify required constant is defined
    if (!defined('TYPE_EXPLNUM')) {
        log_message("ERROR: TYPE_EXPLNUM constant not defined", true);
        return false;
    }

    // Fetch active AI configuration
    $idSettings = AISettingsOrm::fetchActiveAiSettings();
    if (empty($idSettings)) {
        log_message("ERREUR: Aucune configuration AI active trouvee", true);
        log_message("Veuillez activer une configuration AI dans l'administration PMB", true);
        return false;
    }

    log_message("Configuration AI active: ID $idSettings", $verbose);

    $semantic = new Semantic($idSettings);

    if ($doClean) {
        log_message("Nettoyage de l'indexation existante...", true);
        try {
            $semantic->cleanIndexation(TYPE_EXPLNUM);
            log_message("Nettoyage termine", true);
        } catch (Exception $e) {
            log_message("ERROR during cleanup: " . $e->getMessage(), true);
            return false;
        }
    }

    return $idSettings;
}

/**
 * Run AI indexation in batches
 *
 * @param int $idSettings The AI settings ID
 * @param int|null $batchSize Optional custom batch size
 * @param int|null $limit Optional maximum number of elements to process
 * @return int Total number of indexed elements
 */
function runIndexationAI(int $idSettings, ?int $batchSize = null, ?int $limit = null): int
{
    global $verbose, $ai_index_nb_elements;

    // Validate global variable is properly initialized
    if (!isset($ai_index_nb_elements) || !is_int($ai_index_nb_elements) || $ai_index_nb_elements <= 0) {
        $ai_index_nb_elements = 10; // safe default
    }

    $semantic = new Semantic($idSettings);
    $semantic->setIndexationType(TYPE_EXPLNUM);

    // Use custom batch size if provided
    $elementsPerBatch = $batchSize ?? $ai_index_nb_elements;

    $limitMessage = $limit !== null ? " (limite: $limit elements)" : "";
    log_message("Debut de l'indexation (lots de $elementsPerBatch elements)$limitMessage...", true);

    // Get initial count before starting the loop
    $initialCountIndexed = 0;
    try {
        // Using TYPE_EXPLNUM constant defined in PMB core
        $initialCountIndexed = pmb_mysql_result(
            pmb_mysql_query("SELECT count(*) as count FROM explnum WHERE explnum_embeddings != '' AND explnum_index_sew != ''"),
            0,
            0
        );
    } catch (Exception $e) {
        log_message("ERROR getting initial count: " . $e->getMessage(), true);
    }

    $totalProcessedInThisRun = 0;
    $previousCountIndexed = $initialCountIndexed;
    $iteration = 0;

    while (true) {
        $iteration++;
        log_message("Iteration $iteration...", $verbose);

        try {
            // Execute indexation batch
            $result = $semantic->indexation($elementsPerBatch);
        } catch (Exception $e) {
            log_message("ERROR during indexation: " . $e->getMessage(), true);
            break;
        }

        // Safely extract result data with defaults
        $totalEntries = $result["count"] ?? 0;
        $countIndexed = $result["countIndexed"] ?? 0;

        // Calculate how many elements were processed in this iteration
        $processedThisIteration = $countIndexed - $previousCountIndexed;
        $totalProcessedInThisRun += $processedThisIteration;
        $previousCountIndexed = $countIndexed;

        if (empty($totalEntries)) {
            // No elements to index
            afficher_progress(100);
            log_message("Aucun element a indexer", true);
            break;
        }

        // Calculate progress (total indexed / total to index)
        $totalToIndex = $totalEntries;
        $progress = ($countIndexed * 100) / $totalToIndex;
        $progress = round($progress, 2);

        // Ensure progress is within bounds
        if ($progress < 0) {
            $progress = 0;
        } elseif ($progress > 100) {
            $progress = 100;
        }

        // Display progress with info about this run
        afficher_progress($progress, $totalToIndex, $countIndexed);

        if ($verbose) {
            log_message("Elements traites dans cette execution: $totalProcessedInThisRun", true);
        }

        // Check if limit is reached (based on elements processed in THIS run)
        if ($limit !== null && $totalProcessedInThisRun >= $limit) {
            log_message("\nLimite atteinte ($limit elements traites dans cette execution)", true);
            log_message("Total d'elements indexes dans cette execution: $totalProcessedInThisRun", true);
            log_message("Total global d'elements indexes: $countIndexed", true);
            break;
        }

        // Check if finished (no more elements to index)
        if ($totalEntries === 0) {
            log_message("\nIndexation terminee avec succes!", true);
            log_message("Total d'elements indexes dans cette execution: $totalProcessedInThisRun", true);
            log_message("Total global d'elements indexes: $countIndexed", true);
            break;
        }

        // Small pause to avoid overloading system (only if work remains)
        usleep(BATCH_SLEEP_MICROSECONDS);
    }

    return $totalProcessedInThisRun;
}

// ============================================
// EXECUTION PRINCIPALE
// ============================================

try {
    global $class_path, $msg, $charset, $verbose, $ai_index_nb_elements;

    // Parse command line arguments
    $args = parse_arguments();
    $verbose = $args['verbose'];

    log_message("=== Script d'indexation AI des documents numeriques ===", true);
    log_message("Demarrage: " . date('Y-m-d H:i:s'), true);

    // Initialize indexation
    $idSettings = startIndexationAI($args['clean']);

    if ($idSettings === false) {
        exit(1);
    }

    // Run indexation
    $totalProcessed = runIndexationAI($idSettings, $args['batch_size'], $args['limit']);

    log_message("=== Indexation terminee ===", true);
    log_message("Fin: " . date('Y-m-d H:i:s'), true);

    exit(0);

} catch (InvalidArgumentException $e) {
    log_message("ERREUR VALIDATION: " . $e->getMessage(), true);
    exit(1);
} catch (RuntimeException $e) {
    log_message("ERREUR EXECUTION: " . $e->getMessage(), true);
    exit(1);
} catch (Exception $e) {
    log_message("ERREUR CRITIQUE: " . $e->getMessage(), true);
    log_message("Trace: " . $e->getTraceAsString(), true);
    exit(1);
}
