<?php

// +-------------------------------------------------+
// � 2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: Semantic.php,v 1.1.2.13.2.5 2025/12/12 15:09:39 gneveu Exp $

namespace Pmb\AI\Library\Source;

if (stristr($_SERVER['REQUEST_URI'], '/'.basename(__FILE__))) {
    die("no access");
}

use encoding_normalize;
use InvalidArgumentException;
use Pmb\AI\Library\Chunker\TextChunker;
use Pmb\AI\Library\RetryManager;
use Pmb\AI\Models\AiModel;

class Semantic extends AbstractSource implements Source
{

    /**
     * Indexation type
     *
     * @var int
     */
    protected $indexationType;

    /**
     * Set indexation type
     *
     * @param integer $indexationType
     * @return void
     */
    public function setIndexationType(int $indexationType): void
    {
        $this->indexationType = $indexationType;
    }

    /**
     * Clean indexation
     *
     * @param integer $type
     * @return void
     */
    public function cleanIndexation(int $type): void
    {
        switch ($type) {
            case TYPE_NOTICE:
                pmb_mysql_query("UPDATE notices SET embeddings = NULL");
                break;
            case TYPE_EXPLNUM:
                pmb_mysql_query("UPDATE explnum SET explnum_embeddings = NULL");
                break;
            default:
                throw new InvalidArgumentException("[cleanIndexation] Unknown type");
        }
    }

    /**
     * Indexation
     *
     * @param integer $limit
     * @return array
     */
    public function indexation(int $limit): array
    {
        $data = AiModel::getEntitiesDataAi($this->indexationType, $limit);

        if ($this->indexationType === TYPE_NOTICE) {
            $pages = array_chunk($data["entities"], 100);
            foreach ($pages as $page) {
                $contents = array_column($page, 'content');
                $embeddings = $this->service->textToEmbeddings($contents);
                if (empty($embeddings)) {
                    continue;
                }

                foreach ($page as $key => $entity) {
                    if (empty($embeddings[$key])) {
                        continue;
                    }

                    $chunks = ['offset' => 0, 'length' => strlen($entity["content"]), 'data' => $embeddings[$key]];
                    $query = "UPDATE notices
                        set embeddings = '" . encoding_normalize::json_encode($chunks) . "'
                        WHERE notice_id = " . $entity["object_id"];

                    pmb_mysql_query($query);
                }
            }
        } elseif ($this->indexationType === TYPE_EXPLNUM) {
            foreach ($data["entities"] as $entity) {
                $chunksToSplit = [];
                $embeddings = [];
                $chunker = new TextChunker();
                $chunks = $chunker->chunk($entity["content"]);
                $chunksToSplit = array_column($chunks, 'data');
                $chunksToSplit = array_chunk($chunksToSplit, 15);
                foreach ($chunksToSplit as $groupChunks) {
                    $embeddings = array_merge($embeddings, $this->service->textToEmbeddings($groupChunks));
                }
                if (empty($embeddings)) {
                    continue;
                }

                $embeddingsEmpty = array_filter($embeddings, fn ($embedding) => empty($embedding));
                if (!empty($embeddingsEmpty)) {
                    continue;
                }

                foreach ($embeddings as $key => $textEmbeddings) {
                    $chunks[$key]['data'] = $textEmbeddings[0];
                }

                $query = "UPDATE explnum
                    set explnum_embeddings = '" . encoding_normalize::json_encode($chunks) . "'
                    WHERE explnum_id = " . $entity["object_id"];
                pmb_mysql_query($query);
            }
        }

        return [
            "count" => $data["count"],
            "countIndexed" => $data["countIndexed"]
        ];
    }

    /**
     * Indexation d'un contenu
     *
     * @param integer $id
     * @return void
     */
    public function indexationById(int $id): void
    {
        switch ($this->indexationType) {
            case TYPE_NOTICE:
                $querySelect = "SELECT CONCAT(tit1, ' ', n_resume) as content FROM notices
                    WHERE n_resume != ''
                    AND notice_id = $id
                ";
                $result = pmb_mysql_query($querySelect);
                if (!pmb_mysql_num_rows($result)) {
                    return;
                }

                $entity = pmb_mysql_fetch_assoc($result);
                $embeddings = $this->service->textToEmbeddings($entity["content"]);
                $chunks = ['offset' => 0, 'length' => strlen($entity["content"]), 'data' => $embeddings[0]];

                $query = "UPDATE notices
                        set embeddings = '" . encoding_normalize::json_encode($chunks) . "'
                        WHERE notice_id = " . $id;
                pmb_mysql_query($query);
                break;

            case TYPE_EXPLNUM:
                $querySelect = "SELECT explnum_id, explnum_index_wew FROM explnum
                    WHERE explnum_index_wew != ''
                    AND explnum_id = $id
                ";
                $result = pmb_mysql_query($querySelect);
                if (!pmb_mysql_num_rows($result)) {
                    return;
                }

                $entity = pmb_mysql_fetch_assoc($result);
                $chunker = new TextChunker();
                $chunks = $chunker->chunk($entity["explnum_index_wew"]);
                $embeddings = $this->service->textToEmbeddings(array_column($chunks, 'data'));
                foreach ($embeddings as $key => $textEmbeddings) {
                    $chunks[$key]['data'] = $textEmbeddings;
                }
                $query = "UPDATE explnum
                        set explnum_embeddings = '" . encoding_normalize::json_encode($chunks) . "'
                        WHERE explnum_id = " . $id;
                pmb_mysql_query($query);
                break;

            default:
                throw new InvalidArgumentException("[indexationById] Unknown type");
        }

        $query = "SELECT 1 FROM cluster_contents WHERE num_object = " . $id . " AND type_object = " . $this->indexationType;
        $result = pmb_mysql_query($query);
        if (!pmb_mysql_num_rows($result)) {
            $clustersEmbeddings = $this->getClustersEmbeddings();
            $similarCluster = $this->findClusterMostSimilarWithEmbeddings($embeddings, $clustersEmbeddings);

            $query = "INSERT INTO cluster_contents (num_cluster, num_object, type_object) VALUES (" . $similarCluster['cluster_id'] . ", " . $id . ", " . $this->indexationType . ")";
            pmb_mysql_query($query);
        }
    }

    /**
     * Search
     *
     * @param string $userQuery
     * @param int|null $minScore
     * @param string|null $sessionId Identifiant unique pour la session de retry (optionnel)
     * @return array{object: int, data: array}
     */
    public function search(string $userQuery, ?int $minScore = null, ?string $sessionId = null): array
    {
        // Créer un ID de session unique si non fourni
        if (empty($sessionId)) {
            $sessionId = 'ai_search_' . md5(session_id() . '_' . time());
        }

        $retryManager = new RetryManager($sessionId);

        // Appeler l'API pour obtenir les embeddings
        $textEmbeddings = $this->service->textToEmbeddings($userQuery);

        // Vrifier si l'API a rpondu
        if (empty($textEmbeddings)) {
            $retryManager->recordFailedAttempt();

            // Vrifier si on peut refaire une tentative
            if ($retryManager->canRetry()) {
                return [
                    'object' => AbstractSource::OBJECT_RETRY_AFTER,
                    'data' => [
                        'retry-after' => $retryManager->getRetryDelay(),
                        'retry_count' => $retryManager->getRetryCount(),
                        'elapsed_time' => $retryManager->getElapsedTime(),
                        'remaining_time' => $retryManager->getRemainingTime(),
                        'session_id' => $sessionId
                    ]
                ];
            } else {
                // Trop de tentatives ou timeout atteint
                $retryManager->cleanSession();
                return [
                    'object' => AbstractSource::OBJECT_RETRY_AFTER,
                    'data' => [
                        'retry-after' => 0,
                        'error' => 'max_retries_exceeded',
                        'retry_count' => $retryManager->getRetryCount(),
                        'elapsed_time' => $retryManager->getElapsedTime(),
                        'summary' => $retryManager->getSummary()
                    ]
                ];
            }
        }

        // La requte a russi, nettoyer la session de retry
        $retryManager->cleanSession();

        $queryEmbeddings = array_shift($textEmbeddings);

        $clustersEmbeddings = $this->getClustersEmbeddings();
        $clusterMostSimilar = $this->findClusterMostSimilar($queryEmbeddings, $clustersEmbeddings);

        if (empty($clusterMostSimilar)) {
            return [
                'object' => AbstractSource::OBJECT_RESPONSE,
                'data' => []
            ];
        }

        $databaseEmbeddings = $this->getDatabaseEmbeddings($clusterMostSimilar['cluster_id']);
        $similarEmbeddings = $this->findSimilarAboveThreshold(
            $queryEmbeddings,
            $databaseEmbeddings,
            floatval(( ($minScore ?? $this->settings->min_score) / 100))
        );
        $data = [];
        foreach ($similarEmbeddings as $similarEmbedding) {
            $index = $similarEmbedding['object_type'] . '_' . $similarEmbedding['object_id'];
            $data[$index] ??= [
                'object_type' => $similarEmbedding['object_type'],
                'object_id' => $similarEmbedding['object_id'],
                'score' => 0,
                'pertinent_content' => []
            ];

            $data[$index]['score'] = max([$data[$index]['score'], $similarEmbedding['similarity']]);
            $data[$index]['pertinent_content'][] = [
                'offset' => $similarEmbedding['data']['offset'],
                'length' => $similarEmbedding['data']['length']
            ];
        }

        // Tri des resultats par score decroissant
        usort($data, function ($a, $b) {
            if ($a['score'] === $b['score']) {
                return 0;
            }

            return ($a['score'] > $b['score']) ? -1 : 1;
        });

        return [
            'object' => AbstractSource::OBJECT_RESPONSE,
            'data' => array_values($data)
        ];
    }

    /**
     * Get database embeddings
     *
     * @return array
     */
    protected function getClustersEmbeddings(): array
    {
        $query = "SELECT id, embeddings FROM clusters
            WHERE id IN (
                SELECT num_cluster FROM cluster_contents
                JOIN caddie_content
                    ON caddie_id = " . intval($this->settings->caddie_id) . "
                    AND caddie_content.object_id = cluster_contents.num_object
                    AND cluster_contents.type_object = " . TYPE_NOTICE . "
            )";

        $result = pmb_mysql_query($query);

        $clusters = [];
        if (pmb_mysql_num_rows($result)) {
            while ($row = pmb_mysql_fetch_assoc($result)) {
                $row['embeddings'] = json_decode($row['embeddings'], true);
                if (!empty($row['embeddings'])) {
                    $clusters[] = [
                        'cluster_id' => $row['id'],
                        'embeddings' => $row['embeddings']
                    ];
                }
            }

            pmb_mysql_free_result($result);
        }

        return $clusters;
    }

    /**
     * Get database embeddings
     *
     * @param int $clusterId
     * @return array
     */
    protected function getDatabaseEmbeddings(int $clusterId): array
    {
        $embeddings = [];

        if ($this->getSettings()->indexation_choice->summary) {
            $query = "SELECT notice_id FROM notices
                JOIN caddie_content
                    ON caddie_content.object_id = notices.notice_id
                    AND caddie_id = " . intval($this->settings->caddie_id) . "
                JOIN cluster_contents
                    ON cluster_contents.num_cluster = " . $clusterId . "
                    AND cluster_contents.num_object = notices.notice_id
                    AND cluster_contents.type_object = " . TYPE_NOTICE . "
                WHERE notices.embeddings IS NOT NULL";

            $result = pmb_mysql_query($query);
            if (pmb_mysql_num_rows($result)) {
                while ($row = pmb_mysql_fetch_assoc($result)) {
                    $embeddings[] = [
                        'object_type' => TYPE_NOTICE,
                        'object_id' => $row['notice_id'],
                    ];
                }

                pmb_mysql_free_result($result);
            }
        }

        if ($this->getSettings()->indexation_choice->docnum) {
            $query = "SELECT explnum_id FROM explnum
                JOIN notices
                    ON notices.notice_id = explnum.explnum_notice
                JOIN caddie_content
                    ON notices.notice_id = caddie_content.object_id
                    AND caddie_id = " . intval($this->settings->caddie_id) . "
                JOIN cluster_contents
                    ON cluster_contents.num_cluster = " . $clusterId . "
                    AND cluster_contents.num_object = notices.notice_id
                    AND cluster_contents.type_object = " . TYPE_EXPLNUM . "
                WHERE explnum.explnum_embeddings IS NOT NULL";

            $result = pmb_mysql_query($query);
            if (pmb_mysql_num_rows($result)) {
                while ($row = pmb_mysql_fetch_assoc($result)) {
                    $embeddings[] = [
                        'object_type' => TYPE_EXPLNUM,
                        'object_id' => $row['explnum_id'],
                    ];
                }

                pmb_mysql_free_result($result);
            }
        }

        return $embeddings;
    }
}
