<?php

// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: AbstractSource.php,v 1.1.2.11 2025/04/11 07:36:26 qvarin Exp $

namespace Pmb\AI\Library\Source;

if (stristr($_SERVER['REQUEST_URI'], '/'.basename(__FILE__))) {
    die("no access");
}

use InvalidArgumentException;
use Pmb\AI\Library\Services\Service;
use Pmb\AI\Library\Utils;
use Pmb\AI\Models\AiModel;
use Pmb\AI\Orm\AISettingsOrm;
use Pmb\Common\Helper\GlobalContext;

global $include_path;
require_once ($include_path . "/h2o/h2o.php");

abstract class AbstractSource
{
    public const OBJECT_RESPONSE = 0;
    public const OBJECT_RETRY_AFTER = 1;

    public const MAX_ENTITIES_BY_PAGE = 3000;

    /**
     * Settings
     *
     * @var \stdClass
     */
    protected $settings;

    /**
     * Service
     *
     * @var Service
     */
    protected $service;

    /**
     * Constructor
     *
     * @param integer $idSetting
     */
    public function __construct(int $idSetting)
    {
        $this->fetchData($idSetting);
        $this->service = new Service();
    }

    /**
     * fetchData
     *
     * @param integer $idSetting
     * @return void
     */
    public function fetchData(int $idSetting)
    {
        $settings = new AISettingsOrm($idSetting);
        $this->settings = json_decode($settings->settings_ai_settings);
        if (null === $this->settings) {
            $this->settings = new \stdClass();
        }
    }

    /**
     * Get Settings
     *
     * @return \stdClass
     */
    public function getSettings()
    {
        return $this->settings;
    }

    /**
     * Get Prompt System
     *
     * @return string
     */
    protected function getPromptSystem(): string
    {
        return $this->settings->prompt_system ?? '';
    }

    /**
     * Get Prompt User
     *
     * @return string
     */
    protected function getPromptUser(): string
    {
        return $this->settings->prompt_user ?? '';
    }

    /**
     * Find Similar Chunks Above Threshold
     *
     * @param array $queryEmbeddings
     * @param array $entity
     * @param float $threshold
     * @return array
     */
    private function findSimilarChunksAboveThreshold(array $queryEmbeddings, array $entity, float $threshold): array
    {
        $similarEmbeddings = [];

        foreach ($entity['embeddings'] as $chunkEmbeddings) {
            $similarity = Utils::cosineSimilarity($queryEmbeddings, $chunkEmbeddings['data']);
            if ($similarity >= $threshold) {
                $similarEmbeddings[] = [
                    'object_type' => $entity['object_type'],
                    'object_id' => $entity['object_id'],
                    'similarity' => $similarity,
                    'data' => [
                        'offset' => $chunkEmbeddings['offset'],
                        'length' => $chunkEmbeddings['length'],
                    ]
                ];
            }
        }

        return $similarEmbeddings;
    }

    /**
     * Find Similar Above Threshold
     *
     * @param array $queryEmbeddings
     * @param array $databaseEmbeddings
     * @param float $threshold
     * @return array
     */
    protected function findSimilarAboveThreshold(array $queryEmbeddings, array $databaseEmbeddings, float $threshold): array
    {
        $similarEmbeddings = [];

        // On genere les pages
        $pages = array_chunk($databaseEmbeddings, self::MAX_ENTITIES_BY_PAGE);
        // $pages = $this->generatePages($databaseEmbeddings);

        // On cherche les embeddings similaires
        for ($p = 0, $lenPages = count($pages); $p < $lenPages; $p++) {
            $entitiesWithEmbeddings = Utils::fetchPageEmbeddings($pages[$p]);
            for ($e = 0, $lenEntites = count($entitiesWithEmbeddings); $e < $lenEntites; $e++) {
                $similarEmbeddings = [
                    ...$similarEmbeddings,
                    ...$this->findSimilarChunksAboveThreshold(
                        $queryEmbeddings,
                        $entitiesWithEmbeddings[$e],
                        $threshold
                    )
                ];
            }
        }

        // On fait du menage
        $pages = null;

        return $similarEmbeddings;
    }

    /**
     * Find Cluster Most Similar
     *
     * @param array $queryEmbeddings
     * @param array $clustersEmbeddings
     * @return array
     */
    protected function findClusterMostSimilar(array $queryEmbeddings, array $clustersEmbeddings): array
    {
        $countClusters = count($clustersEmbeddings);
        $scores = array_fill(0, $countClusters, 0);
        for ($i = 0; $i < $countClusters; $i++) {
            $scores[$i] = max(
                $scores[$i],
                Utils::cosineSimilarity($clustersEmbeddings[$i]['embeddings'], $queryEmbeddings)
            );
        }

        if (empty($scores)) {
            return [];
        }

        $maxSimilarityIndex = array_search(max($scores), $scores);
        return $clustersEmbeddings[$maxSimilarityIndex] ?? [];
    }

    /**
     * Find Cluster Most Similar With Embeddings
     *
     * @param array $groupEmbeddings
     * @param array $clustersEmbeddings
     * @return array
     */
    protected function findClusterMostSimilarWithEmbeddings(array $groupEmbeddings, array $clustersEmbeddings): array
    {
        $countClusters = count($clustersEmbeddings);
        $scores = array_fill(0, $countClusters, 0);
        for ($i = 0; $i < $countClusters; $i++) {
            foreach ($groupEmbeddings as $embeddings) {
                $scores[$i] = max(
                    $scores[$i],
                    Utils::cosineSimilarity($clustersEmbeddings[$i]['embeddings'], $embeddings)
                );
            }
        }

        if (empty($scores)) {
            return [];
        }

        $maxSimilarityIndex = array_search(max($scores), $scores);
        return $clustersEmbeddings[$maxSimilarityIndex] ?? [];
    }

    /**
     * Text Generation
     *
     * @param string $question
     * @param array $indexList
     * @return string
     */
    public function textGeneration(string $question, array $indexList)
    {
        if (empty($indexList)) {
            return '';
        }

        $userData = [
            "query" => $question,
            "language" => GlobalContext::getCurrentLanguage(),
        ];

        $indexList = array_slice($indexList, 0, 10);
        $documents = [];
        foreach ($indexList as $entity) {
            $documents[] = [
                'id' => $entity['object_id'],
                'content' => $this->computeContent($entity['object_id'], $entity['object_type'], $entity['pertinent_content']),
                'entity_data' => $this->getEntityData($entity['object_id'], $entity['object_type']),
            ];
        }

        $promptSystem = \H2o::parseString($this->getPromptSystem());
        $promptUser = \H2o::parseString($this->getPromptUser());

        return $this->service->chat(
            $promptSystem->render(['user' => $userData, 'documents' => $documents]),
            $promptUser->render(['user' => $userData])
        );
    }

    /**
     * Tips
     *
     * @param string $question
     * @return array
     */
    public function tips(string $question)
    {
        if (empty($question)) {
            return [];
        }

        $userData = [
            "query" => $question,
            "language" => GlobalContext::getCurrentLanguage(),
        ];

        $promptSystem = \H2o::parseString($this->settings->prompt_system_tips);

        return $this->service->tips($promptSystem->render(['user' => $userData]), $question);
    }

    /**
     * Compute Content
     *
     * @param integer $objectId
     * @param integer $type
     * @param array $pertinentContent
     * @return string
     */
    private function computeContent(int $objectId, int $type, array $pertinentContent): string
    {
        $contents = [];
        foreach ($pertinentContent as $data) {
            $contents[] = $this->getTextContent($objectId, $type, $data['offset'], $data['length']);
        }

        $content = implode(str_repeat(PHP_EOL, 2), $contents);
        return $content;
    }

    /**
     * Get Text Content
     *
     * @param int $objectId
     * @param int $type
     * @param int $offset
     * @param int $length
     * @return string
     */
    protected function getTextContent(int $objectId, int $type, int $offset, int $length): string
    {
        switch ($type) {
            case TYPE_NOTICE:
                $query = "SELECT n_resume FROM notices WHERE notice_id = " . intval($objectId);
                break;

            case TYPE_EXPLNUM:
                $query = "SELECT explnum_index_wew FROM explnum WHERE explnum_id = " . intval($objectId);
                break;

            case TYPE_SHARED_LIST_EXPLNUM:
                $query = "SELECT content_ai_shared_list_docnum FROM ai_shared_list_docnum WHERE id_ai_shared_list_docnum = " . intval($objectId);
                break;

            default:
                return '';
        }

        $content = '';
        $result = pmb_mysql_query($query);
        if (pmb_mysql_num_rows($result)) {
            $content = pmb_mysql_result($result, 0, 0);
        }

        return mb_substr($content, $offset, $length);
    }

    /**
     * Get Entity Data
     *
     * @param integer $objectId
     * @param integer $type
     * @return array
     * @throws InvalidArgumentException
     */
    protected function getEntityData(int $objectId, int $type)
    {
        switch ($type) {
            case TYPE_NOTICE:
                return [
                    'object_id' => $objectId,
                    'object_type' => $type,
                    'metadata' => AiModel::getMetaDataRecord($objectId)
                ];

            case TYPE_EXPLNUM:
                return [
                    'object_id' => $objectId,
                    'object_type' => $type,
                    'metadata' => ''
                ];

            case TYPE_SHARED_LIST_EXPLNUM:
                return [
                    'object_id' => $objectId,
                    'object_type' => $type,
                    'metadata' => ''
                ];

            default:
                throw new InvalidArgumentException('[getEntityData] Unknown type');
        }
    }
}