<?php

// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: Utils.php,v 1.1.2.3.2.1 2025/12/12 14:33:18 gneveu Exp $

namespace Pmb\AI\Library;

if (stristr($_SERVER['REQUEST_URI'], '/'.basename(__FILE__))) {
    die("no access");
}

use InvalidArgumentException;

abstract class Utils
{
    public const MAX_ENTITIES_BY_CHUNK = 1000;

    /**
     * Cosine Similarity
     *
     * @param array<int, float> $embeddingsA
     * @param array<int, float> $embeddingsB
     * @return float
     */
    public static function cosineSimilarity(array $embeddingsA, array $embeddingsB): float
    {
        // Calcul du produit scalaire
        $dotProduct = 0.0;
        $normA = 0.0;
        $normB = 0.0;

        foreach ($embeddingsA as $index => $embedding) {
            $textEmbeddings = $embeddingsB[$index];
            $dotProduct += $embedding * $textEmbeddings;
            $normA += $embedding * $embedding;
            $normB += $textEmbeddings * $textEmbeddings;
        }

        // Calcul des normes
        $normA = sqrt($normA);
        $normB = sqrt($normB);

        // Calcul de la similarite cosinus
        if ($normA > 0 && $normB > 0) {
            return $dotProduct / ($normA * $normB);
        } else {
            return 0.0; // Les vecteurs sont nuls
        }
    }

    /**
     * Calcule la moyenne d'un ensemble de tableaux d'embeddings.
     *
     * Cette fonction prend un tableau de tableaux d'embeddings, calcule la moyenne
     * de chaque dimension, et retourne un tableau d'embeddings reprsentant la moyenne.
     *
     * @param array $embeddingsArray
     * @return array
     * @throws InvalidArgumentException
     */
    public static function calculateAverageEmbeddings($embeddingsArray)
    {
        $length = count($embeddingsArray[0]);
        $count = count($embeddingsArray);

        for ($i = 0; $i < $count; $i++) {
            if (count($embeddingsArray[$i]) != $length) {
                throw new InvalidArgumentException('All embeddings arrays must have the same length.');
            }
        }

        $sumEmbeddings = array_fill(0, $length, 0.0);
        for ($e = 0; $e < $count; $e++) {
            for ($i = 0; $i < $length; $i++) {
                $sumEmbeddings[$i] += $embeddingsArray[$e][$i];
            }
        }

        return array_map(fn ($value) => $value / $count, $sumEmbeddings);
    }

    /**
     * Normaliser un embeddings
     *
     * @param array $embeddings
     * @return array
     * @throws InvalidArgumentException
     */
    public static function computeNormalizeEmbeddings(array $embeddings)
    {
        $norm = array_sum(array_map(fn ($embedding) => pow($embedding, 2), $embeddings));
        $norm = sqrt($norm);

        if ($norm > 0) {
            return array_map(fn ($embedding) => $embedding / $norm, $embeddings);
        }
        return [];
    }

    /**
     * Recuperer les embeddings d'une collection d'objets
     *
     * @param integer $type
     * @param array $entities
     * @return bool
     */
    public static function fetchEmbeddingsByEntities(int $type, array &$entities, bool $onlyEmbeddings = false): bool
    {
        $ids = array_column($entities, 'object_id');
        switch ($type) {
            case TYPE_NOTICE:
                $query = 'SELECT embeddings FROM notices
                    WHERE notice_id IN (' . implode(',', $ids) . ')
                        AND embeddings IS NOT NULL';
                break;

            case TYPE_EXPLNUM:
                $query = 'SELECT explnum_embeddings FROM explnum
                    WHERE explnum_id IN (' . implode(',', $ids) . ')
                        AND explnum_embeddings IS NOT NULL';
                break;

            case TYPE_SHARED_LIST_EXPLNUM:
                $query = 'SELECT embeddings_ai_shared_list_docnum FROM ai_shared_list_docnum
                    WHERE id_ai_shared_list_docnum IN (' . implode(',', $ids) . ')
                        AND embeddings_ai_shared_list_docnum IS NOT NULL';
                break;

            default:
                throw new InvalidArgumentException('Unsupported object type.');
        }

        // On libere la memoire
        $ids = null;

        $result = pmb_mysql_query($query);
        if (pmb_mysql_num_rows($result)) {
            $index = 0;
            while ($row = pmb_mysql_fetch_assoc($result)) {
                $embeddings = json_decode(array_shift($row), true);
                switch ($type) {
                    case TYPE_NOTICE:
                        if ($onlyEmbeddings) {
                            $entities[$index]['embeddings'] = [$embeddings['data']];
                        } else {
                            $entities[$index]['embeddings'] = [$embeddings];
                        }
                        break;

                    case TYPE_SHARED_LIST_EXPLNUM:
                    case TYPE_EXPLNUM:
                        if ($onlyEmbeddings) {
                            $embeddings = array_column($embeddings, 'data');
                            $entities[$index]['embeddings'] = $embeddings;
                        } else {
                            $entities[$index]['embeddings'] = $embeddings;
                        }
                        break;
                }

                $index++;
                $embeddings = null;
            }

            pmb_mysql_free_result($result);

            return true;
        }

        return false;
    }

    /**
     * Recuperer les embeddings d'une page
     *
     * @param array $page
     * @return array
     */
    public static function fetchPageEmbeddings(array &$page, bool $onlyEmbeddings = false): array
    {
        $entitiesByType = [];
        for ($i = 0, $lenPage = count($page); $i < $lenPage; $i++) {
            $entitiesByType[$page[$i]['object_type']] ??= [];
            $entitiesByType[$page[$i]['object_type']][] = $page[$i];
        }

        $entitiesWithEmbeddings = [];
        foreach ($entitiesByType as $type => $entities) {
            $chunks = array_chunk($entities, self::MAX_ENTITIES_BY_CHUNK);
            foreach ($chunks as $chunk) {
                if (Utils::fetchEmbeddingsByEntities($type, $chunk, $onlyEmbeddings)) {
                    $entitiesWithEmbeddings = [...$entitiesWithEmbeddings, ...$chunk];
                }
            }
        }

        // On libere la memoire
        $entitiesByType = null;

        // On enleve les embeddings vides
        Utils::removeNullEmbeddingsInPages($entitiesWithEmbeddings, $onlyEmbeddings);

        return $entitiesWithEmbeddings;
    }

    /**
     * Supprime les embeddings null d'une pages
     *
     * @param array $page
     * @param bool $isEmbeddingsOnly
     * @return void
     */
    public static function removeNullEmbeddingsInPages(array &$page, bool $isEmbeddingsOnly = false): void
    {
        $page = array_filter($page, function ($entity) use ($isEmbeddingsOnly) {
            switch ($entity['object_type']) {
                case TYPE_NOTICE:
                    if ($isEmbeddingsOnly) {
                        return !empty($entity['embeddings'][0]);
                    }
                    return !empty($entity['embeddings'][0]['data']);

                case TYPE_SHARED_LIST_EXPLNUM:
                case TYPE_EXPLNUM:
                    if (!$isEmbeddingsOnly) {
                        $embeddings = array_column($entity['embeddings'], 'data');
                    } else {
                        $embeddings = $entity['embeddings'];
                    }

                    $embeddings = array_filter($embeddings, fn ($embedding) => empty($embedding));

                    // Si je n'ai d'embeddings null, on garde les l'entite
                    return count($embeddings) === 0;
            }
        });
    }

    /**
     * Clean Chunk
     *
     * @param string $text
     * @return string
     */
    public static function cleanString(string $text): string
    {
        $text = preg_replace('/[^\x20-\x7F\n\r]/', '', $text);
        return $text;
    }
}
