<?php

// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: Clusters.php,v 1.1.2.3 2025/04/11 07:36:26 qvarin Exp $

namespace Pmb\AI\Library\Clusters;

if (stristr($_SERVER['REQUEST_URI'], '/'.basename(__FILE__))) {
    die("no access");
}

use Pmb\AI\Library\Utils;

class Clusters
{
    public const MODE_DEFAULT = 0;
    public const MODE_CLI = 1;

    public const MAX_ENTITIES_BY_CHUNK = 1000;
    public const MAX_ENTITIES_BY_PAGE = 3000;

    /**
     * Mode
     *
     * @var int
     */
    private $mode = self::MODE_DEFAULT;

    /**
     * Tableau des clusters
     *
     * @var array
     */
    private $clusters = [];

    /**
     * Nom de la table temporaire
     *
     * @var string
     */
    private $temporatyTable;

    /**
     * Creer une table temporaire avec tous les objets ayant des embeddings
     *
     * @return string Nom de la table
     */
    private function generateTemporatyTable()
    {
        $tableName = 'tmp_embeddings_' . md5(time());

        $query = 'CREATE TEMPORARY TABLE IF NOT EXISTS '. $tableName .' (
            object_id int(11) NOT NULL,
            object_type int(11) NOT NULL,
            PRIMARY KEY (object_id, object_type)
        );';
        pmb_mysql_query($query);

        $query = 'INSERT INTO '. $tableName .' SELECT notice_id AS object_id, '. TYPE_NOTICE .' AS object_type FROM notices WHERE embeddings IS NOT NULL';
        pmb_mysql_query($query);

        $query = 'INSERT INTO '. $tableName .' SELECT explnum_id AS object_id, '. TYPE_EXPLNUM .' AS object_type FROM explnum WHERE explnum_embeddings IS NOT NULL';
        pmb_mysql_query($query);

        return $tableName;
    }

    /**
     * Nettoyage des clusters
     *
     * @return void
     */
    public function cleanClusters()
    {
        $this->debug('Nettoyage des clusters' . PHP_EOL);

        $query = 'TRUNCATE TABLE clusters';
        pmb_mysql_query($query);

        $query = 'TRUNCATE TABLE cluster_contents';
        pmb_mysql_query($query);
    }

    /**
     * Creation des clusters
     *
     * @param integer $nbClusters
     * @param integer $nbEpoch
     * @return bool
     */
    public function createClusters(int $nbClusters, int $nbEpoch): bool
    {
        $this->temporatyTable = $this->generateTemporatyTable();

        $query = 'SELECT * FROM ' . $this->temporatyTable . ' ORDER BY RAND() LIMIT ' . $nbClusters;
        $result = pmb_mysql_query($query);

        if (pmb_mysql_num_rows($result)) {
            $this->debug('Creation des clusters' . PHP_EOL);

            while ($row = pmb_mysql_fetch_assoc($result)) {
                $this->clusters[] = Cluster::create($row['object_type'], $row['object_id']);
            }
            pmb_mysql_free_result($result);
        } else {
            return false;
        }

        $pages = $this->generatePages();

        $this->debug(PHP_EOL . 'Lancements des epochs' . PHP_EOL . PHP_EOL);
        for ($epoch = 0; $epoch < $nbEpoch; $epoch++) {
            $stagnationCluster = array_filter($this->clusters, fn ($cluster) => $cluster->isStagnant());
            if (count($stagnationCluster) === $nbClusters) {
                // Toutes les clusters sont stagnantes
                // On arrete le traitement
                $lastEpoch = true;
                $this->debug('Tous les clusters sont stagnants' . PHP_EOL);
            } else {
                $lastEpoch = ($epoch === ($nbEpoch - 1));
            }

            $this->debug('Epoch ' . ($epoch + 1) . PHP_EOL);
            $this->executeEpoch($pages, $lastEpoch);
            $this->debug('Epoch Finie' . PHP_EOL . PHP_EOL);
            if ($lastEpoch) {
                break;
            }
        }

        $this->debug('Creation des clusters finie' . PHP_EOL);

        return true;
    }

    /**
     * Executer une poque
     *
     * @param array $pages
     * @param bool $lastEpoch
     * @return void
     */
    private function executeEpoch(array &$pages, bool $lastEpoch): void
    {
        // On reset les embeddings moyens
        array_walk($this->clusters, fn (&$cluster) => $cluster->initEmbeddingsMeans());

        for ($i = 0; $i < $pages['count']; $i++) {
            $this->debug("\r    Page " . ($i + 1) . ' / ' . $pages['count']);

            $entitiesWithEmbeddings = Utils::fetchPageEmbeddings($pages['items'][$i], true);
            for ($e = 0, $lenEntites = count($entitiesWithEmbeddings); $e < $lenEntites; $e++) {
                $clusterIndex = $this->findSimilarityCluster($entitiesWithEmbeddings[$e]['embeddings']);
                $this->updateEmbeddingsMean($clusterIndex, $entitiesWithEmbeddings[$e]['embeddings']);

                if ($lastEpoch) {
                    $this->clusters[$clusterIndex]->addObject($entitiesWithEmbeddings[$e]['object_id'], $entitiesWithEmbeddings[$e]['object_type']);
                }
            }
        }

        // On met  jour les embeddings
        $this->debug(PHP_EOL . '    Mise a jour des embeddings' . PHP_EOL);
        array_walk($this->clusters, fn (&$cluster) => $cluster->updateEmbeddings());

        if ($lastEpoch) {
            $this->debug('    Sauvegarde des clusters' . PHP_EOL);
            array_walk($this->clusters, fn (&$cluster) => $cluster->save());
        }
    }

    /**
     * Mettre  jour l'embeddings moyen d'un cluster
     *
     * @param int $clusterIndex
     * @param array $embeddings
     * @return void
     */
    private function updateEmbeddingsMean(int $clusterIndex, array &$embeddings): void
    {
        $this->clusters[$clusterIndex]->updateEmbeddingsMean($embeddings);
    }

    /**
     * Trouver le cluster le plus similaire
     *
     * @param array $embeddings
     * @return integer
     */
    private function findSimilarityCluster(array &$embeddings): int
    {
        $countClusters = count($this->clusters);
        $scores = array_fill(0, $countClusters, 0);
        for ($i = 0; $i < $countClusters; $i++) {
            foreach ($embeddings as $chunkEmbedding) {
                $scores[$i] = max(
                    $scores[$i],
                    Utils::cosineSimilarity($this->clusters[$i]->getEmbeddings(), $chunkEmbedding)
                );
            }
        }
        return array_search(max($scores), $scores);
    }

    /**
     * Generate Pages
     *
     * @return array
     */
    private function generatePages(): array
    {
        $this->debug('Creation des pages' . PHP_EOL);
        $query = 'SELECT * FROM ' . $this->temporatyTable;
        $result = pmb_mysql_query($query);

        $entities = [];
        if (pmb_mysql_num_rows($result)) {
            while ($row = pmb_mysql_fetch_assoc($result)) {
                $entities[] = $row;
            }
            pmb_mysql_free_result($result);
        }

        $pages = array_chunk($entities, self::MAX_ENTITIES_BY_PAGE);
        return [
            'count' => count($pages),
            'items' => $pages,
        ];
    }

    /**
     * Set mode
     *
     * @param integer $mode
     * @return void
     */
    public function mode(int $mode): void
    {
        $this->mode = $mode;
    }

    /**
     * Debug
     *
     * @param string $message
     * @return void
     */
    protected function debug(string $message): void
    {
        if (self::MODE_CLI === $this->mode) {
            print $message;
        }
    }
}