<?php
// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: MetadataBuilder.php,v 1.1.2.2 2026/01/09 15:16:00 rtigero Exp $
namespace Pmb\SEO\Library;

if (stristr($_SERVER['REQUEST_URI'], basename(__FILE__))) {
    die("no access");
}

use Pmb\Common\Helper\HTML;

class MetadataBuilder
{
    /**
     * @var string
     */
    protected static $encoding = "";

    /**
     * @var string
     */
    protected static $version = "1.0";

    /**
     * Parametres GET a exclure de l'URL canonique
     * @var array
     */
    protected static $excludedParams = [
        "nb_per_page",
        "nb_per_page_custom",
        "page",
        "nbr_lignes",
        "l_typdoc"
    ];

    /**
     * Retourne l'encoding
     *
     * @return string
     */
    public static function getEncoding(): string
    {
        if (empty(self::$encoding)) {
            global $charset;
            self::$encoding = $charset;
        }
        return self::$encoding;
    }

    /**
     * Definit l'encoding
     *
     * @param string $encoding
     * @return void
     */
    public static function setEncoding(string $encoding = ""): void
    {
        if (empty($encoding)) {
            global $charset;
            $encoding = $charset;
        }
        self::$encoding = $encoding;
    }

    /**
     * Retourne la version
     *
     * @return string
     */
    public static function getVersion(): string
    {
        return self::$version;
    }

    /**
     * Definit la version
     *
     * @param string $version
     * @return void
     */
    public static function setVersion(string $version): void
    {
        self::$version = $version;
    }

    /**
     * Cree une instance de DOMDocument
     *
     * @return \DOMDocument
     */
    protected static function createDocument(): \DOMDocument
    {
        $document = new \DOMDocument(self::getVersion(), self::getEncoding());
        $document->formatOutput = true;
        return $document;
    }

    /**
     * Parse le HTML et ajoute/modifie la meta robots et la balise canonical
     *
     * @param string $html
     * @param bool $indexable
     * @param bool $followable
     * @param bool $addCanonical
     * @param string|null $canonicalUrl
     * @return string
     */
    public static function parseHTML(
        string $html,
        bool $indexable = true,
        bool $followable = true,
        bool $addCanonical = true,
        ?string $canonicalUrl = null
    ): string {
        if (empty($html)) {
            return $html;
        }

        $document = self::createDocument();
        $html = HTML::cleanHTML($html, self::getEncoding());

        if (!@$document->loadHTML($html)) {
            throw new \Exception("HTML could not be loaded");
        }

        self::addOrUpdateMetaRobots($document, $indexable, $followable);

        if ($addCanonical) {
            self::addOrUpdateCanonical($document, $canonicalUrl);
        }

        return $document->saveHTML();
    }

    /**
     * Ajoute ou met a jour la meta robots dans le head
     *
     * @param \DOMDocument $document
     * @param bool $indexable
     * @param bool $followable
     * @return void
     */
    protected static function addOrUpdateMetaRobots(\DOMDocument $document, bool $indexable, bool $followable): void
    {
        $robotsContent = self::getRobotsContent($indexable, $followable);

        // Chercher une meta robots existante
        $metaNodes = $document->getElementsByTagName('meta');
        $robotsMetaFound = false;

        for ($i = 0; $i < $metaNodes->length; $i++) {
            $metaNode = $metaNodes->item($i);
            if ($metaNode->getAttribute('name') === 'robots') {
                $metaNode->setAttribute('content', $robotsContent);
                $robotsMetaFound = true;
                break;
            }
        }

        // Si pas de meta robots existante, en creer une
        if (!$robotsMetaFound) {
            $headNodes = $document->getElementsByTagName('head');
            if ($headNodes->length > 0) {
                $head = $headNodes->item(0);
                $metaRobots = $document->createElement('meta');
                $metaRobots->setAttribute('name', 'robots');
                $metaRobots->setAttribute('content', $robotsContent);

                // Inserer au debut du head
                if ($head->firstChild) {
                    $head->insertBefore($metaRobots, $head->firstChild);
                } else {
                    $head->appendChild($metaRobots);
                }
            }
        }
    }

    /**
     * Genere le contenu de la meta robots
     *
     * @param bool $indexable
     * @param bool $followable
     * @return string
     */
    public static function getRobotsContent(bool $indexable, bool $followable): string
    {
        $index = $indexable ? 'index' : 'noindex';
        $follow = $followable ? 'follow' : 'nofollow';

        return "{$index}, {$follow}";
    }

    /**
     * Retourne les parametres exclus
     *
     * @return array
     */
    public static function getExcludedParams(): array
    {
        return self::$excludedParams;
    }

    /**
     * Definit les parametres a exclure
     *
     * @param array $params
     * @return void
     */
    public static function setExcludedParams(array $params): void
    {
        self::$excludedParams = $params;
    }

    /**
     * Ajoute des parametres a exclure
     *
     * @param array $params
     * @return void
     */
    public static function addExcludedParams(array $params): void
    {
        self::$excludedParams = array_unique(array_merge(self::$excludedParams, $params));
    }

    /**
     * Nettoie une URL en supprimant les parametres exclus
     *
     * @param string $url
     * @return string
     */
    public static function cleanUrl(string $url): string
    {
        $parsedUrl = parse_url($url);

        if (!isset($parsedUrl['query'])) {
            return $url;
        }

        parse_str($parsedUrl['query'], $queryParams);

        // Supprimer les parametres exclus
        foreach (self::$excludedParams as $param) {
            unset($queryParams[$param]);
        }

        // Reconstruire l'URL
        $cleanUrl = '';

        if (isset($parsedUrl['scheme'])) {
            $cleanUrl .= $parsedUrl['scheme'] . '://';
        }

        if (isset($parsedUrl['host'])) {
            $cleanUrl .= $parsedUrl['host'];
        }

        if (isset($parsedUrl['port'])) {
            $cleanUrl .= ':' . $parsedUrl['port'];
        }

        if (isset($parsedUrl['path'])) {
            $cleanUrl .= $parsedUrl['path'];
        }

        if (!empty($queryParams)) {
            $cleanUrl .= '?' . http_build_query($queryParams);
        }

        if (isset($parsedUrl['fragment'])) {
            $cleanUrl .= '#' . $parsedUrl['fragment'];
        }

        return $cleanUrl;
    }

    /**
     * Retourne l'URL canonique courante
     *
     * @return string
     */
    public static function getCanonicalUrl(): string
    {
        $scheme = (!empty($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off') ? 'https' : 'http';
        $host = $_SERVER['HTTP_HOST'] ?? $_SERVER['SERVER_NAME'] ?? 'localhost';
        $uri = $_SERVER['REQUEST_URI'] ?? '/';

        $fullUrl = $scheme . '://' . $host . $uri;

        return self::cleanUrl($fullUrl);
    }

    /**
     * Genere la balise link canonical
     *
     * @param string|null $url URL canonique (si null, utilise l'URL courante)
     * @return string
     */
    public static function getCanonicalTag(?string $url = null): string
    {
        $canonicalUrl = $url ?? self::getCanonicalUrl();
        return '<link rel="canonical" href="' . htmlspecialchars($canonicalUrl, ENT_QUOTES, 'UTF-8') . '">';
    }

    /**
     * Ajoute ou met a jour la balise canonical dans le head
     *
     * @param \DOMDocument $document
     * @param string|null $url
     * @return void
     */
    protected static function addOrUpdateCanonical(\DOMDocument $document, ?string $url = null): void
    {
        $canonicalUrl = $url ?? self::getCanonicalUrl();

        // Chercher un link canonical existant
        $linkNodes = $document->getElementsByTagName('link');
        $canonicalFound = false;

        for ($i = 0; $i < $linkNodes->length; $i++) {
            $linkNode = $linkNodes->item($i);
            if ($linkNode->getAttribute('rel') === 'canonical') {
                $linkNode->setAttribute('href', $canonicalUrl);
                $canonicalFound = true;
                break;
            }
        }

        // Si pas de canonical existant, en creer un
        if (!$canonicalFound) {
            $headNodes = $document->getElementsByTagName('head');
            if ($headNodes->length > 0) {
                $head = $headNodes->item(0);
                $linkCanonical = $document->createElement('link');
                $linkCanonical->setAttribute('rel', 'canonical');
                $linkCanonical->setAttribute('href', $canonicalUrl);
                $head->appendChild($linkCanonical);
            }
        }
    }
}
