<?php
// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: ChunkXMLRDF.php,v 1.3.2.2 2025/06/27 14:20:43 rtigero Exp $

namespace Pmb\ImportExport\Models\Chunks\ChunkXMLRDF;

use XMLReader;
use DOMDocument;
use DOMXPath;
use Pmb\ImportExport\Models\Chunks\Chunk;

class ChunkXMLRDF extends Chunk
{

    /**
     * Element racine
     * @var string
     */
    protected $rootElement = "rdf:RDF";

    /**
     * Tableau de tags  lire (tag|chemin)
     * @var array
     */
    protected $entities = [];

    /**
     * object XMLReader
     * @var XMLReader
     */
    protected $xmlReader = null;

    /**
     * Chemin du noeud courant
     * @var array
     */
    protected $currentNodePath = [];

    /**
     * Entete XML de sortie
     * @var string
     */
    const DEFAULT_XML_HEADER = '<?xml version="1.0" encoding="utf-8"?>' . "\n";

    protected $xmlHeader = '<?xml version="1.0" encoding="utf-8"?>' . "\n";


    protected function initialize()
    {
        if (!$this->isInitialized) {
            $this->isInitialized = true;

            $this->initializeStream();
            if ($this->context) {
                libxml_set_streams_context($this->context);
            }

            $this->xmlReader = new \XMLReader();

            if (is_countable($this->parameters['entitiesElements']) && count($this->parameters['entitiesElements'])) {
                foreach ($this->parameters['entitiesElements'] as $v) {
                    $this->entities[] = $v['value'];
                }
            }
            $this->xmlHeader = $this->parameters["xmlHeader"] ?? static::DEFAULT_XML_HEADER;
        }
    }


    public function next()
    {
        $this->initialize();
        $opened = $this->xmlReader->open($this->uri, null, LIBXML_NOBLANKS);
        if (!$opened) {
            yield null;
        }

        $currentContent = '';

        while ($this->xmlReader->read()) {

            if ($this->xmlReader->nodeType === XMLReader::ELEMENT) {
                array_push($this->currentNodePath, $this->xmlReader->name);
            }
            if ($this->xmlReader->nodeType === XMLReader::END_ELEMENT) {
                array_pop($this->currentNodePath);
            }

            $path = '/' . implode('/', $this->currentNodePath);
            $name = $this->xmlReader->name;

            if ($this->xmlReader->nodeType === XMLReader::ELEMENT && in_array($name, $this->entities)) {
                $currentContent .= $this->xmlReader->readOuterXML();
                yield $this->xmlHeader . $currentContent;
                $currentContent = '';
            }
            if ($this->xmlReader->nodeType === XMLReader::ELEMENT && in_array($path, $this->entities)) {
                $currentContent .= $this->xmlReader->readOuterXML();
                yield $this->xmlHeader . $currentContent;
                $currentContent = '';
            }
        }

        $this->xmlReader->close();
    }

    /**
     * Nettoie les tags HTML prsents dans le contenu XML
     *
     * @param string $content
     * @return string
     */
    protected function cleanHTML($content)
    {
        $dom = new DOMDocument();
        $dom->loadXML($content);
        $xpath = new DOMXPath($dom);

        foreach ($xpath->query('//*') as $node) {
            if ($node->childNodes->length === 1 && $node->firstChild->nodeType === XML_TEXT_NODE) {
                $node->nodeValue = strip_tags(html_entity_decode($node->nodeValue, ENT_QUOTES, "UTF-8"));
            }
        }

        $content = explode("\n", $dom->saveXML());
        array_shift($content);
        return implode("\n", $content);
    }
}
