<?php
// +-------------------------------------------------+
//  2002-2004 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: OCRMyPdf.php,v 1.1.2.1 2025/10/21 08:22:46 qvarin Exp $

namespace Pmb\Ocr\Library\Ocr;

use encoding_normalize;
use explnum;
use RuntimeException;

class OCRMyPdf
{
    protected $filepath = "";
    protected $outputFilepath = "";

    public $language = "";
    public $forceOverwrite = false;
    public $disableOptimization = false;
    public $disablePdfA = false;
    public $disableFastWebView = false;
    public $textFile = "";

    /**
     * Is the server active
     *
     * @var bool
     */
    private $active = false;

    /**
     * Configuration
     *
     * @var array|null
     */
    protected $config = null;

    public function __construct()
    {
        $this->getConfig();
    }

    /**
     * Check if the server is active
     *
     * @return boolean
     */
    public function isActive(): bool
    {
        global $pmb_ocr_active;

        if (!$pmb_ocr_active || empty($this->config['service'])) {
            return false;
        }
        return $this->config['service'] == 1;
    }

    /**
     * Get the config
     *
     * @return array
     */
    public function getConfig(): array
    {
        if ($this->config === null) {
            global $pmb_ocr_config;
            $this->config = encoding_normalize::json_decode($pmb_ocr_config, true) ?? [];
        }

        if (empty($this->config) || empty($this->config['path'])) {
            $this->active = false;
        }

        return $this->config;
    }

    /**
     * Excute l'OCR sur un document numrique
     *
     * @param explnum $explnum
     * @return bool
     */
    public function processExplnum($explnum)
    {
        try {
            if ($explnum->isEnBase() && $explnum->explnum_mimetype === 'application/pdf') {
                $oldExplnumData = $explnum->explnum_data;
                $explnum->explnum_data = $this->processBinary($explnum->explnum_data);
                $explnum->save();

                return $oldExplnumData != $explnum->explnum_data;
            } elseif ($explnum->isEnUpload() && $explnum->explnum_mimetype === 'application/pdf') {
                $this->filepath = $explnum->explnum_rep_path . $explnum->explnum_nomfichier;

                $filename = basename($this->filepath);
                $this->outputFilepath = str_replace($filename, time() . '_ocr_' . $filename, $this->filepath);

                $this->processPdf();

                // On ecrase l'ancien PDF
                if (copy($this->outputFilepath, $this->filepath)) {
                    unlink($this->outputFilepath);
                    return true;
                }
            }
            return false;
        } catch (RuntimeException $e) {
            trigger_error($e->getMessage(), E_USER_WARNING);
            return false;
        }
    }

    /**
     * Excute l'OCR sur un fichier PDF pass en paramtre
     *
     * @param string $filepath
     * @return void
     * @throws RuntimeException
     */
    public function processFile(string $filepath)
    {
        if (! file_exists($filepath)) {
            throw new RuntimeException("Input file does not exist : $this->filepath");
        }

        $this->filepath = $filepath;

        $filename = basename($filepath);
        $this->outputFilepath = str_replace($filename, time() . '_ocr_' . $filename, $filepath);

        try {
            $this->processPdf();

            // On ecrase l'ancien PDF
            if (copy($this->outputFilepath, $this->filepath)) {
                unlink($this->outputFilepath);
            }
        } catch (RuntimeException $e) {
            trigger_error($e->getMessage(), E_USER_WARNING);
        }
    }

    /**
     * Excute l'OCR sur un binaire PDF pass en paramtre
     *
     * @return string
     * @throws RuntimeException
     */
    public function processBinary(string $binary)
    {
        global $base_path;

        $filename = $base_path . '/temp/' . time() . '_ocr_process.pdf';
        $size = file_put_contents($filename, $binary);

        if ($size) {
            try {
                $this->processFile($filename);
                $newBinary = file_get_contents($filename);
                if (false !== $newBinary) {
                    $binary = $newBinary;
                    unlink($filename);
                }
            }  catch (RuntimeException $e) {
                trigger_error($e->getMessage(), E_USER_WARNING);
            }
        }

        return $binary;
    }


    /**
     * Excute l'OCRisation d'un fichier PDF
     *
     * @return string
     * @throws RuntimeException
     */
    protected function processPdf(): string
    {
        if (!$this->isActive()) {
            throw new RuntimeException("OCRMyPDF is not active");
        }

        if (!file_exists($this->filepath)) {
            throw new RuntimeException("Input file does not exist : $this->filepath");
        }

        set_time_limit(0);

        // Excuter la commande
        $output = [];
        $returnVar = 0;
        exec($this->getCommand($this->filepath), $output, $returnVar);

        // Vrifier si la commande a chou
        if ($returnVar !== 0) {
            throw new RuntimeException("OCRMyPDF command failed with exit code : $returnVar");
        }

        // Retourner la sortie de la commande
        return implode("\n", $output);
    }

    /**
     * Retourne la commande ocrmypdf avec les paramtres appropris
     *
     * @param string $file Chemin vers le fichier PDF d'entre.
     * @return string La commande ocrmypdf avec les paramtres appropris
     */
    protected function getCommand(string $file): string
    {
        // Construire la commande
        $config = $this->getConfig();
        if (empty($config['path'])) {
            throw new RuntimeException("OCRMyPDF config is empty");
        }

        $command = $config['path'];

        // Forcer l'crasement si demand
        if ($this->forceOverwrite) {
            $command .= ' --force-ocr';
        }

        // Ajouter la langue si spcifie
        if ($this->language !== "") {
            $command .= " -l " . escapeshellarg($this->language);
        }

        // Dsactiver l'optimisation si demand
        if ($this->disableOptimization) {
            $command .= ' --optimize 0';
        }

        // Dsactiver la gnration PDF/A si demand
        if ($this->disablePdfA) {
            $command .= ' --output-type pdf';
        }

        // Dsactiver l'optimisation pour la vue web rapide si demand
        if ($this->disableFastWebView) {
            $command .= ' --fast-web-view 999999';
        }

        if ($this->textFile != "") {
            $command .= " --sidecar " . escapeshellarg($this->textFile);
        }

        // Ajouter les fichiers d'entre et de sortie
        $command .= " " . escapeshellarg($file) . " " . escapeshellarg($this->outputFilepath);

        return $command . ' 2>&1';
    }
}
