xref: /webtrees/app/GedcomFilters/GedcomEncodingFilter.php (revision 73f6d2b56f07f42ca02190f0dfc40653a1e91834)
11c6adce8SGreg Roach<?php
21c6adce8SGreg Roach
31c6adce8SGreg Roach/**
41c6adce8SGreg Roach * @copyright 2021 Greg Roach <greg@subaqua.co.uk>
51c6adce8SGreg Roach * @license   GPLv3+
61c6adce8SGreg Roach */
71c6adce8SGreg Roach
81c6adce8SGreg Roachdeclare(strict_types=1);
91c6adce8SGreg Roach
101c6adce8SGreg Roachnamespace Fisharebest\Webtrees\GedcomFilters;
111c6adce8SGreg Roach
121c6adce8SGreg Roachuse Fisharebest\Webtrees\Encodings\EncodingInterface;
131c6adce8SGreg Roachuse Fisharebest\Webtrees\Registry;
141c6adce8SGreg Roachuse php_user_filter;
151c6adce8SGreg Roach
161c6adce8SGreg Roachuse function stream_bucket_append;
171c6adce8SGreg Roachuse function stream_bucket_make_writeable;
181c6adce8SGreg Roachuse function stream_bucket_new;
191c6adce8SGreg Roachuse function substr;
201c6adce8SGreg Roach
211c6adce8SGreg Roachuse const PSFS_FEED_ME;
221c6adce8SGreg Roachuse const PSFS_PASS_ON;
231c6adce8SGreg Roach
241c6adce8SGreg Roach/**
251c6adce8SGreg Roach * Filter a GEDCOM data stream, converting to UTF-8.
261c6adce8SGreg Roach *
271c6adce8SGreg Roach * These properties are created after the class is instantiated.
281c6adce8SGreg Roach *
291c6adce8SGreg Roach * @property string               $filtername
30*73f6d2b5SGreg Roach * @property array<string,string> $params
311c6adce8SGreg Roach * @property resource             $stream
321c6adce8SGreg Roach */
331c6adce8SGreg Roachclass GedcomEncodingFilter extends php_user_filter
341c6adce8SGreg Roach{
351c6adce8SGreg Roach    private string $data = '';
361c6adce8SGreg Roach
371ff45046SGreg Roach    private EncodingInterface|null $src_encoding = null;
381c6adce8SGreg Roach
391ff45046SGreg Roach    private EncodingInterface|null $dst_encoding = null;
401c6adce8SGreg Roach
411c6adce8SGreg Roach    /**
421c6adce8SGreg Roach     * Initialization
431c6adce8SGreg Roach     */
441c6adce8SGreg Roach    public function onCreate(): bool
451c6adce8SGreg Roach    {
461c6adce8SGreg Roach        parent::onCreate();
471c6adce8SGreg Roach
481c6adce8SGreg Roach        $src_encoding = $this->params['src_encoding'] ?? '';
491c6adce8SGreg Roach        $dst_encoding = $this->params['dst_encoding'] ?? 'UTF-8';
501c6adce8SGreg Roach
511c6adce8SGreg Roach        if ($src_encoding !== '') {
521c6adce8SGreg Roach            $this->src_encoding = Registry::encodingFactory()->make($src_encoding);
531c6adce8SGreg Roach        }
541c6adce8SGreg Roach
551c6adce8SGreg Roach        $this->dst_encoding = Registry::encodingFactory()->make($dst_encoding);
561c6adce8SGreg Roach
571c6adce8SGreg Roach        return true;
581c6adce8SGreg Roach    }
591c6adce8SGreg Roach
601c6adce8SGreg Roach    /**
611c6adce8SGreg Roach     * Filter some data.
621c6adce8SGreg Roach     *
631c6adce8SGreg Roach     * @param resource $in       Read from this input stream
641c6adce8SGreg Roach     * @param resource $out      Write to this output stream
651c6adce8SGreg Roach     * @param int      $consumed Count of bytes processed
661c6adce8SGreg Roach     * @param bool     $closing  Is the input about to end?
671c6adce8SGreg Roach     *
681c6adce8SGreg Roach     * @return int PSFS_PASS_ON / PSFS_FEED_ME / PSFS_ERR_FATAL
691c6adce8SGreg Roach     */
701c6adce8SGreg Roach    public function filter($in, $out, &$consumed, $closing): int
711c6adce8SGreg Roach    {
721c6adce8SGreg Roach        $return = PSFS_FEED_ME;
731c6adce8SGreg Roach
741c6adce8SGreg Roach        // While input data is available, continue to read it.
751c6adce8SGreg Roach        while ($bucket_in = stream_bucket_make_writeable($in)) {
761c6adce8SGreg Roach            $this->data .= $bucket_in->data;
771c6adce8SGreg Roach            $consumed   += $bucket_in->datalen;
781c6adce8SGreg Roach
791c6adce8SGreg Roach            $this->src_encoding ??= Registry::encodingFactory()->detect($this->data);
801c6adce8SGreg Roach
811c6adce8SGreg Roach            if ($this->src_encoding instanceof EncodingInterface) {
821c6adce8SGreg Roach                $bytes      = $this->src_encoding->convertibleBytes($this->data);
831c6adce8SGreg Roach                $data_in    = substr($this->data, 0, $bytes);
841c6adce8SGreg Roach                $data_out   = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($data_in));
851c6adce8SGreg Roach                $bucket_out = stream_bucket_new($this->stream, $data_out);
861c6adce8SGreg Roach                $this->data = substr($this->data, $bytes);
871c6adce8SGreg Roach                $return     = PSFS_PASS_ON;
881c6adce8SGreg Roach                stream_bucket_append($out, $bucket_out);
891c6adce8SGreg Roach            }
901c6adce8SGreg Roach        }
911c6adce8SGreg Roach
921c6adce8SGreg Roach        // Process the final record.
931c6adce8SGreg Roach        if ($closing && $this->data !== '') {
941c6adce8SGreg Roach            $this->src_encoding ??= Registry::encodingFactory()->make('UTF-8');
951c6adce8SGreg Roach            $data_out           = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($this->data));
961c6adce8SGreg Roach            $bucket_out         = stream_bucket_new($this->stream, $data_out);
971c6adce8SGreg Roach            $return             = PSFS_PASS_ON;
981c6adce8SGreg Roach            stream_bucket_append($out, $bucket_out);
991c6adce8SGreg Roach        }
1001c6adce8SGreg Roach
1011c6adce8SGreg Roach        return $return;
1021c6adce8SGreg Roach    }
1031c6adce8SGreg Roach}
104