xref: /webtrees/app/GedcomFilters/GedcomEncodingFilter.php (revision e873f434551745f888937263ff89e80db3b0f785)
1<?php
2
3/**
4 * @copyright 2021 Greg Roach <greg@subaqua.co.uk>
5 * @license   GPLv3+
6 */
7
8declare(strict_types=1);
9
10namespace Fisharebest\Webtrees\GedcomFilters;
11
12use Fisharebest\Webtrees\Encodings\EncodingInterface;
13use Fisharebest\Webtrees\Registry;
14use php_user_filter;
15
16use function stream_bucket_append;
17use function stream_bucket_make_writeable;
18use function stream_bucket_new;
19use function substr;
20
21use const PSFS_FEED_ME;
22use const PSFS_PASS_ON;
23
24/**
25 * Filter a GEDCOM data stream, converting to UTF-8.
26 *
27 * These properties are created after the class is instantiated.
28 *
29 * @property string               $filtername
30 * @property array<string,string> $params
31 * @property resource             $stream
32 */
33class GedcomEncodingFilter extends php_user_filter
34{
35    private string $data = '';
36
37    private EncodingInterface|null $src_encoding = null;
38
39    private EncodingInterface|null $dst_encoding = null;
40
41    /**
42     * Initialization
43     */
44    public function onCreate(): bool
45    {
46        parent::onCreate();
47
48        $src_encoding = $this->params['src_encoding'] ?? '';
49        $dst_encoding = $this->params['dst_encoding'] ?? 'UTF-8';
50
51        if ($src_encoding !== '') {
52            $this->src_encoding = Registry::encodingFactory()->make($src_encoding);
53        }
54
55        $this->dst_encoding = Registry::encodingFactory()->make($dst_encoding);
56
57        return true;
58    }
59
60    /**
61     * Filter some data.
62     *
63     * @param resource $in       Read from this input stream
64     * @param resource $out      Write to this output stream
65     * @param int      $consumed Count of bytes processed
66     * @param bool     $closing  Is the input about to end?
67     *
68     * @return int PSFS_PASS_ON / PSFS_FEED_ME / PSFS_ERR_FATAL
69     */
70    public function filter($in, $out, &$consumed, $closing): int
71    {
72        $return = PSFS_FEED_ME;
73
74        // While input data is available, continue to read it.
75        while ($bucket_in = stream_bucket_make_writeable($in)) {
76            $this->data .= $bucket_in->data;
77            $consumed   += $bucket_in->datalen;
78
79            $this->src_encoding ??= Registry::encodingFactory()->detect($this->data);
80
81            if ($this->src_encoding instanceof EncodingInterface) {
82                $bytes      = $this->src_encoding->convertibleBytes($this->data);
83                $data_in    = substr($this->data, 0, $bytes);
84                $data_out   = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($data_in));
85                $bucket_out = stream_bucket_new($this->stream, $data_out);
86                $this->data = substr($this->data, $bytes);
87                $return     = PSFS_PASS_ON;
88                stream_bucket_append($out, $bucket_out);
89            }
90        }
91
92        // Process the final record.
93        if ($closing && $this->data !== '') {
94            $this->src_encoding ??= Registry::encodingFactory()->make('UTF-8');
95            $data_out           = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($this->data));
96            $bucket_out         = stream_bucket_new($this->stream, $data_out);
97            $return             = PSFS_PASS_ON;
98            stream_bucket_append($out, $bucket_out);
99        }
100
101        return $return;
102    }
103}
104