11c6adce8SGreg Roach<?php 21c6adce8SGreg Roach 31c6adce8SGreg Roach/** 41c6adce8SGreg Roach * @copyright 2021 Greg Roach <greg@subaqua.co.uk> 51c6adce8SGreg Roach * @license GPLv3+ 61c6adce8SGreg Roach */ 71c6adce8SGreg Roach 81c6adce8SGreg Roachdeclare(strict_types=1); 91c6adce8SGreg Roach 101c6adce8SGreg Roachnamespace Fisharebest\Webtrees\GedcomFilters; 111c6adce8SGreg Roach 121c6adce8SGreg Roachuse Fisharebest\Webtrees\Encodings\EncodingInterface; 131c6adce8SGreg Roachuse Fisharebest\Webtrees\Registry; 141c6adce8SGreg Roachuse php_user_filter; 151c6adce8SGreg Roach 161c6adce8SGreg Roachuse function stream_bucket_append; 171c6adce8SGreg Roachuse function stream_bucket_make_writeable; 181c6adce8SGreg Roachuse function stream_bucket_new; 191c6adce8SGreg Roachuse function substr; 201c6adce8SGreg Roach 211c6adce8SGreg Roachuse const PSFS_FEED_ME; 221c6adce8SGreg Roachuse const PSFS_PASS_ON; 231c6adce8SGreg Roach 241c6adce8SGreg Roach/** 251c6adce8SGreg Roach * Filter a GEDCOM data stream, converting to UTF-8. 261c6adce8SGreg Roach * 271c6adce8SGreg Roach * These properties are created after the class is instantiated. 281c6adce8SGreg Roach * 291c6adce8SGreg Roach * @property string $filtername 30*73f6d2b5SGreg Roach * @property array<string,string> $params 311c6adce8SGreg Roach * @property resource $stream 321c6adce8SGreg Roach */ 331c6adce8SGreg Roachclass GedcomEncodingFilter extends php_user_filter 341c6adce8SGreg Roach{ 351c6adce8SGreg Roach private string $data = ''; 361c6adce8SGreg Roach 371ff45046SGreg Roach private EncodingInterface|null $src_encoding = null; 381c6adce8SGreg Roach 391ff45046SGreg Roach private EncodingInterface|null $dst_encoding = null; 401c6adce8SGreg Roach 411c6adce8SGreg Roach /** 421c6adce8SGreg Roach * Initialization 431c6adce8SGreg Roach */ 441c6adce8SGreg Roach public function onCreate(): bool 451c6adce8SGreg Roach { 461c6adce8SGreg Roach parent::onCreate(); 471c6adce8SGreg Roach 481c6adce8SGreg Roach $src_encoding = $this->params['src_encoding'] ?? ''; 491c6adce8SGreg Roach $dst_encoding = $this->params['dst_encoding'] ?? 'UTF-8'; 501c6adce8SGreg Roach 511c6adce8SGreg Roach if ($src_encoding !== '') { 521c6adce8SGreg Roach $this->src_encoding = Registry::encodingFactory()->make($src_encoding); 531c6adce8SGreg Roach } 541c6adce8SGreg Roach 551c6adce8SGreg Roach $this->dst_encoding = Registry::encodingFactory()->make($dst_encoding); 561c6adce8SGreg Roach 571c6adce8SGreg Roach return true; 581c6adce8SGreg Roach } 591c6adce8SGreg Roach 601c6adce8SGreg Roach /** 611c6adce8SGreg Roach * Filter some data. 621c6adce8SGreg Roach * 631c6adce8SGreg Roach * @param resource $in Read from this input stream 641c6adce8SGreg Roach * @param resource $out Write to this output stream 651c6adce8SGreg Roach * @param int $consumed Count of bytes processed 661c6adce8SGreg Roach * @param bool $closing Is the input about to end? 671c6adce8SGreg Roach * 681c6adce8SGreg Roach * @return int PSFS_PASS_ON / PSFS_FEED_ME / PSFS_ERR_FATAL 691c6adce8SGreg Roach */ 701c6adce8SGreg Roach public function filter($in, $out, &$consumed, $closing): int 711c6adce8SGreg Roach { 721c6adce8SGreg Roach $return = PSFS_FEED_ME; 731c6adce8SGreg Roach 741c6adce8SGreg Roach // While input data is available, continue to read it. 751c6adce8SGreg Roach while ($bucket_in = stream_bucket_make_writeable($in)) { 761c6adce8SGreg Roach $this->data .= $bucket_in->data; 771c6adce8SGreg Roach $consumed += $bucket_in->datalen; 781c6adce8SGreg Roach 791c6adce8SGreg Roach $this->src_encoding ??= Registry::encodingFactory()->detect($this->data); 801c6adce8SGreg Roach 811c6adce8SGreg Roach if ($this->src_encoding instanceof EncodingInterface) { 821c6adce8SGreg Roach $bytes = $this->src_encoding->convertibleBytes($this->data); 831c6adce8SGreg Roach $data_in = substr($this->data, 0, $bytes); 841c6adce8SGreg Roach $data_out = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($data_in)); 851c6adce8SGreg Roach $bucket_out = stream_bucket_new($this->stream, $data_out); 861c6adce8SGreg Roach $this->data = substr($this->data, $bytes); 871c6adce8SGreg Roach $return = PSFS_PASS_ON; 881c6adce8SGreg Roach stream_bucket_append($out, $bucket_out); 891c6adce8SGreg Roach } 901c6adce8SGreg Roach } 911c6adce8SGreg Roach 921c6adce8SGreg Roach // Process the final record. 931c6adce8SGreg Roach if ($closing && $this->data !== '') { 941c6adce8SGreg Roach $this->src_encoding ??= Registry::encodingFactory()->make('UTF-8'); 951c6adce8SGreg Roach $data_out = $this->dst_encoding->fromUtf8($this->src_encoding->toUtf8($this->data)); 961c6adce8SGreg Roach $bucket_out = stream_bucket_new($this->stream, $data_out); 971c6adce8SGreg Roach $return = PSFS_PASS_ON; 981c6adce8SGreg Roach stream_bucket_append($out, $bucket_out); 991c6adce8SGreg Roach } 1001c6adce8SGreg Roach 1011c6adce8SGreg Roach return $return; 1021c6adce8SGreg Roach } 1031c6adce8SGreg Roach} 104