11c6adce8SGreg Roach<?php 21c6adce8SGreg Roach 31c6adce8SGreg Roach/** 41c6adce8SGreg Roach * webtrees: online genealogy 5*d11be702SGreg Roach * Copyright (C) 2023 webtrees development team 61c6adce8SGreg Roach * This program is free software: you can redistribute it and/or modify 71c6adce8SGreg Roach * it under the terms of the GNU General Public License as published by 81c6adce8SGreg Roach * the Free Software Foundation, either version 3 of the License, or 91c6adce8SGreg Roach * (at your option) any later version. 101c6adce8SGreg Roach * This program is distributed in the hope that it will be useful, 111c6adce8SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 121c6adce8SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 131c6adce8SGreg Roach * GNU General Public License for more details. 141c6adce8SGreg Roach * You should have received a copy of the GNU General Public License 151c6adce8SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 161c6adce8SGreg Roach */ 171c6adce8SGreg Roach 181c6adce8SGreg Roachdeclare(strict_types=1); 191c6adce8SGreg Roach 201c6adce8SGreg Roachnamespace Fisharebest\Webtrees\Encodings; 211c6adce8SGreg Roach 221c6adce8SGreg Roachuse function array_flip; 231c6adce8SGreg Roachuse function array_map; 241c6adce8SGreg Roachuse function implode; 251c6adce8SGreg Roachuse function ord; 261c6adce8SGreg Roachuse function preg_split; 271c6adce8SGreg Roachuse function strlen; 281c6adce8SGreg Roachuse function strrpos; 291c6adce8SGreg Roachuse function strtr; 301c6adce8SGreg Roach 311c6adce8SGreg Roachuse const PREG_SPLIT_NO_EMPTY; 321c6adce8SGreg Roach 331c6adce8SGreg Roach/** 341c6adce8SGreg Roach * Convert between an encoding and UTF-8. 351c6adce8SGreg Roach */ 361c6adce8SGreg Roachabstract class AbstractEncoding implements EncodingInterface 371c6adce8SGreg Roach{ 381c6adce8SGreg Roach protected const REPLACEMENT_CHARACTER = '?'; 391c6adce8SGreg Roach 401c6adce8SGreg Roach /** @var array<string,string> Encoded character => utf8 character */ 411c6adce8SGreg Roach protected const TO_UTF8 = []; 421c6adce8SGreg Roach 431c6adce8SGreg Roach /** 441c6adce8SGreg Roach * Convert a string from UTF-8 to another encoding. 451c6adce8SGreg Roach * 461c6adce8SGreg Roach * @param string $text 471c6adce8SGreg Roach * 481c6adce8SGreg Roach * @return string 491c6adce8SGreg Roach */ 501c6adce8SGreg Roach public function fromUtf8(string $text): string 511c6adce8SGreg Roach { 521c6adce8SGreg Roach $utf8 = array_flip(static::TO_UTF8); 531c6adce8SGreg Roach $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER; 541c6adce8SGreg Roach 551c6adce8SGreg Roach $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY); 561c6adce8SGreg Roach $chars = array_map(static function (string $char) use ($utf8): string { 571c6adce8SGreg Roach if (ord($char) < 128) { 581c6adce8SGreg Roach return $char; 591c6adce8SGreg Roach } 601c6adce8SGreg Roach 611c6adce8SGreg Roach return $utf8[$char] ?? static::REPLACEMENT_CHARACTER; 621c6adce8SGreg Roach }, $chars); 631c6adce8SGreg Roach 641c6adce8SGreg Roach return implode('', $chars); 651c6adce8SGreg Roach } 661c6adce8SGreg Roach 671c6adce8SGreg Roach /** 681c6adce8SGreg Roach * Convert a string from another encoding to UTF-8. 691c6adce8SGreg Roach * 701c6adce8SGreg Roach * @param string $text 711c6adce8SGreg Roach * 721c6adce8SGreg Roach * @return string 731c6adce8SGreg Roach */ 741c6adce8SGreg Roach public function toUtf8(string $text): string 751c6adce8SGreg Roach { 761c6adce8SGreg Roach return strtr($text, static::TO_UTF8); 771c6adce8SGreg Roach } 781c6adce8SGreg Roach 791c6adce8SGreg Roach /** 801c6adce8SGreg Roach * When reading multi-byte encodings using a stream, we must avoid incomplete characters. 811c6adce8SGreg Roach * 821c6adce8SGreg Roach * @param string $text 831c6adce8SGreg Roach * 841c6adce8SGreg Roach * @return int 851c6adce8SGreg Roach */ 861c6adce8SGreg Roach public function convertibleBytes(string $text): int 871c6adce8SGreg Roach { 881c6adce8SGreg Roach $safe_chars = [ 891c6adce8SGreg Roach $this->fromUtf8("\n"), 901c6adce8SGreg Roach $this->fromUtf8("\r"), 911c6adce8SGreg Roach $this->fromUtf8(' '), 921c6adce8SGreg Roach ]; 931c6adce8SGreg Roach 941c6adce8SGreg Roach foreach ($safe_chars as $char) { 951c6adce8SGreg Roach $pos = strrpos($text, $char); 961c6adce8SGreg Roach 971c6adce8SGreg Roach if ($pos !== false) { 981c6adce8SGreg Roach return $pos + strlen($char); 991c6adce8SGreg Roach } 1001c6adce8SGreg Roach } 1011c6adce8SGreg Roach 1021c6adce8SGreg Roach return 0; 1031c6adce8SGreg Roach } 1041c6adce8SGreg Roach} 105