1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Encodings; 21 22use function array_flip; 23use function array_map; 24use function implode; 25use function ord; 26use function preg_split; 27use function strlen; 28use function strrpos; 29use function strtr; 30 31use const PREG_SPLIT_NO_EMPTY; 32 33/** 34 * Convert between an encoding and UTF-8. 35 */ 36abstract class AbstractEncoding implements EncodingInterface 37{ 38 protected const string REPLACEMENT_CHARACTER = '?'; 39 40 /** @var array<string,string> Encoded character => utf8 character */ 41 protected const array TO_UTF8 = []; 42 43 /** 44 * Convert a string from UTF-8 to another encoding. 45 * 46 * @param string $text 47 * 48 * @return string 49 */ 50 public function fromUtf8(string $text): string 51 { 52 $utf8 = array_flip(static::TO_UTF8); 53 $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER; 54 55 $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY); 56 $chars = array_map(static function (string $char) use ($utf8): string { 57 if (ord($char) < 128) { 58 return $char; 59 } 60 61 return $utf8[$char] ?? static::REPLACEMENT_CHARACTER; 62 }, $chars); 63 64 return implode('', $chars); 65 } 66 67 /** 68 * Convert a string from another encoding to UTF-8. 69 * 70 * @param string $text 71 * 72 * @return string 73 */ 74 public function toUtf8(string $text): string 75 { 76 return strtr($text, static::TO_UTF8); 77 } 78 79 /** 80 * When reading multi-byte encodings using a stream, we must avoid incomplete characters. 81 * 82 * @param string $text 83 * 84 * @return int 85 */ 86 public function convertibleBytes(string $text): int 87 { 88 $safe_chars = [ 89 $this->fromUtf8("\n"), 90 $this->fromUtf8("\r"), 91 $this->fromUtf8(' '), 92 ]; 93 94 foreach ($safe_chars as $char) { 95 $pos = strrpos($text, $char); 96 97 if ($pos !== false) { 98 return $pos + strlen($char); 99 } 100 } 101 102 return 0; 103 } 104} 105