xref: /webtrees/app/Encodings/AbstractEncoding.php (revision 81b514b4672980e5db010e9d89b55eaf131e798f)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Encodings;
21
22use function array_flip;
23use function array_map;
24use function implode;
25use function ord;
26use function preg_split;
27use function strlen;
28use function strrpos;
29use function strtr;
30
31use const PREG_SPLIT_NO_EMPTY;
32
33/**
34 * Convert between an encoding and UTF-8.
35 */
36abstract class AbstractEncoding implements EncodingInterface
37{
38    protected const REPLACEMENT_CHARACTER = '?';
39
40    /** @var array<string,string> Encoded character => utf8 character */
41    protected const TO_UTF8 = [];
42
43    /**
44     * Convert a string from UTF-8 to another encoding.
45     *
46     * @param string $text
47     *
48     * @return string
49     */
50    public function fromUtf8(string $text): string
51    {
52        $utf8  = array_flip(static::TO_UTF8);
53        $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER;
54
55        $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY);
56        $chars = array_map(static function (string $char) use ($utf8): string {
57            if (ord($char) < 128) {
58                return $char;
59            }
60
61            return $utf8[$char] ?? static::REPLACEMENT_CHARACTER;
62        }, $chars);
63
64        return implode('', $chars);
65    }
66
67    /**
68     * Convert a string from another encoding to UTF-8.
69     *
70     * @param string $text
71     *
72     * @return string
73     */
74    public function toUtf8(string $text): string
75    {
76        return strtr($text, static::TO_UTF8);
77    }
78
79    /**
80     * When reading multi-byte encodings using a stream, we must avoid incomplete characters.
81     *
82     * @param string $text
83     *
84     * @return int
85     */
86    public function convertibleBytes(string $text): int
87    {
88        $safe_chars = [
89            $this->fromUtf8("\n"),
90            $this->fromUtf8("\r"),
91            $this->fromUtf8(' '),
92        ];
93
94        foreach ($safe_chars as $char) {
95            $pos = strrpos($text, $char);
96
97            if ($pos !== false) {
98                return $pos + strlen($char);
99            }
100        }
101
102        return 0;
103    }
104}
105