xref: /webtrees/app/Encodings/AbstractEncoding.php (revision 1c6adce825f16611bd8b75a22114302de4b41cfe)
1*1c6adce8SGreg Roach<?php
2*1c6adce8SGreg Roach
3*1c6adce8SGreg Roach/**
4*1c6adce8SGreg Roach * webtrees: online genealogy
5*1c6adce8SGreg Roach * Copyright (C) 2021 webtrees development team
6*1c6adce8SGreg Roach * This program is free software: you can redistribute it and/or modify
7*1c6adce8SGreg Roach * it under the terms of the GNU General Public License as published by
8*1c6adce8SGreg Roach * the Free Software Foundation, either version 3 of the License, or
9*1c6adce8SGreg Roach * (at your option) any later version.
10*1c6adce8SGreg Roach * This program is distributed in the hope that it will be useful,
11*1c6adce8SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
12*1c6adce8SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13*1c6adce8SGreg Roach * GNU General Public License for more details.
14*1c6adce8SGreg Roach * You should have received a copy of the GNU General Public License
15*1c6adce8SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
16*1c6adce8SGreg Roach */
17*1c6adce8SGreg Roach
18*1c6adce8SGreg Roachdeclare(strict_types=1);
19*1c6adce8SGreg Roach
20*1c6adce8SGreg Roachnamespace Fisharebest\Webtrees\Encodings;
21*1c6adce8SGreg Roach
22*1c6adce8SGreg Roachuse function array_flip;
23*1c6adce8SGreg Roachuse function array_map;
24*1c6adce8SGreg Roachuse function implode;
25*1c6adce8SGreg Roachuse function ord;
26*1c6adce8SGreg Roachuse function preg_split;
27*1c6adce8SGreg Roachuse function strlen;
28*1c6adce8SGreg Roachuse function strrpos;
29*1c6adce8SGreg Roachuse function strtr;
30*1c6adce8SGreg Roach
31*1c6adce8SGreg Roachuse const PREG_SPLIT_NO_EMPTY;
32*1c6adce8SGreg Roach
33*1c6adce8SGreg Roach/**
34*1c6adce8SGreg Roach * Convert between an encoding and UTF-8.
35*1c6adce8SGreg Roach */
36*1c6adce8SGreg Roachabstract class AbstractEncoding implements EncodingInterface
37*1c6adce8SGreg Roach{
38*1c6adce8SGreg Roach    protected const REPLACEMENT_CHARACTER = '?';
39*1c6adce8SGreg Roach
40*1c6adce8SGreg Roach    /** @var array<string,string> Encoded character => utf8 character */
41*1c6adce8SGreg Roach    protected const TO_UTF8 = [];
42*1c6adce8SGreg Roach
43*1c6adce8SGreg Roach    /**
44*1c6adce8SGreg Roach     * Convert a string from UTF-8 to another encoding.
45*1c6adce8SGreg Roach     *
46*1c6adce8SGreg Roach     * @param string $text
47*1c6adce8SGreg Roach     *
48*1c6adce8SGreg Roach     * @return string
49*1c6adce8SGreg Roach     */
50*1c6adce8SGreg Roach    public function fromUtf8(string $text): string
51*1c6adce8SGreg Roach    {
52*1c6adce8SGreg Roach        $utf8  = array_flip(static::TO_UTF8);
53*1c6adce8SGreg Roach        $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER;
54*1c6adce8SGreg Roach
55*1c6adce8SGreg Roach        $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY);
56*1c6adce8SGreg Roach        $chars = array_map(static function (string $char) use ($utf8): string {
57*1c6adce8SGreg Roach            if (ord($char) < 128) {
58*1c6adce8SGreg Roach                return $char;
59*1c6adce8SGreg Roach            }
60*1c6adce8SGreg Roach
61*1c6adce8SGreg Roach            return $utf8[$char] ?? static::REPLACEMENT_CHARACTER;
62*1c6adce8SGreg Roach        }, $chars);
63*1c6adce8SGreg Roach
64*1c6adce8SGreg Roach        return implode('', $chars);
65*1c6adce8SGreg Roach    }
66*1c6adce8SGreg Roach
67*1c6adce8SGreg Roach    /**
68*1c6adce8SGreg Roach     * Convert a string from another encoding to UTF-8.
69*1c6adce8SGreg Roach     *
70*1c6adce8SGreg Roach     * @param string $text
71*1c6adce8SGreg Roach     *
72*1c6adce8SGreg Roach     * @return string
73*1c6adce8SGreg Roach     */
74*1c6adce8SGreg Roach    public function toUtf8(string $text): string
75*1c6adce8SGreg Roach    {
76*1c6adce8SGreg Roach        return strtr($text, static::TO_UTF8);
77*1c6adce8SGreg Roach    }
78*1c6adce8SGreg Roach
79*1c6adce8SGreg Roach    /**
80*1c6adce8SGreg Roach     * When reading multi-byte encodings using a stream, we must avoid incomplete characters.
81*1c6adce8SGreg Roach     *
82*1c6adce8SGreg Roach     * @param string $text
83*1c6adce8SGreg Roach     *
84*1c6adce8SGreg Roach     * @return int
85*1c6adce8SGreg Roach     */
86*1c6adce8SGreg Roach    public function convertibleBytes(string $text): int
87*1c6adce8SGreg Roach    {
88*1c6adce8SGreg Roach        $safe_chars = [
89*1c6adce8SGreg Roach            $this->fromUtf8("\n"),
90*1c6adce8SGreg Roach            $this->fromUtf8("\r"),
91*1c6adce8SGreg Roach            $this->fromUtf8(' '),
92*1c6adce8SGreg Roach        ];
93*1c6adce8SGreg Roach
94*1c6adce8SGreg Roach        foreach ($safe_chars as $char) {
95*1c6adce8SGreg Roach            $pos = strrpos($text, $char);
96*1c6adce8SGreg Roach
97*1c6adce8SGreg Roach            if ($pos !== false) {
98*1c6adce8SGreg Roach                return $pos + strlen($char);
99*1c6adce8SGreg Roach            }
100*1c6adce8SGreg Roach        }
101*1c6adce8SGreg Roach
102*1c6adce8SGreg Roach        return 0;
103*1c6adce8SGreg Roach    }
104*1c6adce8SGreg Roach}
105