xref: /webtrees/app/I18N.php (revision e57829396e448d173bb4be808a8b9fd89548228b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Closure;
23use Collator;
24use Exception;
25use Fisharebest\Localization\Locale;
26use Fisharebest\Localization\Locale\LocaleEnUs;
27use Fisharebest\Localization\Locale\LocaleInterface;
28use Fisharebest\Localization\Translation;
29use Fisharebest\Localization\Translator;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33use Illuminate\Support\Collection;
34
35use function array_merge;
36use function class_exists;
37use function html_entity_decode;
38use function in_array;
39use function mb_strtolower;
40use function mb_strtoupper;
41use function mb_substr;
42use function ord;
43use function sprintf;
44use function str_contains;
45use function str_replace;
46use function strcmp;
47use function strip_tags;
48use function strlen;
49use function strtr;
50use function var_export;
51
52/**
53 * Internationalization (i18n) and localization (l10n).
54 */
55class I18N
56{
57    // MO files use special characters for plurals and context.
58    public const PLURAL  = "\x00";
59    public const CONTEXT = "\x04";
60
61    // Digits are always rendered LTR, even in RTL text.
62    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
63
64    // These locales need special handling for the dotless letter I.
65    private const DOTLESS_I_LOCALES = [
66        'az',
67        'tr',
68    ];
69
70    private const DOTLESS_I_TOLOWER = [
71        'I' => 'ı',
72        'İ' => 'i',
73    ];
74
75    private const DOTLESS_I_TOUPPER = [
76        'ı' => 'I',
77        'i' => 'İ',
78    ];
79
80    // The ranges of characters used by each script.
81    private const SCRIPT_CHARACTER_RANGES = [
82        [
83            'Latn',
84            0x0041,
85            0x005A,
86        ],
87        [
88            'Latn',
89            0x0061,
90            0x007A,
91        ],
92        [
93            'Latn',
94            0x0100,
95            0x02AF,
96        ],
97        [
98            'Grek',
99            0x0370,
100            0x03FF,
101        ],
102        [
103            'Cyrl',
104            0x0400,
105            0x052F,
106        ],
107        [
108            'Hebr',
109            0x0590,
110            0x05FF,
111        ],
112        [
113            'Arab',
114            0x0600,
115            0x06FF,
116        ],
117        [
118            'Arab',
119            0x0750,
120            0x077F,
121        ],
122        [
123            'Arab',
124            0x08A0,
125            0x08FF,
126        ],
127        [
128            'Deva',
129            0x0900,
130            0x097F,
131        ],
132        [
133            'Taml',
134            0x0B80,
135            0x0BFF,
136        ],
137        [
138            'Sinh',
139            0x0D80,
140            0x0DFF,
141        ],
142        [
143            'Thai',
144            0x0E00,
145            0x0E7F,
146        ],
147        [
148            'Geor',
149            0x10A0,
150            0x10FF,
151        ],
152        [
153            'Grek',
154            0x1F00,
155            0x1FFF,
156        ],
157        [
158            'Deva',
159            0xA8E0,
160            0xA8FF,
161        ],
162        [
163            'Hans',
164            0x3000,
165            0x303F,
166        ],
167        // Mixed CJK, not just Hans
168        [
169            'Hans',
170            0x3400,
171            0xFAFF,
172        ],
173        // Mixed CJK, not just Hans
174        [
175            'Hans',
176            0x20000,
177            0x2FA1F,
178        ],
179        // Mixed CJK, not just Hans
180    ];
181
182    // Characters that are displayed in mirror form in RTL text.
183    private const MIRROR_CHARACTERS = [
184        '('  => ')',
185        ')'  => '(',
186        '['  => ']',
187        ']'  => '[',
188        '{'  => '}',
189        '}'  => '{',
190        '<'  => '>',
191        '>'  => '<',
192        '‹ ' => '›',
193        '› ' => '‹',
194        '«'  => '»',
195        '»'  => '«',
196        '﴾ ' => '﴿',
197        '﴿ ' => '﴾',
198        '“ ' => '”',
199        '” ' => '“',
200        '‘ ' => '’',
201        '’ ' => '‘',
202    ];
203
204    // Punctuation used to separate list items, typically a comma
205    public static string $list_separator;
206
207    private static ?ModuleLanguageInterface $language;
208
209    private static LocaleInterface $locale;
210
211    private static Translator $translator;
212
213    private static ?Collator $collator;
214
215    /**
216     * The preferred locales for this site, or a default list if no preference.
217     *
218     * @return array<LocaleInterface>
219     */
220    public static function activeLocales(): array
221    {
222        $locales = app(ModuleService::class)
223            ->findByInterface(ModuleLanguageInterface::class, false, true)
224            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
225                return $module->locale();
226            });
227
228        if ($locales->isEmpty()) {
229            return [new LocaleEnUs()];
230        }
231
232        return $locales->all();
233    }
234
235    /**
236     * Which MySQL collation should be used for this locale?
237     *
238     * @return string
239     */
240    public static function collation(): string
241    {
242        $collation = self::$locale->collation();
243        switch ($collation) {
244            case 'croatian_ci':
245            case 'german2_ci':
246            case 'vietnamese_ci':
247                // Only available in MySQL 5.6
248                return 'utf8_unicode_ci';
249            default:
250                return 'utf8_' . $collation;
251        }
252    }
253
254    /**
255     * What format is used to display dates in the current locale?
256     *
257     * @return string
258     */
259    public static function dateFormat(): string
260    {
261        /* I18N: This is the format string for full dates. See https://php.net/date for codes */
262        return self::$translator->translate('%j %F %Y');
263    }
264
265    /**
266     * Convert the digits 0-9 into the local script
267     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
268     *
269     * @param string|int $n
270     *
271     * @return string
272     */
273    public static function digits($n): string
274    {
275        return self::$locale->digits((string) $n);
276    }
277
278    /**
279     * What is the direction of the current locale
280     *
281     * @return string "ltr" or "rtl"
282     */
283    public static function direction(): string
284    {
285        return self::$locale->direction();
286    }
287
288    /**
289     * Initialise the translation adapter with a locale setting.
290     *
291     * @param string $code
292     * @param bool   $setup
293     *
294     * @return void
295     */
296    public static function init(string $code, bool $setup = false): void
297    {
298        self::$locale = Locale::create($code);
299
300        // Load the translation file
301        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
302
303        try {
304            $translation  = new Translation($translation_file);
305            $translations = $translation->asArray();
306        } catch (Exception $ex) {
307            // The translations files are created during the build process, and are
308            // not included in the source code.
309            // Assuming we are using dev code, and build (or rebuild) the files.
310            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
311            $translation  = new Translation($po_file);
312            $translations = $translation->asArray();
313            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
314        }
315
316        // Add translations from custom modules (but not during setup, as we have no database/modules)
317        if (!$setup) {
318            $module_service = app(ModuleService::class);
319
320            $translations = $module_service
321                ->findByInterface(ModuleCustomInterface::class)
322                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
323                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
324                }, $translations);
325
326            self::$language = $module_service
327                ->findByInterface(ModuleLanguageInterface::class)
328                ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code);
329        }
330
331        // Create a translator
332        self::$translator = new Translator($translations, self::$locale->pluralRule());
333
334        /* I18N: This punctuation is used to separate lists of items */
335        self::$list_separator = self::translate(', ');
336
337        // Create a collator
338        try {
339            if (class_exists('Collator')) {
340                // Symfony provides a very incomplete polyfill - which cannot be used.
341                self::$collator = new Collator(self::$locale->code());
342                // Ignore upper/lower case differences
343                self::$collator->setStrength(Collator::SECONDARY);
344            }
345        } catch (Exception $ex) {
346            // PHP-INTL is not installed?  We'll use a fallback later.
347            self::$collator = null;
348        }
349    }
350
351    /**
352     * Translate a string, and then substitute placeholders
353     * echo I18N::translate('Hello World!');
354     * echo I18N::translate('The %s sat on the mat', 'cat');
355     *
356     * @param string $message
357     * @param string ...$args
358     *
359     * @return string
360     */
361    public static function translate(string $message, ...$args): string
362    {
363        $message = self::$translator->translate($message);
364
365        return sprintf($message, ...$args);
366    }
367
368    /**
369     * @return string
370     */
371    public static function languageTag(): string
372    {
373        return self::$locale->languageTag();
374    }
375
376    /**
377     * @return LocaleInterface
378     */
379    public static function locale(): LocaleInterface
380    {
381        return self::$locale;
382    }
383
384    /**
385     * @return ModuleLanguageInterface
386     */
387    public static function language(): ModuleLanguageInterface
388    {
389        return self::$language;
390    }
391
392    /**
393     * Translate a number into the local representation.
394     * e.g. 12345.67 becomes
395     * en: 12,345.67
396     * fr: 12 345,67
397     * de: 12.345,67
398     *
399     * @param float $n
400     * @param int   $precision
401     *
402     * @return string
403     */
404    public static function number(float $n, int $precision = 0): string
405    {
406        return self::$locale->number(round($n, $precision));
407    }
408
409    /**
410     * Translate a fraction into a percentage.
411     * e.g. 0.123 becomes
412     * en: 12.3%
413     * fr: 12,3 %
414     * de: 12,3%
415     *
416     * @param float $n
417     * @param int   $precision
418     *
419     * @return string
420     */
421    public static function percentage(float $n, int $precision = 0): string
422    {
423        return self::$locale->percent(round($n, $precision + 2));
424    }
425
426    /**
427     * Translate a plural string
428     * echo self::plural('There is an error', 'There are errors', $num_errors);
429     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
430     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
431     *
432     * @param string $singular
433     * @param string $plural
434     * @param int    $count
435     * @param string ...$args
436     *
437     * @return string
438     */
439    public static function plural(string $singular, string $plural, int $count, ...$args): string
440    {
441        $message = self::$translator->translatePlural($singular, $plural, $count);
442
443        return sprintf($message, ...$args);
444    }
445
446    /**
447     * UTF8 version of PHP::strrev()
448     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
449     * These do not support UTF8 text direction, so we must mimic it for them.
450     * Numbers are always rendered LTR, even in RTL text.
451     * The visual direction of characters such as parentheses should be reversed.
452     *
453     * @param string $text Text to be reversed
454     *
455     * @return string
456     */
457    public static function reverseText(string $text): string
458    {
459        // Remove HTML markup - we can't display it and it is LTR.
460        $text = strip_tags($text);
461        // Remove HTML entities.
462        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
463
464        // LTR text doesn't need reversing
465        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
466            return $text;
467        }
468
469        // Mirrored characters
470        $text = strtr($text, self::MIRROR_CHARACTERS);
471
472        $reversed = '';
473        $digits   = '';
474        while ($text !== '') {
475            $letter = mb_substr($text, 0, 1);
476            $text   = mb_substr($text, 1);
477            if (str_contains(self::DIGITS, $letter)) {
478                $digits .= $letter;
479            } else {
480                $reversed = $letter . $digits . $reversed;
481                $digits   = '';
482            }
483        }
484
485        return $digits . $reversed;
486    }
487
488    /**
489     * Return the direction (ltr or rtl) for a given script
490     * The PHP/intl library does not provde this information, so we need
491     * our own lookup table.
492     *
493     * @param string $script
494     *
495     * @return string
496     */
497    public static function scriptDirection(string $script): string
498    {
499        switch ($script) {
500            case 'Arab':
501            case 'Hebr':
502            case 'Mong':
503            case 'Thaa':
504                return 'rtl';
505            default:
506                return 'ltr';
507        }
508    }
509
510    /**
511     * Identify the script used for a piece of text
512     *
513     * @param string $string
514     *
515     * @return string
516     */
517    public static function textScript(string $string): string
518    {
519        $string = strip_tags($string); // otherwise HTML tags show up as latin
520        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
521        $string = str_replace([
522            Individual::NOMEN_NESCIO,
523            Individual::PRAENOMEN_NESCIO,
524        ], '', $string);
525        $pos    = 0;
526        $strlen = strlen($string);
527        while ($pos < $strlen) {
528            // get the Unicode Code Point for the character at position $pos
529            $byte1 = ord($string[$pos]);
530            if ($byte1 < 0x80) {
531                $code_point = $byte1;
532                $chrlen     = 1;
533            } elseif ($byte1 < 0xC0) {
534                // Invalid continuation character
535                return 'Latn';
536            } elseif ($byte1 < 0xE0) {
537                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
538                $chrlen     = 2;
539            } elseif ($byte1 < 0xF0) {
540                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
541                $chrlen     = 3;
542            } elseif ($byte1 < 0xF8) {
543                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
544                $chrlen     = 3;
545            } else {
546                // Invalid UTF
547                return 'Latn';
548            }
549
550            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
551                if ($code_point >= $range[1] && $code_point <= $range[2]) {
552                    return $range[0];
553                }
554            }
555            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
556            $pos += $chrlen;
557        }
558
559        return 'Latn';
560    }
561
562    /**
563     * A closure which will compare strings using local collation rules.
564     *
565     * @return Closure
566     */
567    public static function comparator(): Closure
568    {
569        if (self::$collator instanceof Collator) {
570            return static function (string $x, string $y): int {
571                return (int) self::$collator->compare($x, $y);
572            };
573        }
574
575        return static function (string $x, string $y): int {
576            return strcmp(self::strtolower($x), self::strtolower($y));
577        };
578    }
579
580
581
582    /**
583     * Convert a string to lower case.
584     *
585     * @param string $string
586     *
587     * @return string
588     */
589    public static function strtolower(string $string): string
590    {
591        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
592            $string = strtr($string, self::DOTLESS_I_TOLOWER);
593        }
594
595        return mb_strtolower($string);
596    }
597
598    /**
599     * Convert a string to upper case.
600     *
601     * @param string $string
602     *
603     * @return string
604     */
605    public static function strtoupper(string $string): string
606    {
607        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
608            $string = strtr($string, self::DOTLESS_I_TOUPPER);
609        }
610
611        return mb_strtoupper($string);
612    }
613
614    /**
615     * What format is used to display dates in the current locale?
616     *
617     * @return string
618     */
619    public static function timeFormat(): string
620    {
621        /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */
622        return self::$translator->translate('%H:%i:%s');
623    }
624
625    /**
626     * Context sensitive version of translate.
627     * echo I18N::translateContext('NOMINATIVE', 'January');
628     * echo I18N::translateContext('GENITIVE', 'January');
629     *
630     * @param string $context
631     * @param string $message
632     * @param string ...$args
633     *
634     * @return string
635     */
636    public static function translateContext(string $context, string $message, ...$args): string
637    {
638        $message = self::$translator->translateContext($context, $message);
639
640        return sprintf($message, ...$args);
641    }
642}
643