xref: /webtrees/app/I18N.php (revision aa27872dda4a16c6164a650a3155df6bebf99ca4)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Closure;
23use Collator;
24use Exception;
25use Fisharebest\Localization\Locale;
26use Fisharebest\Localization\Locale\LocaleEnUs;
27use Fisharebest\Localization\Locale\LocaleInterface;
28use Fisharebest\Localization\Translation;
29use Fisharebest\Localization\Translator;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33use Illuminate\Support\Collection;
34
35use function array_merge;
36use function class_exists;
37use function html_entity_decode;
38use function in_array;
39use function mb_strtolower;
40use function mb_strtoupper;
41use function mb_substr;
42use function ord;
43use function sprintf;
44use function str_contains;
45use function str_replace;
46use function strcmp;
47use function strip_tags;
48use function strlen;
49use function strtr;
50use function var_export;
51
52/**
53 * Internationalization (i18n) and localization (l10n).
54 */
55class I18N
56{
57    // MO files use special characters for plurals and context.
58    public const PLURAL  = "\x00";
59    public const CONTEXT = "\x04";
60    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
61    private const DOTLESS_I_LOCALES = [
62        'az',
63        'tr',
64    ];
65    private const DOTLESS_I_TOLOWER = [
66        'I' => 'ı',
67        'İ' => 'i',
68    ];
69
70    // Digits are always rendered LTR, even in RTL text.
71    private const DOTLESS_I_TOUPPER = [
72        'ı' => 'I',
73        'i' => 'İ',
74    ];
75
76    // These locales need special handling for the dotless letter I.
77    private const SCRIPT_CHARACTER_RANGES = [
78        [
79            'Latn',
80            0x0041,
81            0x005A,
82        ],
83        [
84            'Latn',
85            0x0061,
86            0x007A,
87        ],
88        [
89            'Latn',
90            0x0100,
91            0x02AF,
92        ],
93        [
94            'Grek',
95            0x0370,
96            0x03FF,
97        ],
98        [
99            'Cyrl',
100            0x0400,
101            0x052F,
102        ],
103        [
104            'Hebr',
105            0x0590,
106            0x05FF,
107        ],
108        [
109            'Arab',
110            0x0600,
111            0x06FF,
112        ],
113        [
114            'Arab',
115            0x0750,
116            0x077F,
117        ],
118        [
119            'Arab',
120            0x08A0,
121            0x08FF,
122        ],
123        [
124            'Deva',
125            0x0900,
126            0x097F,
127        ],
128        [
129            'Taml',
130            0x0B80,
131            0x0BFF,
132        ],
133        [
134            'Sinh',
135            0x0D80,
136            0x0DFF,
137        ],
138        [
139            'Thai',
140            0x0E00,
141            0x0E7F,
142        ],
143        [
144            'Geor',
145            0x10A0,
146            0x10FF,
147        ],
148        [
149            'Grek',
150            0x1F00,
151            0x1FFF,
152        ],
153        [
154            'Deva',
155            0xA8E0,
156            0xA8FF,
157        ],
158        [
159            'Hans',
160            0x3000,
161            0x303F,
162        ],
163        // Mixed CJK, not just Hans
164        [
165            'Hans',
166            0x3400,
167            0xFAFF,
168        ],
169        // Mixed CJK, not just Hans
170        [
171            'Hans',
172            0x20000,
173            0x2FA1F,
174        ],
175        // Mixed CJK, not just Hans
176    ];
177    private const MIRROR_CHARACTERS = [
178        '('  => ')',
179        ')'  => '(',
180        '['  => ']',
181        ']'  => '[',
182        '{'  => '}',
183        '}'  => '{',
184        '<'  => '>',
185        '>'  => '<',
186        '‹ ' => '›',
187        '› ' => '‹',
188        '«'  => '»',
189        '»'  => '«',
190        '﴾ ' => '﴿',
191        '﴿ ' => '﴾',
192        '“ ' => '”',
193        '” ' => '“',
194        '‘ ' => '’',
195        '’ ' => '‘',
196    ];
197    /** @var string Punctuation used to separate list items, typically a comma */
198    public static $list_separator;
199
200    // The ranges of characters used by each script.
201    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
202    private static $locale;
203
204    // Characters that are displayed in mirror form in RTL text.
205    /** @var Translator An object that performs translation */
206    private static $translator;
207    /** @var  Collator|null From the php-intl library */
208    private static $collator;
209
210    /**
211     * The preferred locales for this site, or a default list if no preference.
212     *
213     * @return LocaleInterface[]
214     */
215    public static function activeLocales(): array
216    {
217        /** @var Collection $locales */
218        $locales = app(ModuleService::class)
219            ->findByInterface(ModuleLanguageInterface::class, false, true)
220            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
221                return $module->locale();
222            });
223
224        if ($locales->isEmpty()) {
225            return [new LocaleEnUs()];
226        }
227
228        return $locales->all();
229    }
230
231    /**
232     * Which MySQL collation should be used for this locale?
233     *
234     * @return string
235     */
236    public static function collation(): string
237    {
238        $collation = self::$locale->collation();
239        switch ($collation) {
240            case 'croatian_ci':
241            case 'german2_ci':
242            case 'vietnamese_ci':
243                // Only available in MySQL 5.6
244                return 'utf8_unicode_ci';
245            default:
246                return 'utf8_' . $collation;
247        }
248    }
249
250    /**
251     * What format is used to display dates in the current locale?
252     *
253     * @return string
254     */
255    public static function dateFormat(): string
256    {
257        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
258        return self::$translator->translate('%j %F %Y');
259    }
260
261    /**
262     * Convert the digits 0-9 into the local script
263     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
264     *
265     * @param string|int $n
266     *
267     * @return string
268     */
269    public static function digits($n): string
270    {
271        return self::$locale->digits((string) $n);
272    }
273
274    /**
275     * What is the direction of the current locale
276     *
277     * @return string "ltr" or "rtl"
278     */
279    public static function direction(): string
280    {
281        return self::$locale->direction();
282    }
283
284    /**
285     * Initialise the translation adapter with a locale setting.
286     *
287     * @param string $code
288     * @param bool   $setup
289     *
290     * @return void
291     */
292    public static function init(string $code, bool $setup = false): void
293    {
294        self::$locale = Locale::create($code);
295
296        // Load the translation file
297        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
298
299        try {
300            $translation  = new Translation($translation_file);
301            $translations = $translation->asArray();
302        } catch (Exception $ex) {
303            // The translations files are created during the build process, and are
304            // not included in the source code.
305            // Assuming we are using dev code, and build (or rebuild) the files.
306            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
307            $translation  = new Translation($po_file);
308            $translations = $translation->asArray();
309            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
310        }
311
312        // Add translations from custom modules (but not during setup, as we have no database/modules)
313        if (!$setup) {
314            $translations = app(ModuleService::class)
315                ->findByInterface(ModuleCustomInterface::class)
316                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
317                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
318                }, $translations);
319        }
320
321        // Create a translator
322        self::$translator = new Translator($translations, self::$locale->pluralRule());
323
324        /* I18N: This punctuation is used to separate lists of items */
325        self::$list_separator = self::translate(', ');
326
327        // Create a collator
328        try {
329            if (class_exists('Collator')) {
330                // Symfony provides a very incomplete polyfill - which cannot be used.
331                self::$collator = new Collator(self::$locale->code());
332                // Ignore upper/lower case differences
333                self::$collator->setStrength(Collator::SECONDARY);
334            }
335        } catch (Exception $ex) {
336            // PHP-INTL is not installed?  We'll use a fallback later.
337            self::$collator = null;
338        }
339    }
340
341    /**
342     * Translate a string, and then substitute placeholders
343     * echo I18N::translate('Hello World!');
344     * echo I18N::translate('The %s sat on the mat', 'cat');
345     *
346     * @param string $message
347     * @param string ...$args
348     *
349     * @return string
350     */
351    public static function translate(string $message, ...$args): string
352    {
353        $message = self::$translator->translate($message);
354
355        return sprintf($message, ...$args);
356    }
357
358    /**
359     * @return string
360     */
361    public static function languageTag(): string
362    {
363        return self::$locale->languageTag();
364    }
365
366    /**
367     * @return LocaleInterface
368     */
369    public static function locale(): LocaleInterface
370    {
371        return self::$locale;
372    }
373
374    /**
375     * Translate a number into the local representation.
376     * e.g. 12345.67 becomes
377     * en: 12,345.67
378     * fr: 12 345,67
379     * de: 12.345,67
380     *
381     * @param float $n
382     * @param int   $precision
383     *
384     * @return string
385     */
386    public static function number(float $n, int $precision = 0): string
387    {
388        return self::$locale->number(round($n, $precision));
389    }
390
391    /**
392     * Translate a fraction into a percentage.
393     * e.g. 0.123 becomes
394     * en: 12.3%
395     * fr: 12,3 %
396     * de: 12,3%
397     *
398     * @param float $n
399     * @param int   $precision
400     *
401     * @return string
402     */
403    public static function percentage(float $n, int $precision = 0): string
404    {
405        return self::$locale->percent(round($n, $precision + 2));
406    }
407
408    /**
409     * Translate a plural string
410     * echo self::plural('There is an error', 'There are errors', $num_errors);
411     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
412     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
413     *
414     * @param string $singular
415     * @param string $plural
416     * @param int    $count
417     * @param string ...$args
418     *
419     * @return string
420     */
421    public static function plural(string $singular, string $plural, int $count, ...$args): string
422    {
423        $message = self::$translator->translatePlural($singular, $plural, $count);
424
425        return sprintf($message, ...$args);
426    }
427
428    /**
429     * UTF8 version of PHP::strrev()
430     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
431     * These do not support UTF8 text direction, so we must mimic it for them.
432     * Numbers are always rendered LTR, even in RTL text.
433     * The visual direction of characters such as parentheses should be reversed.
434     *
435     * @param string $text Text to be reversed
436     *
437     * @return string
438     */
439    public static function reverseText(string $text): string
440    {
441        // Remove HTML markup - we can't display it and it is LTR.
442        $text = strip_tags($text);
443        // Remove HTML entities.
444        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
445
446        // LTR text doesn't need reversing
447        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
448            return $text;
449        }
450
451        // Mirrored characters
452        $text = strtr($text, self::MIRROR_CHARACTERS);
453
454        $reversed = '';
455        $digits   = '';
456        while ($text !== '') {
457            $letter = mb_substr($text, 0, 1);
458            $text   = mb_substr($text, 1);
459            if (str_contains(self::DIGITS, $letter)) {
460                $digits .= $letter;
461            } else {
462                $reversed = $letter . $digits . $reversed;
463                $digits   = '';
464            }
465        }
466
467        return $digits . $reversed;
468    }
469
470    /**
471     * Return the direction (ltr or rtl) for a given script
472     * The PHP/intl library does not provde this information, so we need
473     * our own lookup table.
474     *
475     * @param string $script
476     *
477     * @return string
478     */
479    public static function scriptDirection(string $script): string
480    {
481        switch ($script) {
482            case 'Arab':
483            case 'Hebr':
484            case 'Mong':
485            case 'Thaa':
486                return 'rtl';
487            default:
488                return 'ltr';
489        }
490    }
491
492    /**
493     * Identify the script used for a piece of text
494     *
495     * @param string $string
496     *
497     * @return string
498     */
499    public static function textScript(string $string): string
500    {
501        $string = strip_tags($string); // otherwise HTML tags show up as latin
502        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
503        $string = str_replace([
504            Individual::NOMEN_NESCIO,
505            Individual::PRAENOMEN_NESCIO,
506        ], '', $string);
507        $pos    = 0;
508        $strlen = strlen($string);
509        while ($pos < $strlen) {
510            // get the Unicode Code Point for the character at position $pos
511            $byte1 = ord($string[$pos]);
512            if ($byte1 < 0x80) {
513                $code_point = $byte1;
514                $chrlen     = 1;
515            } elseif ($byte1 < 0xC0) {
516                // Invalid continuation character
517                return 'Latn';
518            } elseif ($byte1 < 0xE0) {
519                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
520                $chrlen     = 2;
521            } elseif ($byte1 < 0xF0) {
522                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
523                $chrlen     = 3;
524            } elseif ($byte1 < 0xF8) {
525                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
526                $chrlen     = 3;
527            } else {
528                // Invalid UTF
529                return 'Latn';
530            }
531
532            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
533                if ($code_point >= $range[1] && $code_point <= $range[2]) {
534                    return $range[0];
535                }
536            }
537            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
538            $pos += $chrlen;
539        }
540
541        return 'Latn';
542    }
543
544    /**
545     * A closure which will compare strings using local collation rules.
546     *
547     * @return Closure
548     */
549    public static function comparator(): Closure
550    {
551        if (self::$collator instanceof Collator) {
552            return static function (string $x, string $y): int {
553                return (int) self::$collator->compare($x, $y);
554            };
555        }
556
557        return static function (string $x, string $y): int {
558            return strcmp(self::strtolower($x), self::strtolower($y));
559        };
560    }
561
562
563
564    /**
565     * Convert a string to lower case.
566     *
567     * @param string $string
568     *
569     * @return string
570     */
571    public static function strtolower(string $string): string
572    {
573        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
574            $string = strtr($string, self::DOTLESS_I_TOLOWER);
575        }
576
577        return mb_strtolower($string);
578    }
579
580    /**
581     * Convert a string to upper case.
582     *
583     * @param string $string
584     *
585     * @return string
586     */
587    public static function strtoupper(string $string): string
588    {
589        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
590            $string = strtr($string, self::DOTLESS_I_TOUPPER);
591        }
592
593        return mb_strtoupper($string);
594    }
595
596    /**
597     * What format is used to display dates in the current locale?
598     *
599     * @return string
600     */
601    public static function timeFormat(): string
602    {
603        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
604        return self::$translator->translate('%H:%i:%s');
605    }
606
607    /**
608     * Context sensitive version of translate.
609     * echo I18N::translateContext('NOMINATIVE', 'January');
610     * echo I18N::translateContext('GENITIVE', 'January');
611     *
612     * @param string $context
613     * @param string $message
614     * @param string ...$args
615     *
616     * @return string
617     */
618    public static function translateContext(string $context, string $message, ...$args): string
619    {
620        $message = self::$translator->translateContext($context, $message);
621
622        return sprintf($message, ...$args);
623    }
624}
625