xref: /webtrees/app/I18N.php (revision a5f003cf4c42de438f568dff311c4fbb7495e541)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Closure;
23use Collator;
24use Exception;
25use Fisharebest\Localization\Locale;
26use Fisharebest\Localization\Locale\LocaleEnUs;
27use Fisharebest\Localization\Locale\LocaleInterface;
28use Fisharebest\Localization\Translation;
29use Fisharebest\Localization\Translator;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33
34use function array_merge;
35use function class_exists;
36use function html_entity_decode;
37use function in_array;
38use function mb_strtolower;
39use function mb_strtoupper;
40use function mb_substr;
41use function ord;
42use function sprintf;
43use function str_contains;
44use function str_replace;
45use function strcmp;
46use function strip_tags;
47use function strlen;
48use function strtr;
49use function var_export;
50
51/**
52 * Internationalization (i18n) and localization (l10n).
53 */
54class I18N
55{
56    // MO files use special characters for plurals and context.
57    public const PLURAL  = "\x00";
58    public const CONTEXT = "\x04";
59
60    // Digits are always rendered LTR, even in RTL text.
61    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
62
63    // These locales need special handling for the dotless letter I.
64    private const DOTLESS_I_LOCALES = [
65        'az',
66        'tr',
67    ];
68
69    private const DOTLESS_I_TOLOWER = [
70        'I' => 'ı',
71        'İ' => 'i',
72    ];
73
74    private const DOTLESS_I_TOUPPER = [
75        'ı' => 'I',
76        'i' => 'İ',
77    ];
78
79    // The ranges of characters used by each script.
80    private const SCRIPT_CHARACTER_RANGES = [
81        [
82            'Latn',
83            0x0041,
84            0x005A,
85        ],
86        [
87            'Latn',
88            0x0061,
89            0x007A,
90        ],
91        [
92            'Latn',
93            0x0100,
94            0x02AF,
95        ],
96        [
97            'Grek',
98            0x0370,
99            0x03FF,
100        ],
101        [
102            'Cyrl',
103            0x0400,
104            0x052F,
105        ],
106        [
107            'Hebr',
108            0x0590,
109            0x05FF,
110        ],
111        [
112            'Arab',
113            0x0600,
114            0x06FF,
115        ],
116        [
117            'Arab',
118            0x0750,
119            0x077F,
120        ],
121        [
122            'Arab',
123            0x08A0,
124            0x08FF,
125        ],
126        [
127            'Deva',
128            0x0900,
129            0x097F,
130        ],
131        [
132            'Taml',
133            0x0B80,
134            0x0BFF,
135        ],
136        [
137            'Sinh',
138            0x0D80,
139            0x0DFF,
140        ],
141        [
142            'Thai',
143            0x0E00,
144            0x0E7F,
145        ],
146        [
147            'Geor',
148            0x10A0,
149            0x10FF,
150        ],
151        [
152            'Grek',
153            0x1F00,
154            0x1FFF,
155        ],
156        [
157            'Deva',
158            0xA8E0,
159            0xA8FF,
160        ],
161        [
162            'Hans',
163            0x3000,
164            0x303F,
165        ],
166        // Mixed CJK, not just Hans
167        [
168            'Hans',
169            0x3400,
170            0xFAFF,
171        ],
172        // Mixed CJK, not just Hans
173        [
174            'Hans',
175            0x20000,
176            0x2FA1F,
177        ],
178        // Mixed CJK, not just Hans
179    ];
180
181    // Characters that are displayed in mirror form in RTL text.
182    private const MIRROR_CHARACTERS = [
183        '('  => ')',
184        ')'  => '(',
185        '['  => ']',
186        ']'  => '[',
187        '{'  => '}',
188        '}'  => '{',
189        '<'  => '>',
190        '>'  => '<',
191        '‹ ' => '›',
192        '› ' => '‹',
193        '«'  => '»',
194        '»'  => '«',
195        '﴾ ' => '﴿',
196        '﴿ ' => '﴾',
197        '“ ' => '”',
198        '” ' => '“',
199        '‘ ' => '’',
200        '’ ' => '‘',
201    ];
202
203    // Punctuation used to separate list items, typically a comma
204    public static string $list_separator;
205
206    private static ModuleLanguageInterface $language;
207
208    private static LocaleInterface $locale;
209
210    private static Translator $translator;
211
212    private static ?Collator $collator = null;
213
214    /**
215     * The preferred locales for this site, or a default list if no preference.
216     *
217     * @return array<LocaleInterface>
218     */
219    public static function activeLocales(): array
220    {
221        $locales = app(ModuleService::class)
222            ->findByInterface(ModuleLanguageInterface::class, false, true)
223            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
224                return $module->locale();
225            });
226
227        if ($locales->isEmpty()) {
228            return [new LocaleEnUs()];
229        }
230
231        return $locales->all();
232    }
233
234    /**
235     * Which MySQL collation should be used for this locale?
236     *
237     * @return string
238     */
239    public static function collation(): string
240    {
241        $collation = self::$locale->collation();
242        switch ($collation) {
243            case 'croatian_ci':
244            case 'german2_ci':
245            case 'vietnamese_ci':
246                // Only available in MySQL 5.6
247                return 'utf8_unicode_ci';
248            default:
249                return 'utf8_' . $collation;
250        }
251    }
252
253    /**
254     * What format is used to display dates in the current locale?
255     *
256     * @return string
257     */
258    public static function dateFormat(): string
259    {
260        /* I18N: This is the format string for full dates. See https://php.net/date for codes */
261        return self::$translator->translate('%j %F %Y');
262    }
263
264    /**
265     * Convert the digits 0-9 into the local script
266     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
267     *
268     * @param string|int $n
269     *
270     * @return string
271     */
272    public static function digits(string|int $n): string
273    {
274        return self::$locale->digits((string) $n);
275    }
276
277    /**
278     * What is the direction of the current locale
279     *
280     * @return string "ltr" or "rtl"
281     */
282    public static function direction(): string
283    {
284        return self::$locale->direction();
285    }
286
287    /**
288     * Initialise the translation adapter with a locale setting.
289     *
290     * @param string $code
291     * @param bool   $setup
292     *
293     * @return void
294     */
295    public static function init(string $code, bool $setup = false): void
296    {
297        self::$locale = Locale::create($code);
298
299        // Load the translation file
300        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
301
302        try {
303            $translation  = new Translation($translation_file);
304            $translations = $translation->asArray();
305        } catch (Exception) {
306            // The translations files are created during the build process, and are
307            // not included in the source code.
308            // Assuming we are using dev code, and build (or rebuild) the files.
309            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
310            $translation  = new Translation($po_file);
311            $translations = $translation->asArray();
312            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
313        }
314
315        // Add translations from custom modules (but not during setup, as we have no database/modules)
316        if (!$setup) {
317            $module_service = app(ModuleService::class);
318
319            $translations = $module_service
320                ->findByInterface(ModuleCustomInterface::class)
321                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
322                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
323                }, $translations);
324
325            self::$language = $module_service
326                ->findByInterface(ModuleLanguageInterface::class, true)
327                ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code);
328        }
329
330        // Create a translator
331        self::$translator = new Translator($translations, self::$locale->pluralRule());
332
333        /* I18N: This punctuation is used to separate lists of items */
334        self::$list_separator = self::translate(', ');
335
336        // Create a collator
337        try {
338            // Symfony provides a very incomplete polyfill - which cannot be used.
339            if (class_exists('Collator')) {
340                // Need phonebook collation rules for German Ä, Ö and Ü.
341                if (str_contains(self::$locale->code(), '@')) {
342                    self::$collator = new Collator(self::$locale->code() . ';collation=phonebook');
343                } else {
344                    self::$collator = new Collator(self::$locale->code() . '@collation=phonebook');
345                }
346                // Ignore upper/lower case differences
347                self::$collator->setStrength(Collator::SECONDARY);
348            }
349        } catch (Exception) {
350            // PHP-INTL is not installed?  We'll use a fallback later.
351        }
352    }
353
354    /**
355     * Translate a string, and then substitute placeholders
356     * echo I18N::translate('Hello World!');
357     * echo I18N::translate('The %s sat on the mat', 'cat');
358     *
359     * @param string $message
360     * @param string ...$args
361     *
362     * @return string
363     */
364    public static function translate(string $message, ...$args): string
365    {
366        $message = self::$translator->translate($message);
367
368        return sprintf($message, ...$args);
369    }
370
371    /**
372     * @return string
373     */
374    public static function languageTag(): string
375    {
376        return self::$locale->languageTag();
377    }
378
379    /**
380     * @return LocaleInterface
381     */
382    public static function locale(): LocaleInterface
383    {
384        return self::$locale;
385    }
386
387    /**
388     * @return ModuleLanguageInterface
389     */
390    public static function language(): ModuleLanguageInterface
391    {
392        return self::$language;
393    }
394
395    /**
396     * Translate a number into the local representation.
397     * e.g. 12345.67 becomes
398     * en: 12,345.67
399     * fr: 12 345,67
400     * de: 12.345,67
401     *
402     * @param float $n
403     * @param int   $precision
404     *
405     * @return string
406     */
407    public static function number(float $n, int $precision = 0): string
408    {
409        return self::$locale->number(round($n, $precision));
410    }
411
412    /**
413     * Translate a fraction into a percentage.
414     * e.g. 0.123 becomes
415     * en: 12.3%
416     * fr: 12,3 %
417     * de: 12,3%
418     *
419     * @param float $n
420     * @param int   $precision
421     *
422     * @return string
423     */
424    public static function percentage(float $n, int $precision = 0): string
425    {
426        return self::$locale->percent(round($n, $precision + 2));
427    }
428
429    /**
430     * Translate a plural string
431     * echo self::plural('There is an error', 'There are errors', $num_errors);
432     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
433     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
434     *
435     * @param string $singular
436     * @param string $plural
437     * @param int    $count
438     * @param string ...$args
439     *
440     * @return string
441     */
442    public static function plural(string $singular, string $plural, int $count, ...$args): string
443    {
444        $message = self::$translator->translatePlural($singular, $plural, $count);
445
446        return sprintf($message, ...$args);
447    }
448
449    /**
450     * UTF8 version of PHP::strrev()
451     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
452     * These do not support UTF8 text direction, so we must mimic it for them.
453     * Numbers are always rendered LTR, even in RTL text.
454     * The visual direction of characters such as parentheses should be reversed.
455     *
456     * @param string $text Text to be reversed
457     *
458     * @return string
459     */
460    public static function reverseText(string $text): string
461    {
462        // Remove HTML markup - we can't display it and it is LTR.
463        $text = strip_tags($text);
464        // Remove HTML entities.
465        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
466
467        // LTR text doesn't need reversing
468        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
469            return $text;
470        }
471
472        // Mirrored characters
473        $text = strtr($text, self::MIRROR_CHARACTERS);
474
475        $reversed = '';
476        $digits   = '';
477        while ($text !== '') {
478            $letter = mb_substr($text, 0, 1);
479            $text   = mb_substr($text, 1);
480            if (str_contains(self::DIGITS, $letter)) {
481                $digits .= $letter;
482            } else {
483                $reversed = $letter . $digits . $reversed;
484                $digits   = '';
485            }
486        }
487
488        return $digits . $reversed;
489    }
490
491    /**
492     * Return the direction (ltr or rtl) for a given script
493     * The PHP/intl library does not provde this information, so we need
494     * our own lookup table.
495     *
496     * @param string $script
497     *
498     * @return string
499     */
500    public static function scriptDirection(string $script): string
501    {
502        switch ($script) {
503            case 'Arab':
504            case 'Hebr':
505            case 'Mong':
506            case 'Thaa':
507                return 'rtl';
508            default:
509                return 'ltr';
510        }
511    }
512
513    /**
514     * Identify the script used for a piece of text
515     *
516     * @param string $string
517     *
518     * @return string
519     */
520    public static function textScript(string $string): string
521    {
522        $string = strip_tags($string); // otherwise HTML tags show up as latin
523        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
524        $string = str_replace([
525            Individual::NOMEN_NESCIO,
526            Individual::PRAENOMEN_NESCIO,
527        ], '', $string);
528        $pos    = 0;
529        $strlen = strlen($string);
530        while ($pos < $strlen) {
531            // get the Unicode Code Point for the character at position $pos
532            $byte1 = ord($string[$pos]);
533            if ($byte1 < 0x80) {
534                $code_point = $byte1;
535                $chrlen     = 1;
536            } elseif ($byte1 < 0xC0) {
537                // Invalid continuation character
538                return 'Latn';
539            } elseif ($byte1 < 0xE0) {
540                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
541                $chrlen     = 2;
542            } elseif ($byte1 < 0xF0) {
543                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
544                $chrlen     = 3;
545            } elseif ($byte1 < 0xF8) {
546                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
547                $chrlen     = 3;
548            } else {
549                // Invalid UTF
550                return 'Latn';
551            }
552
553            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
554                if ($code_point >= $range[1] && $code_point <= $range[2]) {
555                    return $range[0];
556                }
557            }
558            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
559            $pos += $chrlen;
560        }
561
562        return 'Latn';
563    }
564
565    /**
566     * A closure which will compare strings using local collation rules.
567     *
568     * @return Closure
569     */
570    public static function comparator(): Closure
571    {
572        $collator = self::$collator;
573
574        if ($collator instanceof Collator) {
575            return static fn (string $x, string $y): int => (int) $collator->compare($x, $y);
576        }
577
578        return static fn (string $x, string $y): int => strcmp(self::strtolower($x), self::strtolower($y));
579    }
580
581
582
583    /**
584     * Convert a string to lower case.
585     *
586     * @param string $string
587     *
588     * @return string
589     */
590    public static function strtolower(string $string): string
591    {
592        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
593            $string = strtr($string, self::DOTLESS_I_TOLOWER);
594        }
595
596        return mb_strtolower($string);
597    }
598
599    /**
600     * Convert a string to upper case.
601     *
602     * @param string $string
603     *
604     * @return string
605     */
606    public static function strtoupper(string $string): string
607    {
608        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
609            $string = strtr($string, self::DOTLESS_I_TOUPPER);
610        }
611
612        return mb_strtoupper($string);
613    }
614
615    /**
616     * What format is used to display dates in the current locale?
617     *
618     * @return string
619     */
620    public static function timeFormat(): string
621    {
622        /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */
623        return self::$translator->translate('%H:%i:%s');
624    }
625
626    /**
627     * Context sensitive version of translate.
628     * echo I18N::translateContext('NOMINATIVE', 'January');
629     * echo I18N::translateContext('GENITIVE', 'January');
630     *
631     * @param string $context
632     * @param string $message
633     * @param string ...$args
634     *
635     * @return string
636     */
637    public static function translateContext(string $context, string $message, ...$args): string
638    {
639        $message = self::$translator->translateContext($context, $message);
640
641        return sprintf($message, ...$args);
642    }
643}
644