xref: /webtrees/app/I18N.php (revision af7b1f136d0947b42fda7a736f4b48319cc835dd)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Closure;
23use Collator;
24use Exception;
25use Fisharebest\Localization\Locale;
26use Fisharebest\Localization\Locale\LocaleEnUs;
27use Fisharebest\Localization\Locale\LocaleInterface;
28use Fisharebest\Localization\Translation;
29use Fisharebest\Localization\Translator;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33use Illuminate\Support\Collection;
34
35use function array_merge;
36use function class_exists;
37use function html_entity_decode;
38use function in_array;
39use function mb_strtolower;
40use function mb_strtoupper;
41use function mb_substr;
42use function ord;
43use function sprintf;
44use function str_contains;
45use function str_replace;
46use function strcmp;
47use function strip_tags;
48use function strlen;
49use function strtr;
50use function var_export;
51
52/**
53 * Internationalization (i18n) and localization (l10n).
54 */
55class I18N
56{
57    // MO files use special characters for plurals and context.
58    public const PLURAL  = "\x00";
59    public const CONTEXT = "\x04";
60
61    // Digits are always rendered LTR, even in RTL text.
62    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
63
64    // These locales need special handling for the dotless letter I.
65    private const DOTLESS_I_LOCALES = [
66        'az',
67        'tr',
68    ];
69
70    private const DOTLESS_I_TOLOWER = [
71        'I' => 'ı',
72        'İ' => 'i',
73    ];
74
75    private const DOTLESS_I_TOUPPER = [
76        'ı' => 'I',
77        'i' => 'İ',
78    ];
79
80    // The ranges of characters used by each script.
81    private const SCRIPT_CHARACTER_RANGES = [
82        [
83            'Latn',
84            0x0041,
85            0x005A,
86        ],
87        [
88            'Latn',
89            0x0061,
90            0x007A,
91        ],
92        [
93            'Latn',
94            0x0100,
95            0x02AF,
96        ],
97        [
98            'Grek',
99            0x0370,
100            0x03FF,
101        ],
102        [
103            'Cyrl',
104            0x0400,
105            0x052F,
106        ],
107        [
108            'Hebr',
109            0x0590,
110            0x05FF,
111        ],
112        [
113            'Arab',
114            0x0600,
115            0x06FF,
116        ],
117        [
118            'Arab',
119            0x0750,
120            0x077F,
121        ],
122        [
123            'Arab',
124            0x08A0,
125            0x08FF,
126        ],
127        [
128            'Deva',
129            0x0900,
130            0x097F,
131        ],
132        [
133            'Taml',
134            0x0B80,
135            0x0BFF,
136        ],
137        [
138            'Sinh',
139            0x0D80,
140            0x0DFF,
141        ],
142        [
143            'Thai',
144            0x0E00,
145            0x0E7F,
146        ],
147        [
148            'Geor',
149            0x10A0,
150            0x10FF,
151        ],
152        [
153            'Grek',
154            0x1F00,
155            0x1FFF,
156        ],
157        [
158            'Deva',
159            0xA8E0,
160            0xA8FF,
161        ],
162        [
163            'Hans',
164            0x3000,
165            0x303F,
166        ],
167        // Mixed CJK, not just Hans
168        [
169            'Hans',
170            0x3400,
171            0xFAFF,
172        ],
173        // Mixed CJK, not just Hans
174        [
175            'Hans',
176            0x20000,
177            0x2FA1F,
178        ],
179        // Mixed CJK, not just Hans
180    ];
181
182    // Characters that are displayed in mirror form in RTL text.
183    private const MIRROR_CHARACTERS = [
184        '('  => ')',
185        ')'  => '(',
186        '['  => ']',
187        ']'  => '[',
188        '{'  => '}',
189        '}'  => '{',
190        '<'  => '>',
191        '>'  => '<',
192        '‹ ' => '›',
193        '› ' => '‹',
194        '«'  => '»',
195        '»'  => '«',
196        '﴾ ' => '﴿',
197        '﴿ ' => '﴾',
198        '“ ' => '”',
199        '” ' => '“',
200        '‘ ' => '’',
201        '’ ' => '‘',
202    ];
203
204    // Punctuation used to separate list items, typically a comma
205    public static string $list_separator;
206
207    private static ?ModuleLanguageInterface $language;
208
209    private static LocaleInterface $locale;
210
211    private static Translator $translator;
212
213    private static ?Collator $collator;
214
215    /**
216     * The preferred locales for this site, or a default list if no preference.
217     *
218     * @return LocaleInterface[]
219     */
220    public static function activeLocales(): array
221    {
222        /** @var Collection $locales */
223        $locales = app(ModuleService::class)
224            ->findByInterface(ModuleLanguageInterface::class, false, true)
225            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
226                return $module->locale();
227            });
228
229        if ($locales->isEmpty()) {
230            return [new LocaleEnUs()];
231        }
232
233        return $locales->all();
234    }
235
236    /**
237     * Which MySQL collation should be used for this locale?
238     *
239     * @return string
240     */
241    public static function collation(): string
242    {
243        $collation = self::$locale->collation();
244        switch ($collation) {
245            case 'croatian_ci':
246            case 'german2_ci':
247            case 'vietnamese_ci':
248                // Only available in MySQL 5.6
249                return 'utf8_unicode_ci';
250            default:
251                return 'utf8_' . $collation;
252        }
253    }
254
255    /**
256     * What format is used to display dates in the current locale?
257     *
258     * @return string
259     */
260    public static function dateFormat(): string
261    {
262        /* I18N: This is the format string for full dates. See https://php.net/date for codes */
263        return self::$translator->translate('%j %F %Y');
264    }
265
266    /**
267     * Convert the digits 0-9 into the local script
268     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
269     *
270     * @param string|int $n
271     *
272     * @return string
273     */
274    public static function digits($n): string
275    {
276        return self::$locale->digits((string) $n);
277    }
278
279    /**
280     * What is the direction of the current locale
281     *
282     * @return string "ltr" or "rtl"
283     */
284    public static function direction(): string
285    {
286        return self::$locale->direction();
287    }
288
289    /**
290     * Initialise the translation adapter with a locale setting.
291     *
292     * @param string $code
293     * @param bool   $setup
294     *
295     * @return void
296     */
297    public static function init(string $code, bool $setup = false): void
298    {
299        self::$locale = Locale::create($code);
300
301        // Load the translation file
302        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
303
304        try {
305            $translation  = new Translation($translation_file);
306            $translations = $translation->asArray();
307        } catch (Exception $ex) {
308            // The translations files are created during the build process, and are
309            // not included in the source code.
310            // Assuming we are using dev code, and build (or rebuild) the files.
311            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
312            $translation  = new Translation($po_file);
313            $translations = $translation->asArray();
314            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
315        }
316
317        // Add translations from custom modules (but not during setup, as we have no database/modules)
318        if (!$setup) {
319            $module_service = app(ModuleService::class);
320
321            $translations = $module_service
322                ->findByInterface(ModuleCustomInterface::class)
323                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
324                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
325                }, $translations);
326
327            self::$language = $module_service
328                ->findByInterface(ModuleLanguageInterface::class)
329                ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code);
330        }
331
332        // Create a translator
333        self::$translator = new Translator($translations, self::$locale->pluralRule());
334
335        /* I18N: This punctuation is used to separate lists of items */
336        self::$list_separator = self::translate(', ');
337
338        // Create a collator
339        try {
340            if (class_exists('Collator')) {
341                // Symfony provides a very incomplete polyfill - which cannot be used.
342                self::$collator = new Collator(self::$locale->code());
343                // Ignore upper/lower case differences
344                self::$collator->setStrength(Collator::SECONDARY);
345            }
346        } catch (Exception $ex) {
347            // PHP-INTL is not installed?  We'll use a fallback later.
348            self::$collator = null;
349        }
350    }
351
352    /**
353     * Translate a string, and then substitute placeholders
354     * echo I18N::translate('Hello World!');
355     * echo I18N::translate('The %s sat on the mat', 'cat');
356     *
357     * @param string $message
358     * @param string ...$args
359     *
360     * @return string
361     */
362    public static function translate(string $message, ...$args): string
363    {
364        $message = self::$translator->translate($message);
365
366        return sprintf($message, ...$args);
367    }
368
369    /**
370     * @return string
371     */
372    public static function languageTag(): string
373    {
374        return self::$locale->languageTag();
375    }
376
377    /**
378     * @return LocaleInterface
379     */
380    public static function locale(): LocaleInterface
381    {
382        return self::$locale;
383    }
384
385    /**
386     * @return ModuleLanguageInterface
387     */
388    public static function language(): ModuleLanguageInterface
389    {
390        return self::$language;
391    }
392
393    /**
394     * Translate a number into the local representation.
395     * e.g. 12345.67 becomes
396     * en: 12,345.67
397     * fr: 12 345,67
398     * de: 12.345,67
399     *
400     * @param float $n
401     * @param int   $precision
402     *
403     * @return string
404     */
405    public static function number(float $n, int $precision = 0): string
406    {
407        return self::$locale->number(round($n, $precision));
408    }
409
410    /**
411     * Translate a fraction into a percentage.
412     * e.g. 0.123 becomes
413     * en: 12.3%
414     * fr: 12,3 %
415     * de: 12,3%
416     *
417     * @param float $n
418     * @param int   $precision
419     *
420     * @return string
421     */
422    public static function percentage(float $n, int $precision = 0): string
423    {
424        return self::$locale->percent(round($n, $precision + 2));
425    }
426
427    /**
428     * Translate a plural string
429     * echo self::plural('There is an error', 'There are errors', $num_errors);
430     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
431     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
432     *
433     * @param string $singular
434     * @param string $plural
435     * @param int    $count
436     * @param string ...$args
437     *
438     * @return string
439     */
440    public static function plural(string $singular, string $plural, int $count, ...$args): string
441    {
442        $message = self::$translator->translatePlural($singular, $plural, $count);
443
444        return sprintf($message, ...$args);
445    }
446
447    /**
448     * UTF8 version of PHP::strrev()
449     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
450     * These do not support UTF8 text direction, so we must mimic it for them.
451     * Numbers are always rendered LTR, even in RTL text.
452     * The visual direction of characters such as parentheses should be reversed.
453     *
454     * @param string $text Text to be reversed
455     *
456     * @return string
457     */
458    public static function reverseText(string $text): string
459    {
460        // Remove HTML markup - we can't display it and it is LTR.
461        $text = strip_tags($text);
462        // Remove HTML entities.
463        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
464
465        // LTR text doesn't need reversing
466        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
467            return $text;
468        }
469
470        // Mirrored characters
471        $text = strtr($text, self::MIRROR_CHARACTERS);
472
473        $reversed = '';
474        $digits   = '';
475        while ($text !== '') {
476            $letter = mb_substr($text, 0, 1);
477            $text   = mb_substr($text, 1);
478            if (str_contains(self::DIGITS, $letter)) {
479                $digits .= $letter;
480            } else {
481                $reversed = $letter . $digits . $reversed;
482                $digits   = '';
483            }
484        }
485
486        return $digits . $reversed;
487    }
488
489    /**
490     * Return the direction (ltr or rtl) for a given script
491     * The PHP/intl library does not provde this information, so we need
492     * our own lookup table.
493     *
494     * @param string $script
495     *
496     * @return string
497     */
498    public static function scriptDirection(string $script): string
499    {
500        switch ($script) {
501            case 'Arab':
502            case 'Hebr':
503            case 'Mong':
504            case 'Thaa':
505                return 'rtl';
506            default:
507                return 'ltr';
508        }
509    }
510
511    /**
512     * Identify the script used for a piece of text
513     *
514     * @param string $string
515     *
516     * @return string
517     */
518    public static function textScript(string $string): string
519    {
520        $string = strip_tags($string); // otherwise HTML tags show up as latin
521        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
522        $string = str_replace([
523            Individual::NOMEN_NESCIO,
524            Individual::PRAENOMEN_NESCIO,
525        ], '', $string);
526        $pos    = 0;
527        $strlen = strlen($string);
528        while ($pos < $strlen) {
529            // get the Unicode Code Point for the character at position $pos
530            $byte1 = ord($string[$pos]);
531            if ($byte1 < 0x80) {
532                $code_point = $byte1;
533                $chrlen     = 1;
534            } elseif ($byte1 < 0xC0) {
535                // Invalid continuation character
536                return 'Latn';
537            } elseif ($byte1 < 0xE0) {
538                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
539                $chrlen     = 2;
540            } elseif ($byte1 < 0xF0) {
541                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
542                $chrlen     = 3;
543            } elseif ($byte1 < 0xF8) {
544                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
545                $chrlen     = 3;
546            } else {
547                // Invalid UTF
548                return 'Latn';
549            }
550
551            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
552                if ($code_point >= $range[1] && $code_point <= $range[2]) {
553                    return $range[0];
554                }
555            }
556            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
557            $pos += $chrlen;
558        }
559
560        return 'Latn';
561    }
562
563    /**
564     * A closure which will compare strings using local collation rules.
565     *
566     * @return Closure
567     */
568    public static function comparator(): Closure
569    {
570        if (self::$collator instanceof Collator) {
571            return static function (string $x, string $y): int {
572                return (int) self::$collator->compare($x, $y);
573            };
574        }
575
576        return static function (string $x, string $y): int {
577            return strcmp(self::strtolower($x), self::strtolower($y));
578        };
579    }
580
581
582
583    /**
584     * Convert a string to lower case.
585     *
586     * @param string $string
587     *
588     * @return string
589     */
590    public static function strtolower(string $string): string
591    {
592        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
593            $string = strtr($string, self::DOTLESS_I_TOLOWER);
594        }
595
596        return mb_strtolower($string);
597    }
598
599    /**
600     * Convert a string to upper case.
601     *
602     * @param string $string
603     *
604     * @return string
605     */
606    public static function strtoupper(string $string): string
607    {
608        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
609            $string = strtr($string, self::DOTLESS_I_TOUPPER);
610        }
611
612        return mb_strtoupper($string);
613    }
614
615    /**
616     * What format is used to display dates in the current locale?
617     *
618     * @return string
619     */
620    public static function timeFormat(): string
621    {
622        /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */
623        return self::$translator->translate('%H:%i:%s');
624    }
625
626    /**
627     * Context sensitive version of translate.
628     * echo I18N::translateContext('NOMINATIVE', 'January');
629     * echo I18N::translateContext('GENITIVE', 'January');
630     *
631     * @param string $context
632     * @param string $message
633     * @param string ...$args
634     *
635     * @return string
636     */
637    public static function translateContext(string $context, string $message, ...$args): string
638    {
639        $message = self::$translator->translateContext($context, $message);
640
641        return sprintf($message, ...$args);
642    }
643}
644