xref: /webtrees/app/I18N.php (revision 3976b4703df669696105ed6b024b96d433c8fbdb)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17declare(strict_types=1);
18
19namespace Fisharebest\Webtrees;
20
21use Collator;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Module\ModuleCustomInterface;
29use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
30use Fisharebest\Webtrees\Services\ModuleService;
31use Illuminate\Support\Collection;
32
33use function array_merge;
34use function class_exists;
35use function html_entity_decode;
36use function in_array;
37use function mb_strtolower;
38use function mb_strtoupper;
39use function mb_substr;
40use function ord;
41use function sprintf;
42use function str_replace;
43use function strcmp;
44use function strip_tags;
45use function strlen;
46use function strpos;
47use function strtr;
48
49/**
50 * Internationalization (i18n) and localization (l10n).
51 */
52class I18N
53{
54    // MO files use special characters for plurals and context.
55    public const PLURAL  = "\x00";
56    public const CONTEXT = "\x04";
57    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
58    private const DOTLESS_I_LOCALES = [
59        'az',
60        'tr',
61    ];
62    private const DOTLESS_I_TOLOWER = [
63        'I' => 'ı',
64        'İ' => 'i',
65    ];
66
67    // Digits are always rendered LTR, even in RTL text.
68    private const DOTLESS_I_TOUPPER = [
69        'ı' => 'I',
70        'i' => 'İ',
71    ];
72
73    // These locales need special handling for the dotless letter I.
74    private const SCRIPT_CHARACTER_RANGES = [
75        [
76            'Latn',
77            0x0041,
78            0x005A,
79        ],
80        [
81            'Latn',
82            0x0061,
83            0x007A,
84        ],
85        [
86            'Latn',
87            0x0100,
88            0x02AF,
89        ],
90        [
91            'Grek',
92            0x0370,
93            0x03FF,
94        ],
95        [
96            'Cyrl',
97            0x0400,
98            0x052F,
99        ],
100        [
101            'Hebr',
102            0x0590,
103            0x05FF,
104        ],
105        [
106            'Arab',
107            0x0600,
108            0x06FF,
109        ],
110        [
111            'Arab',
112            0x0750,
113            0x077F,
114        ],
115        [
116            'Arab',
117            0x08A0,
118            0x08FF,
119        ],
120        [
121            'Deva',
122            0x0900,
123            0x097F,
124        ],
125        [
126            'Taml',
127            0x0B80,
128            0x0BFF,
129        ],
130        [
131            'Sinh',
132            0x0D80,
133            0x0DFF,
134        ],
135        [
136            'Thai',
137            0x0E00,
138            0x0E7F,
139        ],
140        [
141            'Geor',
142            0x10A0,
143            0x10FF,
144        ],
145        [
146            'Grek',
147            0x1F00,
148            0x1FFF,
149        ],
150        [
151            'Deva',
152            0xA8E0,
153            0xA8FF,
154        ],
155        [
156            'Hans',
157            0x3000,
158            0x303F,
159        ],
160        // Mixed CJK, not just Hans
161        [
162            'Hans',
163            0x3400,
164            0xFAFF,
165        ],
166        // Mixed CJK, not just Hans
167        [
168            'Hans',
169            0x20000,
170            0x2FA1F,
171        ],
172        // Mixed CJK, not just Hans
173    ];
174    private const MIRROR_CHARACTERS = [
175        '('  => ')',
176        ')'  => '(',
177        '['  => ']',
178        ']'  => '[',
179        '{'  => '}',
180        '}'  => '{',
181        '<'  => '>',
182        '>'  => '<',
183        '‹ ' => '›',
184        '› ' => '‹',
185        '«'  => '»',
186        '»'  => '«',
187        '﴾ ' => '﴿',
188        '﴿ ' => '﴾',
189        '“ ' => '”',
190        '” ' => '“',
191        '‘ ' => '’',
192        '’ ' => '‘',
193    ];
194    /** @var string Punctuation used to separate list items, typically a comma */
195    public static $list_separator;
196
197    // The ranges of characters used by each script.
198    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
199    private static $locale;
200
201    // Characters that are displayed in mirror form in RTL text.
202    /** @var Translator An object that performs translation */
203    private static $translator;
204    /** @var  Collator|null From the php-intl library */
205    private static $collator;
206
207    /**
208     * The preferred locales for this site, or a default list if no preference.
209     *
210     * @return LocaleInterface[]
211     */
212    public static function activeLocales(): array
213    {
214        /** @var Collection $locales */
215        $locales = app(ModuleService::class)
216            ->findByInterface(ModuleLanguageInterface::class, false, true)
217            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
218                return $module->locale();
219            });
220
221        if ($locales->isEmpty()) {
222            return [new LocaleEnUs()];
223        }
224
225        return $locales->all();
226    }
227
228    /**
229     * Which MySQL collation should be used for this locale?
230     *
231     * @return string
232     */
233    public static function collation(): string
234    {
235        $collation = self::$locale->collation();
236        switch ($collation) {
237            case 'croatian_ci':
238            case 'german2_ci':
239            case 'vietnamese_ci':
240                // Only available in MySQL 5.6
241                return 'utf8_unicode_ci';
242            default:
243                return 'utf8_' . $collation;
244        }
245    }
246
247    /**
248     * What format is used to display dates in the current locale?
249     *
250     * @return string
251     */
252    public static function dateFormat(): string
253    {
254        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
255        return self::$translator->translate('%j %F %Y');
256    }
257
258    /**
259     * Convert the digits 0-9 into the local script
260     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
261     *
262     * @param string|int $n
263     *
264     * @return string
265     */
266    public static function digits($n): string
267    {
268        return self::$locale->digits((string) $n);
269    }
270
271    /**
272     * What is the direction of the current locale
273     *
274     * @return string "ltr" or "rtl"
275     */
276    public static function direction(): string
277    {
278        return self::$locale->direction();
279    }
280
281    /**
282     * What is the first day of the week.
283     *
284     * @return int Sunday=0, Monday=1, etc.
285     */
286    public static function firstDay(): int
287    {
288        return self::$locale->territory()->firstDay();
289    }
290
291    /**
292     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
293     *
294     * @return string
295     */
296    public static function htmlAttributes(): string
297    {
298        return self::$locale->htmlAttributes();
299    }
300
301    /**
302     * Initialise the translation adapter with a locale setting.
303     *
304     * @param string    $code  Use this locale/language code, or choose one automatically
305     * @param Tree|null $tree
306     * @param bool      $setup During setup, we cannot access the database.
307     *
308     * @return string $string
309     */
310    public static function init(string $code = '', Tree $tree = null, $setup = false): string
311    {
312        if ($code !== '') {
313            // Create the specified locale
314            self::$locale = Locale::create($code);
315        } elseif (Session::has('language')) {
316            // Select a previously used locale
317            self::$locale = Locale::create(Session::get('language'));
318        } else {
319            if ($tree instanceof Tree) {
320                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
321            } else {
322                $default_locale = new LocaleEnUs();
323            }
324
325            // Negotiate with the browser.
326            // Search engines don't negotiate.  They get the default locale of the tree.
327            if ($setup) {
328                $installed_locales = app(ModuleService::class)->setupLanguages()
329                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
330                        return $module->locale();
331                    });
332            } else {
333                $installed_locales = self::installedLocales();
334            }
335
336            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
337        }
338
339        // Load the translation file
340        $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php';
341
342        try {
343            $translation  = new Translation($translation_file);
344            $translations = $translation->asArray();
345        } catch (Exception $ex) {
346            // The translations files are created during the build process, and are
347            // not included in the source code.
348            // Assuming we are using dev code, and build (or rebuild) the files.
349            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
350            $translation  = new Translation($po_file);
351            $translations = $translation->asArray();
352            file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';');
353        }
354
355        // Add translations from custom modules (but not during setup, as we have no database/modules)
356        if (!$setup) {
357            $translations = app(ModuleService::class)
358                ->findByInterface(ModuleCustomInterface::class)
359                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
360                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
361                }, $translations);
362        }
363
364        // Create a translator
365        self::$translator = new Translator($translations, self::$locale->pluralRule());
366
367        /* I18N: This punctuation is used to separate lists of items */
368        self::$list_separator = self::translate(', ');
369
370        // Create a collator
371        try {
372            if (class_exists('Collator')) {
373                // Symfony provides a very incomplete polyfill - which cannot be used.
374                self::$collator = new Collator(self::$locale->code());
375                // Ignore upper/lower case differences
376                self::$collator->setStrength(Collator::SECONDARY);
377            }
378        } catch (Exception $ex) {
379            // PHP-INTL is not installed?  We'll use a fallback later.
380            self::$collator = null;
381        }
382
383        return self::$locale->languageTag();
384    }
385
386    /**
387     * All locales for which a translation file exists.
388     *
389     * @return Collection
390     */
391    public static function installedLocales(): Collection
392    {
393        return app(ModuleService::class)
394            ->findByInterface(ModuleLanguageInterface::class, true)
395            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
396                return $module->locale();
397            });
398    }
399
400    /**
401     * Translate a string, and then substitute placeholders
402     * echo I18N::translate('Hello World!');
403     * echo I18N::translate('The %s sat on the mat', 'cat');
404     *
405     * @param string $message
406     * @param string ...$args
407     *
408     * @return string
409     */
410    public static function translate(string $message, ...$args): string
411    {
412        $message = self::$translator->translate($message);
413
414        return sprintf($message, ...$args);
415    }
416
417    /**
418     * Return the endonym for a given language - as per http://cldr.unicode.org/
419     *
420     * @param string $locale
421     *
422     * @return string
423     */
424    public static function languageName(string $locale): string
425    {
426        return Locale::create($locale)->endonym();
427    }
428
429    /**
430     * Return the script used by a given language
431     *
432     * @param string $locale
433     *
434     * @return string
435     */
436    public static function languageScript(string $locale): string
437    {
438        return Locale::create($locale)->script()->code();
439    }
440
441    /**
442     * Translate a number into the local representation.
443     * e.g. 12345.67 becomes
444     * en: 12,345.67
445     * fr: 12 345,67
446     * de: 12.345,67
447     *
448     * @param float $n
449     * @param int   $precision
450     *
451     * @return string
452     */
453    public static function number(float $n, int $precision = 0): string
454    {
455        return self::$locale->number(round($n, $precision));
456    }
457
458    /**
459     * Translate a fraction into a percentage.
460     * e.g. 0.123 becomes
461     * en: 12.3%
462     * fr: 12,3 %
463     * de: 12,3%
464     *
465     * @param float $n
466     * @param int   $precision
467     *
468     * @return string
469     */
470    public static function percentage(float $n, int $precision = 0): string
471    {
472        return self::$locale->percent(round($n, $precision + 2));
473    }
474
475    /**
476     * Translate a plural string
477     * echo self::plural('There is an error', 'There are errors', $num_errors);
478     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
479     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
480     *
481     * @param string $singular
482     * @param string $plural
483     * @param int    $count
484     * @param string ...$args
485     *
486     * @return string
487     */
488    public static function plural(string $singular, string $plural, int $count, ...$args): string
489    {
490        $message = self::$translator->translatePlural($singular, $plural, $count);
491
492        return sprintf($message, ...$args);
493    }
494
495    /**
496     * UTF8 version of PHP::strrev()
497     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
498     * These do not support UTF8 text direction, so we must mimic it for them.
499     * Numbers are always rendered LTR, even in RTL text.
500     * The visual direction of characters such as parentheses should be reversed.
501     *
502     * @param string $text Text to be reversed
503     *
504     * @return string
505     */
506    public static function reverseText($text): string
507    {
508        // Remove HTML markup - we can't display it and it is LTR.
509        $text = strip_tags($text);
510        // Remove HTML entities.
511        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
512
513        // LTR text doesn't need reversing
514        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
515            return $text;
516        }
517
518        // Mirrored characters
519        $text = strtr($text, self::MIRROR_CHARACTERS);
520
521        $reversed = '';
522        $digits   = '';
523        while ($text !== '') {
524            $letter = mb_substr($text, 0, 1);
525            $text   = mb_substr($text, 1);
526            if (strpos(self::DIGITS, $letter) !== false) {
527                $digits .= $letter;
528            } else {
529                $reversed = $letter . $digits . $reversed;
530                $digits   = '';
531            }
532        }
533
534        return $digits . $reversed;
535    }
536
537    /**
538     * Return the direction (ltr or rtl) for a given script
539     * The PHP/intl library does not provde this information, so we need
540     * our own lookup table.
541     *
542     * @param string $script
543     *
544     * @return string
545     */
546    public static function scriptDirection($script): string
547    {
548        switch ($script) {
549            case 'Arab':
550            case 'Hebr':
551            case 'Mong':
552            case 'Thaa':
553                return 'rtl';
554            default:
555                return 'ltr';
556        }
557    }
558
559    /**
560     * Identify the script used for a piece of text
561     *
562     * @param string $string
563     *
564     * @return string
565     */
566    public static function textScript($string): string
567    {
568        $string = strip_tags($string); // otherwise HTML tags show up as latin
569        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
570        $string = str_replace([
571            '@N.N.',
572            '@P.N.',
573        ], '', $string); // otherwise unknown names show up as latin
574        $pos    = 0;
575        $strlen = strlen($string);
576        while ($pos < $strlen) {
577            // get the Unicode Code Point for the character at position $pos
578            $byte1 = ord($string[$pos]);
579            if ($byte1 < 0x80) {
580                $code_point = $byte1;
581                $chrlen     = 1;
582            } elseif ($byte1 < 0xC0) {
583                // Invalid continuation character
584                return 'Latn';
585            } elseif ($byte1 < 0xE0) {
586                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
587                $chrlen     = 2;
588            } elseif ($byte1 < 0xF0) {
589                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
590                $chrlen     = 3;
591            } elseif ($byte1 < 0xF8) {
592                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
593                $chrlen     = 3;
594            } else {
595                // Invalid UTF
596                return 'Latn';
597            }
598
599            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
600                if ($code_point >= $range[1] && $code_point <= $range[2]) {
601                    return $range[0];
602                }
603            }
604            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
605            $pos += $chrlen;
606        }
607
608        return 'Latn';
609    }
610
611    /**
612     * Perform a case-insensitive comparison of two strings.
613     *
614     * @param string $string1
615     * @param string $string2
616     *
617     * @return int
618     */
619    public static function strcasecmp($string1, $string2): int
620    {
621        if (self::$collator instanceof Collator) {
622            return self::$collator->compare($string1, $string2);
623        }
624
625        return strcmp(self::strtolower($string1), self::strtolower($string2));
626    }
627
628    /**
629     * Convert a string to lower case.
630     *
631     * @param string $string
632     *
633     * @return string
634     */
635    public static function strtolower($string): string
636    {
637        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
638            $string = strtr($string, self::DOTLESS_I_TOLOWER);
639        }
640
641        return mb_strtolower($string);
642    }
643
644    /**
645     * Convert a string to upper case.
646     *
647     * @param string $string
648     *
649     * @return string
650     */
651    public static function strtoupper($string): string
652    {
653        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
654            $string = strtr($string, self::DOTLESS_I_TOUPPER);
655        }
656
657        return mb_strtoupper($string);
658    }
659
660    /**
661     * What format is used to display dates in the current locale?
662     *
663     * @return string
664     */
665    public static function timeFormat(): string
666    {
667        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
668        return self::$translator->translate('%H:%i:%s');
669    }
670
671    /**
672     * Context sensitive version of translate.
673     * echo I18N::translateContext('NOMINATIVE', 'January');
674     * echo I18N::translateContext('GENITIVE', 'January');
675     *
676     * @param string $context
677     * @param string $message
678     * @param string ...$args
679     *
680     * @return string
681     */
682    public static function translateContext(string $context, string $message, ...$args): string
683    {
684        $message = self::$translator->translateContext($context, $message);
685
686        return sprintf($message, ...$args);
687    }
688}
689