xref: /webtrees/app/I18N.php (revision c06384d0dc00d3271940d4c4da1c38f13ddb0cba)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Collator;
23use Exception;
24use Fisharebest\Localization\Locale;
25use Fisharebest\Localization\Locale\LocaleEnUs;
26use Fisharebest\Localization\Locale\LocaleInterface;
27use Fisharebest\Localization\Translation;
28use Fisharebest\Localization\Translator;
29use Fisharebest\Webtrees\Module\ModuleCustomInterface;
30use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
31use Fisharebest\Webtrees\Services\ModuleService;
32use Illuminate\Support\Collection;
33
34use function array_merge;
35use function class_exists;
36use function html_entity_decode;
37use function in_array;
38use function mb_strtolower;
39use function mb_strtoupper;
40use function mb_substr;
41use function ord;
42use function sprintf;
43use function str_replace;
44use function strcmp;
45use function strip_tags;
46use function strlen;
47use function strpos;
48use function strtr;
49
50/**
51 * Internationalization (i18n) and localization (l10n).
52 */
53class I18N
54{
55    // MO files use special characters for plurals and context.
56    public const PLURAL  = "\x00";
57    public const CONTEXT = "\x04";
58    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
59    private const DOTLESS_I_LOCALES = [
60        'az',
61        'tr',
62    ];
63    private const DOTLESS_I_TOLOWER = [
64        'I' => 'ı',
65        'İ' => 'i',
66    ];
67
68    // Digits are always rendered LTR, even in RTL text.
69    private const DOTLESS_I_TOUPPER = [
70        'ı' => 'I',
71        'i' => 'İ',
72    ];
73
74    // These locales need special handling for the dotless letter I.
75    private const SCRIPT_CHARACTER_RANGES = [
76        [
77            'Latn',
78            0x0041,
79            0x005A,
80        ],
81        [
82            'Latn',
83            0x0061,
84            0x007A,
85        ],
86        [
87            'Latn',
88            0x0100,
89            0x02AF,
90        ],
91        [
92            'Grek',
93            0x0370,
94            0x03FF,
95        ],
96        [
97            'Cyrl',
98            0x0400,
99            0x052F,
100        ],
101        [
102            'Hebr',
103            0x0590,
104            0x05FF,
105        ],
106        [
107            'Arab',
108            0x0600,
109            0x06FF,
110        ],
111        [
112            'Arab',
113            0x0750,
114            0x077F,
115        ],
116        [
117            'Arab',
118            0x08A0,
119            0x08FF,
120        ],
121        [
122            'Deva',
123            0x0900,
124            0x097F,
125        ],
126        [
127            'Taml',
128            0x0B80,
129            0x0BFF,
130        ],
131        [
132            'Sinh',
133            0x0D80,
134            0x0DFF,
135        ],
136        [
137            'Thai',
138            0x0E00,
139            0x0E7F,
140        ],
141        [
142            'Geor',
143            0x10A0,
144            0x10FF,
145        ],
146        [
147            'Grek',
148            0x1F00,
149            0x1FFF,
150        ],
151        [
152            'Deva',
153            0xA8E0,
154            0xA8FF,
155        ],
156        [
157            'Hans',
158            0x3000,
159            0x303F,
160        ],
161        // Mixed CJK, not just Hans
162        [
163            'Hans',
164            0x3400,
165            0xFAFF,
166        ],
167        // Mixed CJK, not just Hans
168        [
169            'Hans',
170            0x20000,
171            0x2FA1F,
172        ],
173        // Mixed CJK, not just Hans
174    ];
175    private const MIRROR_CHARACTERS = [
176        '('  => ')',
177        ')'  => '(',
178        '['  => ']',
179        ']'  => '[',
180        '{'  => '}',
181        '}'  => '{',
182        '<'  => '>',
183        '>'  => '<',
184        '‹ ' => '›',
185        '› ' => '‹',
186        '«'  => '»',
187        '»'  => '«',
188        '﴾ ' => '﴿',
189        '﴿ ' => '﴾',
190        '“ ' => '”',
191        '” ' => '“',
192        '‘ ' => '’',
193        '’ ' => '‘',
194    ];
195    /** @var string Punctuation used to separate list items, typically a comma */
196    public static $list_separator;
197
198    // The ranges of characters used by each script.
199    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
200    private static $locale;
201
202    // Characters that are displayed in mirror form in RTL text.
203    /** @var Translator An object that performs translation */
204    private static $translator;
205    /** @var  Collator|null From the php-intl library */
206    private static $collator;
207
208    /**
209     * The preferred locales for this site, or a default list if no preference.
210     *
211     * @return LocaleInterface[]
212     */
213    public static function activeLocales(): array
214    {
215        /** @var Collection $locales */
216        $locales = app(ModuleService::class)
217            ->findByInterface(ModuleLanguageInterface::class, false, true)
218            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
219                return $module->locale();
220            });
221
222        if ($locales->isEmpty()) {
223            return [new LocaleEnUs()];
224        }
225
226        return $locales->all();
227    }
228
229    /**
230     * Which MySQL collation should be used for this locale?
231     *
232     * @return string
233     */
234    public static function collation(): string
235    {
236        $collation = self::$locale->collation();
237        switch ($collation) {
238            case 'croatian_ci':
239            case 'german2_ci':
240            case 'vietnamese_ci':
241                // Only available in MySQL 5.6
242                return 'utf8_unicode_ci';
243            default:
244                return 'utf8_' . $collation;
245        }
246    }
247
248    /**
249     * What format is used to display dates in the current locale?
250     *
251     * @return string
252     */
253    public static function dateFormat(): string
254    {
255        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
256        return self::$translator->translate('%j %F %Y');
257    }
258
259    /**
260     * Convert the digits 0-9 into the local script
261     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
262     *
263     * @param string|int $n
264     *
265     * @return string
266     */
267    public static function digits($n): string
268    {
269        return self::$locale->digits((string) $n);
270    }
271
272    /**
273     * What is the direction of the current locale
274     *
275     * @return string "ltr" or "rtl"
276     */
277    public static function direction(): string
278    {
279        return self::$locale->direction();
280    }
281
282    /**
283     * What is the first day of the week.
284     *
285     * @return int Sunday=0, Monday=1, etc.
286     */
287    public static function firstDay(): int
288    {
289        return self::$locale->territory()->firstDay();
290    }
291
292    /**
293     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
294     *
295     * @return string
296     */
297    public static function htmlAttributes(): string
298    {
299        return self::$locale->htmlAttributes();
300    }
301
302    /**
303     * Initialise the translation adapter with a locale setting.
304     *
305     * @param string    $code  Use this locale/language code, or choose one automatically
306     * @param Tree|null $tree
307     * @param bool      $setup During setup, we cannot access the database.
308     *
309     * @return string $string
310     */
311    public static function init(string $code = '', Tree $tree = null, $setup = false): string
312    {
313        if ($code !== '') {
314            // Create the specified locale
315            self::$locale = Locale::create($code);
316        } elseif (Session::has('language')) {
317            // Select a previously used locale
318            self::$locale = Locale::create(Session::get('language'));
319        } else {
320            if ($tree instanceof Tree) {
321                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
322            } else {
323                $default_locale = new LocaleEnUs();
324            }
325
326            // Negotiate with the browser.
327            // Search engines don't negotiate.  They get the default locale of the tree.
328            if ($setup) {
329                $installed_locales = app(ModuleService::class)->setupLanguages()
330                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
331                        return $module->locale();
332                    });
333            } else {
334                $installed_locales = self::installedLocales();
335            }
336
337            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
338        }
339
340        // Load the translation file
341        $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php';
342
343        try {
344            $translation  = new Translation($translation_file);
345            $translations = $translation->asArray();
346        } catch (Exception $ex) {
347            // The translations files are created during the build process, and are
348            // not included in the source code.
349            // Assuming we are using dev code, and build (or rebuild) the files.
350            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
351            $translation  = new Translation($po_file);
352            $translations = $translation->asArray();
353            file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';');
354        }
355
356        // Add translations from custom modules (but not during setup, as we have no database/modules)
357        if (!$setup) {
358            $translations = app(ModuleService::class)
359                ->findByInterface(ModuleCustomInterface::class)
360                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
361                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
362                }, $translations);
363        }
364
365        // Create a translator
366        self::$translator = new Translator($translations, self::$locale->pluralRule());
367
368        /* I18N: This punctuation is used to separate lists of items */
369        self::$list_separator = self::translate(', ');
370
371        // Create a collator
372        try {
373            if (class_exists('Collator')) {
374                // Symfony provides a very incomplete polyfill - which cannot be used.
375                self::$collator = new Collator(self::$locale->code());
376                // Ignore upper/lower case differences
377                self::$collator->setStrength(Collator::SECONDARY);
378            }
379        } catch (Exception $ex) {
380            // PHP-INTL is not installed?  We'll use a fallback later.
381            self::$collator = null;
382        }
383
384        return self::$locale->languageTag();
385    }
386
387    /**
388     * All locales for which a translation file exists.
389     *
390     * @return Collection
391     */
392    public static function installedLocales(): Collection
393    {
394        return app(ModuleService::class)
395            ->findByInterface(ModuleLanguageInterface::class, true)
396            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
397                return $module->locale();
398            });
399    }
400
401    /**
402     * Translate a string, and then substitute placeholders
403     * echo I18N::translate('Hello World!');
404     * echo I18N::translate('The %s sat on the mat', 'cat');
405     *
406     * @param string $message
407     * @param string ...$args
408     *
409     * @return string
410     */
411    public static function translate(string $message, ...$args): string
412    {
413        $message = self::$translator->translate($message);
414
415        return sprintf($message, ...$args);
416    }
417
418    /**
419     * Return the endonym for a given language - as per http://cldr.unicode.org/
420     *
421     * @param string $locale
422     *
423     * @return string
424     */
425    public static function languageName(string $locale): string
426    {
427        return Locale::create($locale)->endonym();
428    }
429
430    /**
431     * @return string
432     */
433    public static function languageTag(): string
434    {
435        return self::$locale->languageTag();
436    }
437
438    /**
439     * Return the script used by a given language
440     *
441     * @param string $locale
442     *
443     * @return string
444     */
445    public static function languageScript(string $locale): string
446    {
447        return Locale::create($locale)->script()->code();
448    }
449
450    /**
451     * Translate a number into the local representation.
452     * e.g. 12345.67 becomes
453     * en: 12,345.67
454     * fr: 12 345,67
455     * de: 12.345,67
456     *
457     * @param float $n
458     * @param int   $precision
459     *
460     * @return string
461     */
462    public static function number(float $n, int $precision = 0): string
463    {
464        return self::$locale->number(round($n, $precision));
465    }
466
467    /**
468     * Translate a fraction into a percentage.
469     * e.g. 0.123 becomes
470     * en: 12.3%
471     * fr: 12,3 %
472     * de: 12,3%
473     *
474     * @param float $n
475     * @param int   $precision
476     *
477     * @return string
478     */
479    public static function percentage(float $n, int $precision = 0): string
480    {
481        return self::$locale->percent(round($n, $precision + 2));
482    }
483
484    /**
485     * Translate a plural string
486     * echo self::plural('There is an error', 'There are errors', $num_errors);
487     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
488     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
489     *
490     * @param string $singular
491     * @param string $plural
492     * @param int    $count
493     * @param string ...$args
494     *
495     * @return string
496     */
497    public static function plural(string $singular, string $plural, int $count, ...$args): string
498    {
499        $message = self::$translator->translatePlural($singular, $plural, $count);
500
501        return sprintf($message, ...$args);
502    }
503
504    /**
505     * UTF8 version of PHP::strrev()
506     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
507     * These do not support UTF8 text direction, so we must mimic it for them.
508     * Numbers are always rendered LTR, even in RTL text.
509     * The visual direction of characters such as parentheses should be reversed.
510     *
511     * @param string $text Text to be reversed
512     *
513     * @return string
514     */
515    public static function reverseText($text): string
516    {
517        // Remove HTML markup - we can't display it and it is LTR.
518        $text = strip_tags($text);
519        // Remove HTML entities.
520        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
521
522        // LTR text doesn't need reversing
523        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
524            return $text;
525        }
526
527        // Mirrored characters
528        $text = strtr($text, self::MIRROR_CHARACTERS);
529
530        $reversed = '';
531        $digits   = '';
532        while ($text !== '') {
533            $letter = mb_substr($text, 0, 1);
534            $text   = mb_substr($text, 1);
535            if (strpos(self::DIGITS, $letter) !== false) {
536                $digits .= $letter;
537            } else {
538                $reversed = $letter . $digits . $reversed;
539                $digits   = '';
540            }
541        }
542
543        return $digits . $reversed;
544    }
545
546    /**
547     * Return the direction (ltr or rtl) for a given script
548     * The PHP/intl library does not provde this information, so we need
549     * our own lookup table.
550     *
551     * @param string $script
552     *
553     * @return string
554     */
555    public static function scriptDirection($script): string
556    {
557        switch ($script) {
558            case 'Arab':
559            case 'Hebr':
560            case 'Mong':
561            case 'Thaa':
562                return 'rtl';
563            default:
564                return 'ltr';
565        }
566    }
567
568    /**
569     * Identify the script used for a piece of text
570     *
571     * @param string $string
572     *
573     * @return string
574     */
575    public static function textScript($string): string
576    {
577        $string = strip_tags($string); // otherwise HTML tags show up as latin
578        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
579        $string = str_replace([
580            '@N.N.',
581            '@P.N.',
582        ], '', $string); // otherwise unknown names show up as latin
583        $pos    = 0;
584        $strlen = strlen($string);
585        while ($pos < $strlen) {
586            // get the Unicode Code Point for the character at position $pos
587            $byte1 = ord($string[$pos]);
588            if ($byte1 < 0x80) {
589                $code_point = $byte1;
590                $chrlen     = 1;
591            } elseif ($byte1 < 0xC0) {
592                // Invalid continuation character
593                return 'Latn';
594            } elseif ($byte1 < 0xE0) {
595                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
596                $chrlen     = 2;
597            } elseif ($byte1 < 0xF0) {
598                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
599                $chrlen     = 3;
600            } elseif ($byte1 < 0xF8) {
601                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
602                $chrlen     = 3;
603            } else {
604                // Invalid UTF
605                return 'Latn';
606            }
607
608            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
609                if ($code_point >= $range[1] && $code_point <= $range[2]) {
610                    return $range[0];
611                }
612            }
613            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
614            $pos += $chrlen;
615        }
616
617        return 'Latn';
618    }
619
620    /**
621     * Perform a case-insensitive comparison of two strings.
622     *
623     * @param string $string1
624     * @param string $string2
625     *
626     * @return int
627     */
628    public static function strcasecmp($string1, $string2): int
629    {
630        if (self::$collator instanceof Collator) {
631            return self::$collator->compare($string1, $string2);
632        }
633
634        return strcmp(self::strtolower($string1), self::strtolower($string2));
635    }
636
637    /**
638     * Convert a string to lower case.
639     *
640     * @param string $string
641     *
642     * @return string
643     */
644    public static function strtolower($string): string
645    {
646        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
647            $string = strtr($string, self::DOTLESS_I_TOLOWER);
648        }
649
650        return mb_strtolower($string);
651    }
652
653    /**
654     * Convert a string to upper case.
655     *
656     * @param string $string
657     *
658     * @return string
659     */
660    public static function strtoupper($string): string
661    {
662        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
663            $string = strtr($string, self::DOTLESS_I_TOUPPER);
664        }
665
666        return mb_strtoupper($string);
667    }
668
669    /**
670     * What format is used to display dates in the current locale?
671     *
672     * @return string
673     */
674    public static function timeFormat(): string
675    {
676        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
677        return self::$translator->translate('%H:%i:%s');
678    }
679
680    /**
681     * Context sensitive version of translate.
682     * echo I18N::translateContext('NOMINATIVE', 'January');
683     * echo I18N::translateContext('GENITIVE', 'January');
684     *
685     * @param string $context
686     * @param string $message
687     * @param string ...$args
688     *
689     * @return string
690     */
691    public static function translateContext(string $context, string $message, ...$args): string
692    {
693        $message = self::$translator->translateContext($context, $message);
694
695        return sprintf($message, ...$args);
696    }
697}
698