xref: /webtrees/app/I18N.php (revision b2c011d7d8ff495bd3636357907da539925bd5f9)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Module\ModuleCustomInterface;
28use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
29use Fisharebest\Webtrees\Services\ModuleService;
30use Illuminate\Support\Collection;
31use function array_merge;
32use function class_exists;
33use function filemtime;
34use function file_exists;
35use function html_entity_decode;
36use function in_array;
37use function mb_strtolower;
38use function mb_strtoupper;
39use function mb_substr;
40use function ord;
41use function sprintf;
42use function str_replace;
43use function strcmp;
44use function strip_tags;
45use function strlen;
46use function strpos;
47use function strtr;
48
49/**
50 * Internationalization (i18n) and localization (l10n).
51 */
52class I18N
53{
54    // MO files use special characters for plurals and context.
55    public const PLURAL  = "\x00";
56    public const CONTEXT = "\x04";
57
58    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
59    private static $locale;
60
61    /** @var Translator An object that performs translation */
62    private static $translator;
63
64    /** @var  Collator|null From the php-intl library */
65    private static $collator;
66
67    // Digits are always rendered LTR, even in RTL text.
68    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
69
70    // These locales need special handling for the dotless letter I.
71    private const DOTLESS_I_LOCALES = [
72        'az',
73        'tr',
74    ];
75    private const DOTLESS_I_TOLOWER = [
76        'I' => 'ı',
77        'İ' => 'i',
78    ];
79    private const DOTLESS_I_TOUPPER = [
80        'ı' => 'I',
81        'i' => 'İ',
82    ];
83
84    // The ranges of characters used by each script.
85    private const SCRIPT_CHARACTER_RANGES = [
86        [
87            'Latn',
88            0x0041,
89            0x005A,
90        ],
91        [
92            'Latn',
93            0x0061,
94            0x007A,
95        ],
96        [
97            'Latn',
98            0x0100,
99            0x02AF,
100        ],
101        [
102            'Grek',
103            0x0370,
104            0x03FF,
105        ],
106        [
107            'Cyrl',
108            0x0400,
109            0x052F,
110        ],
111        [
112            'Hebr',
113            0x0590,
114            0x05FF,
115        ],
116        [
117            'Arab',
118            0x0600,
119            0x06FF,
120        ],
121        [
122            'Arab',
123            0x0750,
124            0x077F,
125        ],
126        [
127            'Arab',
128            0x08A0,
129            0x08FF,
130        ],
131        [
132            'Deva',
133            0x0900,
134            0x097F,
135        ],
136        [
137            'Taml',
138            0x0B80,
139            0x0BFF,
140        ],
141        [
142            'Sinh',
143            0x0D80,
144            0x0DFF,
145        ],
146        [
147            'Thai',
148            0x0E00,
149            0x0E7F,
150        ],
151        [
152            'Geor',
153            0x10A0,
154            0x10FF,
155        ],
156        [
157            'Grek',
158            0x1F00,
159            0x1FFF,
160        ],
161        [
162            'Deva',
163            0xA8E0,
164            0xA8FF,
165        ],
166        [
167            'Hans',
168            0x3000,
169            0x303F,
170        ],
171        // Mixed CJK, not just Hans
172        [
173            'Hans',
174            0x3400,
175            0xFAFF,
176        ],
177        // Mixed CJK, not just Hans
178        [
179            'Hans',
180            0x20000,
181            0x2FA1F,
182        ],
183        // Mixed CJK, not just Hans
184    ];
185
186    // Characters that are displayed in mirror form in RTL text.
187    private const MIRROR_CHARACTERS = [
188        '('  => ')',
189        ')'  => '(',
190        '['  => ']',
191        ']'  => '[',
192        '{'  => '}',
193        '}'  => '{',
194        '<'  => '>',
195        '>'  => '<',
196        '‹ ' => '›',
197        '› ' => '‹',
198        '«'  => '»',
199        '»'  => '«',
200        '﴾ ' => '﴿',
201        '﴿ ' => '﴾',
202        '“ ' => '”',
203        '” ' => '“',
204        '‘ ' => '’',
205        '’ ' => '‘',
206    ];
207
208    /** @var string Punctuation used to separate list items, typically a comma */
209    public static $list_separator;
210
211    /**
212     * The preferred locales for this site, or a default list if no preference.
213     *
214     * @return LocaleInterface[]
215     */
216    public static function activeLocales(): array
217    {
218        $locales = app(ModuleService::class)
219            ->findByInterface(ModuleLanguageInterface::class, false, true)
220            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
221                return $module->locale();
222            });
223
224        if ($locales->isEmpty()) {
225            return [new LocaleEnUs()];
226        }
227
228        return $locales->all();
229    }
230
231    /**
232     * Which MySQL collation should be used for this locale?
233     *
234     * @return string
235     */
236    public static function collation(): string
237    {
238        $collation = self::$locale->collation();
239        switch ($collation) {
240            case 'croatian_ci':
241            case 'german2_ci':
242            case 'vietnamese_ci':
243                // Only available in MySQL 5.6
244                return 'utf8_unicode_ci';
245            default:
246                return 'utf8_' . $collation;
247        }
248    }
249
250    /**
251     * What format is used to display dates in the current locale?
252     *
253     * @return string
254     */
255    public static function dateFormat(): string
256    {
257        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
258        return self::$translator->translate('%j %F %Y');
259    }
260
261    /**
262     * Convert the digits 0-9 into the local script
263     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
264     *
265     * @param string|int $n
266     *
267     * @return string
268     */
269    public static function digits($n): string
270    {
271        return self::$locale->digits((string) $n);
272    }
273
274    /**
275     * What is the direction of the current locale
276     *
277     * @return string "ltr" or "rtl"
278     */
279    public static function direction(): string
280    {
281        return self::$locale->direction();
282    }
283
284    /**
285     * What is the first day of the week.
286     *
287     * @return int Sunday=0, Monday=1, etc.
288     */
289    public static function firstDay(): int
290    {
291        return self::$locale->territory()->firstDay();
292    }
293
294    /**
295     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
296     *
297     * @return string
298     */
299    public static function htmlAttributes(): string
300    {
301        return self::$locale->htmlAttributes();
302    }
303
304    /**
305     * Initialise the translation adapter with a locale setting.
306     *
307     * @param string    $code  Use this locale/language code, or choose one automatically
308     * @param Tree|null $tree
309     * @param bool      $setup During setup, we cannot access the database.
310     *
311     * @return string $string
312     */
313    public static function init(string $code = '', Tree $tree = null, $setup = false): string
314    {
315        if ($code !== '') {
316            // Create the specified locale
317            self::$locale = Locale::create($code);
318        } elseif (Session::has('language') && file_exists(Webtrees::ROOT_DIR . 'resources/lang/' . Session::get('language') . '/messages.mo')) {
319            // Select a previously used locale
320            self::$locale = Locale::create(Session::get('language'));
321        } else {
322            if ($tree instanceof Tree) {
323                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
324            } else {
325                $default_locale = new LocaleEnUs();
326            }
327
328            // Negotiate with the browser.
329            // Search engines don't negotiate.  They get the default locale of the tree.
330            if ($setup) {
331                $installed_locales = app(ModuleService::class)->setupLanguages()
332                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
333                        return $module->locale();
334                    });
335            } else {
336                $installed_locales = self::installedLocales();
337            }
338
339            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
340        }
341
342        $cache_dir  = WT_DATA_DIR . 'cache/';
343        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
344        if (file_exists($cache_file)) {
345            $filemtime = filemtime($cache_file);
346        } else {
347            $filemtime = 0;
348        }
349
350        // Load the translation file
351        $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo';
352
353        if (!file_exists($translation_file)) {
354            // Test and dev environments may not have the compiled translations
355            $translations = [];
356        } elseif (filemtime($translation_file) > $filemtime) {
357            $translation  = new Translation($translation_file);
358            $translations = $translation->asArray();
359
360            try {
361                File::mkdir($cache_dir);
362                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
363            } catch (Exception $ex) {
364                // During setup, we may not have been able to create it.
365            }
366        } else {
367            $translations = include $cache_file;
368        }
369
370        // Add translations from custom modules (but not during setup, as we have no database/modules)
371        if (!$setup) {
372            $translations = app(ModuleService::class)
373                ->findByInterface(ModuleCustomInterface::class)
374                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
375                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
376                }, $translations);
377        }
378
379        // Create a translator
380        self::$translator = new Translator($translations, self::$locale->pluralRule());
381
382        /* I18N: This punctuation is used to separate lists of items */
383        self::$list_separator = self::translate(', ');
384
385        // Create a collator
386        try {
387            if (class_exists('Collator')) {
388                // Symfony provides a very incomplete polyfill - which cannot be used.
389                self::$collator = new Collator(self::$locale->code());
390                // Ignore upper/lower case differences
391                self::$collator->setStrength(Collator::SECONDARY);
392            }
393        } catch (Exception $ex) {
394            // PHP-INTL is not installed?  We'll use a fallback later.
395            self::$collator = null;
396        }
397
398        return self::$locale->languageTag();
399    }
400
401    /**
402     * All locales for which a translation file exists.
403     *
404     * @return Collection
405     * @return LocaleInterface[]
406     */
407    public static function installedLocales(): Collection
408    {
409        return app(ModuleService::class)
410            ->findByInterface(ModuleLanguageInterface::class, true)
411            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
412                return $module->locale();
413            });
414    }
415
416    /**
417     * Return the endonym for a given language - as per http://cldr.unicode.org/
418     *
419     * @param string $locale
420     *
421     * @return string
422     */
423    public static function languageName(string $locale): string
424    {
425        return Locale::create($locale)->endonym();
426    }
427
428    /**
429     * Return the script used by a given language
430     *
431     * @param string $locale
432     *
433     * @return string
434     */
435    public static function languageScript(string $locale): string
436    {
437        return Locale::create($locale)->script()->code();
438    }
439
440    /**
441     * Translate a number into the local representation.
442     * e.g. 12345.67 becomes
443     * en: 12,345.67
444     * fr: 12 345,67
445     * de: 12.345,67
446     *
447     * @param float $n
448     * @param int   $precision
449     *
450     * @return string
451     */
452    public static function number(float $n, int $precision = 0): string
453    {
454        return self::$locale->number(round($n, $precision));
455    }
456
457    /**
458     * Translate a fraction into a percentage.
459     * e.g. 0.123 becomes
460     * en: 12.3%
461     * fr: 12,3 %
462     * de: 12,3%
463     *
464     * @param float $n
465     * @param int   $precision
466     *
467     * @return string
468     */
469    public static function percentage(float $n, int $precision = 0): string
470    {
471        return self::$locale->percent(round($n, $precision + 2));
472    }
473
474    /**
475     * Translate a plural string
476     * echo self::plural('There is an error', 'There are errors', $num_errors);
477     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
478     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
479     *
480     * @param string $singular
481     * @param string $plural
482     * @param int    $count
483     * @param string ...$args
484     *
485     * @return string
486     */
487    public static function plural(string $singular, string $plural, int $count, ...$args): string
488    {
489        $message = self::$translator->translatePlural($singular, $plural, $count);
490
491        return sprintf($message, ...$args);
492    }
493
494    /**
495     * UTF8 version of PHP::strrev()
496     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
497     * These do not support UTF8 text direction, so we must mimic it for them.
498     * Numbers are always rendered LTR, even in RTL text.
499     * The visual direction of characters such as parentheses should be reversed.
500     *
501     * @param string $text Text to be reversed
502     *
503     * @return string
504     */
505    public static function reverseText($text): string
506    {
507        // Remove HTML markup - we can't display it and it is LTR.
508        $text = strip_tags($text);
509        // Remove HTML entities.
510        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
511
512        // LTR text doesn't need reversing
513        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
514            return $text;
515        }
516
517        // Mirrored characters
518        $text = strtr($text, self::MIRROR_CHARACTERS);
519
520        $reversed = '';
521        $digits   = '';
522        while ($text !== '') {
523            $letter = mb_substr($text, 0, 1);
524            $text   = mb_substr($text, 1);
525            if (strpos(self::DIGITS, $letter) !== false) {
526                $digits .= $letter;
527            } else {
528                $reversed = $letter . $digits . $reversed;
529                $digits   = '';
530            }
531        }
532
533        return $digits . $reversed;
534    }
535
536    /**
537     * Return the direction (ltr or rtl) for a given script
538     * The PHP/intl library does not provde this information, so we need
539     * our own lookup table.
540     *
541     * @param string $script
542     *
543     * @return string
544     */
545    public static function scriptDirection($script): string
546    {
547        switch ($script) {
548            case 'Arab':
549            case 'Hebr':
550            case 'Mong':
551            case 'Thaa':
552                return 'rtl';
553            default:
554                return 'ltr';
555        }
556    }
557
558    /**
559     * Perform a case-insensitive comparison of two strings.
560     *
561     * @param string $string1
562     * @param string $string2
563     *
564     * @return int
565     */
566    public static function strcasecmp($string1, $string2): int
567    {
568        if (self::$collator instanceof Collator) {
569            return self::$collator->compare($string1, $string2);
570        }
571
572        return strcmp(self::strtolower($string1), self::strtolower($string2));
573    }
574
575    /**
576     * Convert a string to lower case.
577     *
578     * @param string $string
579     *
580     * @return string
581     */
582    public static function strtolower($string): string
583    {
584        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
585            $string = strtr($string, self::DOTLESS_I_TOLOWER);
586        }
587
588        return mb_strtolower($string);
589    }
590
591    /**
592     * Convert a string to upper case.
593     *
594     * @param string $string
595     *
596     * @return string
597     */
598    public static function strtoupper($string): string
599    {
600        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
601            $string = strtr($string, self::DOTLESS_I_TOUPPER);
602        }
603
604        return mb_strtoupper($string);
605    }
606
607    /**
608     * Identify the script used for a piece of text
609     *
610     * @param string $string
611     *
612     * @return string
613     */
614    public static function textScript($string): string
615    {
616        $string = strip_tags($string); // otherwise HTML tags show up as latin
617        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
618        $string = str_replace([
619            '@N.N.',
620            '@P.N.',
621        ], '', $string); // otherwise unknown names show up as latin
622        $pos    = 0;
623        $strlen = strlen($string);
624        while ($pos < $strlen) {
625            // get the Unicode Code Point for the character at position $pos
626            $byte1 = ord($string[$pos]);
627            if ($byte1 < 0x80) {
628                $code_point = $byte1;
629                $chrlen     = 1;
630            } elseif ($byte1 < 0xC0) {
631                // Invalid continuation character
632                return 'Latn';
633            } elseif ($byte1 < 0xE0) {
634                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
635                $chrlen     = 2;
636            } elseif ($byte1 < 0xF0) {
637                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
638                $chrlen     = 3;
639            } elseif ($byte1 < 0xF8) {
640                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
641                $chrlen     = 3;
642            } else {
643                // Invalid UTF
644                return 'Latn';
645            }
646
647            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
648                if ($code_point >= $range[1] && $code_point <= $range[2]) {
649                    return $range[0];
650                }
651            }
652            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
653            $pos += $chrlen;
654        }
655
656        return 'Latn';
657    }
658
659    /**
660     * What format is used to display dates in the current locale?
661     *
662     * @return string
663     */
664    public static function timeFormat(): string
665    {
666        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
667        return self::$translator->translate('%H:%i:%s');
668    }
669
670    /**
671     * Translate a string, and then substitute placeholders
672     * echo I18N::translate('Hello World!');
673     * echo I18N::translate('The %s sat on the mat', 'cat');
674     *
675     * @param string $message
676     * @param string ...$args
677     *
678     * @return string
679     */
680    public static function translate(string $message, ...$args): string
681    {
682        $message = self::$translator->translate($message);
683
684        return sprintf($message, ...$args);
685    }
686
687    /**
688     * Context sensitive version of translate.
689     * echo I18N::translateContext('NOMINATIVE', 'January');
690     * echo I18N::translateContext('GENITIVE', 'January');
691     *
692     * @param string $context
693     * @param string $message
694     * @param string ...$args
695     *
696     * @return string
697     */
698    public static function translateContext(string $context, string $message, ...$args): string
699    {
700        $message = self::$translator->translateContext($context, $message);
701
702        return sprintf($message, ...$args);
703    }
704}
705