xref: /webtrees/app/I18N.php (revision b6c326d8b8798b83b744c4d4a669df5aa9f3e0c2)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Module\ModuleCustomInterface;
28use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
29use Fisharebest\Webtrees\Services\ModuleService;
30use Illuminate\Support\Collection;
31
32/**
33 * Internationalization (i18n) and localization (l10n).
34 */
35class I18N
36{
37    // MO files use special characters for plurals and context.
38    public const PLURAL  = '\x00';
39    public const CONTEXT = '\x04';
40
41    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
42    private static $locale;
43
44    /** @var Translator An object that performs translation */
45    private static $translator;
46
47    /** @var  Collator|null From the php-intl library */
48    private static $collator;
49
50    // Digits are always rendered LTR, even in RTL text.
51    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
52
53    // These locales need special handling for the dotless letter I.
54    private const DOTLESS_I_LOCALES = [
55        'az',
56        'tr',
57    ];
58    private const DOTLESS_I_TOLOWER = [
59        'I' => 'ı',
60        'İ' => 'i',
61    ];
62    private const DOTLESS_I_TOUPPER = [
63        'ı' => 'I',
64        'i' => 'İ',
65    ];
66
67    // The ranges of characters used by each script.
68    private const SCRIPT_CHARACTER_RANGES = [
69        [
70            'Latn',
71            0x0041,
72            0x005A,
73        ],
74        [
75            'Latn',
76            0x0061,
77            0x007A,
78        ],
79        [
80            'Latn',
81            0x0100,
82            0x02AF,
83        ],
84        [
85            'Grek',
86            0x0370,
87            0x03FF,
88        ],
89        [
90            'Cyrl',
91            0x0400,
92            0x052F,
93        ],
94        [
95            'Hebr',
96            0x0590,
97            0x05FF,
98        ],
99        [
100            'Arab',
101            0x0600,
102            0x06FF,
103        ],
104        [
105            'Arab',
106            0x0750,
107            0x077F,
108        ],
109        [
110            'Arab',
111            0x08A0,
112            0x08FF,
113        ],
114        [
115            'Deva',
116            0x0900,
117            0x097F,
118        ],
119        [
120            'Taml',
121            0x0B80,
122            0x0BFF,
123        ],
124        [
125            'Sinh',
126            0x0D80,
127            0x0DFF,
128        ],
129        [
130            'Thai',
131            0x0E00,
132            0x0E7F,
133        ],
134        [
135            'Geor',
136            0x10A0,
137            0x10FF,
138        ],
139        [
140            'Grek',
141            0x1F00,
142            0x1FFF,
143        ],
144        [
145            'Deva',
146            0xA8E0,
147            0xA8FF,
148        ],
149        [
150            'Hans',
151            0x3000,
152            0x303F,
153        ],
154        // Mixed CJK, not just Hans
155        [
156            'Hans',
157            0x3400,
158            0xFAFF,
159        ],
160        // Mixed CJK, not just Hans
161        [
162            'Hans',
163            0x20000,
164            0x2FA1F,
165        ],
166        // Mixed CJK, not just Hans
167    ];
168
169    // Characters that are displayed in mirror form in RTL text.
170    private const MIRROR_CHARACTERS = [
171        '('  => ')',
172        ')'  => '(',
173        '['  => ']',
174        ']'  => '[',
175        '{'  => '}',
176        '}'  => '{',
177        '<'  => '>',
178        '>'  => '<',
179        '‹ ' => '›',
180        '› ' => '‹',
181        '«'  => '»',
182        '»'  => '«',
183        '﴾ ' => '﴿',
184        '﴿ ' => '﴾',
185        '“ ' => '”',
186        '” ' => '“',
187        '‘ ' => '’',
188        '’ ' => '‘',
189    ];
190
191    /** @var string Punctuation used to separate list items, typically a comma */
192    public static $list_separator;
193
194    /**
195     * The preferred locales for this site, or a default list if no preference.
196     *
197     * @return LocaleInterface[]
198     */
199    public static function activeLocales(): array
200    {
201        $locales = app(ModuleService::class)
202            ->findByInterface(ModuleLanguageInterface::class, false, true)
203            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
204                return $module->locale();
205            });
206
207        if ($locales->isEmpty()) {
208            return [new LocaleEnUs()];
209        }
210
211        return $locales->all();
212    }
213
214    /**
215     * Which MySQL collation should be used for this locale?
216     *
217     * @return string
218     */
219    public static function collation(): string
220    {
221        $collation = self::$locale->collation();
222        switch ($collation) {
223            case 'croatian_ci':
224            case 'german2_ci':
225            case 'vietnamese_ci':
226                // Only available in MySQL 5.6
227                return 'utf8_unicode_ci';
228            default:
229                return 'utf8_' . $collation;
230        }
231    }
232
233    /**
234     * What format is used to display dates in the current locale?
235     *
236     * @return string
237     */
238    public static function dateFormat(): string
239    {
240        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
241        return self::$translator->translate('%j %F %Y');
242    }
243
244    /**
245     * Convert the digits 0-9 into the local script
246     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
247     *
248     * @param string|int $n
249     *
250     * @return string
251     */
252    public static function digits($n): string
253    {
254        return self::$locale->digits((string) $n);
255    }
256
257    /**
258     * What is the direction of the current locale
259     *
260     * @return string "ltr" or "rtl"
261     */
262    public static function direction(): string
263    {
264        return self::$locale->direction();
265    }
266
267    /**
268     * What is the first day of the week.
269     *
270     * @return int Sunday=0, Monday=1, etc.
271     */
272    public static function firstDay(): int
273    {
274        return self::$locale->territory()->firstDay();
275    }
276
277    /**
278     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
279     *
280     * @return string
281     */
282    public static function htmlAttributes(): string
283    {
284        return self::$locale->htmlAttributes();
285    }
286
287    /**
288     * Initialise the translation adapter with a locale setting.
289     *
290     * @param string    $code  Use this locale/language code, or choose one automatically
291     * @param Tree|null $tree
292     * @param bool      $setup During setup, we cannot access the database.
293     *
294     * @return string $string
295     */
296    public static function init(string $code = '', Tree $tree = null, $setup = false): string
297    {
298        if ($code !== '') {
299            // Create the specified locale
300            self::$locale = Locale::create($code);
301        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
302            // Select a previously used locale
303            self::$locale = Locale::create(Session::get('locale'));
304        } else {
305            if ($tree instanceof Tree) {
306                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
307            } else {
308                $default_locale = new LocaleEnUs();
309            }
310
311            // Negotiate with the browser.
312            // Search engines don't negotiate.  They get the default locale of the tree.
313            if ($setup) {
314                $installed_locales = app(ModuleService::class)->setupLanguages()
315                    ->map(function (ModuleLanguageInterface $module): LocaleInterface {
316                        return $module->locale();
317                    });
318            } else {
319                $installed_locales = self::installedLocales();
320            }
321
322            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
323        }
324
325        $cache_dir  = WT_DATA_DIR . 'cache/';
326        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
327        if (file_exists($cache_file)) {
328            $filemtime = filemtime($cache_file);
329        } else {
330            $filemtime = 0;
331        }
332
333        // Load the translation file(s)
334        $translation_files = [
335            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
336        ];
337
338        // Rebuild files after one hour
339        $rebuild_cache = time() > $filemtime + 3600;
340        // Rebuild files if any translation file has been updated
341        foreach ($translation_files as $translation_file) {
342            if (filemtime($translation_file) > $filemtime) {
343                $rebuild_cache = true;
344                break;
345            }
346        }
347
348        if ($rebuild_cache) {
349            $translations = [];
350            foreach ($translation_files as $translation_file) {
351                $translation  = new Translation($translation_file);
352                $translations = array_merge($translations, $translation->asArray());
353            }
354            try {
355                File::mkdir($cache_dir);
356                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
357            } catch (Exception $ex) {
358                // During setup, we may not have been able to create it.
359            }
360        } else {
361            $translations = include $cache_file;
362        }
363
364        // Add translations from custom modules (but not during setup)
365        if (!$setup) {
366            $custom_modules = app(ModuleService::class)
367                ->findByInterface(ModuleCustomInterface::class);
368
369            foreach ($custom_modules as $custom_module) {
370                $custom_translations = $custom_module->customTranslations(self::$locale->languageTag());
371                $translations        = array_merge($translations, $custom_translations);
372            }
373        }
374
375        // Create a translator
376        self::$translator = new Translator($translations, self::$locale->pluralRule());
377
378        /* I18N: This punctuation is used to separate lists of items */
379        self::$list_separator = self::translate(', ');
380
381        // Create a collator
382        try {
383            if (class_exists('Collator')) {
384                // Symfony provides a very incomplete polyfill - which cannot be used.
385                self::$collator = new Collator(self::$locale->code());
386                // Ignore upper/lower case differences
387                self::$collator->setStrength(Collator::SECONDARY);
388            }
389        } catch (Exception $ex) {
390            // PHP-INTL is not installed?  We'll use a fallback later.
391            self::$collator = null;
392        }
393
394        return self::$locale->languageTag();
395    }
396
397    /**
398     * All locales for which a translation file exists.
399     *
400     * @return Collection
401     * @return LocaleInterface[]
402     */
403    public static function installedLocales(): Collection
404    {
405        return app(ModuleService::class)
406            ->findByInterface(ModuleLanguageInterface::class, true)
407            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
408                return $module->locale();
409            });
410    }
411
412    /**
413     * Return the endonym for a given language - as per http://cldr.unicode.org/
414     *
415     * @param string $locale
416     *
417     * @return string
418     */
419    public static function languageName(string $locale): string
420    {
421        return Locale::create($locale)->endonym();
422    }
423
424    /**
425     * Return the script used by a given language
426     *
427     * @param string $locale
428     *
429     * @return string
430     */
431    public static function languageScript(string $locale): string
432    {
433        return Locale::create($locale)->script()->code();
434    }
435
436    /**
437     * Translate a number into the local representation.
438     * e.g. 12345.67 becomes
439     * en: 12,345.67
440     * fr: 12 345,67
441     * de: 12.345,67
442     *
443     * @param float $n
444     * @param int   $precision
445     *
446     * @return string
447     */
448    public static function number(float $n, int $precision = 0): string
449    {
450        return self::$locale->number(round($n, $precision));
451    }
452
453    /**
454     * Translate a fraction into a percentage.
455     * e.g. 0.123 becomes
456     * en: 12.3%
457     * fr: 12,3 %
458     * de: 12,3%
459     *
460     * @param float $n
461     * @param int   $precision
462     *
463     * @return string
464     */
465    public static function percentage(float $n, int $precision = 0): string
466    {
467        return self::$locale->percent(round($n, $precision + 2));
468    }
469
470    /**
471     * Translate a plural string
472     * echo self::plural('There is an error', 'There are errors', $num_errors);
473     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
474     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
475     *
476     * @param string $singular
477     * @param string $plural
478     * @param int    $count
479     * @param string ...$args
480     *
481     * @return string
482     */
483    public static function plural(string $singular, string $plural, int $count, ...$args): string
484    {
485        $message = self::$translator->translatePlural($singular, $plural, $count);
486
487        return sprintf($message, ...$args);
488    }
489
490    /**
491     * UTF8 version of PHP::strrev()
492     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
493     * These do not support UTF8 text direction, so we must mimic it for them.
494     * Numbers are always rendered LTR, even in RTL text.
495     * The visual direction of characters such as parentheses should be reversed.
496     *
497     * @param string $text Text to be reversed
498     *
499     * @return string
500     */
501    public static function reverseText($text): string
502    {
503        // Remove HTML markup - we can't display it and it is LTR.
504        $text = strip_tags($text);
505        // Remove HTML entities.
506        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
507
508        // LTR text doesn't need reversing
509        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
510            return $text;
511        }
512
513        // Mirrored characters
514        $text = strtr($text, self::MIRROR_CHARACTERS);
515
516        $reversed = '';
517        $digits   = '';
518        while ($text !== '') {
519            $letter = mb_substr($text, 0, 1);
520            $text   = mb_substr($text, 1);
521            if (strpos(self::DIGITS, $letter) !== false) {
522                $digits .= $letter;
523            } else {
524                $reversed = $letter . $digits . $reversed;
525                $digits   = '';
526            }
527        }
528
529        return $digits . $reversed;
530    }
531
532    /**
533     * Return the direction (ltr or rtl) for a given script
534     * The PHP/intl library does not provde this information, so we need
535     * our own lookup table.
536     *
537     * @param string $script
538     *
539     * @return string
540     */
541    public static function scriptDirection($script): string
542    {
543        switch ($script) {
544            case 'Arab':
545            case 'Hebr':
546            case 'Mong':
547            case 'Thaa':
548                return 'rtl';
549            default:
550                return 'ltr';
551        }
552    }
553
554    /**
555     * Perform a case-insensitive comparison of two strings.
556     *
557     * @param string $string1
558     * @param string $string2
559     *
560     * @return int
561     */
562    public static function strcasecmp($string1, $string2): int
563    {
564        if (self::$collator instanceof Collator) {
565            return self::$collator->compare($string1, $string2);
566        }
567
568        return strcmp(self::strtolower($string1), self::strtolower($string2));
569    }
570
571    /**
572     * Convert a string to lower case.
573     *
574     * @param string $string
575     *
576     * @return string
577     */
578    public static function strtolower($string): string
579    {
580        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
581            $string = strtr($string, self::DOTLESS_I_TOLOWER);
582        }
583
584        return mb_strtolower($string);
585    }
586
587    /**
588     * Convert a string to upper case.
589     *
590     * @param string $string
591     *
592     * @return string
593     */
594    public static function strtoupper($string): string
595    {
596        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
597            $string = strtr($string, self::DOTLESS_I_TOUPPER);
598        }
599
600        return mb_strtoupper($string);
601    }
602
603    /**
604     * Identify the script used for a piece of text
605     *
606     * @param string $string
607     *
608     * @return string
609     */
610    public static function textScript($string): string
611    {
612        $string = strip_tags($string); // otherwise HTML tags show up as latin
613        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
614        $string = str_replace([
615            '@N.N.',
616            '@P.N.',
617        ], '', $string); // otherwise unknown names show up as latin
618        $pos    = 0;
619        $strlen = strlen($string);
620        while ($pos < $strlen) {
621            // get the Unicode Code Point for the character at position $pos
622            $byte1 = ord($string[$pos]);
623            if ($byte1 < 0x80) {
624                $code_point = $byte1;
625                $chrlen     = 1;
626            } elseif ($byte1 < 0xC0) {
627                // Invalid continuation character
628                return 'Latn';
629            } elseif ($byte1 < 0xE0) {
630                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
631                $chrlen     = 2;
632            } elseif ($byte1 < 0xF0) {
633                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
634                $chrlen     = 3;
635            } elseif ($byte1 < 0xF8) {
636                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
637                $chrlen     = 3;
638            } else {
639                // Invalid UTF
640                return 'Latn';
641            }
642
643            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
644                if ($code_point >= $range[1] && $code_point <= $range[2]) {
645                    return $range[0];
646                }
647            }
648            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
649            $pos += $chrlen;
650        }
651
652        return 'Latn';
653    }
654
655    /**
656     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
657     *
658     * @param int $seconds
659     *
660     * @return string
661     */
662    public static function timeAgo($seconds): string
663    {
664        $minute = 60;
665        $hour   = 60 * $minute;
666        $day    = 24 * $hour;
667        $month  = 30 * $day;
668        $year   = 365 * $day;
669
670        if ($seconds > $year) {
671            $years = intdiv($seconds, $year);
672
673            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
674        }
675
676        if ($seconds > $month) {
677            $months = intdiv($seconds, $month);
678
679            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
680        }
681
682        if ($seconds > $day) {
683            $days = intdiv($seconds, $day);
684
685            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
686        }
687
688        if ($seconds > $hour) {
689            $hours = intdiv($seconds, $hour);
690
691            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
692        }
693
694        if ($seconds > $minute) {
695            $minutes = intdiv($seconds, $minute);
696
697            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
698        }
699
700        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
701    }
702
703    /**
704     * What format is used to display dates in the current locale?
705     *
706     * @return string
707     */
708    public static function timeFormat(): string
709    {
710        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
711        return self::$translator->translate('%H:%i:%s');
712    }
713
714    /**
715     * Translate a string, and then substitute placeholders
716     * echo I18N::translate('Hello World!');
717     * echo I18N::translate('The %s sat on the mat', 'cat');
718     *
719     * @param string $message
720     * @param string ...$args
721     *
722     * @return string
723     */
724    public static function translate(string $message, ...$args): string
725    {
726        $message = self::$translator->translate($message);
727
728        return sprintf($message, ...$args);
729    }
730
731    /**
732     * Context sensitive version of translate.
733     * echo I18N::translateContext('NOMINATIVE', 'January');
734     * echo I18N::translateContext('GENITIVE', 'January');
735     *
736     * @param string $context
737     * @param string $message
738     * @param string ...$args
739     *
740     * @return string
741     */
742    public static function translateContext(string $context, string $message, ...$args): string
743    {
744        $message = self::$translator->translateContext($context, $message);
745
746        return sprintf($message, ...$args);
747    }
748}
749