xref: /webtrees/app/I18N.php (revision 8898179db9ee9ab76e7912468223d7b1fdcddfbc)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29
30/**
31 * Internationalization (i18n) and localization (l10n).
32 */
33class I18N
34{
35    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
36    private static $locale;
37
38    /** @var Translator An object that performs translation */
39    private static $translator;
40
41    /** @var  Collator|null From the php-intl library */
42    private static $collator;
43
44    // Digits are always rendered LTR, even in RTL text.
45    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
46
47    // These locales need special handling for the dotless letter I.
48    private const DOTLESS_I_LOCALES = [
49        'az',
50        'tr',
51    ];
52    private const DOTLESS_I_TOLOWER = [
53        'I' => 'ı',
54        'İ' => 'i',
55    ];
56    private const DOTLESS_I_TOUPPER = [
57        'ı' => 'I',
58        'i' => 'İ',
59    ];
60
61    // The ranges of characters used by each script.
62    private const SCRIPT_CHARACTER_RANGES = [
63        [
64            'Latn',
65            0x0041,
66            0x005A,
67        ],
68        [
69            'Latn',
70            0x0061,
71            0x007A,
72        ],
73        [
74            'Latn',
75            0x0100,
76            0x02AF,
77        ],
78        [
79            'Grek',
80            0x0370,
81            0x03FF,
82        ],
83        [
84            'Cyrl',
85            0x0400,
86            0x052F,
87        ],
88        [
89            'Hebr',
90            0x0590,
91            0x05FF,
92        ],
93        [
94            'Arab',
95            0x0600,
96            0x06FF,
97        ],
98        [
99            'Arab',
100            0x0750,
101            0x077F,
102        ],
103        [
104            'Arab',
105            0x08A0,
106            0x08FF,
107        ],
108        [
109            'Deva',
110            0x0900,
111            0x097F,
112        ],
113        [
114            'Taml',
115            0x0B80,
116            0x0BFF,
117        ],
118        [
119            'Sinh',
120            0x0D80,
121            0x0DFF,
122        ],
123        [
124            'Thai',
125            0x0E00,
126            0x0E7F,
127        ],
128        [
129            'Geor',
130            0x10A0,
131            0x10FF,
132        ],
133        [
134            'Grek',
135            0x1F00,
136            0x1FFF,
137        ],
138        [
139            'Deva',
140            0xA8E0,
141            0xA8FF,
142        ],
143        [
144            'Hans',
145            0x3000,
146            0x303F,
147        ],
148        // Mixed CJK, not just Hans
149        [
150            'Hans',
151            0x3400,
152            0xFAFF,
153        ],
154        // Mixed CJK, not just Hans
155        [
156            'Hans',
157            0x20000,
158            0x2FA1F,
159        ],
160        // Mixed CJK, not just Hans
161    ];
162
163    // Characters that are displayed in mirror form in RTL text.
164    private const MIRROR_CHARACTERS = [
165        '('  => ')',
166        ')'  => '(',
167        '['  => ']',
168        ']'  => '[',
169        '{'  => '}',
170        '}'  => '{',
171        '<'  => '>',
172        '>'  => '<',
173        '‹ ' => '›',
174        '› ' => '‹',
175        '«'  => '»',
176        '»'  => '«',
177        '﴾ ' => '﴿',
178        '﴿ ' => '﴾',
179        '“ ' => '”',
180        '” ' => '“',
181        '‘ ' => '’',
182        '’ ' => '‘',
183    ];
184
185    // Default list of locales to show in the menu.
186    private const DEFAULT_LOCALES = [
187        'ar',
188        'bg',
189        'bs',
190        'ca',
191        'cs',
192        'da',
193        'de',
194        'el',
195        'en-GB',
196        'en-US',
197        'es',
198        'et',
199        'fi',
200        'fr',
201        'he',
202        'hr',
203        'hu',
204        'is',
205        'it',
206        'ka',
207        'kk',
208        'lt',
209        'mr',
210        'nb',
211        'nl',
212        'nn',
213        'pl',
214        'pt',
215        'ru',
216        'sk',
217        'sv',
218        'tr',
219        'uk',
220        'vi',
221        'zh-Hans',
222    ];
223
224    /** @var string Punctuation used to separate list items, typically a comma */
225    public static $list_separator;
226
227    /**
228     * The prefered locales for this site, or a default list if no preference.
229     *
230     * @return LocaleInterface[]
231     */
232    public static function activeLocales(): array
233    {
234        $code_list = Site::getPreference('LANGUAGES');
235
236        if ($code_list === '') {
237            $codes = self::DEFAULT_LOCALES;
238        } else {
239            $codes = explode(',', $code_list);
240        }
241
242        $locales = [];
243        foreach ($codes as $code) {
244            if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) {
245                try {
246                    $locales[] = Locale::create($code);
247                } catch (Exception $ex) {
248                    // No such locale exists?
249                }
250            }
251        }
252
253        usort($locales, '\Fisharebest\Localization\Locale::compare');
254
255        return $locales;
256    }
257
258    /**
259     * Which MySQL collation should be used for this locale?
260     *
261     * @return string
262     */
263    public static function collation()
264    {
265        $collation = self::$locale->collation();
266        switch ($collation) {
267            case 'croatian_ci':
268            case 'german2_ci':
269            case 'vietnamese_ci':
270                // Only available in MySQL 5.6
271                return 'utf8_unicode_ci';
272            default:
273                return 'utf8_' . $collation;
274        }
275    }
276
277    /**
278     * What format is used to display dates in the current locale?
279     *
280     * @return string
281     */
282    public static function dateFormat(): string
283    {
284        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
285        return self::$translator->translate('%j %F %Y');
286    }
287
288    /**
289     * Generate consistent I18N for datatables.js
290     *
291     * @param int[] $lengths An optional array of page lengths
292     *
293     * @return string
294     */
295    public static function datatablesI18N(array $lengths = [
296        10,
297        20,
298        30,
299        50,
300        100,
301        -1,
302    ]): string
303    {
304        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
305
306        return
307            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
308            '"language": {' .
309            ' "paginate": {' .
310            '  "first":    "' . self::translate('first') . '",' .
311            '  "last":     "' . self::translate('last') . '",' .
312            '  "next":     "' . self::translate('next') . '",' .
313            '  "previous": "' . self::translate('previous') . '"' .
314            ' },' .
315            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
316            ' "info":           "' . /* I18N: %s are placeholders for numbers */
317            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
318            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
319            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
320            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
321            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
322            self::translate('Display %s', addslashes($length_options)) . '",' .
323            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
324            ' "processing":     "' . self::translate('Loading…') . '",' .
325            ' "search":         "' . self::translate('Filter') . '",' .
326            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
327            '}';
328    }
329
330    /**
331     * Convert the digits 0-9 into the local script
332     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
333     *
334     * @param string|int $n
335     *
336     * @return string
337     */
338    public static function digits($n): string
339    {
340        return self::$locale->digits((string) $n);
341    }
342
343    /**
344     * What is the direction of the current locale
345     *
346     * @return string "ltr" or "rtl"
347     */
348    public static function direction(): string
349    {
350        return self::$locale->direction();
351    }
352
353    /**
354     * What is the first day of the week.
355     *
356     * @return int Sunday=0, Monday=1, etc.
357     */
358    public static function firstDay(): int
359    {
360        return self::$locale->territory()->firstDay();
361    }
362
363    /**
364     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
365     *
366     * @return string
367     */
368    public static function htmlAttributes(): string
369    {
370        return self::$locale->htmlAttributes();
371    }
372
373    /**
374     * Initialise the translation adapter with a locale setting.
375     *
376     * @param string    $code Use this locale/language code, or choose one automatically
377     * @param Tree|null $tree
378     *
379     * @return string $string
380     */
381    public static function init(string $code = '', Tree $tree = null): string
382    {
383        mb_internal_encoding('UTF-8');
384
385        if ($code !== '') {
386            // Create the specified locale
387            self::$locale = Locale::create($code);
388        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
389            // Select a previously used locale
390            self::$locale = Locale::create(Session::get('locale'));
391        } else {
392            if ($tree instanceof Tree) {
393                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
394            } else {
395                $default_locale = new LocaleEnUs();
396            }
397
398            // Negotiate with the browser.
399            // Search engines don't negotiate.  They get the default locale of the tree.
400            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
401        }
402
403        $cache_dir  = WT_DATA_DIR . 'cache/';
404        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
405        if (file_exists($cache_file)) {
406            $filemtime = filemtime($cache_file);
407        } else {
408            $filemtime = 0;
409        }
410
411        // Load the translation file(s)
412        $translation_files = [
413            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
414        ];
415
416        // Rebuild files after one hour
417        $rebuild_cache = time() > $filemtime + 3600;
418        // Rebuild files if any translation file has been updated
419        foreach ($translation_files as $translation_file) {
420            if (filemtime($translation_file) > $filemtime) {
421                $rebuild_cache = true;
422                break;
423            }
424        }
425
426        if ($rebuild_cache) {
427            $translations = [];
428            foreach ($translation_files as $translation_file) {
429                $translation  = new Translation($translation_file);
430                $translations = array_merge($translations, $translation->asArray());
431            }
432            try {
433                File::mkdir($cache_dir);
434                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
435            } catch (Exception $ex) {
436                // During setup, we may not have been able to create it.
437            }
438        } else {
439            $translations = include $cache_file;
440        }
441
442        // Create a translator
443        self::$translator = new Translator($translations, self::$locale->pluralRule());
444
445        /* I18N: This punctuation is used to separate lists of items */
446        self::$list_separator = self::translate(', ');
447
448        // Create a collator
449        try {
450            if (class_exists('Collator')) {
451                // Symfony provides a very incomplete polyfill - which cannot be used.
452                self::$collator = new Collator(self::$locale->code());
453                // Ignore upper/lower case differences
454                self::$collator->setStrength(Collator::SECONDARY);
455            }
456        } catch (Exception $ex) {
457            // PHP-INTL is not installed?  We'll use a fallback later.
458            self::$collator = null;
459        }
460
461        return self::$locale->languageTag();
462    }
463
464    /**
465     * All locales for which a translation file exists.
466     *
467     * @return LocaleInterface[]
468     */
469    public static function installedLocales(): array
470    {
471        $locales = [];
472
473        foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo') as $file) {
474            try {
475                $locales[] = Locale::create(basename($file, '.mo'));
476            } catch (DomainException $ex) {
477                // Not a recognised locale
478            }
479        }
480        usort($locales, '\Fisharebest\Localization\Locale::compare');
481
482        return $locales;
483    }
484
485    /**
486     * Return the endonym for a given language - as per http://cldr.unicode.org/
487     *
488     * @param string $locale
489     *
490     * @return string
491     */
492    public static function languageName(string $locale): string
493    {
494        return Locale::create($locale)->endonym();
495    }
496
497    /**
498     * Return the script used by a given language
499     *
500     * @param string $locale
501     *
502     * @return string
503     */
504    public static function languageScript(string $locale): string
505    {
506        return Locale::create($locale)->script()->code();
507    }
508
509    /**
510     * Translate a number into the local representation.
511     * e.g. 12345.67 becomes
512     * en: 12,345.67
513     * fr: 12 345,67
514     * de: 12.345,67
515     *
516     * @param float $n
517     * @param int   $precision
518     *
519     * @return string
520     */
521    public static function number(float $n, int $precision = 0): string
522    {
523        return self::$locale->number(round($n, $precision));
524    }
525
526    /**
527     * Translate a fraction into a percentage.
528     * e.g. 0.123 becomes
529     * en: 12.3%
530     * fr: 12,3 %
531     * de: 12,3%
532     *
533     * @param float $n
534     * @param int   $precision
535     *
536     * @return string
537     */
538    public static function percentage(float $n, int $precision = 0): string
539    {
540        return self::$locale->percent(round($n, $precision + 2));
541    }
542
543    /**
544     * Translate a plural string
545     * echo self::plural('There is an error', 'There are errors', $num_errors);
546     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
547     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
548     *
549     * @param string $singular
550     * @param string $plural
551     * @param int    $count
552     * @param string ...$args
553     *
554     * @return string
555     */
556    public static function plural(string $singular, string $plural, int $count, ...$args): string
557    {
558        $message = self::$translator->translatePlural($singular, $plural, $count);
559
560        return sprintf($message, ...$args);
561    }
562
563    /**
564     * UTF8 version of PHP::strrev()
565     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
566     * These do not support UTF8 text direction, so we must mimic it for them.
567     * Numbers are always rendered LTR, even in RTL text.
568     * The visual direction of characters such as parentheses should be reversed.
569     *
570     * @param string $text Text to be reversed
571     *
572     * @return string
573     */
574    public static function reverseText($text): string
575    {
576        // Remove HTML markup - we can't display it and it is LTR.
577        $text = strip_tags($text);
578        // Remove HTML entities.
579        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
580
581        // LTR text doesn't need reversing
582        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
583            return $text;
584        }
585
586        // Mirrored characters
587        $text = strtr($text, self::MIRROR_CHARACTERS);
588
589        $reversed = '';
590        $digits   = '';
591        while ($text != '') {
592            $letter = mb_substr($text, 0, 1);
593            $text   = mb_substr($text, 1);
594            if (strpos(self::DIGITS, $letter) !== false) {
595                $digits .= $letter;
596            } else {
597                $reversed = $letter . $digits . $reversed;
598                $digits   = '';
599            }
600        }
601
602        return $digits . $reversed;
603    }
604
605    /**
606     * Return the direction (ltr or rtl) for a given script
607     * The PHP/intl library does not provde this information, so we need
608     * our own lookup table.
609     *
610     * @param string $script
611     *
612     * @return string
613     */
614    public static function scriptDirection($script)
615    {
616        switch ($script) {
617            case 'Arab':
618            case 'Hebr':
619            case 'Mong':
620            case 'Thaa':
621                return 'rtl';
622            default:
623                return 'ltr';
624        }
625    }
626
627    /**
628     * Perform a case-insensitive comparison of two strings.
629     *
630     * @param string $string1
631     * @param string $string2
632     *
633     * @return int
634     */
635    public static function strcasecmp($string1, $string2)
636    {
637        if (self::$collator instanceof Collator) {
638            return self::$collator->compare($string1, $string2);
639        } else {
640            return strcmp(self::strtolower($string1), self::strtolower($string2));
641        }
642    }
643
644    /**
645     * Convert a string to lower case.
646     *
647     * @param string $string
648     *
649     * @return string
650     */
651    public static function strtolower($string): string
652    {
653        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
654            $string = strtr($string, self::DOTLESS_I_TOLOWER);
655        }
656
657        return mb_strtolower($string);
658    }
659
660    /**
661     * Convert a string to upper case.
662     *
663     * @param string $string
664     *
665     * @return string
666     */
667    public static function strtoupper($string): string
668    {
669        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
670            $string = strtr($string, self::DOTLESS_I_TOUPPER);
671        }
672
673        return mb_strtoupper($string);
674    }
675
676    /**
677     * Identify the script used for a piece of text
678     *
679     * @param string $string
680     *
681     * @return string
682     */
683    public static function textScript($string): string
684    {
685        $string = strip_tags($string); // otherwise HTML tags show up as latin
686        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
687        $string = str_replace([
688            '@N.N.',
689            '@P.N.',
690        ], '', $string); // otherwise unknown names show up as latin
691        $pos    = 0;
692        $strlen = strlen($string);
693        while ($pos < $strlen) {
694            // get the Unicode Code Point for the character at position $pos
695            $byte1 = ord($string[$pos]);
696            if ($byte1 < 0x80) {
697                $code_point = $byte1;
698                $chrlen     = 1;
699            } elseif ($byte1 < 0xC0) {
700                // Invalid continuation character
701                return 'Latn';
702            } elseif ($byte1 < 0xE0) {
703                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
704                $chrlen     = 2;
705            } elseif ($byte1 < 0xF0) {
706                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
707                $chrlen     = 3;
708            } elseif ($byte1 < 0xF8) {
709                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
710                $chrlen     = 3;
711            } else {
712                // Invalid UTF
713                return 'Latn';
714            }
715
716            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
717                if ($code_point >= $range[1] && $code_point <= $range[2]) {
718                    return $range[0];
719                }
720            }
721            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
722            $pos += $chrlen;
723        }
724
725        return 'Latn';
726    }
727
728    /**
729     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
730     *
731     * @param int $seconds
732     *
733     * @return string
734     */
735    public static function timeAgo($seconds)
736    {
737        $minute = 60;
738        $hour   = 60 * $minute;
739        $day    = 24 * $hour;
740        $month  = 30 * $day;
741        $year   = 365 * $day;
742
743        if ($seconds > $year) {
744            $years = intdiv($seconds, $year);
745
746            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
747        }
748
749        if ($seconds > $month) {
750            $months = intdiv($seconds, $month);
751
752            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
753        }
754
755        if ($seconds > $day) {
756            $days = intdiv($seconds, $day);
757
758            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
759        }
760
761        if ($seconds > $hour) {
762            $hours = intdiv($seconds, $hour);
763
764            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
765        }
766
767        if ($seconds > $minute) {
768            $minutes = intdiv($seconds, $minute);
769
770            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
771        }
772
773        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
774    }
775
776    /**
777     * What format is used to display dates in the current locale?
778     *
779     * @return string
780     */
781    public static function timeFormat(): string
782    {
783        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
784        return self::$translator->translate('%H:%i:%s');
785    }
786
787    /**
788     * Translate a string, and then substitute placeholders
789     * echo I18N::translate('Hello World!');
790     * echo I18N::translate('The %s sat on the mat', 'cat');
791     *
792     * @param string $message
793     * @param string ...$args
794     *
795     * @return string
796     */
797    public static function translate(string $message, ...$args): string
798    {
799        $message = self::$translator->translate($message);
800
801        return sprintf($message, ...$args);
802    }
803
804    /**
805     * Context sensitive version of translate.
806     * echo I18N::translateContext('NOMINATIVE', 'January');
807     * echo I18N::translateContext('GENITIVE', 'January');
808     *
809     * @param string $context
810     * @param string $message
811     * @param string ...$args
812     *
813     * @return string
814     */
815    public static function translateContext(string $context, string $message, ...$args): string
816    {
817        $message = self::$translator->translateContext($context, $message);
818
819        return sprintf($message, ...$args);
820    }
821}
822