xref: /webtrees/app/I18N.php (revision 376123c414a36e5151c2e51777a7c6eb53564481)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29
30/**
31 * Internationalization (i18n) and localization (l10n).
32 */
33class I18N
34{
35    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
36    private static $locale;
37
38    /** @var Translator An object that performs translation */
39    private static $translator;
40
41    /** @var  Collator|null From the php-intl library */
42    private static $collator;
43
44    // Digits are always rendered LTR, even in RTL text.
45    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
46
47    // These locales need special handling for the dotless letter I.
48    private const DOTLESS_I_LOCALES = [
49        'az',
50        'tr',
51    ];
52    private const DOTLESS_I_TOLOWER = [
53        'I' => 'ı',
54        'İ' => 'i',
55    ];
56    private const DOTLESS_I_TOUPPER = [
57        'ı' => 'I',
58        'i' => 'İ',
59    ];
60
61    // The ranges of characters used by each script.
62    private const SCRIPT_CHARACTER_RANGES = [
63        [
64            'Latn',
65            0x0041,
66            0x005A,
67        ],
68        [
69            'Latn',
70            0x0061,
71            0x007A,
72        ],
73        [
74            'Latn',
75            0x0100,
76            0x02AF,
77        ],
78        [
79            'Grek',
80            0x0370,
81            0x03FF,
82        ],
83        [
84            'Cyrl',
85            0x0400,
86            0x052F,
87        ],
88        [
89            'Hebr',
90            0x0590,
91            0x05FF,
92        ],
93        [
94            'Arab',
95            0x0600,
96            0x06FF,
97        ],
98        [
99            'Arab',
100            0x0750,
101            0x077F,
102        ],
103        [
104            'Arab',
105            0x08A0,
106            0x08FF,
107        ],
108        [
109            'Deva',
110            0x0900,
111            0x097F,
112        ],
113        [
114            'Taml',
115            0x0B80,
116            0x0BFF,
117        ],
118        [
119            'Sinh',
120            0x0D80,
121            0x0DFF,
122        ],
123        [
124            'Thai',
125            0x0E00,
126            0x0E7F,
127        ],
128        [
129            'Geor',
130            0x10A0,
131            0x10FF,
132        ],
133        [
134            'Grek',
135            0x1F00,
136            0x1FFF,
137        ],
138        [
139            'Deva',
140            0xA8E0,
141            0xA8FF,
142        ],
143        [
144            'Hans',
145            0x3000,
146            0x303F,
147        ],
148        // Mixed CJK, not just Hans
149        [
150            'Hans',
151            0x3400,
152            0xFAFF,
153        ],
154        // Mixed CJK, not just Hans
155        [
156            'Hans',
157            0x20000,
158            0x2FA1F,
159        ],
160        // Mixed CJK, not just Hans
161    ];
162
163    // Characters that are displayed in mirror form in RTL text.
164    private const MIRROR_CHARACTERS = [
165        '('  => ')',
166        ')'  => '(',
167        '['  => ']',
168        ']'  => '[',
169        '{'  => '}',
170        '}'  => '{',
171        '<'  => '>',
172        '>'  => '<',
173        '‹ ' => '›',
174        '› ' => '‹',
175        '«'  => '»',
176        '»'  => '«',
177        '﴾ ' => '﴿',
178        '﴿ ' => '﴾',
179        '“ ' => '”',
180        '” ' => '“',
181        '‘ ' => '’',
182        '’ ' => '‘',
183    ];
184
185    // Default list of locales to show in the menu.
186    private const DEFAULT_LOCALES = [
187        'ar',
188        'bg',
189        'bs',
190        'ca',
191        'cs',
192        'da',
193        'de',
194        'el',
195        'en-GB',
196        'en-US',
197        'es',
198        'et',
199        'fi',
200        'fr',
201        'he',
202        'hr',
203        'hu',
204        'is',
205        'it',
206        'ka',
207        'kk',
208        'lt',
209        'mr',
210        'nb',
211        'nl',
212        'nn',
213        'pl',
214        'pt',
215        'ru',
216        'sk',
217        'sv',
218        'tr',
219        'uk',
220        'vi',
221        'zh-Hans',
222    ];
223
224    /** @var string Punctuation used to separate list items, typically a comma */
225    public static $list_separator;
226
227    /**
228     * The prefered locales for this site, or a default list if no preference.
229     *
230     * @return LocaleInterface[]
231     */
232    public static function activeLocales(): array
233    {
234        $code_list = Site::getPreference('LANGUAGES');
235
236        if ($code_list === '') {
237            $codes = self::DEFAULT_LOCALES;
238        } else {
239            $codes = explode(',', $code_list);
240        }
241
242        $locales = [];
243        foreach ($codes as $code) {
244            if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) {
245                try {
246                    $locales[] = Locale::create($code);
247                } catch (Exception $ex) {
248                    // No such locale exists?
249                }
250            }
251        }
252
253        usort($locales, '\Fisharebest\Localization\Locale::compare');
254
255        return $locales;
256    }
257
258    /**
259     * Which MySQL collation should be used for this locale?
260     *
261     * @return string
262     */
263    public static function collation(): string
264    {
265        $collation = self::$locale->collation();
266        switch ($collation) {
267            case 'croatian_ci':
268            case 'german2_ci':
269            case 'vietnamese_ci':
270                // Only available in MySQL 5.6
271                return 'utf8_unicode_ci';
272            default:
273                return 'utf8_' . $collation;
274        }
275    }
276
277    /**
278     * What format is used to display dates in the current locale?
279     *
280     * @return string
281     */
282    public static function dateFormat(): string
283    {
284        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
285        return self::$translator->translate('%j %F %Y');
286    }
287
288    /**
289     * Generate consistent I18N for datatables.js
290     *
291     * @param int[] $lengths An optional array of page lengths
292     *
293     * @return string
294     */
295    public static function datatablesI18N(array $lengths = [
296        10,
297        20,
298        30,
299        50,
300        100,
301        -1,
302    ]): string
303    {
304        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
305
306        return
307            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
308            '"language": {' .
309            ' "paginate": {' .
310            '  "first":    "' . self::translate('first') . '",' .
311            '  "last":     "' . self::translate('last') . '",' .
312            '  "next":     "' . self::translate('next') . '",' .
313            '  "previous": "' . self::translate('previous') . '"' .
314            ' },' .
315            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
316            ' "info":           "' . /* I18N: %s are placeholders for numbers */
317            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
318            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
319            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
320            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
321            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
322            self::translate('Display %s', addslashes($length_options)) . '",' .
323            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
324            ' "processing":     "' . self::translate('Loading…') . '",' .
325            ' "search":         "' . self::translate('Filter') . '",' .
326            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
327            '}';
328    }
329
330    /**
331     * Convert the digits 0-9 into the local script
332     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
333     *
334     * @param string|int $n
335     *
336     * @return string
337     */
338    public static function digits($n): string
339    {
340        return self::$locale->digits((string) $n);
341    }
342
343    /**
344     * What is the direction of the current locale
345     *
346     * @return string "ltr" or "rtl"
347     */
348    public static function direction(): string
349    {
350        return self::$locale->direction();
351    }
352
353    /**
354     * What is the first day of the week.
355     *
356     * @return int Sunday=0, Monday=1, etc.
357     */
358    public static function firstDay(): int
359    {
360        return self::$locale->territory()->firstDay();
361    }
362
363    /**
364     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
365     *
366     * @return string
367     */
368    public static function htmlAttributes(): string
369    {
370        return self::$locale->htmlAttributes();
371    }
372
373    /**
374     * Initialise the translation adapter with a locale setting.
375     *
376     * @param string    $code Use this locale/language code, or choose one automatically
377     * @param Tree|null $tree
378     *
379     * @return string $string
380     */
381    public static function init(string $code = '', Tree $tree = null): string
382    {
383        if ($code !== '') {
384            // Create the specified locale
385            self::$locale = Locale::create($code);
386        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
387            // Select a previously used locale
388            self::$locale = Locale::create(Session::get('locale'));
389        } else {
390            if ($tree instanceof Tree) {
391                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
392            } else {
393                $default_locale = new LocaleEnUs();
394            }
395
396            // Negotiate with the browser.
397            // Search engines don't negotiate.  They get the default locale of the tree.
398            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
399        }
400
401        $cache_dir  = WT_DATA_DIR . 'cache/';
402        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
403        if (file_exists($cache_file)) {
404            $filemtime = filemtime($cache_file);
405        } else {
406            $filemtime = 0;
407        }
408
409        // Load the translation file(s)
410        $translation_files = [
411            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
412        ];
413
414        // Rebuild files after one hour
415        $rebuild_cache = time() > $filemtime + 3600;
416        // Rebuild files if any translation file has been updated
417        foreach ($translation_files as $translation_file) {
418            if (filemtime($translation_file) > $filemtime) {
419                $rebuild_cache = true;
420                break;
421            }
422        }
423
424        if ($rebuild_cache) {
425            $translations = [];
426            foreach ($translation_files as $translation_file) {
427                $translation  = new Translation($translation_file);
428                $translations = array_merge($translations, $translation->asArray());
429            }
430            try {
431                File::mkdir($cache_dir);
432                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
433            } catch (Exception $ex) {
434                // During setup, we may not have been able to create it.
435            }
436        } else {
437            $translations = include $cache_file;
438        }
439
440        // Create a translator
441        self::$translator = new Translator($translations, self::$locale->pluralRule());
442
443        /* I18N: This punctuation is used to separate lists of items */
444        self::$list_separator = self::translate(', ');
445
446        // Create a collator
447        try {
448            if (class_exists('Collator')) {
449                // Symfony provides a very incomplete polyfill - which cannot be used.
450                self::$collator = new Collator(self::$locale->code());
451                // Ignore upper/lower case differences
452                self::$collator->setStrength(Collator::SECONDARY);
453            }
454        } catch (Exception $ex) {
455            // PHP-INTL is not installed?  We'll use a fallback later.
456            self::$collator = null;
457        }
458
459        return self::$locale->languageTag();
460    }
461
462    /**
463     * All locales for which a translation file exists.
464     *
465     * @return LocaleInterface[]
466     */
467    public static function installedLocales(): array
468    {
469        $locales = [];
470
471        foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo') as $file) {
472            try {
473                $locales[] = Locale::create(basename(dirname($file)));
474            } catch (DomainException $ex) {
475                // Not a recognised locale
476            }
477        }
478        usort($locales, '\Fisharebest\Localization\Locale::compare');
479
480        return $locales;
481    }
482
483    /**
484     * Return the endonym for a given language - as per http://cldr.unicode.org/
485     *
486     * @param string $locale
487     *
488     * @return string
489     */
490    public static function languageName(string $locale): string
491    {
492        return Locale::create($locale)->endonym();
493    }
494
495    /**
496     * Return the script used by a given language
497     *
498     * @param string $locale
499     *
500     * @return string
501     */
502    public static function languageScript(string $locale): string
503    {
504        return Locale::create($locale)->script()->code();
505    }
506
507    /**
508     * Translate a number into the local representation.
509     * e.g. 12345.67 becomes
510     * en: 12,345.67
511     * fr: 12 345,67
512     * de: 12.345,67
513     *
514     * @param float $n
515     * @param int   $precision
516     *
517     * @return string
518     */
519    public static function number(float $n, int $precision = 0): string
520    {
521        return self::$locale->number(round($n, $precision));
522    }
523
524    /**
525     * Translate a fraction into a percentage.
526     * e.g. 0.123 becomes
527     * en: 12.3%
528     * fr: 12,3 %
529     * de: 12,3%
530     *
531     * @param float $n
532     * @param int   $precision
533     *
534     * @return string
535     */
536    public static function percentage(float $n, int $precision = 0): string
537    {
538        return self::$locale->percent(round($n, $precision + 2));
539    }
540
541    /**
542     * Translate a plural string
543     * echo self::plural('There is an error', 'There are errors', $num_errors);
544     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
545     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
546     *
547     * @param string $singular
548     * @param string $plural
549     * @param int    $count
550     * @param string ...$args
551     *
552     * @return string
553     */
554    public static function plural(string $singular, string $plural, int $count, ...$args): string
555    {
556        $message = self::$translator->translatePlural($singular, $plural, $count);
557
558        return sprintf($message, ...$args);
559    }
560
561    /**
562     * UTF8 version of PHP::strrev()
563     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
564     * These do not support UTF8 text direction, so we must mimic it for them.
565     * Numbers are always rendered LTR, even in RTL text.
566     * The visual direction of characters such as parentheses should be reversed.
567     *
568     * @param string $text Text to be reversed
569     *
570     * @return string
571     */
572    public static function reverseText($text): string
573    {
574        // Remove HTML markup - we can't display it and it is LTR.
575        $text = strip_tags($text);
576        // Remove HTML entities.
577        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
578
579        // LTR text doesn't need reversing
580        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
581            return $text;
582        }
583
584        // Mirrored characters
585        $text = strtr($text, self::MIRROR_CHARACTERS);
586
587        $reversed = '';
588        $digits   = '';
589        while ($text !== '') {
590            $letter = mb_substr($text, 0, 1);
591            $text   = mb_substr($text, 1);
592            if (strpos(self::DIGITS, $letter) !== false) {
593                $digits .= $letter;
594            } else {
595                $reversed = $letter . $digits . $reversed;
596                $digits   = '';
597            }
598        }
599
600        return $digits . $reversed;
601    }
602
603    /**
604     * Return the direction (ltr or rtl) for a given script
605     * The PHP/intl library does not provde this information, so we need
606     * our own lookup table.
607     *
608     * @param string $script
609     *
610     * @return string
611     */
612    public static function scriptDirection($script): string
613    {
614        switch ($script) {
615            case 'Arab':
616            case 'Hebr':
617            case 'Mong':
618            case 'Thaa':
619                return 'rtl';
620            default:
621                return 'ltr';
622        }
623    }
624
625    /**
626     * Perform a case-insensitive comparison of two strings.
627     *
628     * @param string $string1
629     * @param string $string2
630     *
631     * @return int
632     */
633    public static function strcasecmp($string1, $string2): int
634    {
635        if (self::$collator instanceof Collator) {
636            return self::$collator->compare($string1, $string2);
637        }
638
639        return strcmp(self::strtolower($string1), self::strtolower($string2));
640    }
641
642    /**
643     * Convert a string to lower case.
644     *
645     * @param string $string
646     *
647     * @return string
648     */
649    public static function strtolower($string): string
650    {
651        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
652            $string = strtr($string, self::DOTLESS_I_TOLOWER);
653        }
654
655        return mb_strtolower($string);
656    }
657
658    /**
659     * Convert a string to upper case.
660     *
661     * @param string $string
662     *
663     * @return string
664     */
665    public static function strtoupper($string): string
666    {
667        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
668            $string = strtr($string, self::DOTLESS_I_TOUPPER);
669        }
670
671        return mb_strtoupper($string);
672    }
673
674    /**
675     * Identify the script used for a piece of text
676     *
677     * @param string $string
678     *
679     * @return string
680     */
681    public static function textScript($string): string
682    {
683        $string = strip_tags($string); // otherwise HTML tags show up as latin
684        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
685        $string = str_replace([
686            '@N.N.',
687            '@P.N.',
688        ], '', $string); // otherwise unknown names show up as latin
689        $pos    = 0;
690        $strlen = strlen($string);
691        while ($pos < $strlen) {
692            // get the Unicode Code Point for the character at position $pos
693            $byte1 = ord($string[$pos]);
694            if ($byte1 < 0x80) {
695                $code_point = $byte1;
696                $chrlen     = 1;
697            } elseif ($byte1 < 0xC0) {
698                // Invalid continuation character
699                return 'Latn';
700            } elseif ($byte1 < 0xE0) {
701                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
702                $chrlen     = 2;
703            } elseif ($byte1 < 0xF0) {
704                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
705                $chrlen     = 3;
706            } elseif ($byte1 < 0xF8) {
707                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
708                $chrlen     = 3;
709            } else {
710                // Invalid UTF
711                return 'Latn';
712            }
713
714            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
715                if ($code_point >= $range[1] && $code_point <= $range[2]) {
716                    return $range[0];
717                }
718            }
719            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
720            $pos += $chrlen;
721        }
722
723        return 'Latn';
724    }
725
726    /**
727     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
728     *
729     * @param int $seconds
730     *
731     * @return string
732     */
733    public static function timeAgo($seconds): string
734    {
735        $minute = 60;
736        $hour   = 60 * $minute;
737        $day    = 24 * $hour;
738        $month  = 30 * $day;
739        $year   = 365 * $day;
740
741        if ($seconds > $year) {
742            $years = intdiv($seconds, $year);
743
744            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
745        }
746
747        if ($seconds > $month) {
748            $months = intdiv($seconds, $month);
749
750            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
751        }
752
753        if ($seconds > $day) {
754            $days = intdiv($seconds, $day);
755
756            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
757        }
758
759        if ($seconds > $hour) {
760            $hours = intdiv($seconds, $hour);
761
762            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
763        }
764
765        if ($seconds > $minute) {
766            $minutes = intdiv($seconds, $minute);
767
768            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
769        }
770
771        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
772    }
773
774    /**
775     * What format is used to display dates in the current locale?
776     *
777     * @return string
778     */
779    public static function timeFormat(): string
780    {
781        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
782        return self::$translator->translate('%H:%i:%s');
783    }
784
785    /**
786     * Translate a string, and then substitute placeholders
787     * echo I18N::translate('Hello World!');
788     * echo I18N::translate('The %s sat on the mat', 'cat');
789     *
790     * @param string $message
791     * @param string ...$args
792     *
793     * @return string
794     */
795    public static function translate(string $message, ...$args): string
796    {
797        $message = self::$translator->translate($message);
798
799        return sprintf($message, ...$args);
800    }
801
802    /**
803     * Context sensitive version of translate.
804     * echo I18N::translateContext('NOMINATIVE', 'January');
805     * echo I18N::translateContext('GENITIVE', 'January');
806     *
807     * @param string $context
808     * @param string $message
809     * @param string ...$args
810     *
811     * @return string
812     */
813    public static function translateContext(string $context, string $message, ...$args): string
814    {
815        $message = self::$translator->translateContext($context, $message);
816
817        return sprintf($message, ...$args);
818    }
819}
820