xref: /webtrees/app/I18N.php (revision e9e853987811e8bd423dccf26f6ef57571f393eb)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Functions\FunctionsEdit;
28
29/**
30 * Internationalization (i18n) and localization (l10n).
31 */
32class I18N
33{
34    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
35    private static $locale;
36
37    /** @var Translator An object that performs translation */
38    private static $translator;
39
40    /** @var  Collator|null From the php-intl library */
41    private static $collator;
42
43    // Digits are always rendered LTR, even in RTL text.
44    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
45
46    // These locales need special handling for the dotless letter I.
47    private const DOTLESS_I_LOCALES = [
48        'az',
49        'tr',
50    ];
51    private const DOTLESS_I_TOLOWER = [
52        'I' => 'ı',
53        'İ' => 'i',
54    ];
55    private const DOTLESS_I_TOUPPER = [
56        'ı' => 'I',
57        'i' => 'İ',
58    ];
59
60    // The ranges of characters used by each script.
61    private const SCRIPT_CHARACTER_RANGES = [
62        [
63            'Latn',
64            0x0041,
65            0x005A,
66        ],
67        [
68            'Latn',
69            0x0061,
70            0x007A,
71        ],
72        [
73            'Latn',
74            0x0100,
75            0x02AF,
76        ],
77        [
78            'Grek',
79            0x0370,
80            0x03FF,
81        ],
82        [
83            'Cyrl',
84            0x0400,
85            0x052F,
86        ],
87        [
88            'Hebr',
89            0x0590,
90            0x05FF,
91        ],
92        [
93            'Arab',
94            0x0600,
95            0x06FF,
96        ],
97        [
98            'Arab',
99            0x0750,
100            0x077F,
101        ],
102        [
103            'Arab',
104            0x08A0,
105            0x08FF,
106        ],
107        [
108            'Deva',
109            0x0900,
110            0x097F,
111        ],
112        [
113            'Taml',
114            0x0B80,
115            0x0BFF,
116        ],
117        [
118            'Sinh',
119            0x0D80,
120            0x0DFF,
121        ],
122        [
123            'Thai',
124            0x0E00,
125            0x0E7F,
126        ],
127        [
128            'Geor',
129            0x10A0,
130            0x10FF,
131        ],
132        [
133            'Grek',
134            0x1F00,
135            0x1FFF,
136        ],
137        [
138            'Deva',
139            0xA8E0,
140            0xA8FF,
141        ],
142        [
143            'Hans',
144            0x3000,
145            0x303F,
146        ],
147        // Mixed CJK, not just Hans
148        [
149            'Hans',
150            0x3400,
151            0xFAFF,
152        ],
153        // Mixed CJK, not just Hans
154        [
155            'Hans',
156            0x20000,
157            0x2FA1F,
158        ],
159        // Mixed CJK, not just Hans
160    ];
161
162    // Characters that are displayed in mirror form in RTL text.
163    private const MIRROR_CHARACTERS = [
164        '('  => ')',
165        ')'  => '(',
166        '['  => ']',
167        ']'  => '[',
168        '{'  => '}',
169        '}'  => '{',
170        '<'  => '>',
171        '>'  => '<',
172        '‹ ' => '›',
173        '› ' => '‹',
174        '«'  => '»',
175        '»'  => '«',
176        '﴾ ' => '﴿',
177        '﴿ ' => '﴾',
178        '“ ' => '”',
179        '” ' => '“',
180        '‘ ' => '’',
181        '’ ' => '‘',
182    ];
183
184    // Default list of locales to show in the menu.
185    private const DEFAULT_LOCALES = [
186        'ar',
187        'bg',
188        'bs',
189        'ca',
190        'cs',
191        'da',
192        'de',
193        'el',
194        'en-GB',
195        'en-US',
196        'es',
197        'et',
198        'fi',
199        'fr',
200        'he',
201        'hr',
202        'hu',
203        'is',
204        'it',
205        'ka',
206        'kk',
207        'lt',
208        'mr',
209        'nb',
210        'nl',
211        'nn',
212        'pl',
213        'pt',
214        'ru',
215        'sk',
216        'sv',
217        'tr',
218        'uk',
219        'vi',
220        'zh-Hans',
221    ];
222
223    /** @var string Punctuation used to separate list items, typically a comma */
224    public static $list_separator;
225
226    /**
227     * The prefered locales for this site, or a default list if no preference.
228     *
229     * @return LocaleInterface[]
230     */
231    public static function activeLocales(): array
232    {
233        $code_list = Site::getPreference('LANGUAGES');
234
235        if ($code_list === '') {
236            $codes = self::DEFAULT_LOCALES;
237        } else {
238            $codes = explode(',', $code_list);
239        }
240
241        $locales = [];
242        foreach ($codes as $code) {
243            if (file_exists(WT_ROOT . 'language/' . $code . '.mo')) {
244                try {
245                    $locales[] = Locale::create($code);
246                } catch (Exception $ex) {
247                    // No such locale exists?
248                }
249            }
250        }
251        usort($locales, '\Fisharebest\Localization\Locale::compare');
252
253        return $locales;
254    }
255
256    /**
257     * Which MySQL collation should be used for this locale?
258     *
259     * @return string
260     */
261    public static function collation()
262    {
263        $collation = self::$locale->collation();
264        switch ($collation) {
265            case 'croatian_ci':
266            case 'german2_ci':
267            case 'vietnamese_ci':
268                // Only available in MySQL 5.6
269                return 'utf8_unicode_ci';
270            default:
271                return 'utf8_' . $collation;
272        }
273    }
274
275    /**
276     * What format is used to display dates in the current locale?
277     *
278     * @return string
279     */
280    public static function dateFormat(): string
281    {
282        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
283        return self::$translator->translate('%j %F %Y');
284    }
285
286    /**
287     * Generate consistent I18N for datatables.js
288     *
289     * @param int[] $lengths An optional array of page lengths
290     *
291     * @return string
292     */
293    public static function datatablesI18N(array $lengths = [
294        10,
295        20,
296        30,
297        50,
298        100,
299        -1,
300    ]): string
301    {
302        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
303
304        return
305            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
306            '"language": {' .
307            ' "paginate": {' .
308            '  "first":    "' . self::translate('first') . '",' .
309            '  "last":     "' . self::translate('last') . '",' .
310            '  "next":     "' . self::translate('next') . '",' .
311            '  "previous": "' . self::translate('previous') . '"' .
312            ' },' .
313            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
314            ' "info":           "' . /* I18N: %s are placeholders for numbers */
315            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
316            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
317            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
318            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
319            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
320            self::translate('Display %s', addslashes($length_options)) . '",' .
321            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
322            ' "processing":     "' . self::translate('Loading…') . '",' .
323            ' "search":         "' . self::translate('Filter') . '",' .
324            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
325            '}';
326    }
327
328    /**
329     * Convert the digits 0-9 into the local script
330     *
331     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
332     *
333     * @param string|int $n
334     *
335     * @return string
336     */
337    public static function digits($n): string
338    {
339        return self::$locale->digits((string) $n);
340    }
341
342    /**
343     * What is the direction of the current locale
344     *
345     * @return string "ltr" or "rtl"
346     */
347    public static function direction(): string
348    {
349        return self::$locale->direction();
350    }
351
352    /**
353     * What is the first day of the week.
354     *
355     * @return int Sunday=0, Monday=1, etc.
356     */
357    public static function firstDay(): int
358    {
359        return self::$locale->territory()->firstDay();
360    }
361
362    /**
363     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
364     *
365     * @return string
366     */
367    public static function htmlAttributes(): string
368    {
369        return self::$locale->htmlAttributes();
370    }
371
372    /**
373     * Initialise the translation adapter with a locale setting.
374     *
375     * @param string    $code Use this locale/language code, or choose one automatically
376     * @param Tree|null $tree
377     *
378     * @return string $string
379     */
380    public static function init(string $code = '', Tree $tree = null): string
381    {
382        mb_internal_encoding('UTF-8');
383
384        if ($code !== '') {
385            // Create the specified locale
386            self::$locale = Locale::create($code);
387        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'language/' . Session::get('locale') . '.mo')) {
388            // Select a previously used locale
389            self::$locale = Locale::create(Session::get('locale'));
390        } else {
391            if ($tree instanceof Tree) {
392                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
393            } else {
394                $default_locale = new LocaleEnUs();
395            }
396
397            // Negotiate with the browser.
398            // Search engines don't negotiate.  They get the default locale of the tree.
399            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
400        }
401
402        $cache_dir  = WT_DATA_DIR . 'cache/';
403        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
404        if (file_exists($cache_file)) {
405            $filemtime = filemtime($cache_file);
406        } else {
407            $filemtime = 0;
408        }
409
410        // Load the translation file(s)
411        // Note that glob() returns false instead of an empty array when open_basedir_restriction
412        // is in force and no files are found. See PHP bug #47358.
413        if (defined('GLOB_BRACE')) {
414            $translation_files = array_merge(
415                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
416                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [],
417                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: []
418            );
419        } else {
420            // Some servers do not have GLOB_BRACE - see http://php.net/manual/en/function.glob.php
421            $translation_files = array_merge(
422                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
423                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.csv') ?: [],
424                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.php') ?: [],
425                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.mo') ?: [],
426                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.csv') ?: [],
427                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.php') ?: [],
428                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.mo') ?: []
429            );
430        }
431        // Rebuild files after one hour
432        $rebuild_cache = time() > $filemtime + 3600;
433        // Rebuild files if any translation file has been updated
434        foreach ($translation_files as $translation_file) {
435            if (filemtime($translation_file) > $filemtime) {
436                $rebuild_cache = true;
437                break;
438            }
439        }
440
441        if ($rebuild_cache) {
442            $translations = [];
443            foreach ($translation_files as $translation_file) {
444                $translation  = new Translation($translation_file);
445                $translations = array_merge($translations, $translation->asArray());
446            }
447            try {
448                File::mkdir($cache_dir);
449                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
450            } catch (Exception $ex) {
451                // During setup, we may not have been able to create it.
452            }
453        } else {
454            $translations = include $cache_file;
455        }
456
457        // Create a translator
458        self::$translator = new Translator($translations, self::$locale->pluralRule());
459
460        /* I18N: This punctuation is used to separate lists of items */
461        self::$list_separator = self::translate(', ');
462
463        // Create a collator
464        try {
465            if (class_exists('Collator')) {
466                // Symfony provides a very incomplete polyfill - which cannot be used.
467                self::$collator = new Collator(self::$locale->code());
468                // Ignore upper/lower case differences
469                self::$collator->setStrength(Collator::SECONDARY);
470            }
471        } catch (Exception $ex) {
472            // PHP-INTL is not installed?  We'll use a fallback later.
473            self::$collator = null;
474        }
475
476        return self::$locale->languageTag();
477    }
478
479    /**
480     * All locales for which a translation file exists.
481     *
482     * @return LocaleInterface[]
483     */
484    public static function installedLocales(): array
485    {
486        $locales = [];
487        foreach (glob(WT_ROOT . 'language/*.mo') as $file) {
488            try {
489                $locales[] = Locale::create(basename($file, '.mo'));
490            } catch (Exception $ex) {
491                // Not a recognised locale
492            }
493        }
494        usort($locales, '\Fisharebest\Localization\Locale::compare');
495
496        return $locales;
497    }
498
499    /**
500     * Return the endonym for a given language - as per http://cldr.unicode.org/
501     *
502     * @param string $locale
503     *
504     * @return string
505     */
506    public static function languageName(string $locale): string
507    {
508        return Locale::create($locale)->endonym();
509    }
510
511    /**
512     * Return the script used by a given language
513     *
514     * @param string $locale
515     *
516     * @return string
517     */
518    public static function languageScript(string $locale): string
519    {
520        return Locale::create($locale)->script()->code();
521    }
522
523    /**
524     * Translate a number into the local representation.
525     *
526     * e.g. 12345.67 becomes
527     * en: 12,345.67
528     * fr: 12 345,67
529     * de: 12.345,67
530     *
531     * @param float $n
532     * @param int   $precision
533     *
534     * @return string
535     */
536    public static function number(float $n, int $precision = 0): string
537    {
538        return self::$locale->number(round($n, $precision));
539    }
540
541    /**
542     * Translate a fraction into a percentage.
543     *
544     * e.g. 0.123 becomes
545     * en: 12.3%
546     * fr: 12,3 %
547     * de: 12,3%
548     *
549     * @param float $n
550     * @param int   $precision
551     *
552     * @return string
553     */
554    public static function percentage(float $n, int $precision = 0): string
555    {
556        return self::$locale->percent(round($n, $precision + 2));
557    }
558
559    /**
560     * Translate a plural string
561     * echo self::plural('There is an error', 'There are errors', $num_errors);
562     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
563     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
564     *
565     * @param string $singular
566     * @param string $plural
567     * @param int    $count
568     * @param string ...$args
569     *
570     * @return string
571     */
572    public static function plural(string $singular, string $plural, int $count, ...$args): string
573    {
574        $message = self::$translator->translatePlural($singular, $plural, $count);
575
576        return sprintf($message, ...$args);
577    }
578
579    /**
580     * UTF8 version of PHP::strrev()
581     *
582     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
583     *
584     * These do not support UTF8 text direction, so we must mimic it for them.
585     *
586     * Numbers are always rendered LTR, even in RTL text.
587     * The visual direction of characters such as parentheses should be reversed.
588     *
589     * @param string $text Text to be reversed
590     *
591     * @return string
592     */
593    public static function reverseText($text): string
594    {
595        // Remove HTML markup - we can't display it and it is LTR.
596        $text = strip_tags($text);
597        // Remove HTML entities.
598        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
599
600        // LTR text doesn't need reversing
601        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
602            return $text;
603        }
604
605        // Mirrored characters
606        $text = strtr($text, self::MIRROR_CHARACTERS);
607
608        $reversed = '';
609        $digits   = '';
610        while ($text != '') {
611            $letter = mb_substr($text, 0, 1);
612            $text   = mb_substr($text, 1);
613            if (strpos(self::DIGITS, $letter) !== false) {
614                $digits .= $letter;
615            } else {
616                $reversed = $letter . $digits . $reversed;
617                $digits   = '';
618            }
619        }
620
621        return $digits . $reversed;
622    }
623
624    /**
625     * Return the direction (ltr or rtl) for a given script
626     *
627     * The PHP/intl library does not provde this information, so we need
628     * our own lookup table.
629     *
630     * @param string $script
631     *
632     * @return string
633     */
634    public static function scriptDirection($script)
635    {
636        switch ($script) {
637            case 'Arab':
638            case 'Hebr':
639            case 'Mong':
640            case 'Thaa':
641                return 'rtl';
642            default:
643                return 'ltr';
644        }
645    }
646
647    /**
648     * Perform a case-insensitive comparison of two strings.
649     *
650     * @param string $string1
651     * @param string $string2
652     *
653     * @return int
654     */
655    public static function strcasecmp($string1, $string2)
656    {
657        if (self::$collator instanceof Collator) {
658            return self::$collator->compare($string1, $string2);
659        } else {
660            return strcmp(self::strtolower($string1), self::strtolower($string2));
661        }
662    }
663
664    /**
665     * Convert a string to lower case.
666     *
667     * @param string $string
668     *
669     * @return string
670     */
671    public static function strtolower($string): string
672    {
673        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
674            $string = strtr($string, self::DOTLESS_I_TOLOWER);
675        }
676
677        return mb_strtolower($string);
678    }
679
680    /**
681     * Convert a string to upper case.
682     *
683     * @param string $string
684     *
685     * @return string
686     */
687    public static function strtoupper($string): string
688    {
689        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
690            $string = strtr($string, self::DOTLESS_I_TOUPPER);
691        }
692
693        return mb_strtoupper($string);
694    }
695
696    /**
697     * Identify the script used for a piece of text
698     *
699     * @param string $string
700     *
701     * @return string
702     */
703    public static function textScript($string): string
704    {
705        $string = strip_tags($string); // otherwise HTML tags show up as latin
706        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
707        $string = str_replace([
708            '@N.N.',
709            '@P.N.',
710        ], '', $string); // otherwise unknown names show up as latin
711        $pos    = 0;
712        $strlen = strlen($string);
713        while ($pos < $strlen) {
714            // get the Unicode Code Point for the character at position $pos
715            $byte1 = ord($string[$pos]);
716            if ($byte1 < 0x80) {
717                $code_point = $byte1;
718                $chrlen     = 1;
719            } elseif ($byte1 < 0xC0) {
720                // Invalid continuation character
721                return 'Latn';
722            } elseif ($byte1 < 0xE0) {
723                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
724                $chrlen     = 2;
725            } elseif ($byte1 < 0xF0) {
726                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
727                $chrlen     = 3;
728            } elseif ($byte1 < 0xF8) {
729                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
730                $chrlen     = 3;
731            } else {
732                // Invalid UTF
733                return 'Latn';
734            }
735
736            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
737                if ($code_point >= $range[1] && $code_point <= $range[2]) {
738                    return $range[0];
739                }
740            }
741            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
742            $pos += $chrlen;
743        }
744
745        return 'Latn';
746    }
747
748    /**
749     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
750     *
751     * @param int $seconds
752     *
753     * @return string
754     */
755    public static function timeAgo($seconds)
756    {
757        $minute = 60;
758        $hour   = 60 * $minute;
759        $day    = 24 * $hour;
760        $month  = 30 * $day;
761        $year   = 365 * $day;
762
763        if ($seconds > $year) {
764            $years = intdiv($seconds, $year);
765
766            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
767        }
768
769        if ($seconds > $month) {
770            $months = intdiv($seconds, $month);
771
772            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
773        }
774
775        if ($seconds > $day) {
776            $days = intdiv($seconds, $day);
777
778            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
779        }
780
781        if ($seconds > $hour) {
782            $hours = intdiv($seconds, $hour);
783
784            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
785        }
786
787        if ($seconds > $minute) {
788            $minutes = intdiv($seconds, $minute);
789
790            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
791        }
792
793        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
794    }
795
796    /**
797     * What format is used to display dates in the current locale?
798     *
799     * @return string
800     */
801    public static function timeFormat(): string
802    {
803        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
804        return self::$translator->translate('%H:%i:%s');
805    }
806
807    /**
808     * Translate a string, and then substitute placeholders
809     *
810     * echo I18N::translate('Hello World!');
811     * echo I18N::translate('The %s sat on the mat', 'cat');
812     *
813     * @param string $message
814     * @param string ...$args
815     *
816     * @return string
817     */
818    public static function translate(string $message, ...$args): string
819    {
820        $message = self::$translator->translate($message);
821
822        return sprintf($message, ...$args);
823    }
824
825    /**
826     * Context sensitive version of translate.
827     * echo I18N::translateContext('NOMINATIVE', 'January');
828     * echo I18N::translateContext('GENITIVE', 'January');
829     *
830     * @param string $context
831     * @param string $message
832     * @param string ...$args
833     *
834     * @return string
835     */
836    public static function translateContext(string $context, string $message, ...$args): string
837    {
838        $message = self::$translator->translateContext($context, $message);
839
840        return sprintf($message, ...$args);
841    }
842}
843