xref: /webtrees/app/I18N.php (revision 8354ceb31bee4736b78590afb62d679c4f9389df)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29use const GLOB_NOSORT;
30
31/**
32 * Internationalization (i18n) and localization (l10n).
33 */
34class I18N
35{
36    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
37    private static $locale;
38
39    /** @var Translator An object that performs translation */
40    private static $translator;
41
42    /** @var  Collator|null From the php-intl library */
43    private static $collator;
44
45    // Digits are always rendered LTR, even in RTL text.
46    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
47
48    // These locales need special handling for the dotless letter I.
49    private const DOTLESS_I_LOCALES = [
50        'az',
51        'tr',
52    ];
53    private const DOTLESS_I_TOLOWER = [
54        'I' => 'ı',
55        'İ' => 'i',
56    ];
57    private const DOTLESS_I_TOUPPER = [
58        'ı' => 'I',
59        'i' => 'İ',
60    ];
61
62    // The ranges of characters used by each script.
63    private const SCRIPT_CHARACTER_RANGES = [
64        [
65            'Latn',
66            0x0041,
67            0x005A,
68        ],
69        [
70            'Latn',
71            0x0061,
72            0x007A,
73        ],
74        [
75            'Latn',
76            0x0100,
77            0x02AF,
78        ],
79        [
80            'Grek',
81            0x0370,
82            0x03FF,
83        ],
84        [
85            'Cyrl',
86            0x0400,
87            0x052F,
88        ],
89        [
90            'Hebr',
91            0x0590,
92            0x05FF,
93        ],
94        [
95            'Arab',
96            0x0600,
97            0x06FF,
98        ],
99        [
100            'Arab',
101            0x0750,
102            0x077F,
103        ],
104        [
105            'Arab',
106            0x08A0,
107            0x08FF,
108        ],
109        [
110            'Deva',
111            0x0900,
112            0x097F,
113        ],
114        [
115            'Taml',
116            0x0B80,
117            0x0BFF,
118        ],
119        [
120            'Sinh',
121            0x0D80,
122            0x0DFF,
123        ],
124        [
125            'Thai',
126            0x0E00,
127            0x0E7F,
128        ],
129        [
130            'Geor',
131            0x10A0,
132            0x10FF,
133        ],
134        [
135            'Grek',
136            0x1F00,
137            0x1FFF,
138        ],
139        [
140            'Deva',
141            0xA8E0,
142            0xA8FF,
143        ],
144        [
145            'Hans',
146            0x3000,
147            0x303F,
148        ],
149        // Mixed CJK, not just Hans
150        [
151            'Hans',
152            0x3400,
153            0xFAFF,
154        ],
155        // Mixed CJK, not just Hans
156        [
157            'Hans',
158            0x20000,
159            0x2FA1F,
160        ],
161        // Mixed CJK, not just Hans
162    ];
163
164    // Characters that are displayed in mirror form in RTL text.
165    private const MIRROR_CHARACTERS = [
166        '('  => ')',
167        ')'  => '(',
168        '['  => ']',
169        ']'  => '[',
170        '{'  => '}',
171        '}'  => '{',
172        '<'  => '>',
173        '>'  => '<',
174        '‹ ' => '›',
175        '› ' => '‹',
176        '«'  => '»',
177        '»'  => '«',
178        '﴾ ' => '﴿',
179        '﴿ ' => '﴾',
180        '“ ' => '”',
181        '” ' => '“',
182        '‘ ' => '’',
183        '’ ' => '‘',
184    ];
185
186    // Default list of locales to show in the menu.
187    private const DEFAULT_LOCALES = [
188        'ar',
189        'bg',
190        'bs',
191        'ca',
192        'cs',
193        'da',
194        'de',
195        'el',
196        'en-GB',
197        'en-US',
198        'es',
199        'et',
200        'fi',
201        'fr',
202        'he',
203        'hr',
204        'hu',
205        'is',
206        'it',
207        'ka',
208        'kk',
209        'lt',
210        'mr',
211        'nb',
212        'nl',
213        'nn',
214        'pl',
215        'pt',
216        'ru',
217        'sk',
218        'sv',
219        'tr',
220        'uk',
221        'vi',
222        'zh-Hans',
223    ];
224
225    /** @var string Punctuation used to separate list items, typically a comma */
226    public static $list_separator;
227
228    /**
229     * The prefered locales for this site, or a default list if no preference.
230     *
231     * @return LocaleInterface[]
232     */
233    public static function activeLocales(): array
234    {
235        $code_list = Site::getPreference('LANGUAGES');
236
237        if ($code_list === '') {
238            $codes = self::DEFAULT_LOCALES;
239        } else {
240            $codes = explode(',', $code_list);
241        }
242
243        $locales = [];
244        foreach ($codes as $code) {
245            if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) {
246                try {
247                    $locales[] = Locale::create($code);
248                } catch (Exception $ex) {
249                    // No such locale exists?
250                }
251            }
252        }
253
254        usort($locales, '\Fisharebest\Localization\Locale::compare');
255
256        return $locales;
257    }
258
259    /**
260     * Which MySQL collation should be used for this locale?
261     *
262     * @return string
263     */
264    public static function collation(): string
265    {
266        $collation = self::$locale->collation();
267        switch ($collation) {
268            case 'croatian_ci':
269            case 'german2_ci':
270            case 'vietnamese_ci':
271                // Only available in MySQL 5.6
272                return 'utf8_unicode_ci';
273            default:
274                return 'utf8_' . $collation;
275        }
276    }
277
278    /**
279     * What format is used to display dates in the current locale?
280     *
281     * @return string
282     */
283    public static function dateFormat(): string
284    {
285        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
286        return self::$translator->translate('%j %F %Y');
287    }
288
289    /**
290     * Generate consistent I18N for datatables.js
291     *
292     * @param int[] $lengths An optional array of page lengths
293     *
294     * @return string
295     */
296    public static function datatablesI18N(array $lengths = [
297        10,
298        20,
299        30,
300        50,
301        100,
302        -1,
303    ]): string
304    {
305        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
306
307        return
308            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
309            '"language": {' .
310            ' "paginate": {' .
311            '  "first":    "' . self::translate('first') . '",' .
312            '  "last":     "' . self::translate('last') . '",' .
313            '  "next":     "' . self::translate('next') . '",' .
314            '  "previous": "' . self::translate('previous') . '"' .
315            ' },' .
316            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
317            ' "info":           "' . /* I18N: %s are placeholders for numbers */
318            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
319            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
320            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
321            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
322            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
323            self::translate('Display %s', addslashes($length_options)) . '",' .
324            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
325            ' "processing":     "' . self::translate('Loading…') . '",' .
326            ' "search":         "' . self::translate('Filter') . '",' .
327            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
328            '}';
329    }
330
331    /**
332     * Convert the digits 0-9 into the local script
333     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
334     *
335     * @param string|int $n
336     *
337     * @return string
338     */
339    public static function digits($n): string
340    {
341        return self::$locale->digits((string) $n);
342    }
343
344    /**
345     * What is the direction of the current locale
346     *
347     * @return string "ltr" or "rtl"
348     */
349    public static function direction(): string
350    {
351        return self::$locale->direction();
352    }
353
354    /**
355     * What is the first day of the week.
356     *
357     * @return int Sunday=0, Monday=1, etc.
358     */
359    public static function firstDay(): int
360    {
361        return self::$locale->territory()->firstDay();
362    }
363
364    /**
365     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
366     *
367     * @return string
368     */
369    public static function htmlAttributes(): string
370    {
371        return self::$locale->htmlAttributes();
372    }
373
374    /**
375     * Initialise the translation adapter with a locale setting.
376     *
377     * @param string    $code Use this locale/language code, or choose one automatically
378     * @param Tree|null $tree
379     *
380     * @return string $string
381     */
382    public static function init(string $code = '', Tree $tree = null): string
383    {
384        if ($code !== '') {
385            // Create the specified locale
386            self::$locale = Locale::create($code);
387        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
388            // Select a previously used locale
389            self::$locale = Locale::create(Session::get('locale'));
390        } else {
391            if ($tree instanceof Tree) {
392                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
393            } else {
394                $default_locale = new LocaleEnUs();
395            }
396
397            // Negotiate with the browser.
398            // Search engines don't negotiate.  They get the default locale of the tree.
399            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
400        }
401
402        $cache_dir  = WT_DATA_DIR . 'cache/';
403        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
404        if (file_exists($cache_file)) {
405            $filemtime = filemtime($cache_file);
406        } else {
407            $filemtime = 0;
408        }
409
410        // Load the translation file(s)
411        $translation_files = [
412            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
413        ];
414
415        // Rebuild files after one hour
416        $rebuild_cache = time() > $filemtime + 3600;
417        // Rebuild files if any translation file has been updated
418        foreach ($translation_files as $translation_file) {
419            if (filemtime($translation_file) > $filemtime) {
420                $rebuild_cache = true;
421                break;
422            }
423        }
424
425        if ($rebuild_cache) {
426            $translations = [];
427            foreach ($translation_files as $translation_file) {
428                $translation  = new Translation($translation_file);
429                $translations = array_merge($translations, $translation->asArray());
430            }
431            try {
432                File::mkdir($cache_dir);
433                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
434            } catch (Exception $ex) {
435                // During setup, we may not have been able to create it.
436            }
437        } else {
438            $translations = include $cache_file;
439        }
440
441        // Create a translator
442        self::$translator = new Translator($translations, self::$locale->pluralRule());
443
444        /* I18N: This punctuation is used to separate lists of items */
445        self::$list_separator = self::translate(', ');
446
447        // Create a collator
448        try {
449            if (class_exists('Collator')) {
450                // Symfony provides a very incomplete polyfill - which cannot be used.
451                self::$collator = new Collator(self::$locale->code());
452                // Ignore upper/lower case differences
453                self::$collator->setStrength(Collator::SECONDARY);
454            }
455        } catch (Exception $ex) {
456            // PHP-INTL is not installed?  We'll use a fallback later.
457            self::$collator = null;
458        }
459
460        return self::$locale->languageTag();
461    }
462
463    /**
464     * All locales for which a translation file exists.
465     *
466     * @return LocaleInterface[]
467     */
468    public static function installedLocales(): array
469    {
470        $locales = [];
471
472        foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo', GLOB_NOSORT) as $file) {
473            try {
474                $locales[] = Locale::create(basename(dirname($file)));
475            } catch (DomainException $ex) {
476                // Not a recognised locale
477            }
478        }
479        usort($locales, '\Fisharebest\Localization\Locale::compare');
480
481        return $locales;
482    }
483
484    /**
485     * Return the endonym for a given language - as per http://cldr.unicode.org/
486     *
487     * @param string $locale
488     *
489     * @return string
490     */
491    public static function languageName(string $locale): string
492    {
493        return Locale::create($locale)->endonym();
494    }
495
496    /**
497     * Return the script used by a given language
498     *
499     * @param string $locale
500     *
501     * @return string
502     */
503    public static function languageScript(string $locale): string
504    {
505        return Locale::create($locale)->script()->code();
506    }
507
508    /**
509     * Translate a number into the local representation.
510     * e.g. 12345.67 becomes
511     * en: 12,345.67
512     * fr: 12 345,67
513     * de: 12.345,67
514     *
515     * @param float $n
516     * @param int   $precision
517     *
518     * @return string
519     */
520    public static function number(float $n, int $precision = 0): string
521    {
522        return self::$locale->number(round($n, $precision));
523    }
524
525    /**
526     * Translate a fraction into a percentage.
527     * e.g. 0.123 becomes
528     * en: 12.3%
529     * fr: 12,3 %
530     * de: 12,3%
531     *
532     * @param float $n
533     * @param int   $precision
534     *
535     * @return string
536     */
537    public static function percentage(float $n, int $precision = 0): string
538    {
539        return self::$locale->percent(round($n, $precision + 2));
540    }
541
542    /**
543     * Translate a plural string
544     * echo self::plural('There is an error', 'There are errors', $num_errors);
545     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
546     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
547     *
548     * @param string $singular
549     * @param string $plural
550     * @param int    $count
551     * @param string ...$args
552     *
553     * @return string
554     */
555    public static function plural(string $singular, string $plural, int $count, ...$args): string
556    {
557        $message = self::$translator->translatePlural($singular, $plural, $count);
558
559        return sprintf($message, ...$args);
560    }
561
562    /**
563     * UTF8 version of PHP::strrev()
564     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
565     * These do not support UTF8 text direction, so we must mimic it for them.
566     * Numbers are always rendered LTR, even in RTL text.
567     * The visual direction of characters such as parentheses should be reversed.
568     *
569     * @param string $text Text to be reversed
570     *
571     * @return string
572     */
573    public static function reverseText($text): string
574    {
575        // Remove HTML markup - we can't display it and it is LTR.
576        $text = strip_tags($text);
577        // Remove HTML entities.
578        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
579
580        // LTR text doesn't need reversing
581        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
582            return $text;
583        }
584
585        // Mirrored characters
586        $text = strtr($text, self::MIRROR_CHARACTERS);
587
588        $reversed = '';
589        $digits   = '';
590        while ($text !== '') {
591            $letter = mb_substr($text, 0, 1);
592            $text   = mb_substr($text, 1);
593            if (strpos(self::DIGITS, $letter) !== false) {
594                $digits .= $letter;
595            } else {
596                $reversed = $letter . $digits . $reversed;
597                $digits   = '';
598            }
599        }
600
601        return $digits . $reversed;
602    }
603
604    /**
605     * Return the direction (ltr or rtl) for a given script
606     * The PHP/intl library does not provde this information, so we need
607     * our own lookup table.
608     *
609     * @param string $script
610     *
611     * @return string
612     */
613    public static function scriptDirection($script): string
614    {
615        switch ($script) {
616            case 'Arab':
617            case 'Hebr':
618            case 'Mong':
619            case 'Thaa':
620                return 'rtl';
621            default:
622                return 'ltr';
623        }
624    }
625
626    /**
627     * Perform a case-insensitive comparison of two strings.
628     *
629     * @param string $string1
630     * @param string $string2
631     *
632     * @return int
633     */
634    public static function strcasecmp($string1, $string2): int
635    {
636        if (self::$collator instanceof Collator) {
637            return self::$collator->compare($string1, $string2);
638        }
639
640        return strcmp(self::strtolower($string1), self::strtolower($string2));
641    }
642
643    /**
644     * Convert a string to lower case.
645     *
646     * @param string $string
647     *
648     * @return string
649     */
650    public static function strtolower($string): string
651    {
652        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
653            $string = strtr($string, self::DOTLESS_I_TOLOWER);
654        }
655
656        return mb_strtolower($string);
657    }
658
659    /**
660     * Convert a string to upper case.
661     *
662     * @param string $string
663     *
664     * @return string
665     */
666    public static function strtoupper($string): string
667    {
668        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
669            $string = strtr($string, self::DOTLESS_I_TOUPPER);
670        }
671
672        return mb_strtoupper($string);
673    }
674
675    /**
676     * Identify the script used for a piece of text
677     *
678     * @param string $string
679     *
680     * @return string
681     */
682    public static function textScript($string): string
683    {
684        $string = strip_tags($string); // otherwise HTML tags show up as latin
685        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
686        $string = str_replace([
687            '@N.N.',
688            '@P.N.',
689        ], '', $string); // otherwise unknown names show up as latin
690        $pos    = 0;
691        $strlen = strlen($string);
692        while ($pos < $strlen) {
693            // get the Unicode Code Point for the character at position $pos
694            $byte1 = ord($string[$pos]);
695            if ($byte1 < 0x80) {
696                $code_point = $byte1;
697                $chrlen     = 1;
698            } elseif ($byte1 < 0xC0) {
699                // Invalid continuation character
700                return 'Latn';
701            } elseif ($byte1 < 0xE0) {
702                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
703                $chrlen     = 2;
704            } elseif ($byte1 < 0xF0) {
705                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
706                $chrlen     = 3;
707            } elseif ($byte1 < 0xF8) {
708                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
709                $chrlen     = 3;
710            } else {
711                // Invalid UTF
712                return 'Latn';
713            }
714
715            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
716                if ($code_point >= $range[1] && $code_point <= $range[2]) {
717                    return $range[0];
718                }
719            }
720            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
721            $pos += $chrlen;
722        }
723
724        return 'Latn';
725    }
726
727    /**
728     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
729     *
730     * @param int $seconds
731     *
732     * @return string
733     */
734    public static function timeAgo($seconds): string
735    {
736        $minute = 60;
737        $hour   = 60 * $minute;
738        $day    = 24 * $hour;
739        $month  = 30 * $day;
740        $year   = 365 * $day;
741
742        if ($seconds > $year) {
743            $years = intdiv($seconds, $year);
744
745            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
746        }
747
748        if ($seconds > $month) {
749            $months = intdiv($seconds, $month);
750
751            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
752        }
753
754        if ($seconds > $day) {
755            $days = intdiv($seconds, $day);
756
757            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
758        }
759
760        if ($seconds > $hour) {
761            $hours = intdiv($seconds, $hour);
762
763            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
764        }
765
766        if ($seconds > $minute) {
767            $minutes = intdiv($seconds, $minute);
768
769            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
770        }
771
772        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
773    }
774
775    /**
776     * What format is used to display dates in the current locale?
777     *
778     * @return string
779     */
780    public static function timeFormat(): string
781    {
782        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
783        return self::$translator->translate('%H:%i:%s');
784    }
785
786    /**
787     * Translate a string, and then substitute placeholders
788     * echo I18N::translate('Hello World!');
789     * echo I18N::translate('The %s sat on the mat', 'cat');
790     *
791     * @param string $message
792     * @param string ...$args
793     *
794     * @return string
795     */
796    public static function translate(string $message, ...$args): string
797    {
798        $message = self::$translator->translate($message);
799
800        return sprintf($message, ...$args);
801    }
802
803    /**
804     * Context sensitive version of translate.
805     * echo I18N::translateContext('NOMINATIVE', 'January');
806     * echo I18N::translateContext('GENITIVE', 'January');
807     *
808     * @param string $context
809     * @param string $message
810     * @param string ...$args
811     *
812     * @return string
813     */
814    public static function translateContext(string $context, string $message, ...$args): string
815    {
816        $message = self::$translator->translateContext($context, $message);
817
818        return sprintf($message, ...$args);
819    }
820}
821