xref: /webtrees/app/I18N.php (revision 71239cb694d278d044f33328daaa60c8ed7431e9)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2018 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Functions\FunctionsEdit;
28
29/**
30 * Internationalization (i18n) and localization (l10n).
31 */
32class I18N
33{
34    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
35    private static $locale;
36
37    /** @var Translator An object that performs translation */
38    private static $translator;
39
40    /** @var  Collator|null From the php-intl library */
41    private static $collator;
42
43    // Digits are always rendered LTR, even in RTL text.
44    const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
45
46    // These locales need special handling for the dotless letter I.
47    const DOTLESS_I_LOCALES = [
48        'az',
49        'tr',
50    ];
51    const DOTLESS_I_TOLOWER = [
52        'I' => 'ı',
53        'İ' => 'i',
54    ];
55    const DOTLESS_I_TOUPPER = [
56        'ı' => 'I',
57        'i' => 'İ',
58    ];
59
60    // The ranges of characters used by each script.
61    const SCRIPT_CHARACTER_RANGES = [
62        [
63            'Latn',
64            0x0041,
65            0x005A,
66        ],
67        [
68            'Latn',
69            0x0061,
70            0x007A,
71        ],
72        [
73            'Latn',
74            0x0100,
75            0x02AF,
76        ],
77        [
78            'Grek',
79            0x0370,
80            0x03FF,
81        ],
82        [
83            'Cyrl',
84            0x0400,
85            0x052F,
86        ],
87        [
88            'Hebr',
89            0x0590,
90            0x05FF,
91        ],
92        [
93            'Arab',
94            0x0600,
95            0x06FF,
96        ],
97        [
98            'Arab',
99            0x0750,
100            0x077F,
101        ],
102        [
103            'Arab',
104            0x08A0,
105            0x08FF,
106        ],
107        [
108            'Deva',
109            0x0900,
110            0x097F,
111        ],
112        [
113            'Taml',
114            0x0B80,
115            0x0BFF,
116        ],
117        [
118            'Sinh',
119            0x0D80,
120            0x0DFF,
121        ],
122        [
123            'Thai',
124            0x0E00,
125            0x0E7F,
126        ],
127        [
128            'Geor',
129            0x10A0,
130            0x10FF,
131        ],
132        [
133            'Grek',
134            0x1F00,
135            0x1FFF,
136        ],
137        [
138            'Deva',
139            0xA8E0,
140            0xA8FF,
141        ],
142        [
143            'Hans',
144            0x3000,
145            0x303F,
146        ],
147        // Mixed CJK, not just Hans
148        [
149            'Hans',
150            0x3400,
151            0xFAFF,
152        ],
153        // Mixed CJK, not just Hans
154        [
155            'Hans',
156            0x20000,
157            0x2FA1F,
158        ],
159        // Mixed CJK, not just Hans
160    ];
161
162    // Characters that are displayed in mirror form in RTL text.
163    const MIRROR_CHARACTERS = [
164        '('  => ')',
165        ')'  => '(',
166        '['  => ']',
167        ']'  => '[',
168        '{'  => '}',
169        '}'  => '{',
170        '<'  => '>',
171        '>'  => '<',
172        '‹ ' => '›',
173        '› ' => '‹',
174        '«'  => '»',
175        '»'  => '«',
176        '﴾ ' => '﴿',
177        '﴿ ' => '﴾',
178        '“ ' => '”',
179        '” ' => '“',
180        '‘ ' => '’',
181        '’ ' => '‘',
182    ];
183
184    // Default list of locales to show in the menu.
185    const DEFAULT_LOCALES = [
186        'ar',
187        'bg',
188        'bs',
189        'ca',
190        'cs',
191        'da',
192        'de',
193        'el',
194        'en-GB',
195        'en-US',
196        'es',
197        'et',
198        'fi',
199        'fr',
200        'he',
201        'hr',
202        'hu',
203        'is',
204        'it',
205        'ka',
206        'kk',
207        'lt',
208        'mr',
209        'nb',
210        'nl',
211        'nn',
212        'pl',
213        'pt',
214        'ru',
215        'sk',
216        'sv',
217        'tr',
218        'uk',
219        'vi',
220        'zh-Hans',
221    ];
222
223    /** @var string Punctuation used to separate list items, typically a comma */
224    public static $list_separator;
225
226    /**
227     * The prefered locales for this site, or a default list if no preference.
228     *
229     * @return LocaleInterface[]
230     */
231    public static function activeLocales(): array
232    {
233        $code_list = Site::getPreference('LANGUAGES');
234
235        if ($code_list === '') {
236            $codes = self::DEFAULT_LOCALES;
237        } else {
238            $codes = explode(',', $code_list);
239        }
240
241        $locales = [];
242        foreach ($codes as $code) {
243            if (file_exists(WT_ROOT . 'language/' . $code . '.mo')) {
244                try {
245                    $locales[] = Locale::create($code);
246                } catch (\Exception $ex) {
247                    DebugBar::addThrowable($ex);
248
249                    // No such locale exists?
250                }
251            }
252        }
253        usort($locales, '\Fisharebest\Localization\Locale::compare');
254
255        return $locales;
256    }
257
258    /**
259     * Which MySQL collation should be used for this locale?
260     *
261     * @return string
262     */
263    public static function collation()
264    {
265        $collation = self::$locale->collation();
266        switch ($collation) {
267            case 'croatian_ci':
268            case 'german2_ci':
269            case 'vietnamese_ci':
270                // Only available in MySQL 5.6
271                return 'utf8_unicode_ci';
272            default:
273                return 'utf8_' . $collation;
274        }
275    }
276
277    /**
278     * What format is used to display dates in the current locale?
279     *
280     * @return string
281     */
282    public static function dateFormat(): string
283    {
284        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
285        return self::$translator->translate('%j %F %Y');
286    }
287
288    /**
289     * Generate consistent I18N for datatables.js
290     *
291     * @param int[] $lengths An optional array of page lengths
292     *
293     * @return string
294     */
295    public static function datatablesI18N(array $lengths = [
296        10,
297        20,
298        30,
299        50,
300        100,
301        -1,
302    ]): string
303    {
304        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
305
306        return
307            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
308            '"language": {' .
309            ' "paginate": {' .
310            '  "first":    "' . self::translate('first') . '",' .
311            '  "last":     "' . self::translate('last') . '",' .
312            '  "next":     "' . self::translate('next') . '",' .
313            '  "previous": "' . self::translate('previous') . '"' .
314            ' },' .
315            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
316            ' "info":           "' . /* I18N: %s are placeholders for numbers */
317            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
318            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
319            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
320            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
321            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
322            self::translate('Display %s', addslashes($length_options)) . '",' .
323            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
324            ' "processing":     "' . self::translate('Loading…') . '",' .
325            ' "search":         "' . self::translate('Filter') . '",' .
326            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
327            '}';
328    }
329
330    /**
331     * Convert the digits 0-9 into the local script
332     *
333     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
334     *
335     * @param string|int $n
336     *
337     * @return string
338     */
339    public static function digits($n): string
340    {
341        return self::$locale->digits((string) $n);
342    }
343
344    /**
345     * What is the direction of the current locale
346     *
347     * @return string "ltr" or "rtl"
348     */
349    public static function direction(): string
350    {
351        return self::$locale->direction();
352    }
353
354    /**
355     * What is the first day of the week.
356     *
357     * @return int Sunday=0, Monday=1, etc.
358     */
359    public static function firstDay(): int
360    {
361        return self::$locale->territory()->firstDay();
362    }
363
364    /**
365     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
366     *
367     * @return string
368     */
369    public static function htmlAttributes(): string
370    {
371        return self::$locale->htmlAttributes();
372    }
373
374    /**
375     * Initialise the translation adapter with a locale setting.
376     *
377     * @param string    $code Use this locale/language code, or choose one automatically
378     * @param Tree|null $tree
379     *
380     * @return string $string
381     */
382    public static function init(string $code = '', Tree $tree = null): string
383    {
384        mb_internal_encoding('UTF-8');
385
386        if ($code !== '') {
387            // Create the specified locale
388            self::$locale = Locale::create($code);
389        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'language/' . Session::get('locale') . '.mo')) {
390            // Select a previously used locale
391            self::$locale = Locale::create(Session::get('locale'));
392        } else {
393            if ($tree instanceof Tree) {
394                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
395            } else {
396                $default_locale = new LocaleEnUs();
397            }
398
399            // Negotiate with the browser.
400            // Search engines don't negotiate.  They get the default locale of the tree.
401            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
402        }
403
404        $cache_dir  = WT_DATA_DIR . 'cache/';
405        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
406        if (file_exists($cache_file)) {
407            $filemtime = filemtime($cache_file);
408        } else {
409            $filemtime = 0;
410        }
411
412        // Load the translation file(s)
413        // Note that glob() returns false instead of an empty array when open_basedir_restriction
414        // is in force and no files are found. See PHP bug #47358.
415        if (defined('GLOB_BRACE')) {
416            $translation_files = array_merge(
417                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
418                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [],
419                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: []
420            );
421        } else {
422            // Some servers do not have GLOB_BRACE - see http://php.net/manual/en/function.glob.php
423            $translation_files = array_merge(
424                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
425                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.csv') ?: [],
426                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.php') ?: [],
427                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.mo') ?: [],
428                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.csv') ?: [],
429                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.php') ?: [],
430                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.mo') ?: []
431            );
432        }
433        // Rebuild files after one hour
434        $rebuild_cache = time() > $filemtime + 3600;
435        // Rebuild files if any translation file has been updated
436        foreach ($translation_files as $translation_file) {
437            if (filemtime($translation_file) > $filemtime) {
438                $rebuild_cache = true;
439                break;
440            }
441        }
442
443        if ($rebuild_cache) {
444            $translations = [];
445            foreach ($translation_files as $translation_file) {
446                $translation  = new Translation($translation_file);
447                $translations = array_merge($translations, $translation->asArray());
448            }
449            try {
450                File::mkdir($cache_dir);
451                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
452            } catch (Exception $ex) {
453                DebugBar::addThrowable($ex);
454
455                // During setup, we may not have been able to create it.
456            }
457        } else {
458            $translations = include $cache_file;
459        }
460
461        // Create a translator
462        self::$translator = new Translator($translations, self::$locale->pluralRule());
463
464        /* I18N: This punctuation is used to separate lists of items */
465        self::$list_separator = self::translate(', ');
466
467        // Create a collator
468        try {
469            if (class_exists('Collator')) {
470                // Symfony provides a very incomplete polyfill - which cannot be used.
471                self::$collator = new Collator(self::$locale->code());
472                // Ignore upper/lower case differences
473                self::$collator->setStrength(Collator::SECONDARY);
474            }
475        } catch (Exception $ex) {
476            // PHP-INTL is not installed?  We'll use a fallback later.
477            self::$collator = null;
478        }
479
480        return self::$locale->languageTag();
481    }
482
483    /**
484     * All locales for which a translation file exists.
485     *
486     * @return LocaleInterface[]
487     */
488    public static function installedLocales(): array
489    {
490        $locales = [];
491        foreach (glob(WT_ROOT . 'language/*.mo') as $file) {
492            try {
493                $locales[] = Locale::create(basename($file, '.mo'));
494            } catch (\Exception $ex) {
495                DebugBar::addThrowable($ex);
496
497                // Not a recognised locale
498            }
499        }
500        usort($locales, '\Fisharebest\Localization\Locale::compare');
501
502        return $locales;
503    }
504
505    /**
506     * Return the endonym for a given language - as per http://cldr.unicode.org/
507     *
508     * @param string $locale
509     *
510     * @return string
511     */
512    public static function languageName(string $locale): string
513    {
514        return Locale::create($locale)->endonym();
515    }
516
517    /**
518     * Return the script used by a given language
519     *
520     * @param string $locale
521     *
522     * @return string
523     */
524    public static function languageScript(string $locale): string
525    {
526        return Locale::create($locale)->script()->code();
527    }
528
529    /**
530     * Translate a number into the local representation.
531     *
532     * e.g. 12345.67 becomes
533     * en: 12,345.67
534     * fr: 12 345,67
535     * de: 12.345,67
536     *
537     * @param float $n
538     * @param int   $precision
539     *
540     * @return string
541     */
542    public static function number(float $n, int $precision = 0): string
543    {
544        return self::$locale->number(round($n, $precision));
545    }
546
547    /**
548     * Translate a fraction into a percentage.
549     *
550     * e.g. 0.123 becomes
551     * en: 12.3%
552     * fr: 12,3 %
553     * de: 12,3%
554     *
555     * @param float $n
556     * @param int   $precision
557     *
558     * @return string
559     */
560    public static function percentage(float $n, int $precision = 0): string
561    {
562        return self::$locale->percent(round($n, $precision + 2));
563    }
564
565    /**
566     * Translate a plural string
567     * echo self::plural('There is an error', 'There are errors', $num_errors);
568     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
569     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
570     *
571     * @param string $singular
572     * @param string $plural
573     * @param int    $count
574     * @param string ...$args
575     *
576     * @return string
577     */
578    public static function plural(string $singular, string $plural, int $count, ...$args): string
579    {
580        $message = self::$translator->translatePlural($singular, $plural, $count);
581
582        return sprintf($message, ...$args);
583    }
584
585    /**
586     * UTF8 version of PHP::strrev()
587     *
588     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
589     *
590     * These do not support UTF8 text direction, so we must mimic it for them.
591     *
592     * Numbers are always rendered LTR, even in RTL text.
593     * The visual direction of characters such as parentheses should be reversed.
594     *
595     * @param string $text Text to be reversed
596     *
597     * @return string
598     */
599    public static function reverseText($text): string
600    {
601        // Remove HTML markup - we can't display it and it is LTR.
602        $text = strip_tags($text);
603        // Remove HTML entities.
604        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
605
606        // LTR text doesn't need reversing
607        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
608            return $text;
609        }
610
611        // Mirrored characters
612        $text = strtr($text, self::MIRROR_CHARACTERS);
613
614        $reversed = '';
615        $digits   = '';
616        while ($text != '') {
617            $letter = mb_substr($text, 0, 1);
618            $text   = mb_substr($text, 1);
619            if (strpos(self::DIGITS, $letter) !== false) {
620                $digits .= $letter;
621            } else {
622                $reversed = $letter . $digits . $reversed;
623                $digits   = '';
624            }
625        }
626
627        return $digits . $reversed;
628    }
629
630    /**
631     * Return the direction (ltr or rtl) for a given script
632     *
633     * The PHP/intl library does not provde this information, so we need
634     * our own lookup table.
635     *
636     * @param string $script
637     *
638     * @return string
639     */
640    public static function scriptDirection($script)
641    {
642        switch ($script) {
643            case 'Arab':
644            case 'Hebr':
645            case 'Mong':
646            case 'Thaa':
647                return 'rtl';
648            default:
649                return 'ltr';
650        }
651    }
652
653    /**
654     * Perform a case-insensitive comparison of two strings.
655     *
656     * @param string $string1
657     * @param string $string2
658     *
659     * @return int
660     */
661    public static function strcasecmp($string1, $string2)
662    {
663        if (self::$collator instanceof Collator) {
664            return self::$collator->compare($string1, $string2);
665        } else {
666            return strcmp(self::strtolower($string1), self::strtolower($string2));
667        }
668    }
669
670    /**
671     * Convert a string to lower case.
672     *
673     * @param string $string
674     *
675     * @return string
676     */
677    public static function strtolower($string): string
678    {
679        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
680            $string = strtr($string, self::DOTLESS_I_TOLOWER);
681        }
682
683        return mb_strtolower($string);
684    }
685
686    /**
687     * Convert a string to upper case.
688     *
689     * @param string $string
690     *
691     * @return string
692     */
693    public static function strtoupper($string): string
694    {
695        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
696            $string = strtr($string, self::DOTLESS_I_TOUPPER);
697        }
698
699        return mb_strtoupper($string);
700    }
701
702    /**
703     * Identify the script used for a piece of text
704     *
705     * @param string $string
706     *
707     * @return string
708     */
709    public static function textScript($string): string
710    {
711        $string = strip_tags($string); // otherwise HTML tags show up as latin
712        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
713        $string = str_replace([
714            '@N.N.',
715            '@P.N.',
716        ], '', $string); // otherwise unknown names show up as latin
717        $pos    = 0;
718        $strlen = strlen($string);
719        while ($pos < $strlen) {
720            // get the Unicode Code Point for the character at position $pos
721            $byte1 = ord($string[$pos]);
722            if ($byte1 < 0x80) {
723                $code_point = $byte1;
724                $chrlen     = 1;
725            } elseif ($byte1 < 0xC0) {
726                // Invalid continuation character
727                return 'Latn';
728            } elseif ($byte1 < 0xE0) {
729                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
730                $chrlen     = 2;
731            } elseif ($byte1 < 0xF0) {
732                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
733                $chrlen     = 3;
734            } elseif ($byte1 < 0xF8) {
735                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
736                $chrlen     = 3;
737            } else {
738                // Invalid UTF
739                return 'Latn';
740            }
741
742            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
743                if ($code_point >= $range[1] && $code_point <= $range[2]) {
744                    return $range[0];
745                }
746            }
747            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
748            $pos += $chrlen;
749        }
750
751        return 'Latn';
752    }
753
754    /**
755     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
756     *
757     * @param int $seconds
758     *
759     * @return string
760     */
761    public static function timeAgo($seconds)
762    {
763        $minute = 60;
764        $hour   = 60 * $minute;
765        $day    = 24 * $hour;
766        $month  = 30 * $day;
767        $year   = 365 * $day;
768
769        if ($seconds > $year) {
770            $years = intdiv($seconds, $year);
771
772            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
773        }
774
775        if ($seconds > $month) {
776            $months = intdiv($seconds, $month);
777
778            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
779        }
780
781        if ($seconds > $day) {
782            $days = intdiv($seconds, $day);
783
784            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
785        }
786
787        if ($seconds > $hour) {
788            $hours = intdiv($seconds, $hour);
789
790            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
791        }
792
793        if ($seconds > $minute) {
794            $minutes = intdiv($seconds, $minute);
795
796            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
797        }
798
799        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
800    }
801
802    /**
803     * What format is used to display dates in the current locale?
804     *
805     * @return string
806     */
807    public static function timeFormat(): string
808    {
809        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
810        return self::$translator->translate('%H:%i:%s');
811    }
812
813    /**
814     * Translate a string, and then substitute placeholders
815     *
816     * echo I18N::translate('Hello World!');
817     * echo I18N::translate('The %s sat on the mat', 'cat');
818     *
819     * @param string $message
820     * @param string ...$args
821     *
822     * @return string
823     */
824    public static function translate(string $message, ...$args): string
825    {
826        $message = self::$translator->translate($message);
827
828        return sprintf($message, ...$args);
829    }
830
831    /**
832     * Context sensitive version of translate.
833     * echo I18N::translateContext('NOMINATIVE', 'January');
834     * echo I18N::translateContext('GENITIVE', 'January');
835     *
836     * @param string $context
837     * @param string $message
838     * @param string ...$args
839     *
840     * @return string
841     */
842    public static function translateContext(string $context, string $message, ...$args): string
843    {
844        $message = self::$translator->translateContext($context, $message);
845
846        return sprintf($message, ...$args);
847    }
848}
849