xref: /webtrees/app/I18N.php (revision 4459dc9a6d0c27769f8135175f3569e4fa287451)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Functions\FunctionsEdit;
28
29/**
30 * Internationalization (i18n) and localization (l10n).
31 */
32class I18N
33{
34    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
35    private static $locale;
36
37    /** @var Translator An object that performs translation */
38    private static $translator;
39
40    /** @var  Collator|null From the php-intl library */
41    private static $collator;
42
43    // Digits are always rendered LTR, even in RTL text.
44    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
45
46    // These locales need special handling for the dotless letter I.
47    private const DOTLESS_I_LOCALES = [
48        'az',
49        'tr',
50    ];
51    private const DOTLESS_I_TOLOWER = [
52        'I' => 'ı',
53        'İ' => 'i',
54    ];
55    private const DOTLESS_I_TOUPPER = [
56        'ı' => 'I',
57        'i' => 'İ',
58    ];
59
60    // The ranges of characters used by each script.
61    private const SCRIPT_CHARACTER_RANGES = [
62        [
63            'Latn',
64            0x0041,
65            0x005A,
66        ],
67        [
68            'Latn',
69            0x0061,
70            0x007A,
71        ],
72        [
73            'Latn',
74            0x0100,
75            0x02AF,
76        ],
77        [
78            'Grek',
79            0x0370,
80            0x03FF,
81        ],
82        [
83            'Cyrl',
84            0x0400,
85            0x052F,
86        ],
87        [
88            'Hebr',
89            0x0590,
90            0x05FF,
91        ],
92        [
93            'Arab',
94            0x0600,
95            0x06FF,
96        ],
97        [
98            'Arab',
99            0x0750,
100            0x077F,
101        ],
102        [
103            'Arab',
104            0x08A0,
105            0x08FF,
106        ],
107        [
108            'Deva',
109            0x0900,
110            0x097F,
111        ],
112        [
113            'Taml',
114            0x0B80,
115            0x0BFF,
116        ],
117        [
118            'Sinh',
119            0x0D80,
120            0x0DFF,
121        ],
122        [
123            'Thai',
124            0x0E00,
125            0x0E7F,
126        ],
127        [
128            'Geor',
129            0x10A0,
130            0x10FF,
131        ],
132        [
133            'Grek',
134            0x1F00,
135            0x1FFF,
136        ],
137        [
138            'Deva',
139            0xA8E0,
140            0xA8FF,
141        ],
142        [
143            'Hans',
144            0x3000,
145            0x303F,
146        ],
147        // Mixed CJK, not just Hans
148        [
149            'Hans',
150            0x3400,
151            0xFAFF,
152        ],
153        // Mixed CJK, not just Hans
154        [
155            'Hans',
156            0x20000,
157            0x2FA1F,
158        ],
159        // Mixed CJK, not just Hans
160    ];
161
162    // Characters that are displayed in mirror form in RTL text.
163    private const MIRROR_CHARACTERS = [
164        '('  => ')',
165        ')'  => '(',
166        '['  => ']',
167        ']'  => '[',
168        '{'  => '}',
169        '}'  => '{',
170        '<'  => '>',
171        '>'  => '<',
172        '‹ ' => '›',
173        '› ' => '‹',
174        '«'  => '»',
175        '»'  => '«',
176        '﴾ ' => '﴿',
177        '﴿ ' => '﴾',
178        '“ ' => '”',
179        '” ' => '“',
180        '‘ ' => '’',
181        '’ ' => '‘',
182    ];
183
184    // Default list of locales to show in the menu.
185    private const DEFAULT_LOCALES = [
186        'ar',
187        'bg',
188        'bs',
189        'ca',
190        'cs',
191        'da',
192        'de',
193        'el',
194        'en-GB',
195        'en-US',
196        'es',
197        'et',
198        'fi',
199        'fr',
200        'he',
201        'hr',
202        'hu',
203        'is',
204        'it',
205        'ka',
206        'kk',
207        'lt',
208        'mr',
209        'nb',
210        'nl',
211        'nn',
212        'pl',
213        'pt',
214        'ru',
215        'sk',
216        'sv',
217        'tr',
218        'uk',
219        'vi',
220        'zh-Hans',
221    ];
222
223    /** @var string Punctuation used to separate list items, typically a comma */
224    public static $list_separator;
225
226    /**
227     * The prefered locales for this site, or a default list if no preference.
228     *
229     * @return LocaleInterface[]
230     */
231    public static function activeLocales(): array
232    {
233        $code_list = Site::getPreference('LANGUAGES');
234
235        if ($code_list === '') {
236            $codes = self::DEFAULT_LOCALES;
237        } else {
238            $codes = explode(',', $code_list);
239        }
240
241        $locales = [];
242        foreach ($codes as $code) {
243            if (file_exists(WT_ROOT . 'language/' . $code . '.mo')) {
244                try {
245                    $locales[] = Locale::create($code);
246                } catch (Exception $ex) {
247                    // No such locale exists?
248                }
249            }
250        }
251        usort($locales, '\Fisharebest\Localization\Locale::compare');
252
253        return $locales;
254    }
255
256    /**
257     * Which MySQL collation should be used for this locale?
258     *
259     * @return string
260     */
261    public static function collation()
262    {
263        $collation = self::$locale->collation();
264        switch ($collation) {
265            case 'croatian_ci':
266            case 'german2_ci':
267            case 'vietnamese_ci':
268                // Only available in MySQL 5.6
269                return 'utf8_unicode_ci';
270            default:
271                return 'utf8_' . $collation;
272        }
273    }
274
275    /**
276     * What format is used to display dates in the current locale?
277     *
278     * @return string
279     */
280    public static function dateFormat(): string
281    {
282        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
283        return self::$translator->translate('%j %F %Y');
284    }
285
286    /**
287     * Generate consistent I18N for datatables.js
288     *
289     * @param int[] $lengths An optional array of page lengths
290     *
291     * @return string
292     */
293    public static function datatablesI18N(array $lengths = [
294        10,
295        20,
296        30,
297        50,
298        100,
299        -1,
300    ]): string
301    {
302        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
303
304        return
305            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
306            '"language": {' .
307            ' "paginate": {' .
308            '  "first":    "' . self::translate('first') . '",' .
309            '  "last":     "' . self::translate('last') . '",' .
310            '  "next":     "' . self::translate('next') . '",' .
311            '  "previous": "' . self::translate('previous') . '"' .
312            ' },' .
313            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
314            ' "info":           "' . /* I18N: %s are placeholders for numbers */
315            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
316            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
317            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
318            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
319            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
320            self::translate('Display %s', addslashes($length_options)) . '",' .
321            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
322            ' "processing":     "' . self::translate('Loading…') . '",' .
323            ' "search":         "' . self::translate('Filter') . '",' .
324            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
325            '}';
326    }
327
328    /**
329     * Convert the digits 0-9 into the local script
330     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
331     *
332     * @param string|int $n
333     *
334     * @return string
335     */
336    public static function digits($n): string
337    {
338        return self::$locale->digits((string) $n);
339    }
340
341    /**
342     * What is the direction of the current locale
343     *
344     * @return string "ltr" or "rtl"
345     */
346    public static function direction(): string
347    {
348        return self::$locale->direction();
349    }
350
351    /**
352     * What is the first day of the week.
353     *
354     * @return int Sunday=0, Monday=1, etc.
355     */
356    public static function firstDay(): int
357    {
358        return self::$locale->territory()->firstDay();
359    }
360
361    /**
362     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
363     *
364     * @return string
365     */
366    public static function htmlAttributes(): string
367    {
368        return self::$locale->htmlAttributes();
369    }
370
371    /**
372     * Initialise the translation adapter with a locale setting.
373     *
374     * @param string    $code Use this locale/language code, or choose one automatically
375     * @param Tree|null $tree
376     *
377     * @return string $string
378     */
379    public static function init(string $code = '', Tree $tree = null): string
380    {
381        mb_internal_encoding('UTF-8');
382
383        if ($code !== '') {
384            // Create the specified locale
385            self::$locale = Locale::create($code);
386        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'language/' . Session::get('locale') . '.mo')) {
387            // Select a previously used locale
388            self::$locale = Locale::create(Session::get('locale'));
389        } else {
390            if ($tree instanceof Tree) {
391                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
392            } else {
393                $default_locale = new LocaleEnUs();
394            }
395
396            // Negotiate with the browser.
397            // Search engines don't negotiate.  They get the default locale of the tree.
398            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
399        }
400
401        $cache_dir  = WT_DATA_DIR . 'cache/';
402        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
403        if (file_exists($cache_file)) {
404            $filemtime = filemtime($cache_file);
405        } else {
406            $filemtime = 0;
407        }
408
409        // Load the translation file(s)
410        // Note that glob() returns false instead of an empty array when open_basedir_restriction
411        // is in force and no files are found. See PHP bug #47358.
412        if (defined('GLOB_BRACE')) {
413            $translation_files = array_merge(
414                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
415                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [],
416                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: []
417            );
418        } else {
419            // Some servers do not have GLOB_BRACE - see http://php.net/manual/en/function.glob.php
420            $translation_files = array_merge(
421                [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'],
422                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.csv') ?: [],
423                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.php') ?: [],
424                glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.mo') ?: [],
425                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.csv') ?: [],
426                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.php') ?: [],
427                glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.mo') ?: []
428            );
429        }
430        // Rebuild files after one hour
431        $rebuild_cache = time() > $filemtime + 3600;
432        // Rebuild files if any translation file has been updated
433        foreach ($translation_files as $translation_file) {
434            if (filemtime($translation_file) > $filemtime) {
435                $rebuild_cache = true;
436                break;
437            }
438        }
439
440        if ($rebuild_cache) {
441            $translations = [];
442            foreach ($translation_files as $translation_file) {
443                $translation  = new Translation($translation_file);
444                $translations = array_merge($translations, $translation->asArray());
445            }
446            try {
447                File::mkdir($cache_dir);
448                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
449            } catch (Exception $ex) {
450                // During setup, we may not have been able to create it.
451            }
452        } else {
453            $translations = include $cache_file;
454        }
455
456        // Create a translator
457        self::$translator = new Translator($translations, self::$locale->pluralRule());
458
459        /* I18N: This punctuation is used to separate lists of items */
460        self::$list_separator = self::translate(', ');
461
462        // Create a collator
463        try {
464            if (class_exists('Collator')) {
465                // Symfony provides a very incomplete polyfill - which cannot be used.
466                self::$collator = new Collator(self::$locale->code());
467                // Ignore upper/lower case differences
468                self::$collator->setStrength(Collator::SECONDARY);
469            }
470        } catch (Exception $ex) {
471            // PHP-INTL is not installed?  We'll use a fallback later.
472            self::$collator = null;
473        }
474
475        return self::$locale->languageTag();
476    }
477
478    /**
479     * All locales for which a translation file exists.
480     *
481     * @return LocaleInterface[]
482     */
483    public static function installedLocales(): array
484    {
485        $locales = [];
486        foreach (glob(WT_ROOT . 'language/*.mo') as $file) {
487            try {
488                $locales[] = Locale::create(basename($file, '.mo'));
489            } catch (Exception $ex) {
490                // Not a recognised locale
491            }
492        }
493        usort($locales, '\Fisharebest\Localization\Locale::compare');
494
495        return $locales;
496    }
497
498    /**
499     * Return the endonym for a given language - as per http://cldr.unicode.org/
500     *
501     * @param string $locale
502     *
503     * @return string
504     */
505    public static function languageName(string $locale): string
506    {
507        return Locale::create($locale)->endonym();
508    }
509
510    /**
511     * Return the script used by a given language
512     *
513     * @param string $locale
514     *
515     * @return string
516     */
517    public static function languageScript(string $locale): string
518    {
519        return Locale::create($locale)->script()->code();
520    }
521
522    /**
523     * Translate a number into the local representation.
524     * e.g. 12345.67 becomes
525     * en: 12,345.67
526     * fr: 12 345,67
527     * de: 12.345,67
528     *
529     * @param float $n
530     * @param int   $precision
531     *
532     * @return string
533     */
534    public static function number(float $n, int $precision = 0): string
535    {
536        return self::$locale->number(round($n, $precision));
537    }
538
539    /**
540     * Translate a fraction into a percentage.
541     * e.g. 0.123 becomes
542     * en: 12.3%
543     * fr: 12,3 %
544     * de: 12,3%
545     *
546     * @param float $n
547     * @param int   $precision
548     *
549     * @return string
550     */
551    public static function percentage(float $n, int $precision = 0): string
552    {
553        return self::$locale->percent(round($n, $precision + 2));
554    }
555
556    /**
557     * Translate a plural string
558     * echo self::plural('There is an error', 'There are errors', $num_errors);
559     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
560     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
561     *
562     * @param string $singular
563     * @param string $plural
564     * @param int    $count
565     * @param string ...$args
566     *
567     * @return string
568     */
569    public static function plural(string $singular, string $plural, int $count, ...$args): string
570    {
571        $message = self::$translator->translatePlural($singular, $plural, $count);
572
573        return sprintf($message, ...$args);
574    }
575
576    /**
577     * UTF8 version of PHP::strrev()
578     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
579     * These do not support UTF8 text direction, so we must mimic it for them.
580     * Numbers are always rendered LTR, even in RTL text.
581     * The visual direction of characters such as parentheses should be reversed.
582     *
583     * @param string $text Text to be reversed
584     *
585     * @return string
586     */
587    public static function reverseText($text): string
588    {
589        // Remove HTML markup - we can't display it and it is LTR.
590        $text = strip_tags($text);
591        // Remove HTML entities.
592        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
593
594        // LTR text doesn't need reversing
595        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
596            return $text;
597        }
598
599        // Mirrored characters
600        $text = strtr($text, self::MIRROR_CHARACTERS);
601
602        $reversed = '';
603        $digits   = '';
604        while ($text != '') {
605            $letter = mb_substr($text, 0, 1);
606            $text   = mb_substr($text, 1);
607            if (strpos(self::DIGITS, $letter) !== false) {
608                $digits .= $letter;
609            } else {
610                $reversed = $letter . $digits . $reversed;
611                $digits   = '';
612            }
613        }
614
615        return $digits . $reversed;
616    }
617
618    /**
619     * Return the direction (ltr or rtl) for a given script
620     * The PHP/intl library does not provde this information, so we need
621     * our own lookup table.
622     *
623     * @param string $script
624     *
625     * @return string
626     */
627    public static function scriptDirection($script)
628    {
629        switch ($script) {
630            case 'Arab':
631            case 'Hebr':
632            case 'Mong':
633            case 'Thaa':
634                return 'rtl';
635            default:
636                return 'ltr';
637        }
638    }
639
640    /**
641     * Perform a case-insensitive comparison of two strings.
642     *
643     * @param string $string1
644     * @param string $string2
645     *
646     * @return int
647     */
648    public static function strcasecmp($string1, $string2)
649    {
650        if (self::$collator instanceof Collator) {
651            return self::$collator->compare($string1, $string2);
652        } else {
653            return strcmp(self::strtolower($string1), self::strtolower($string2));
654        }
655    }
656
657    /**
658     * Convert a string to lower case.
659     *
660     * @param string $string
661     *
662     * @return string
663     */
664    public static function strtolower($string): string
665    {
666        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
667            $string = strtr($string, self::DOTLESS_I_TOLOWER);
668        }
669
670        return mb_strtolower($string);
671    }
672
673    /**
674     * Convert a string to upper case.
675     *
676     * @param string $string
677     *
678     * @return string
679     */
680    public static function strtoupper($string): string
681    {
682        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
683            $string = strtr($string, self::DOTLESS_I_TOUPPER);
684        }
685
686        return mb_strtoupper($string);
687    }
688
689    /**
690     * Identify the script used for a piece of text
691     *
692     * @param string $string
693     *
694     * @return string
695     */
696    public static function textScript($string): string
697    {
698        $string = strip_tags($string); // otherwise HTML tags show up as latin
699        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
700        $string = str_replace([
701            '@N.N.',
702            '@P.N.',
703        ], '', $string); // otherwise unknown names show up as latin
704        $pos    = 0;
705        $strlen = strlen($string);
706        while ($pos < $strlen) {
707            // get the Unicode Code Point for the character at position $pos
708            $byte1 = ord($string[$pos]);
709            if ($byte1 < 0x80) {
710                $code_point = $byte1;
711                $chrlen     = 1;
712            } elseif ($byte1 < 0xC0) {
713                // Invalid continuation character
714                return 'Latn';
715            } elseif ($byte1 < 0xE0) {
716                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
717                $chrlen     = 2;
718            } elseif ($byte1 < 0xF0) {
719                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
720                $chrlen     = 3;
721            } elseif ($byte1 < 0xF8) {
722                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
723                $chrlen     = 3;
724            } else {
725                // Invalid UTF
726                return 'Latn';
727            }
728
729            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
730                if ($code_point >= $range[1] && $code_point <= $range[2]) {
731                    return $range[0];
732                }
733            }
734            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
735            $pos += $chrlen;
736        }
737
738        return 'Latn';
739    }
740
741    /**
742     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
743     *
744     * @param int $seconds
745     *
746     * @return string
747     */
748    public static function timeAgo($seconds)
749    {
750        $minute = 60;
751        $hour   = 60 * $minute;
752        $day    = 24 * $hour;
753        $month  = 30 * $day;
754        $year   = 365 * $day;
755
756        if ($seconds > $year) {
757            $years = intdiv($seconds, $year);
758
759            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
760        }
761
762        if ($seconds > $month) {
763            $months = intdiv($seconds, $month);
764
765            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
766        }
767
768        if ($seconds > $day) {
769            $days = intdiv($seconds, $day);
770
771            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
772        }
773
774        if ($seconds > $hour) {
775            $hours = intdiv($seconds, $hour);
776
777            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
778        }
779
780        if ($seconds > $minute) {
781            $minutes = intdiv($seconds, $minute);
782
783            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
784        }
785
786        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
787    }
788
789    /**
790     * What format is used to display dates in the current locale?
791     *
792     * @return string
793     */
794    public static function timeFormat(): string
795    {
796        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
797        return self::$translator->translate('%H:%i:%s');
798    }
799
800    /**
801     * Translate a string, and then substitute placeholders
802     * echo I18N::translate('Hello World!');
803     * echo I18N::translate('The %s sat on the mat', 'cat');
804     *
805     * @param string $message
806     * @param string ...$args
807     *
808     * @return string
809     */
810    public static function translate(string $message, ...$args): string
811    {
812        $message = self::$translator->translate($message);
813
814        return sprintf($message, ...$args);
815    }
816
817    /**
818     * Context sensitive version of translate.
819     * echo I18N::translateContext('NOMINATIVE', 'January');
820     * echo I18N::translateContext('GENITIVE', 'January');
821     *
822     * @param string $context
823     * @param string $message
824     * @param string ...$args
825     *
826     * @return string
827     */
828    public static function translateContext(string $context, string $message, ...$args): string
829    {
830        $message = self::$translator->translateContext($context, $message);
831
832        return sprintf($message, ...$args);
833    }
834}
835