xref: /webtrees/app/I18N.php (revision d37db671e2f4b9f27d817b54a435ecf154a67a6b)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29use Fisharebest\Webtrees\Module\ModuleCustomInterface;
30use Fisharebest\Webtrees\Services\ModuleService;
31use const GLOB_NOSORT;
32
33/**
34 * Internationalization (i18n) and localization (l10n).
35 */
36class I18N
37{
38    // MO files use special characters for plurals and context.
39    public const PLURAL  = '\x00';
40    public const CONTEXT = '\x04';
41
42    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
43    private static $locale;
44
45    /** @var Translator An object that performs translation */
46    private static $translator;
47
48    /** @var  Collator|null From the php-intl library */
49    private static $collator;
50
51    // Digits are always rendered LTR, even in RTL text.
52    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
53
54    // These locales need special handling for the dotless letter I.
55    private const DOTLESS_I_LOCALES = [
56        'az',
57        'tr',
58    ];
59    private const DOTLESS_I_TOLOWER = [
60        'I' => 'ı',
61        'İ' => 'i',
62    ];
63    private const DOTLESS_I_TOUPPER = [
64        'ı' => 'I',
65        'i' => 'İ',
66    ];
67
68    // The ranges of characters used by each script.
69    private const SCRIPT_CHARACTER_RANGES = [
70        [
71            'Latn',
72            0x0041,
73            0x005A,
74        ],
75        [
76            'Latn',
77            0x0061,
78            0x007A,
79        ],
80        [
81            'Latn',
82            0x0100,
83            0x02AF,
84        ],
85        [
86            'Grek',
87            0x0370,
88            0x03FF,
89        ],
90        [
91            'Cyrl',
92            0x0400,
93            0x052F,
94        ],
95        [
96            'Hebr',
97            0x0590,
98            0x05FF,
99        ],
100        [
101            'Arab',
102            0x0600,
103            0x06FF,
104        ],
105        [
106            'Arab',
107            0x0750,
108            0x077F,
109        ],
110        [
111            'Arab',
112            0x08A0,
113            0x08FF,
114        ],
115        [
116            'Deva',
117            0x0900,
118            0x097F,
119        ],
120        [
121            'Taml',
122            0x0B80,
123            0x0BFF,
124        ],
125        [
126            'Sinh',
127            0x0D80,
128            0x0DFF,
129        ],
130        [
131            'Thai',
132            0x0E00,
133            0x0E7F,
134        ],
135        [
136            'Geor',
137            0x10A0,
138            0x10FF,
139        ],
140        [
141            'Grek',
142            0x1F00,
143            0x1FFF,
144        ],
145        [
146            'Deva',
147            0xA8E0,
148            0xA8FF,
149        ],
150        [
151            'Hans',
152            0x3000,
153            0x303F,
154        ],
155        // Mixed CJK, not just Hans
156        [
157            'Hans',
158            0x3400,
159            0xFAFF,
160        ],
161        // Mixed CJK, not just Hans
162        [
163            'Hans',
164            0x20000,
165            0x2FA1F,
166        ],
167        // Mixed CJK, not just Hans
168    ];
169
170    // Characters that are displayed in mirror form in RTL text.
171    private const MIRROR_CHARACTERS = [
172        '('  => ')',
173        ')'  => '(',
174        '['  => ']',
175        ']'  => '[',
176        '{'  => '}',
177        '}'  => '{',
178        '<'  => '>',
179        '>'  => '<',
180        '‹ ' => '›',
181        '› ' => '‹',
182        '«'  => '»',
183        '»'  => '«',
184        '﴾ ' => '﴿',
185        '﴿ ' => '﴾',
186        '“ ' => '”',
187        '” ' => '“',
188        '‘ ' => '’',
189        '’ ' => '‘',
190    ];
191
192    // Default list of locales to show in the menu.
193    private const DEFAULT_LOCALES = [
194        'ar',
195        'bg',
196        'bs',
197        'ca',
198        'cs',
199        'da',
200        'de',
201        'el',
202        'en-GB',
203        'en-US',
204        'es',
205        'et',
206        'fi',
207        'fr',
208        'he',
209        'hr',
210        'hu',
211        'is',
212        'it',
213        'ka',
214        'kk',
215        'lt',
216        'mr',
217        'nb',
218        'nl',
219        'nn',
220        'pl',
221        'pt',
222        'ru',
223        'sk',
224        'sv',
225        'tr',
226        'uk',
227        'vi',
228        'zh-Hans',
229    ];
230
231    /** @var string Punctuation used to separate list items, typically a comma */
232    public static $list_separator;
233
234    /**
235     * The prefered locales for this site, or a default list if no preference.
236     *
237     * @return LocaleInterface[]
238     */
239    public static function activeLocales(): array
240    {
241        $code_list = Site::getPreference('LANGUAGES');
242
243        if ($code_list === '') {
244            $codes = self::DEFAULT_LOCALES;
245        } else {
246            $codes = explode(',', $code_list);
247        }
248
249        $locales = [];
250        foreach ($codes as $code) {
251            if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) {
252                try {
253                    $locales[] = Locale::create($code);
254                } catch (Exception $ex) {
255                    // No such locale exists?
256                }
257            }
258        }
259
260        usort($locales, '\Fisharebest\Localization\Locale::compare');
261
262        return $locales;
263    }
264
265    /**
266     * Which MySQL collation should be used for this locale?
267     *
268     * @return string
269     */
270    public static function collation(): string
271    {
272        $collation = self::$locale->collation();
273        switch ($collation) {
274            case 'croatian_ci':
275            case 'german2_ci':
276            case 'vietnamese_ci':
277                // Only available in MySQL 5.6
278                return 'utf8_unicode_ci';
279            default:
280                return 'utf8_' . $collation;
281        }
282    }
283
284    /**
285     * What format is used to display dates in the current locale?
286     *
287     * @return string
288     */
289    public static function dateFormat(): string
290    {
291        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
292        return self::$translator->translate('%j %F %Y');
293    }
294
295    /**
296     * Generate consistent I18N for datatables.js
297     *
298     * @param int[] $lengths An optional array of page lengths
299     *
300     * @return string
301     */
302    public static function datatablesI18N(array $lengths = [
303        10,
304        20,
305        30,
306        50,
307        100,
308        -1,
309    ]): string
310    {
311        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
312
313        return
314            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
315            '"language": {' .
316            ' "paginate": {' .
317            '  "first":    "' . self::translate('first') . '",' .
318            '  "last":     "' . self::translate('last') . '",' .
319            '  "next":     "' . self::translate('next') . '",' .
320            '  "previous": "' . self::translate('previous') . '"' .
321            ' },' .
322            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
323            ' "info":           "' . /* I18N: %s are placeholders for numbers */
324            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
325            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
326            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
327            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
328            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
329            self::translate('Display %s', addslashes($length_options)) . '",' .
330            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
331            ' "processing":     "' . self::translate('Loading…') . '",' .
332            ' "search":         "' . self::translate('Filter') . '",' .
333            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
334            '}';
335    }
336
337    /**
338     * Convert the digits 0-9 into the local script
339     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
340     *
341     * @param string|int $n
342     *
343     * @return string
344     */
345    public static function digits($n): string
346    {
347        return self::$locale->digits((string) $n);
348    }
349
350    /**
351     * What is the direction of the current locale
352     *
353     * @return string "ltr" or "rtl"
354     */
355    public static function direction(): string
356    {
357        return self::$locale->direction();
358    }
359
360    /**
361     * What is the first day of the week.
362     *
363     * @return int Sunday=0, Monday=1, etc.
364     */
365    public static function firstDay(): int
366    {
367        return self::$locale->territory()->firstDay();
368    }
369
370    /**
371     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
372     *
373     * @return string
374     */
375    public static function htmlAttributes(): string
376    {
377        return self::$locale->htmlAttributes();
378    }
379
380    /**
381     * Initialise the translation adapter with a locale setting.
382     *
383     * @param string    $code Use this locale/language code, or choose one automatically
384     * @param Tree|null $tree
385     * @param bool      $custom Load custom translations
386     *
387     * @return string $string
388     */
389    public static function init(string $code = '', Tree $tree = null, $custom = true): string
390    {
391        if ($code !== '') {
392            // Create the specified locale
393            self::$locale = Locale::create($code);
394        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
395            // Select a previously used locale
396            self::$locale = Locale::create(Session::get('locale'));
397        } else {
398            if ($tree instanceof Tree) {
399                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
400            } else {
401                $default_locale = new LocaleEnUs();
402            }
403
404            // Negotiate with the browser.
405            // Search engines don't negotiate.  They get the default locale of the tree.
406            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
407        }
408
409        $cache_dir  = WT_DATA_DIR . 'cache/';
410        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
411        if (file_exists($cache_file)) {
412            $filemtime = filemtime($cache_file);
413        } else {
414            $filemtime = 0;
415        }
416
417        // Load the translation file(s)
418        $translation_files = [
419            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
420        ];
421
422        // Rebuild files after one hour
423        $rebuild_cache = time() > $filemtime + 3600;
424        // Rebuild files if any translation file has been updated
425        foreach ($translation_files as $translation_file) {
426            if (filemtime($translation_file) > $filemtime) {
427                $rebuild_cache = true;
428                break;
429            }
430        }
431
432        if ($rebuild_cache) {
433            $translations = [];
434            foreach ($translation_files as $translation_file) {
435                $translation  = new Translation($translation_file);
436                $translations = array_merge($translations, $translation->asArray());
437            }
438            try {
439                File::mkdir($cache_dir);
440                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
441            } catch (Exception $ex) {
442                // During setup, we may not have been able to create it.
443            }
444        } else {
445            $translations = include $cache_file;
446        }
447
448        // Add translations from custom modules (but not during setup)
449        if ($custom) {
450            $custom_modules = app(ModuleService::class)->findByInterface(ModuleCustomInterface::class);
451
452            foreach ($custom_modules as $custom_module) {
453                $custom_translations = $custom_module->customTranslations(self::$locale->languageTag());
454                $translations        = array_merge($translations, $custom_translations);
455            }
456        }
457
458        // Create a translator
459        self::$translator = new Translator($translations, self::$locale->pluralRule());
460
461        /* I18N: This punctuation is used to separate lists of items */
462        self::$list_separator = self::translate(', ');
463
464        // Create a collator
465        try {
466            if (class_exists('Collator')) {
467                // Symfony provides a very incomplete polyfill - which cannot be used.
468                self::$collator = new Collator(self::$locale->code());
469                // Ignore upper/lower case differences
470                self::$collator->setStrength(Collator::SECONDARY);
471            }
472        } catch (Exception $ex) {
473            // PHP-INTL is not installed?  We'll use a fallback later.
474            self::$collator = null;
475        }
476
477        return self::$locale->languageTag();
478    }
479
480    /**
481     * All locales for which a translation file exists.
482     *
483     * @return LocaleInterface[]
484     */
485    public static function installedLocales(): array
486    {
487        $locales = [];
488
489        foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo', GLOB_NOSORT) as $file) {
490            try {
491                $locales[] = Locale::create(basename(dirname($file)));
492            } catch (DomainException $ex) {
493                // Not a recognised locale
494            }
495        }
496        usort($locales, '\Fisharebest\Localization\Locale::compare');
497
498        return $locales;
499    }
500
501    /**
502     * Return the endonym for a given language - as per http://cldr.unicode.org/
503     *
504     * @param string $locale
505     *
506     * @return string
507     */
508    public static function languageName(string $locale): string
509    {
510        return Locale::create($locale)->endonym();
511    }
512
513    /**
514     * Return the script used by a given language
515     *
516     * @param string $locale
517     *
518     * @return string
519     */
520    public static function languageScript(string $locale): string
521    {
522        return Locale::create($locale)->script()->code();
523    }
524
525    /**
526     * Translate a number into the local representation.
527     * e.g. 12345.67 becomes
528     * en: 12,345.67
529     * fr: 12 345,67
530     * de: 12.345,67
531     *
532     * @param float $n
533     * @param int   $precision
534     *
535     * @return string
536     */
537    public static function number(float $n, int $precision = 0): string
538    {
539        return self::$locale->number(round($n, $precision));
540    }
541
542    /**
543     * Translate a fraction into a percentage.
544     * e.g. 0.123 becomes
545     * en: 12.3%
546     * fr: 12,3 %
547     * de: 12,3%
548     *
549     * @param float $n
550     * @param int   $precision
551     *
552     * @return string
553     */
554    public static function percentage(float $n, int $precision = 0): string
555    {
556        return self::$locale->percent(round($n, $precision + 2));
557    }
558
559    /**
560     * Translate a plural string
561     * echo self::plural('There is an error', 'There are errors', $num_errors);
562     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
563     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
564     *
565     * @param string $singular
566     * @param string $plural
567     * @param int    $count
568     * @param string ...$args
569     *
570     * @return string
571     */
572    public static function plural(string $singular, string $plural, int $count, ...$args): string
573    {
574        $message = self::$translator->translatePlural($singular, $plural, $count);
575
576        return sprintf($message, ...$args);
577    }
578
579    /**
580     * UTF8 version of PHP::strrev()
581     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
582     * These do not support UTF8 text direction, so we must mimic it for them.
583     * Numbers are always rendered LTR, even in RTL text.
584     * The visual direction of characters such as parentheses should be reversed.
585     *
586     * @param string $text Text to be reversed
587     *
588     * @return string
589     */
590    public static function reverseText($text): string
591    {
592        // Remove HTML markup - we can't display it and it is LTR.
593        $text = strip_tags($text);
594        // Remove HTML entities.
595        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
596
597        // LTR text doesn't need reversing
598        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
599            return $text;
600        }
601
602        // Mirrored characters
603        $text = strtr($text, self::MIRROR_CHARACTERS);
604
605        $reversed = '';
606        $digits   = '';
607        while ($text !== '') {
608            $letter = mb_substr($text, 0, 1);
609            $text   = mb_substr($text, 1);
610            if (strpos(self::DIGITS, $letter) !== false) {
611                $digits .= $letter;
612            } else {
613                $reversed = $letter . $digits . $reversed;
614                $digits   = '';
615            }
616        }
617
618        return $digits . $reversed;
619    }
620
621    /**
622     * Return the direction (ltr or rtl) for a given script
623     * The PHP/intl library does not provde this information, so we need
624     * our own lookup table.
625     *
626     * @param string $script
627     *
628     * @return string
629     */
630    public static function scriptDirection($script): string
631    {
632        switch ($script) {
633            case 'Arab':
634            case 'Hebr':
635            case 'Mong':
636            case 'Thaa':
637                return 'rtl';
638            default:
639                return 'ltr';
640        }
641    }
642
643    /**
644     * Perform a case-insensitive comparison of two strings.
645     *
646     * @param string $string1
647     * @param string $string2
648     *
649     * @return int
650     */
651    public static function strcasecmp($string1, $string2): int
652    {
653        if (self::$collator instanceof Collator) {
654            return self::$collator->compare($string1, $string2);
655        }
656
657        return strcmp(self::strtolower($string1), self::strtolower($string2));
658    }
659
660    /**
661     * Convert a string to lower case.
662     *
663     * @param string $string
664     *
665     * @return string
666     */
667    public static function strtolower($string): string
668    {
669        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
670            $string = strtr($string, self::DOTLESS_I_TOLOWER);
671        }
672
673        return mb_strtolower($string);
674    }
675
676    /**
677     * Convert a string to upper case.
678     *
679     * @param string $string
680     *
681     * @return string
682     */
683    public static function strtoupper($string): string
684    {
685        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) {
686            $string = strtr($string, self::DOTLESS_I_TOUPPER);
687        }
688
689        return mb_strtoupper($string);
690    }
691
692    /**
693     * Identify the script used for a piece of text
694     *
695     * @param string $string
696     *
697     * @return string
698     */
699    public static function textScript($string): string
700    {
701        $string = strip_tags($string); // otherwise HTML tags show up as latin
702        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
703        $string = str_replace([
704            '@N.N.',
705            '@P.N.',
706        ], '', $string); // otherwise unknown names show up as latin
707        $pos    = 0;
708        $strlen = strlen($string);
709        while ($pos < $strlen) {
710            // get the Unicode Code Point for the character at position $pos
711            $byte1 = ord($string[$pos]);
712            if ($byte1 < 0x80) {
713                $code_point = $byte1;
714                $chrlen     = 1;
715            } elseif ($byte1 < 0xC0) {
716                // Invalid continuation character
717                return 'Latn';
718            } elseif ($byte1 < 0xE0) {
719                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
720                $chrlen     = 2;
721            } elseif ($byte1 < 0xF0) {
722                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
723                $chrlen     = 3;
724            } elseif ($byte1 < 0xF8) {
725                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
726                $chrlen     = 3;
727            } else {
728                // Invalid UTF
729                return 'Latn';
730            }
731
732            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
733                if ($code_point >= $range[1] && $code_point <= $range[2]) {
734                    return $range[0];
735                }
736            }
737            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
738            $pos += $chrlen;
739        }
740
741        return 'Latn';
742    }
743
744    /**
745     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
746     *
747     * @param int $seconds
748     *
749     * @return string
750     */
751    public static function timeAgo($seconds): string
752    {
753        $minute = 60;
754        $hour   = 60 * $minute;
755        $day    = 24 * $hour;
756        $month  = 30 * $day;
757        $year   = 365 * $day;
758
759        if ($seconds > $year) {
760            $years = intdiv($seconds, $year);
761
762            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
763        }
764
765        if ($seconds > $month) {
766            $months = intdiv($seconds, $month);
767
768            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
769        }
770
771        if ($seconds > $day) {
772            $days = intdiv($seconds, $day);
773
774            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
775        }
776
777        if ($seconds > $hour) {
778            $hours = intdiv($seconds, $hour);
779
780            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
781        }
782
783        if ($seconds > $minute) {
784            $minutes = intdiv($seconds, $minute);
785
786            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
787        }
788
789        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
790    }
791
792    /**
793     * What format is used to display dates in the current locale?
794     *
795     * @return string
796     */
797    public static function timeFormat(): string
798    {
799        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
800        return self::$translator->translate('%H:%i:%s');
801    }
802
803    /**
804     * Translate a string, and then substitute placeholders
805     * echo I18N::translate('Hello World!');
806     * echo I18N::translate('The %s sat on the mat', 'cat');
807     *
808     * @param string $message
809     * @param string ...$args
810     *
811     * @return string
812     */
813    public static function translate(string $message, ...$args): string
814    {
815        $message = self::$translator->translate($message);
816
817        return sprintf($message, ...$args);
818    }
819
820    /**
821     * Context sensitive version of translate.
822     * echo I18N::translateContext('NOMINATIVE', 'January');
823     * echo I18N::translateContext('GENITIVE', 'January');
824     *
825     * @param string $context
826     * @param string $message
827     * @param string ...$args
828     *
829     * @return string
830     */
831    public static function translateContext(string $context, string $message, ...$args): string
832    {
833        $message = self::$translator->translateContext($context, $message);
834
835        return sprintf($message, ...$args);
836    }
837}
838