xref: /webtrees/app/I18N.php (revision af14d23858d0cfa8a319c6c50a5f28fbc7754829)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29use Fisharebest\Webtrees\Module\LanguageEnglishUnitedStates;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33use const GLOB_NOSORT;
34use Illuminate\Support\Collection;
35
36/**
37 * Internationalization (i18n) and localization (l10n).
38 */
39class I18N
40{
41    // MO files use special characters for plurals and context.
42    public const PLURAL  = '\x00';
43    public const CONTEXT = '\x04';
44
45    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
46    private static $locale;
47
48    /** @var Translator An object that performs translation */
49    private static $translator;
50
51    /** @var  Collator|null From the php-intl library */
52    private static $collator;
53
54    // Digits are always rendered LTR, even in RTL text.
55    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
56
57    // These locales need special handling for the dotless letter I.
58    private const DOTLESS_I_LOCALES = [
59        'az',
60        'tr',
61    ];
62    private const DOTLESS_I_TOLOWER = [
63        'I' => 'ı',
64        'İ' => 'i',
65    ];
66    private const DOTLESS_I_TOUPPER = [
67        'ı' => 'I',
68        'i' => 'İ',
69    ];
70
71    // The ranges of characters used by each script.
72    private const SCRIPT_CHARACTER_RANGES = [
73        [
74            'Latn',
75            0x0041,
76            0x005A,
77        ],
78        [
79            'Latn',
80            0x0061,
81            0x007A,
82        ],
83        [
84            'Latn',
85            0x0100,
86            0x02AF,
87        ],
88        [
89            'Grek',
90            0x0370,
91            0x03FF,
92        ],
93        [
94            'Cyrl',
95            0x0400,
96            0x052F,
97        ],
98        [
99            'Hebr',
100            0x0590,
101            0x05FF,
102        ],
103        [
104            'Arab',
105            0x0600,
106            0x06FF,
107        ],
108        [
109            'Arab',
110            0x0750,
111            0x077F,
112        ],
113        [
114            'Arab',
115            0x08A0,
116            0x08FF,
117        ],
118        [
119            'Deva',
120            0x0900,
121            0x097F,
122        ],
123        [
124            'Taml',
125            0x0B80,
126            0x0BFF,
127        ],
128        [
129            'Sinh',
130            0x0D80,
131            0x0DFF,
132        ],
133        [
134            'Thai',
135            0x0E00,
136            0x0E7F,
137        ],
138        [
139            'Geor',
140            0x10A0,
141            0x10FF,
142        ],
143        [
144            'Grek',
145            0x1F00,
146            0x1FFF,
147        ],
148        [
149            'Deva',
150            0xA8E0,
151            0xA8FF,
152        ],
153        [
154            'Hans',
155            0x3000,
156            0x303F,
157        ],
158        // Mixed CJK, not just Hans
159        [
160            'Hans',
161            0x3400,
162            0xFAFF,
163        ],
164        // Mixed CJK, not just Hans
165        [
166            'Hans',
167            0x20000,
168            0x2FA1F,
169        ],
170        // Mixed CJK, not just Hans
171    ];
172
173    // Characters that are displayed in mirror form in RTL text.
174    private const MIRROR_CHARACTERS = [
175        '('  => ')',
176        ')'  => '(',
177        '['  => ']',
178        ']'  => '[',
179        '{'  => '}',
180        '}'  => '{',
181        '<'  => '>',
182        '>'  => '<',
183        '‹ ' => '›',
184        '› ' => '‹',
185        '«'  => '»',
186        '»'  => '«',
187        '﴾ ' => '﴿',
188        '﴿ ' => '﴾',
189        '“ ' => '”',
190        '” ' => '“',
191        '‘ ' => '’',
192        '’ ' => '‘',
193    ];
194
195    /** @var string Punctuation used to separate list items, typically a comma */
196    public static $list_separator;
197
198    /**
199     * The preferred locales for this site, or a default list if no preference.
200     *
201     * @return LocaleInterface[]
202     */
203    public static function activeLocales(): array
204    {
205        $locales = app(ModuleService::class)
206            ->findByInterface(ModuleLanguageInterface::class)
207            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
208                return $module->locale();
209            });
210
211        if ($locales->isEmpty()) {
212            return [new LocaleEnUs()];
213        }
214
215        return $locales->all();
216    }
217
218    /**
219     * Which MySQL collation should be used for this locale?
220     *
221     * @return string
222     */
223    public static function collation(): string
224    {
225        $collation = self::$locale->collation();
226        switch ($collation) {
227            case 'croatian_ci':
228            case 'german2_ci':
229            case 'vietnamese_ci':
230                // Only available in MySQL 5.6
231                return 'utf8_unicode_ci';
232            default:
233                return 'utf8_' . $collation;
234        }
235    }
236
237    /**
238     * What format is used to display dates in the current locale?
239     *
240     * @return string
241     */
242    public static function dateFormat(): string
243    {
244        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
245        return self::$translator->translate('%j %F %Y');
246    }
247
248    /**
249     * Generate consistent I18N for datatables.js
250     *
251     * @param int[] $lengths An optional array of page lengths
252     *
253     * @return string
254     */
255    public static function datatablesI18N(array $lengths = [
256        10,
257        20,
258        30,
259        50,
260        100,
261        -1,
262    ]): string
263    {
264        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
265
266        return
267            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
268            '"language": {' .
269            ' "paginate": {' .
270            '  "first":    "' . self::translate('first') . '",' .
271            '  "last":     "' . self::translate('last') . '",' .
272            '  "next":     "' . self::translate('next') . '",' .
273            '  "previous": "' . self::translate('previous') . '"' .
274            ' },' .
275            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
276            ' "info":           "' . /* I18N: %s are placeholders for numbers */
277            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
278            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
279            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
280            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
281            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
282            self::translate('Display %s', addslashes($length_options)) . '",' .
283            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
284            ' "processing":     "' . self::translate('Loading…') . '",' .
285            ' "search":         "' . self::translate('Filter') . '",' .
286            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
287            '}';
288    }
289
290    /**
291     * Convert the digits 0-9 into the local script
292     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
293     *
294     * @param string|int $n
295     *
296     * @return string
297     */
298    public static function digits($n): string
299    {
300        return self::$locale->digits((string) $n);
301    }
302
303    /**
304     * What is the direction of the current locale
305     *
306     * @return string "ltr" or "rtl"
307     */
308    public static function direction(): string
309    {
310        return self::$locale->direction();
311    }
312
313    /**
314     * What is the first day of the week.
315     *
316     * @return int Sunday=0, Monday=1, etc.
317     */
318    public static function firstDay(): int
319    {
320        return self::$locale->territory()->firstDay();
321    }
322
323    /**
324     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
325     *
326     * @return string
327     */
328    public static function htmlAttributes(): string
329    {
330        return self::$locale->htmlAttributes();
331    }
332
333    /**
334     * Initialise the translation adapter with a locale setting.
335     *
336     * @param string    $code  Use this locale/language code, or choose one automatically
337     * @param Tree|null $tree
338     * @param bool      $setup During setup, we cannot access the database.
339     *
340     * @return string $string
341     */
342    public static function init(string $code = '', Tree $tree = null, $setup = true): string
343    {
344        if ($code !== '') {
345            // Create the specified locale
346            self::$locale = Locale::create($code);
347        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
348            // Select a previously used locale
349            self::$locale = Locale::create(Session::get('locale'));
350        } else {
351            if ($tree instanceof Tree) {
352                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
353            } else {
354                $default_locale = new LocaleEnUs();
355            }
356
357            // Negotiate with the browser.
358            // Search engines don't negotiate.  They get the default locale of the tree.
359            if ($setup) {
360                $installed_locales = app(ModuleService::class)->setupLanguages()
361                    ->map(function (ModuleLanguageInterface $module): LocaleInterface {
362                        return $module->locale();
363                    });
364            } else {
365                $installed_locales = self::installedLocales();
366            }
367
368            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
369        }
370
371        $cache_dir  = WT_DATA_DIR . 'cache/';
372        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
373        if (file_exists($cache_file)) {
374            $filemtime = filemtime($cache_file);
375        } else {
376            $filemtime = 0;
377        }
378
379        // Load the translation file(s)
380        $translation_files = [
381            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
382        ];
383
384        // Rebuild files after one hour
385        $rebuild_cache = time() > $filemtime + 3600;
386        // Rebuild files if any translation file has been updated
387        foreach ($translation_files as $translation_file) {
388            if (filemtime($translation_file) > $filemtime) {
389                $rebuild_cache = true;
390                break;
391            }
392        }
393
394        if ($rebuild_cache) {
395            $translations = [];
396            foreach ($translation_files as $translation_file) {
397                $translation  = new Translation($translation_file);
398                $translations = array_merge($translations, $translation->asArray());
399            }
400            try {
401                File::mkdir($cache_dir);
402                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
403            } catch (Exception $ex) {
404                // During setup, we may not have been able to create it.
405            }
406        } else {
407            $translations = include $cache_file;
408        }
409
410        // Add translations from custom modules (but not during setup)
411        if (!$setup) {
412            $custom_modules = app(ModuleService::class)
413                ->findByInterface(ModuleCustomInterface::class);
414
415            foreach ($custom_modules as $custom_module) {
416                $custom_translations = $custom_module->customTranslations(self::$locale->languageTag());
417                $translations        = array_merge($translations, $custom_translations);
418            }
419        }
420
421        // Create a translator
422        self::$translator = new Translator($translations, self::$locale->pluralRule());
423
424        /* I18N: This punctuation is used to separate lists of items */
425        self::$list_separator = self::translate(', ');
426
427        // Create a collator
428        try {
429            if (class_exists('Collator')) {
430                // Symfony provides a very incomplete polyfill - which cannot be used.
431                self::$collator = new Collator(self::$locale->code());
432                // Ignore upper/lower case differences
433                self::$collator->setStrength(Collator::SECONDARY);
434            }
435        } catch (Exception $ex) {
436            // PHP-INTL is not installed?  We'll use a fallback later.
437            self::$collator = null;
438        }
439
440        return self::$locale->languageTag();
441    }
442
443    /**
444     * All locales for which a translation file exists.
445     *
446     * @return Collection
447     * @return LocaleInterface[]
448     */
449    public static function installedLocales(): Collection
450    {
451        return app(ModuleService::class)
452            ->findByInterface(ModuleLanguageInterface::class, true)
453            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
454                return $module->locale();
455            });
456    }
457
458    /**
459     * Return the endonym for a given language - as per http://cldr.unicode.org/
460     *
461     * @param string $locale
462     *
463     * @return string
464     */
465    public static function languageName(string $locale): string
466    {
467        return Locale::create($locale)->endonym();
468    }
469
470    /**
471     * Return the script used by a given language
472     *
473     * @param string $locale
474     *
475     * @return string
476     */
477    public static function languageScript(string $locale): string
478    {
479        return Locale::create($locale)->script()->code();
480    }
481
482    /**
483     * Translate a number into the local representation.
484     * e.g. 12345.67 becomes
485     * en: 12,345.67
486     * fr: 12 345,67
487     * de: 12.345,67
488     *
489     * @param float $n
490     * @param int   $precision
491     *
492     * @return string
493     */
494    public static function number(float $n, int $precision = 0): string
495    {
496        return self::$locale->number(round($n, $precision));
497    }
498
499    /**
500     * Translate a fraction into a percentage.
501     * e.g. 0.123 becomes
502     * en: 12.3%
503     * fr: 12,3 %
504     * de: 12,3%
505     *
506     * @param float $n
507     * @param int   $precision
508     *
509     * @return string
510     */
511    public static function percentage(float $n, int $precision = 0): string
512    {
513        return self::$locale->percent(round($n, $precision + 2));
514    }
515
516    /**
517     * Translate a plural string
518     * echo self::plural('There is an error', 'There are errors', $num_errors);
519     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
520     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
521     *
522     * @param string $singular
523     * @param string $plural
524     * @param int    $count
525     * @param string ...$args
526     *
527     * @return string
528     */
529    public static function plural(string $singular, string $plural, int $count, ...$args): string
530    {
531        $message = self::$translator->translatePlural($singular, $plural, $count);
532
533        return sprintf($message, ...$args);
534    }
535
536    /**
537     * UTF8 version of PHP::strrev()
538     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
539     * These do not support UTF8 text direction, so we must mimic it for them.
540     * Numbers are always rendered LTR, even in RTL text.
541     * The visual direction of characters such as parentheses should be reversed.
542     *
543     * @param string $text Text to be reversed
544     *
545     * @return string
546     */
547    public static function reverseText($text): string
548    {
549        // Remove HTML markup - we can't display it and it is LTR.
550        $text = strip_tags($text);
551        // Remove HTML entities.
552        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
553
554        // LTR text doesn't need reversing
555        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
556            return $text;
557        }
558
559        // Mirrored characters
560        $text = strtr($text, self::MIRROR_CHARACTERS);
561
562        $reversed = '';
563        $digits   = '';
564        while ($text !== '') {
565            $letter = mb_substr($text, 0, 1);
566            $text   = mb_substr($text, 1);
567            if (strpos(self::DIGITS, $letter) !== false) {
568                $digits .= $letter;
569            } else {
570                $reversed = $letter . $digits . $reversed;
571                $digits   = '';
572            }
573        }
574
575        return $digits . $reversed;
576    }
577
578    /**
579     * Return the direction (ltr or rtl) for a given script
580     * The PHP/intl library does not provde this information, so we need
581     * our own lookup table.
582     *
583     * @param string $script
584     *
585     * @return string
586     */
587    public static function scriptDirection($script): string
588    {
589        switch ($script) {
590            case 'Arab':
591            case 'Hebr':
592            case 'Mong':
593            case 'Thaa':
594                return 'rtl';
595            default:
596                return 'ltr';
597        }
598    }
599
600    /**
601     * Perform a case-insensitive comparison of two strings.
602     *
603     * @param string $string1
604     * @param string $string2
605     *
606     * @return int
607     */
608    public static function strcasecmp($string1, $string2): int
609    {
610        if (self::$collator instanceof Collator) {
611            return self::$collator->compare($string1, $string2);
612        }
613
614        return strcmp(self::strtolower($string1), self::strtolower($string2));
615    }
616
617    /**
618     * Convert a string to lower case.
619     *
620     * @param string $string
621     *
622     * @return string
623     */
624    public static function strtolower($string): string
625    {
626        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
627            $string = strtr($string, self::DOTLESS_I_TOLOWER);
628        }
629
630        return mb_strtolower($string);
631    }
632
633    /**
634     * Convert a string to upper case.
635     *
636     * @param string $string
637     *
638     * @return string
639     */
640    public static function strtoupper($string): string
641    {
642        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
643            $string = strtr($string, self::DOTLESS_I_TOUPPER);
644        }
645
646        return mb_strtoupper($string);
647    }
648
649    /**
650     * Identify the script used for a piece of text
651     *
652     * @param string $string
653     *
654     * @return string
655     */
656    public static function textScript($string): string
657    {
658        $string = strip_tags($string); // otherwise HTML tags show up as latin
659        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
660        $string = str_replace([
661            '@N.N.',
662            '@P.N.',
663        ], '', $string); // otherwise unknown names show up as latin
664        $pos    = 0;
665        $strlen = strlen($string);
666        while ($pos < $strlen) {
667            // get the Unicode Code Point for the character at position $pos
668            $byte1 = ord($string[$pos]);
669            if ($byte1 < 0x80) {
670                $code_point = $byte1;
671                $chrlen     = 1;
672            } elseif ($byte1 < 0xC0) {
673                // Invalid continuation character
674                return 'Latn';
675            } elseif ($byte1 < 0xE0) {
676                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
677                $chrlen     = 2;
678            } elseif ($byte1 < 0xF0) {
679                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
680                $chrlen     = 3;
681            } elseif ($byte1 < 0xF8) {
682                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
683                $chrlen     = 3;
684            } else {
685                // Invalid UTF
686                return 'Latn';
687            }
688
689            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
690                if ($code_point >= $range[1] && $code_point <= $range[2]) {
691                    return $range[0];
692                }
693            }
694            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
695            $pos += $chrlen;
696        }
697
698        return 'Latn';
699    }
700
701    /**
702     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
703     *
704     * @param int $seconds
705     *
706     * @return string
707     */
708    public static function timeAgo($seconds): string
709    {
710        $minute = 60;
711        $hour   = 60 * $minute;
712        $day    = 24 * $hour;
713        $month  = 30 * $day;
714        $year   = 365 * $day;
715
716        if ($seconds > $year) {
717            $years = intdiv($seconds, $year);
718
719            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
720        }
721
722        if ($seconds > $month) {
723            $months = intdiv($seconds, $month);
724
725            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
726        }
727
728        if ($seconds > $day) {
729            $days = intdiv($seconds, $day);
730
731            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
732        }
733
734        if ($seconds > $hour) {
735            $hours = intdiv($seconds, $hour);
736
737            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
738        }
739
740        if ($seconds > $minute) {
741            $minutes = intdiv($seconds, $minute);
742
743            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
744        }
745
746        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
747    }
748
749    /**
750     * What format is used to display dates in the current locale?
751     *
752     * @return string
753     */
754    public static function timeFormat(): string
755    {
756        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
757        return self::$translator->translate('%H:%i:%s');
758    }
759
760    /**
761     * Translate a string, and then substitute placeholders
762     * echo I18N::translate('Hello World!');
763     * echo I18N::translate('The %s sat on the mat', 'cat');
764     *
765     * @param string $message
766     * @param string ...$args
767     *
768     * @return string
769     */
770    public static function translate(string $message, ...$args): string
771    {
772        $message = self::$translator->translate($message);
773
774        return sprintf($message, ...$args);
775    }
776
777    /**
778     * Context sensitive version of translate.
779     * echo I18N::translateContext('NOMINATIVE', 'January');
780     * echo I18N::translateContext('GENITIVE', 'January');
781     *
782     * @param string $context
783     * @param string $message
784     * @param string ...$args
785     *
786     * @return string
787     */
788    public static function translateContext(string $context, string $message, ...$args): string
789    {
790        $message = self::$translator->translateContext($context, $message);
791
792        return sprintf($message, ...$args);
793    }
794}
795