xref: /webtrees/app/I18N.php (revision 02086832f6c85aab8102c1a336d227314e040441)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use DomainException;
22use Exception;
23use Fisharebest\Localization\Locale;
24use Fisharebest\Localization\Locale\LocaleEnUs;
25use Fisharebest\Localization\Locale\LocaleInterface;
26use Fisharebest\Localization\Translation;
27use Fisharebest\Localization\Translator;
28use Fisharebest\Webtrees\Functions\FunctionsEdit;
29use Fisharebest\Webtrees\Module\LanguageEnglishUnitedStates;
30use Fisharebest\Webtrees\Module\ModuleCustomInterface;
31use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
32use Fisharebest\Webtrees\Services\ModuleService;
33use const GLOB_NOSORT;
34
35/**
36 * Internationalization (i18n) and localization (l10n).
37 */
38class I18N
39{
40    // MO files use special characters for plurals and context.
41    public const PLURAL  = '\x00';
42    public const CONTEXT = '\x04';
43
44    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
45    private static $locale;
46
47    /** @var Translator An object that performs translation */
48    private static $translator;
49
50    /** @var  Collator|null From the php-intl library */
51    private static $collator;
52
53    // Digits are always rendered LTR, even in RTL text.
54    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
55
56    // These locales need special handling for the dotless letter I.
57    private const DOTLESS_I_LOCALES = [
58        'az',
59        'tr',
60    ];
61    private const DOTLESS_I_TOLOWER = [
62        'I' => 'ı',
63        'İ' => 'i',
64    ];
65    private const DOTLESS_I_TOUPPER = [
66        'ı' => 'I',
67        'i' => 'İ',
68    ];
69
70    // The ranges of characters used by each script.
71    private const SCRIPT_CHARACTER_RANGES = [
72        [
73            'Latn',
74            0x0041,
75            0x005A,
76        ],
77        [
78            'Latn',
79            0x0061,
80            0x007A,
81        ],
82        [
83            'Latn',
84            0x0100,
85            0x02AF,
86        ],
87        [
88            'Grek',
89            0x0370,
90            0x03FF,
91        ],
92        [
93            'Cyrl',
94            0x0400,
95            0x052F,
96        ],
97        [
98            'Hebr',
99            0x0590,
100            0x05FF,
101        ],
102        [
103            'Arab',
104            0x0600,
105            0x06FF,
106        ],
107        [
108            'Arab',
109            0x0750,
110            0x077F,
111        ],
112        [
113            'Arab',
114            0x08A0,
115            0x08FF,
116        ],
117        [
118            'Deva',
119            0x0900,
120            0x097F,
121        ],
122        [
123            'Taml',
124            0x0B80,
125            0x0BFF,
126        ],
127        [
128            'Sinh',
129            0x0D80,
130            0x0DFF,
131        ],
132        [
133            'Thai',
134            0x0E00,
135            0x0E7F,
136        ],
137        [
138            'Geor',
139            0x10A0,
140            0x10FF,
141        ],
142        [
143            'Grek',
144            0x1F00,
145            0x1FFF,
146        ],
147        [
148            'Deva',
149            0xA8E0,
150            0xA8FF,
151        ],
152        [
153            'Hans',
154            0x3000,
155            0x303F,
156        ],
157        // Mixed CJK, not just Hans
158        [
159            'Hans',
160            0x3400,
161            0xFAFF,
162        ],
163        // Mixed CJK, not just Hans
164        [
165            'Hans',
166            0x20000,
167            0x2FA1F,
168        ],
169        // Mixed CJK, not just Hans
170    ];
171
172    // Characters that are displayed in mirror form in RTL text.
173    private const MIRROR_CHARACTERS = [
174        '('  => ')',
175        ')'  => '(',
176        '['  => ']',
177        ']'  => '[',
178        '{'  => '}',
179        '}'  => '{',
180        '<'  => '>',
181        '>'  => '<',
182        '‹ ' => '›',
183        '› ' => '‹',
184        '«'  => '»',
185        '»'  => '«',
186        '﴾ ' => '﴿',
187        '﴿ ' => '﴾',
188        '“ ' => '”',
189        '” ' => '“',
190        '‘ ' => '’',
191        '’ ' => '‘',
192    ];
193
194    /** @var string Punctuation used to separate list items, typically a comma */
195    public static $list_separator;
196
197    /**
198     * The preferred locales for this site, or a default list if no preference.
199     *
200     * @return LocaleInterface[]
201     */
202    public static function activeLocales(): array
203    {
204        $locales = app(ModuleService::class)
205            ->findByInterface(ModuleLanguageInterface::class)
206            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
207                return $module->locale();
208            });
209
210        if ($locales->isEmpty()) {
211            return [new LocaleEnUs()];
212        }
213
214        return $locales->all();
215    }
216
217    /**
218     * Which MySQL collation should be used for this locale?
219     *
220     * @return string
221     */
222    public static function collation(): string
223    {
224        $collation = self::$locale->collation();
225        switch ($collation) {
226            case 'croatian_ci':
227            case 'german2_ci':
228            case 'vietnamese_ci':
229                // Only available in MySQL 5.6
230                return 'utf8_unicode_ci';
231            default:
232                return 'utf8_' . $collation;
233        }
234    }
235
236    /**
237     * What format is used to display dates in the current locale?
238     *
239     * @return string
240     */
241    public static function dateFormat(): string
242    {
243        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
244        return self::$translator->translate('%j %F %Y');
245    }
246
247    /**
248     * Generate consistent I18N for datatables.js
249     *
250     * @param int[] $lengths An optional array of page lengths
251     *
252     * @return string
253     */
254    public static function datatablesI18N(array $lengths = [
255        10,
256        20,
257        30,
258        50,
259        100,
260        -1,
261    ]): string
262    {
263        $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10');
264
265        return
266            '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' .
267            '"language": {' .
268            ' "paginate": {' .
269            '  "first":    "' . self::translate('first') . '",' .
270            '  "last":     "' . self::translate('last') . '",' .
271            '  "next":     "' . self::translate('next') . '",' .
272            '  "previous": "' . self::translate('previous') . '"' .
273            ' },' .
274            ' "emptyTable":     "' . self::translate('No records to display') . '",' .
275            ' "info":           "' . /* I18N: %s are placeholders for numbers */
276            self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' .
277            ' "infoEmpty":      "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' .
278            ' "infoFiltered":   "' . /* I18N: %s is a placeholder for a number */
279            self::translate('(filtered from %s total entries)', '_MAX_') . '",' .
280            ' "lengthMenu":     "' . /* I18N: %s is a number of records per page */
281            self::translate('Display %s', addslashes($length_options)) . '",' .
282            ' "loadingRecords": "' . self::translate('Loading…') . '",' .
283            ' "processing":     "' . self::translate('Loading…') . '",' .
284            ' "search":         "' . self::translate('Filter') . '",' .
285            ' "zeroRecords":    "' . self::translate('No records to display') . '"' .
286            '}';
287    }
288
289    /**
290     * Convert the digits 0-9 into the local script
291     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
292     *
293     * @param string|int $n
294     *
295     * @return string
296     */
297    public static function digits($n): string
298    {
299        return self::$locale->digits((string) $n);
300    }
301
302    /**
303     * What is the direction of the current locale
304     *
305     * @return string "ltr" or "rtl"
306     */
307    public static function direction(): string
308    {
309        return self::$locale->direction();
310    }
311
312    /**
313     * What is the first day of the week.
314     *
315     * @return int Sunday=0, Monday=1, etc.
316     */
317    public static function firstDay(): int
318    {
319        return self::$locale->territory()->firstDay();
320    }
321
322    /**
323     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
324     *
325     * @return string
326     */
327    public static function htmlAttributes(): string
328    {
329        return self::$locale->htmlAttributes();
330    }
331
332    /**
333     * Initialise the translation adapter with a locale setting.
334     *
335     * @param string    $code Use this locale/language code, or choose one automatically
336     * @param Tree|null $tree
337     * @param bool      $custom Load custom translations
338     *
339     * @return string $string
340     */
341    public static function init(string $code = '', Tree $tree = null, $custom = true): string
342    {
343        if ($code !== '') {
344            // Create the specified locale
345            self::$locale = Locale::create($code);
346        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
347            // Select a previously used locale
348            self::$locale = Locale::create(Session::get('locale'));
349        } else {
350            if ($tree instanceof Tree) {
351                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
352            } else {
353                $default_locale = new LocaleEnUs();
354            }
355
356            // Negotiate with the browser.
357            // Search engines don't negotiate.  They get the default locale of the tree.
358            self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale);
359        }
360
361        $cache_dir  = WT_DATA_DIR . 'cache/';
362        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
363        if (file_exists($cache_file)) {
364            $filemtime = filemtime($cache_file);
365        } else {
366            $filemtime = 0;
367        }
368
369        // Load the translation file(s)
370        $translation_files = [
371            WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo',
372        ];
373
374        // Rebuild files after one hour
375        $rebuild_cache = time() > $filemtime + 3600;
376        // Rebuild files if any translation file has been updated
377        foreach ($translation_files as $translation_file) {
378            if (filemtime($translation_file) > $filemtime) {
379                $rebuild_cache = true;
380                break;
381            }
382        }
383
384        if ($rebuild_cache) {
385            $translations = [];
386            foreach ($translation_files as $translation_file) {
387                $translation  = new Translation($translation_file);
388                $translations = array_merge($translations, $translation->asArray());
389            }
390            try {
391                File::mkdir($cache_dir);
392                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
393            } catch (Exception $ex) {
394                // During setup, we may not have been able to create it.
395            }
396        } else {
397            $translations = include $cache_file;
398        }
399
400        // Add translations from custom modules (but not during setup)
401        if ($custom) {
402            $custom_modules = app(ModuleService::class)
403                ->findByInterface(ModuleCustomInterface::class);
404
405            foreach ($custom_modules as $custom_module) {
406                $custom_translations = $custom_module->customTranslations(self::$locale->languageTag());
407                $translations        = array_merge($translations, $custom_translations);
408            }
409        }
410
411        // Create a translator
412        self::$translator = new Translator($translations, self::$locale->pluralRule());
413
414        /* I18N: This punctuation is used to separate lists of items */
415        self::$list_separator = self::translate(', ');
416
417        // Create a collator
418        try {
419            if (class_exists('Collator')) {
420                // Symfony provides a very incomplete polyfill - which cannot be used.
421                self::$collator = new Collator(self::$locale->code());
422                // Ignore upper/lower case differences
423                self::$collator->setStrength(Collator::SECONDARY);
424            }
425        } catch (Exception $ex) {
426            // PHP-INTL is not installed?  We'll use a fallback later.
427            self::$collator = null;
428        }
429
430        return self::$locale->languageTag();
431    }
432
433    /**
434     * All locales for which a translation file exists.
435     *
436     * @return LocaleInterface[]
437     */
438    public static function installedLocales(): array
439    {
440        return app(ModuleService::class)
441            ->findByInterface(ModuleLanguageInterface::class, true)
442            ->map(function (ModuleLanguageInterface $module): LocaleInterface {
443                return $module->locale();
444            })
445            ->all();
446    }
447
448    /**
449     * Return the endonym for a given language - as per http://cldr.unicode.org/
450     *
451     * @param string $locale
452     *
453     * @return string
454     */
455    public static function languageName(string $locale): string
456    {
457        return Locale::create($locale)->endonym();
458    }
459
460    /**
461     * Return the script used by a given language
462     *
463     * @param string $locale
464     *
465     * @return string
466     */
467    public static function languageScript(string $locale): string
468    {
469        return Locale::create($locale)->script()->code();
470    }
471
472    /**
473     * Translate a number into the local representation.
474     * e.g. 12345.67 becomes
475     * en: 12,345.67
476     * fr: 12 345,67
477     * de: 12.345,67
478     *
479     * @param float $n
480     * @param int   $precision
481     *
482     * @return string
483     */
484    public static function number(float $n, int $precision = 0): string
485    {
486        return self::$locale->number(round($n, $precision));
487    }
488
489    /**
490     * Translate a fraction into a percentage.
491     * e.g. 0.123 becomes
492     * en: 12.3%
493     * fr: 12,3 %
494     * de: 12,3%
495     *
496     * @param float $n
497     * @param int   $precision
498     *
499     * @return string
500     */
501    public static function percentage(float $n, int $precision = 0): string
502    {
503        return self::$locale->percent(round($n, $precision + 2));
504    }
505
506    /**
507     * Translate a plural string
508     * echo self::plural('There is an error', 'There are errors', $num_errors);
509     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
510     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
511     *
512     * @param string $singular
513     * @param string $plural
514     * @param int    $count
515     * @param string ...$args
516     *
517     * @return string
518     */
519    public static function plural(string $singular, string $plural, int $count, ...$args): string
520    {
521        $message = self::$translator->translatePlural($singular, $plural, $count);
522
523        return sprintf($message, ...$args);
524    }
525
526    /**
527     * UTF8 version of PHP::strrev()
528     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
529     * These do not support UTF8 text direction, so we must mimic it for them.
530     * Numbers are always rendered LTR, even in RTL text.
531     * The visual direction of characters such as parentheses should be reversed.
532     *
533     * @param string $text Text to be reversed
534     *
535     * @return string
536     */
537    public static function reverseText($text): string
538    {
539        // Remove HTML markup - we can't display it and it is LTR.
540        $text = strip_tags($text);
541        // Remove HTML entities.
542        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
543
544        // LTR text doesn't need reversing
545        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
546            return $text;
547        }
548
549        // Mirrored characters
550        $text = strtr($text, self::MIRROR_CHARACTERS);
551
552        $reversed = '';
553        $digits   = '';
554        while ($text !== '') {
555            $letter = mb_substr($text, 0, 1);
556            $text   = mb_substr($text, 1);
557            if (strpos(self::DIGITS, $letter) !== false) {
558                $digits .= $letter;
559            } else {
560                $reversed = $letter . $digits . $reversed;
561                $digits   = '';
562            }
563        }
564
565        return $digits . $reversed;
566    }
567
568    /**
569     * Return the direction (ltr or rtl) for a given script
570     * The PHP/intl library does not provde this information, so we need
571     * our own lookup table.
572     *
573     * @param string $script
574     *
575     * @return string
576     */
577    public static function scriptDirection($script): string
578    {
579        switch ($script) {
580            case 'Arab':
581            case 'Hebr':
582            case 'Mong':
583            case 'Thaa':
584                return 'rtl';
585            default:
586                return 'ltr';
587        }
588    }
589
590    /**
591     * Perform a case-insensitive comparison of two strings.
592     *
593     * @param string $string1
594     * @param string $string2
595     *
596     * @return int
597     */
598    public static function strcasecmp($string1, $string2): int
599    {
600        if (self::$collator instanceof Collator) {
601            return self::$collator->compare($string1, $string2);
602        }
603
604        return strcmp(self::strtolower($string1), self::strtolower($string2));
605    }
606
607    /**
608     * Convert a string to lower case.
609     *
610     * @param string $string
611     *
612     * @return string
613     */
614    public static function strtolower($string): string
615    {
616        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
617            $string = strtr($string, self::DOTLESS_I_TOLOWER);
618        }
619
620        return mb_strtolower($string);
621    }
622
623    /**
624     * Convert a string to upper case.
625     *
626     * @param string $string
627     *
628     * @return string
629     */
630    public static function strtoupper($string): string
631    {
632        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
633            $string = strtr($string, self::DOTLESS_I_TOUPPER);
634        }
635
636        return mb_strtoupper($string);
637    }
638
639    /**
640     * Identify the script used for a piece of text
641     *
642     * @param string $string
643     *
644     * @return string
645     */
646    public static function textScript($string): string
647    {
648        $string = strip_tags($string); // otherwise HTML tags show up as latin
649        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
650        $string = str_replace([
651            '@N.N.',
652            '@P.N.',
653        ], '', $string); // otherwise unknown names show up as latin
654        $pos    = 0;
655        $strlen = strlen($string);
656        while ($pos < $strlen) {
657            // get the Unicode Code Point for the character at position $pos
658            $byte1 = ord($string[$pos]);
659            if ($byte1 < 0x80) {
660                $code_point = $byte1;
661                $chrlen     = 1;
662            } elseif ($byte1 < 0xC0) {
663                // Invalid continuation character
664                return 'Latn';
665            } elseif ($byte1 < 0xE0) {
666                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
667                $chrlen     = 2;
668            } elseif ($byte1 < 0xF0) {
669                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
670                $chrlen     = 3;
671            } elseif ($byte1 < 0xF8) {
672                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
673                $chrlen     = 3;
674            } else {
675                // Invalid UTF
676                return 'Latn';
677            }
678
679            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
680                if ($code_point >= $range[1] && $code_point <= $range[2]) {
681                    return $range[0];
682                }
683            }
684            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
685            $pos += $chrlen;
686        }
687
688        return 'Latn';
689    }
690
691    /**
692     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
693     *
694     * @param int $seconds
695     *
696     * @return string
697     */
698    public static function timeAgo($seconds): string
699    {
700        $minute = 60;
701        $hour   = 60 * $minute;
702        $day    = 24 * $hour;
703        $month  = 30 * $day;
704        $year   = 365 * $day;
705
706        if ($seconds > $year) {
707            $years = intdiv($seconds, $year);
708
709            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
710        }
711
712        if ($seconds > $month) {
713            $months = intdiv($seconds, $month);
714
715            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
716        }
717
718        if ($seconds > $day) {
719            $days = intdiv($seconds, $day);
720
721            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
722        }
723
724        if ($seconds > $hour) {
725            $hours = intdiv($seconds, $hour);
726
727            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
728        }
729
730        if ($seconds > $minute) {
731            $minutes = intdiv($seconds, $minute);
732
733            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
734        }
735
736        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
737    }
738
739    /**
740     * What format is used to display dates in the current locale?
741     *
742     * @return string
743     */
744    public static function timeFormat(): string
745    {
746        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
747        return self::$translator->translate('%H:%i:%s');
748    }
749
750    /**
751     * Translate a string, and then substitute placeholders
752     * echo I18N::translate('Hello World!');
753     * echo I18N::translate('The %s sat on the mat', 'cat');
754     *
755     * @param string $message
756     * @param string ...$args
757     *
758     * @return string
759     */
760    public static function translate(string $message, ...$args): string
761    {
762        $message = self::$translator->translate($message);
763
764        return sprintf($message, ...$args);
765    }
766
767    /**
768     * Context sensitive version of translate.
769     * echo I18N::translateContext('NOMINATIVE', 'January');
770     * echo I18N::translateContext('GENITIVE', 'January');
771     *
772     * @param string $context
773     * @param string $message
774     * @param string ...$args
775     *
776     * @return string
777     */
778    public static function translateContext(string $context, string $message, ...$args): string
779    {
780        $message = self::$translator->translateContext($context, $message);
781
782        return sprintf($message, ...$args);
783    }
784}
785