xref: /webtrees/app/I18N.php (revision a22c26f33a8f68ef90ed22483c6c4afd351939be)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Module\ModuleCustomInterface;
28use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
29use Fisharebest\Webtrees\Services\ModuleService;
30use Illuminate\Support\Collection;
31use function array_merge;
32use function class_exists;
33use function filemtime;
34use function file_exists;
35use function html_entity_decode;
36use function in_array;
37use function intdiv;
38use function mb_strtolower;
39use function mb_strtoupper;
40use function mb_substr;
41use function ord;
42use function sprintf;
43use function str_replace;
44use function strcmp;
45use function strip_tags;
46use function strlen;
47use function strpos;
48use function strtr;
49
50/**
51 * Internationalization (i18n) and localization (l10n).
52 */
53class I18N
54{
55    // MO files use special characters for plurals and context.
56    public const PLURAL  = "\x00";
57    public const CONTEXT = "\x04";
58
59    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
60    private static $locale;
61
62    /** @var Translator An object that performs translation */
63    private static $translator;
64
65    /** @var  Collator|null From the php-intl library */
66    private static $collator;
67
68    // Digits are always rendered LTR, even in RTL text.
69    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
70
71    // These locales need special handling for the dotless letter I.
72    private const DOTLESS_I_LOCALES = [
73        'az',
74        'tr',
75    ];
76    private const DOTLESS_I_TOLOWER = [
77        'I' => 'ı',
78        'İ' => 'i',
79    ];
80    private const DOTLESS_I_TOUPPER = [
81        'ı' => 'I',
82        'i' => 'İ',
83    ];
84
85    // The ranges of characters used by each script.
86    private const SCRIPT_CHARACTER_RANGES = [
87        [
88            'Latn',
89            0x0041,
90            0x005A,
91        ],
92        [
93            'Latn',
94            0x0061,
95            0x007A,
96        ],
97        [
98            'Latn',
99            0x0100,
100            0x02AF,
101        ],
102        [
103            'Grek',
104            0x0370,
105            0x03FF,
106        ],
107        [
108            'Cyrl',
109            0x0400,
110            0x052F,
111        ],
112        [
113            'Hebr',
114            0x0590,
115            0x05FF,
116        ],
117        [
118            'Arab',
119            0x0600,
120            0x06FF,
121        ],
122        [
123            'Arab',
124            0x0750,
125            0x077F,
126        ],
127        [
128            'Arab',
129            0x08A0,
130            0x08FF,
131        ],
132        [
133            'Deva',
134            0x0900,
135            0x097F,
136        ],
137        [
138            'Taml',
139            0x0B80,
140            0x0BFF,
141        ],
142        [
143            'Sinh',
144            0x0D80,
145            0x0DFF,
146        ],
147        [
148            'Thai',
149            0x0E00,
150            0x0E7F,
151        ],
152        [
153            'Geor',
154            0x10A0,
155            0x10FF,
156        ],
157        [
158            'Grek',
159            0x1F00,
160            0x1FFF,
161        ],
162        [
163            'Deva',
164            0xA8E0,
165            0xA8FF,
166        ],
167        [
168            'Hans',
169            0x3000,
170            0x303F,
171        ],
172        // Mixed CJK, not just Hans
173        [
174            'Hans',
175            0x3400,
176            0xFAFF,
177        ],
178        // Mixed CJK, not just Hans
179        [
180            'Hans',
181            0x20000,
182            0x2FA1F,
183        ],
184        // Mixed CJK, not just Hans
185    ];
186
187    // Characters that are displayed in mirror form in RTL text.
188    private const MIRROR_CHARACTERS = [
189        '('  => ')',
190        ')'  => '(',
191        '['  => ']',
192        ']'  => '[',
193        '{'  => '}',
194        '}'  => '{',
195        '<'  => '>',
196        '>'  => '<',
197        '‹ ' => '›',
198        '› ' => '‹',
199        '«'  => '»',
200        '»'  => '«',
201        '﴾ ' => '﴿',
202        '﴿ ' => '﴾',
203        '“ ' => '”',
204        '” ' => '“',
205        '‘ ' => '’',
206        '’ ' => '‘',
207    ];
208
209    /** @var string Punctuation used to separate list items, typically a comma */
210    public static $list_separator;
211
212    /**
213     * The preferred locales for this site, or a default list if no preference.
214     *
215     * @return LocaleInterface[]
216     */
217    public static function activeLocales(): array
218    {
219        $locales = app(ModuleService::class)
220            ->findByInterface(ModuleLanguageInterface::class, false, true)
221            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
222                return $module->locale();
223            });
224
225        if ($locales->isEmpty()) {
226            return [new LocaleEnUs()];
227        }
228
229        return $locales->all();
230    }
231
232    /**
233     * Which MySQL collation should be used for this locale?
234     *
235     * @return string
236     */
237    public static function collation(): string
238    {
239        $collation = self::$locale->collation();
240        switch ($collation) {
241            case 'croatian_ci':
242            case 'german2_ci':
243            case 'vietnamese_ci':
244                // Only available in MySQL 5.6
245                return 'utf8_unicode_ci';
246            default:
247                return 'utf8_' . $collation;
248        }
249    }
250
251    /**
252     * What format is used to display dates in the current locale?
253     *
254     * @return string
255     */
256    public static function dateFormat(): string
257    {
258        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
259        return self::$translator->translate('%j %F %Y');
260    }
261
262    /**
263     * Convert the digits 0-9 into the local script
264     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
265     *
266     * @param string|int $n
267     *
268     * @return string
269     */
270    public static function digits($n): string
271    {
272        return self::$locale->digits((string) $n);
273    }
274
275    /**
276     * What is the direction of the current locale
277     *
278     * @return string "ltr" or "rtl"
279     */
280    public static function direction(): string
281    {
282        return self::$locale->direction();
283    }
284
285    /**
286     * What is the first day of the week.
287     *
288     * @return int Sunday=0, Monday=1, etc.
289     */
290    public static function firstDay(): int
291    {
292        return self::$locale->territory()->firstDay();
293    }
294
295    /**
296     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
297     *
298     * @return string
299     */
300    public static function htmlAttributes(): string
301    {
302        return self::$locale->htmlAttributes();
303    }
304
305    /**
306     * Initialise the translation adapter with a locale setting.
307     *
308     * @param string    $code  Use this locale/language code, or choose one automatically
309     * @param Tree|null $tree
310     * @param bool      $setup During setup, we cannot access the database.
311     *
312     * @return string $string
313     */
314    public static function init(string $code = '', Tree $tree = null, $setup = false): string
315    {
316        if ($code !== '') {
317            // Create the specified locale
318            self::$locale = Locale::create($code);
319        } elseif (Session::has('language') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('language') . '/messages.mo')) {
320            // Select a previously used locale
321            self::$locale = Locale::create(Session::get('language'));
322        } else {
323            if ($tree instanceof Tree) {
324                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
325            } else {
326                $default_locale = new LocaleEnUs();
327            }
328
329            // Negotiate with the browser.
330            // Search engines don't negotiate.  They get the default locale of the tree.
331            if ($setup) {
332                $installed_locales = app(ModuleService::class)->setupLanguages()
333                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
334                        return $module->locale();
335                    });
336            } else {
337                $installed_locales = self::installedLocales();
338            }
339
340            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
341        }
342
343        $cache_dir  = WT_DATA_DIR . 'cache/';
344        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
345        if (file_exists($cache_file)) {
346            $filemtime = filemtime($cache_file);
347        } else {
348            $filemtime = 0;
349        }
350
351        // Load the translation file
352        $translation_file = WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo';
353
354        if (!file_exists($translation_file)) {
355            // Test and dev environments may not have the compiled translations
356            $translations = [];
357        } elseif (filemtime($translation_file) > $filemtime) {
358            $translation  = new Translation($translation_file);
359            $translations = $translation->asArray();
360
361            try {
362                File::mkdir($cache_dir);
363                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
364            } catch (Exception $ex) {
365                // During setup, we may not have been able to create it.
366            }
367        } else {
368            $translations = include $cache_file;
369        }
370
371        // Add translations from custom modules (but not during setup, as we have no database/modules)
372        if (!$setup) {
373            $translations = app(ModuleService::class)
374                ->findByInterface(ModuleCustomInterface::class)
375                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
376                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
377                }, $translations);
378        }
379
380        // Create a translator
381        self::$translator = new Translator($translations, self::$locale->pluralRule());
382
383        /* I18N: This punctuation is used to separate lists of items */
384        self::$list_separator = self::translate(', ');
385
386        // Create a collator
387        try {
388            if (class_exists('Collator')) {
389                // Symfony provides a very incomplete polyfill - which cannot be used.
390                self::$collator = new Collator(self::$locale->code());
391                // Ignore upper/lower case differences
392                self::$collator->setStrength(Collator::SECONDARY);
393            }
394        } catch (Exception $ex) {
395            // PHP-INTL is not installed?  We'll use a fallback later.
396            self::$collator = null;
397        }
398
399        return self::$locale->languageTag();
400    }
401
402    /**
403     * All locales for which a translation file exists.
404     *
405     * @return Collection
406     * @return LocaleInterface[]
407     */
408    public static function installedLocales(): Collection
409    {
410        return app(ModuleService::class)
411            ->findByInterface(ModuleLanguageInterface::class, true)
412            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
413                return $module->locale();
414            });
415    }
416
417    /**
418     * Return the endonym for a given language - as per http://cldr.unicode.org/
419     *
420     * @param string $locale
421     *
422     * @return string
423     */
424    public static function languageName(string $locale): string
425    {
426        return Locale::create($locale)->endonym();
427    }
428
429    /**
430     * Return the script used by a given language
431     *
432     * @param string $locale
433     *
434     * @return string
435     */
436    public static function languageScript(string $locale): string
437    {
438        return Locale::create($locale)->script()->code();
439    }
440
441    /**
442     * Translate a number into the local representation.
443     * e.g. 12345.67 becomes
444     * en: 12,345.67
445     * fr: 12 345,67
446     * de: 12.345,67
447     *
448     * @param float $n
449     * @param int   $precision
450     *
451     * @return string
452     */
453    public static function number(float $n, int $precision = 0): string
454    {
455        return self::$locale->number(round($n, $precision));
456    }
457
458    /**
459     * Translate a fraction into a percentage.
460     * e.g. 0.123 becomes
461     * en: 12.3%
462     * fr: 12,3 %
463     * de: 12,3%
464     *
465     * @param float $n
466     * @param int   $precision
467     *
468     * @return string
469     */
470    public static function percentage(float $n, int $precision = 0): string
471    {
472        return self::$locale->percent(round($n, $precision + 2));
473    }
474
475    /**
476     * Translate a plural string
477     * echo self::plural('There is an error', 'There are errors', $num_errors);
478     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
479     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
480     *
481     * @param string $singular
482     * @param string $plural
483     * @param int    $count
484     * @param string ...$args
485     *
486     * @return string
487     */
488    public static function plural(string $singular, string $plural, int $count, ...$args): string
489    {
490        $message = self::$translator->translatePlural($singular, $plural, $count);
491
492        return sprintf($message, ...$args);
493    }
494
495    /**
496     * UTF8 version of PHP::strrev()
497     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
498     * These do not support UTF8 text direction, so we must mimic it for them.
499     * Numbers are always rendered LTR, even in RTL text.
500     * The visual direction of characters such as parentheses should be reversed.
501     *
502     * @param string $text Text to be reversed
503     *
504     * @return string
505     */
506    public static function reverseText($text): string
507    {
508        // Remove HTML markup - we can't display it and it is LTR.
509        $text = strip_tags($text);
510        // Remove HTML entities.
511        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
512
513        // LTR text doesn't need reversing
514        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
515            return $text;
516        }
517
518        // Mirrored characters
519        $text = strtr($text, self::MIRROR_CHARACTERS);
520
521        $reversed = '';
522        $digits   = '';
523        while ($text !== '') {
524            $letter = mb_substr($text, 0, 1);
525            $text   = mb_substr($text, 1);
526            if (strpos(self::DIGITS, $letter) !== false) {
527                $digits .= $letter;
528            } else {
529                $reversed = $letter . $digits . $reversed;
530                $digits   = '';
531            }
532        }
533
534        return $digits . $reversed;
535    }
536
537    /**
538     * Return the direction (ltr or rtl) for a given script
539     * The PHP/intl library does not provde this information, so we need
540     * our own lookup table.
541     *
542     * @param string $script
543     *
544     * @return string
545     */
546    public static function scriptDirection($script): string
547    {
548        switch ($script) {
549            case 'Arab':
550            case 'Hebr':
551            case 'Mong':
552            case 'Thaa':
553                return 'rtl';
554            default:
555                return 'ltr';
556        }
557    }
558
559    /**
560     * Perform a case-insensitive comparison of two strings.
561     *
562     * @param string $string1
563     * @param string $string2
564     *
565     * @return int
566     */
567    public static function strcasecmp($string1, $string2): int
568    {
569        if (self::$collator instanceof Collator) {
570            return self::$collator->compare($string1, $string2);
571        }
572
573        return strcmp(self::strtolower($string1), self::strtolower($string2));
574    }
575
576    /**
577     * Convert a string to lower case.
578     *
579     * @param string $string
580     *
581     * @return string
582     */
583    public static function strtolower($string): string
584    {
585        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
586            $string = strtr($string, self::DOTLESS_I_TOLOWER);
587        }
588
589        return mb_strtolower($string);
590    }
591
592    /**
593     * Convert a string to upper case.
594     *
595     * @param string $string
596     *
597     * @return string
598     */
599    public static function strtoupper($string): string
600    {
601        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
602            $string = strtr($string, self::DOTLESS_I_TOUPPER);
603        }
604
605        return mb_strtoupper($string);
606    }
607
608    /**
609     * Identify the script used for a piece of text
610     *
611     * @param string $string
612     *
613     * @return string
614     */
615    public static function textScript($string): string
616    {
617        $string = strip_tags($string); // otherwise HTML tags show up as latin
618        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
619        $string = str_replace([
620            '@N.N.',
621            '@P.N.',
622        ], '', $string); // otherwise unknown names show up as latin
623        $pos    = 0;
624        $strlen = strlen($string);
625        while ($pos < $strlen) {
626            // get the Unicode Code Point for the character at position $pos
627            $byte1 = ord($string[$pos]);
628            if ($byte1 < 0x80) {
629                $code_point = $byte1;
630                $chrlen     = 1;
631            } elseif ($byte1 < 0xC0) {
632                // Invalid continuation character
633                return 'Latn';
634            } elseif ($byte1 < 0xE0) {
635                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
636                $chrlen     = 2;
637            } elseif ($byte1 < 0xF0) {
638                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
639                $chrlen     = 3;
640            } elseif ($byte1 < 0xF8) {
641                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
642                $chrlen     = 3;
643            } else {
644                // Invalid UTF
645                return 'Latn';
646            }
647
648            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
649                if ($code_point >= $range[1] && $code_point <= $range[2]) {
650                    return $range[0];
651                }
652            }
653            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
654            $pos += $chrlen;
655        }
656
657        return 'Latn';
658    }
659
660    /**
661     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
662     *
663     * @param int $seconds
664     *
665     * @return string
666     */
667    public static function timeAgo($seconds): string
668    {
669        $minute = 60;
670        $hour   = 60 * $minute;
671        $day    = 24 * $hour;
672        $month  = 30 * $day;
673        $year   = 365 * $day;
674
675        if ($seconds > $year) {
676            $years = intdiv($seconds, $year);
677
678            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
679        }
680
681        if ($seconds > $month) {
682            $months = intdiv($seconds, $month);
683
684            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
685        }
686
687        if ($seconds > $day) {
688            $days = intdiv($seconds, $day);
689
690            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
691        }
692
693        if ($seconds > $hour) {
694            $hours = intdiv($seconds, $hour);
695
696            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
697        }
698
699        if ($seconds > $minute) {
700            $minutes = intdiv($seconds, $minute);
701
702            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
703        }
704
705        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
706    }
707
708    /**
709     * What format is used to display dates in the current locale?
710     *
711     * @return string
712     */
713    public static function timeFormat(): string
714    {
715        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
716        return self::$translator->translate('%H:%i:%s');
717    }
718
719    /**
720     * Translate a string, and then substitute placeholders
721     * echo I18N::translate('Hello World!');
722     * echo I18N::translate('The %s sat on the mat', 'cat');
723     *
724     * @param string $message
725     * @param string ...$args
726     *
727     * @return string
728     */
729    public static function translate(string $message, ...$args): string
730    {
731        $message = self::$translator->translate($message);
732
733        return sprintf($message, ...$args);
734    }
735
736    /**
737     * Context sensitive version of translate.
738     * echo I18N::translateContext('NOMINATIVE', 'January');
739     * echo I18N::translateContext('GENITIVE', 'January');
740     *
741     * @param string $context
742     * @param string $message
743     * @param string ...$args
744     *
745     * @return string
746     */
747    public static function translateContext(string $context, string $message, ...$args): string
748    {
749        $message = self::$translator->translateContext($context, $message);
750
751        return sprintf($message, ...$args);
752    }
753}
754