xref: /webtrees/app/I18N.php (revision 4f194b97d4dda944b41f9edd5e8f7da11656c545)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Module\ModuleCustomInterface;
28use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
29use Fisharebest\Webtrees\Services\ModuleService;
30use Illuminate\Support\Collection;
31use function array_merge;
32use function filemtime;
33
34/**
35 * Internationalization (i18n) and localization (l10n).
36 */
37class I18N
38{
39    // MO files use special characters for plurals and context.
40    public const PLURAL  = "\x00";
41    public const CONTEXT = "\x04";
42
43    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
44    private static $locale;
45
46    /** @var Translator An object that performs translation */
47    private static $translator;
48
49    /** @var  Collator|null From the php-intl library */
50    private static $collator;
51
52    // Digits are always rendered LTR, even in RTL text.
53    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
54
55    // These locales need special handling for the dotless letter I.
56    private const DOTLESS_I_LOCALES = [
57        'az',
58        'tr',
59    ];
60    private const DOTLESS_I_TOLOWER = [
61        'I' => 'ı',
62        'İ' => 'i',
63    ];
64    private const DOTLESS_I_TOUPPER = [
65        'ı' => 'I',
66        'i' => 'İ',
67    ];
68
69    // The ranges of characters used by each script.
70    private const SCRIPT_CHARACTER_RANGES = [
71        [
72            'Latn',
73            0x0041,
74            0x005A,
75        ],
76        [
77            'Latn',
78            0x0061,
79            0x007A,
80        ],
81        [
82            'Latn',
83            0x0100,
84            0x02AF,
85        ],
86        [
87            'Grek',
88            0x0370,
89            0x03FF,
90        ],
91        [
92            'Cyrl',
93            0x0400,
94            0x052F,
95        ],
96        [
97            'Hebr',
98            0x0590,
99            0x05FF,
100        ],
101        [
102            'Arab',
103            0x0600,
104            0x06FF,
105        ],
106        [
107            'Arab',
108            0x0750,
109            0x077F,
110        ],
111        [
112            'Arab',
113            0x08A0,
114            0x08FF,
115        ],
116        [
117            'Deva',
118            0x0900,
119            0x097F,
120        ],
121        [
122            'Taml',
123            0x0B80,
124            0x0BFF,
125        ],
126        [
127            'Sinh',
128            0x0D80,
129            0x0DFF,
130        ],
131        [
132            'Thai',
133            0x0E00,
134            0x0E7F,
135        ],
136        [
137            'Geor',
138            0x10A0,
139            0x10FF,
140        ],
141        [
142            'Grek',
143            0x1F00,
144            0x1FFF,
145        ],
146        [
147            'Deva',
148            0xA8E0,
149            0xA8FF,
150        ],
151        [
152            'Hans',
153            0x3000,
154            0x303F,
155        ],
156        // Mixed CJK, not just Hans
157        [
158            'Hans',
159            0x3400,
160            0xFAFF,
161        ],
162        // Mixed CJK, not just Hans
163        [
164            'Hans',
165            0x20000,
166            0x2FA1F,
167        ],
168        // Mixed CJK, not just Hans
169    ];
170
171    // Characters that are displayed in mirror form in RTL text.
172    private const MIRROR_CHARACTERS = [
173        '('  => ')',
174        ')'  => '(',
175        '['  => ']',
176        ']'  => '[',
177        '{'  => '}',
178        '}'  => '{',
179        '<'  => '>',
180        '>'  => '<',
181        '‹ ' => '›',
182        '› ' => '‹',
183        '«'  => '»',
184        '»'  => '«',
185        '﴾ ' => '﴿',
186        '﴿ ' => '﴾',
187        '“ ' => '”',
188        '” ' => '“',
189        '‘ ' => '’',
190        '’ ' => '‘',
191    ];
192
193    /** @var string Punctuation used to separate list items, typically a comma */
194    public static $list_separator;
195
196    /**
197     * The preferred locales for this site, or a default list if no preference.
198     *
199     * @return LocaleInterface[]
200     */
201    public static function activeLocales(): array
202    {
203        $locales = app(ModuleService::class)
204            ->findByInterface(ModuleLanguageInterface::class, false, true)
205            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
206                return $module->locale();
207            });
208
209        if ($locales->isEmpty()) {
210            return [new LocaleEnUs()];
211        }
212
213        return $locales->all();
214    }
215
216    /**
217     * Which MySQL collation should be used for this locale?
218     *
219     * @return string
220     */
221    public static function collation(): string
222    {
223        $collation = self::$locale->collation();
224        switch ($collation) {
225            case 'croatian_ci':
226            case 'german2_ci':
227            case 'vietnamese_ci':
228                // Only available in MySQL 5.6
229                return 'utf8_unicode_ci';
230            default:
231                return 'utf8_' . $collation;
232        }
233    }
234
235    /**
236     * What format is used to display dates in the current locale?
237     *
238     * @return string
239     */
240    public static function dateFormat(): string
241    {
242        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
243        return self::$translator->translate('%j %F %Y');
244    }
245
246    /**
247     * Convert the digits 0-9 into the local script
248     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
249     *
250     * @param string|int $n
251     *
252     * @return string
253     */
254    public static function digits($n): string
255    {
256        return self::$locale->digits((string) $n);
257    }
258
259    /**
260     * What is the direction of the current locale
261     *
262     * @return string "ltr" or "rtl"
263     */
264    public static function direction(): string
265    {
266        return self::$locale->direction();
267    }
268
269    /**
270     * What is the first day of the week.
271     *
272     * @return int Sunday=0, Monday=1, etc.
273     */
274    public static function firstDay(): int
275    {
276        return self::$locale->territory()->firstDay();
277    }
278
279    /**
280     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
281     *
282     * @return string
283     */
284    public static function htmlAttributes(): string
285    {
286        return self::$locale->htmlAttributes();
287    }
288
289    /**
290     * Initialise the translation adapter with a locale setting.
291     *
292     * @param string    $code  Use this locale/language code, or choose one automatically
293     * @param Tree|null $tree
294     * @param bool      $setup During setup, we cannot access the database.
295     *
296     * @return string $string
297     */
298    public static function init(string $code = '', Tree $tree = null, $setup = false): string
299    {
300        if ($code !== '') {
301            // Create the specified locale
302            self::$locale = Locale::create($code);
303        } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) {
304            // Select a previously used locale
305            self::$locale = Locale::create(Session::get('locale'));
306        } else {
307            if ($tree instanceof Tree) {
308                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
309            } else {
310                $default_locale = new LocaleEnUs();
311            }
312
313            // Negotiate with the browser.
314            // Search engines don't negotiate.  They get the default locale of the tree.
315            if ($setup) {
316                $installed_locales = app(ModuleService::class)->setupLanguages()
317                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
318                        return $module->locale();
319                    });
320            } else {
321                $installed_locales = self::installedLocales();
322            }
323
324            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
325        }
326
327        $cache_dir  = WT_DATA_DIR . 'cache/';
328        $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php';
329        if (file_exists($cache_file)) {
330            $filemtime = filemtime($cache_file);
331        } else {
332            $filemtime = 0;
333        }
334
335        // Load the translation file
336        $translation_file = WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo';
337
338        // Rebuild files if the translation file has been updated
339        if (filemtime($translation_file) > $filemtime) {
340            $translation  = new Translation($translation_file);
341            $translations = $translation->asArray();
342
343            try {
344                File::mkdir($cache_dir);
345                file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';');
346            } catch (Exception $ex) {
347                // During setup, we may not have been able to create it.
348            }
349        } else {
350            $translations = include $cache_file;
351        }
352
353        // Add translations from custom modules (but not during setup, as we have no database/modules)
354        if (!$setup) {
355            $translations = app(ModuleService::class)
356                ->findByInterface(ModuleCustomInterface::class)
357                ->reduce(function (array $carry, ModuleCustomInterface $item): array {
358                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
359                }, $translations);
360        }
361
362        // Create a translator
363        self::$translator = new Translator($translations, self::$locale->pluralRule());
364
365        /* I18N: This punctuation is used to separate lists of items */
366        self::$list_separator = self::translate(', ');
367
368        // Create a collator
369        try {
370            if (class_exists('Collator')) {
371                // Symfony provides a very incomplete polyfill - which cannot be used.
372                self::$collator = new Collator(self::$locale->code());
373                // Ignore upper/lower case differences
374                self::$collator->setStrength(Collator::SECONDARY);
375            }
376        } catch (Exception $ex) {
377            // PHP-INTL is not installed?  We'll use a fallback later.
378            self::$collator = null;
379        }
380
381        return self::$locale->languageTag();
382    }
383
384    /**
385     * All locales for which a translation file exists.
386     *
387     * @return Collection
388     * @return LocaleInterface[]
389     */
390    public static function installedLocales(): Collection
391    {
392        return app(ModuleService::class)
393            ->findByInterface(ModuleLanguageInterface::class, true)
394            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
395                return $module->locale();
396            });
397    }
398
399    /**
400     * Return the endonym for a given language - as per http://cldr.unicode.org/
401     *
402     * @param string $locale
403     *
404     * @return string
405     */
406    public static function languageName(string $locale): string
407    {
408        return Locale::create($locale)->endonym();
409    }
410
411    /**
412     * Return the script used by a given language
413     *
414     * @param string $locale
415     *
416     * @return string
417     */
418    public static function languageScript(string $locale): string
419    {
420        return Locale::create($locale)->script()->code();
421    }
422
423    /**
424     * Translate a number into the local representation.
425     * e.g. 12345.67 becomes
426     * en: 12,345.67
427     * fr: 12 345,67
428     * de: 12.345,67
429     *
430     * @param float $n
431     * @param int   $precision
432     *
433     * @return string
434     */
435    public static function number(float $n, int $precision = 0): string
436    {
437        return self::$locale->number(round($n, $precision));
438    }
439
440    /**
441     * Translate a fraction into a percentage.
442     * e.g. 0.123 becomes
443     * en: 12.3%
444     * fr: 12,3 %
445     * de: 12,3%
446     *
447     * @param float $n
448     * @param int   $precision
449     *
450     * @return string
451     */
452    public static function percentage(float $n, int $precision = 0): string
453    {
454        return self::$locale->percent(round($n, $precision + 2));
455    }
456
457    /**
458     * Translate a plural string
459     * echo self::plural('There is an error', 'There are errors', $num_errors);
460     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
461     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
462     *
463     * @param string $singular
464     * @param string $plural
465     * @param int    $count
466     * @param string ...$args
467     *
468     * @return string
469     */
470    public static function plural(string $singular, string $plural, int $count, ...$args): string
471    {
472        $message = self::$translator->translatePlural($singular, $plural, $count);
473
474        return sprintf($message, ...$args);
475    }
476
477    /**
478     * UTF8 version of PHP::strrev()
479     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
480     * These do not support UTF8 text direction, so we must mimic it for them.
481     * Numbers are always rendered LTR, even in RTL text.
482     * The visual direction of characters such as parentheses should be reversed.
483     *
484     * @param string $text Text to be reversed
485     *
486     * @return string
487     */
488    public static function reverseText($text): string
489    {
490        // Remove HTML markup - we can't display it and it is LTR.
491        $text = strip_tags($text);
492        // Remove HTML entities.
493        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
494
495        // LTR text doesn't need reversing
496        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
497            return $text;
498        }
499
500        // Mirrored characters
501        $text = strtr($text, self::MIRROR_CHARACTERS);
502
503        $reversed = '';
504        $digits   = '';
505        while ($text !== '') {
506            $letter = mb_substr($text, 0, 1);
507            $text   = mb_substr($text, 1);
508            if (strpos(self::DIGITS, $letter) !== false) {
509                $digits .= $letter;
510            } else {
511                $reversed = $letter . $digits . $reversed;
512                $digits   = '';
513            }
514        }
515
516        return $digits . $reversed;
517    }
518
519    /**
520     * Return the direction (ltr or rtl) for a given script
521     * The PHP/intl library does not provde this information, so we need
522     * our own lookup table.
523     *
524     * @param string $script
525     *
526     * @return string
527     */
528    public static function scriptDirection($script): string
529    {
530        switch ($script) {
531            case 'Arab':
532            case 'Hebr':
533            case 'Mong':
534            case 'Thaa':
535                return 'rtl';
536            default:
537                return 'ltr';
538        }
539    }
540
541    /**
542     * Perform a case-insensitive comparison of two strings.
543     *
544     * @param string $string1
545     * @param string $string2
546     *
547     * @return int
548     */
549    public static function strcasecmp($string1, $string2): int
550    {
551        if (self::$collator instanceof Collator) {
552            return self::$collator->compare($string1, $string2);
553        }
554
555        return strcmp(self::strtolower($string1), self::strtolower($string2));
556    }
557
558    /**
559     * Convert a string to lower case.
560     *
561     * @param string $string
562     *
563     * @return string
564     */
565    public static function strtolower($string): string
566    {
567        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
568            $string = strtr($string, self::DOTLESS_I_TOLOWER);
569        }
570
571        return mb_strtolower($string);
572    }
573
574    /**
575     * Convert a string to upper case.
576     *
577     * @param string $string
578     *
579     * @return string
580     */
581    public static function strtoupper($string): string
582    {
583        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
584            $string = strtr($string, self::DOTLESS_I_TOUPPER);
585        }
586
587        return mb_strtoupper($string);
588    }
589
590    /**
591     * Identify the script used for a piece of text
592     *
593     * @param string $string
594     *
595     * @return string
596     */
597    public static function textScript($string): string
598    {
599        $string = strip_tags($string); // otherwise HTML tags show up as latin
600        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
601        $string = str_replace([
602            '@N.N.',
603            '@P.N.',
604        ], '', $string); // otherwise unknown names show up as latin
605        $pos    = 0;
606        $strlen = strlen($string);
607        while ($pos < $strlen) {
608            // get the Unicode Code Point for the character at position $pos
609            $byte1 = ord($string[$pos]);
610            if ($byte1 < 0x80) {
611                $code_point = $byte1;
612                $chrlen     = 1;
613            } elseif ($byte1 < 0xC0) {
614                // Invalid continuation character
615                return 'Latn';
616            } elseif ($byte1 < 0xE0) {
617                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
618                $chrlen     = 2;
619            } elseif ($byte1 < 0xF0) {
620                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
621                $chrlen     = 3;
622            } elseif ($byte1 < 0xF8) {
623                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
624                $chrlen     = 3;
625            } else {
626                // Invalid UTF
627                return 'Latn';
628            }
629
630            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
631                if ($code_point >= $range[1] && $code_point <= $range[2]) {
632                    return $range[0];
633                }
634            }
635            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
636            $pos += $chrlen;
637        }
638
639        return 'Latn';
640    }
641
642    /**
643     * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago"
644     *
645     * @param int $seconds
646     *
647     * @return string
648     */
649    public static function timeAgo($seconds): string
650    {
651        $minute = 60;
652        $hour   = 60 * $minute;
653        $day    = 24 * $hour;
654        $month  = 30 * $day;
655        $year   = 365 * $day;
656
657        if ($seconds > $year) {
658            $years = intdiv($seconds, $year);
659
660            return self::plural('%s year ago', '%s years ago', $years, self::number($years));
661        }
662
663        if ($seconds > $month) {
664            $months = intdiv($seconds, $month);
665
666            return self::plural('%s month ago', '%s months ago', $months, self::number($months));
667        }
668
669        if ($seconds > $day) {
670            $days = intdiv($seconds, $day);
671
672            return self::plural('%s day ago', '%s days ago', $days, self::number($days));
673        }
674
675        if ($seconds > $hour) {
676            $hours = intdiv($seconds, $hour);
677
678            return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours));
679        }
680
681        if ($seconds > $minute) {
682            $minutes = intdiv($seconds, $minute);
683
684            return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes));
685        }
686
687        return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds));
688    }
689
690    /**
691     * What format is used to display dates in the current locale?
692     *
693     * @return string
694     */
695    public static function timeFormat(): string
696    {
697        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
698        return self::$translator->translate('%H:%i:%s');
699    }
700
701    /**
702     * Translate a string, and then substitute placeholders
703     * echo I18N::translate('Hello World!');
704     * echo I18N::translate('The %s sat on the mat', 'cat');
705     *
706     * @param string $message
707     * @param string ...$args
708     *
709     * @return string
710     */
711    public static function translate(string $message, ...$args): string
712    {
713        $message = self::$translator->translate($message);
714
715        return sprintf($message, ...$args);
716    }
717
718    /**
719     * Context sensitive version of translate.
720     * echo I18N::translateContext('NOMINATIVE', 'January');
721     * echo I18N::translateContext('GENITIVE', 'January');
722     *
723     * @param string $context
724     * @param string $message
725     * @param string ...$args
726     *
727     * @return string
728     */
729    public static function translateContext(string $context, string $message, ...$args): string
730    {
731        $message = self::$translator->translateContext($context, $message);
732
733        return sprintf($message, ...$args);
734    }
735}
736