xref: /webtrees/app/I18N.php (revision 57ab22314b2599feb432b1a1ed71643cfc2f0452)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20use Collator;
21use Exception;
22use Fisharebest\Localization\Locale;
23use Fisharebest\Localization\Locale\LocaleEnUs;
24use Fisharebest\Localization\Locale\LocaleInterface;
25use Fisharebest\Localization\Translation;
26use Fisharebest\Localization\Translator;
27use Fisharebest\Webtrees\Module\ModuleCustomInterface;
28use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
29use Fisharebest\Webtrees\Services\ModuleService;
30use Illuminate\Support\Collection;
31use function array_merge;
32use function class_exists;
33use function html_entity_decode;
34use function in_array;
35use function mb_strtolower;
36use function mb_strtoupper;
37use function mb_substr;
38use function ord;
39use function sprintf;
40use function str_replace;
41use function strcmp;
42use function strip_tags;
43use function strlen;
44use function strpos;
45use function strtr;
46
47/**
48 * Internationalization (i18n) and localization (l10n).
49 */
50class I18N
51{
52    // MO files use special characters for plurals and context.
53    public const PLURAL  = "\x00";
54    public const CONTEXT = "\x04";
55    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
56    private const DOTLESS_I_LOCALES = [
57        'az',
58        'tr',
59    ];
60    private const DOTLESS_I_TOLOWER = [
61        'I' => 'ı',
62        'İ' => 'i',
63    ];
64
65    // Digits are always rendered LTR, even in RTL text.
66    private const DOTLESS_I_TOUPPER = [
67        'ı' => 'I',
68        'i' => 'İ',
69    ];
70
71    // These locales need special handling for the dotless letter I.
72    private const SCRIPT_CHARACTER_RANGES = [
73        [
74            'Latn',
75            0x0041,
76            0x005A,
77        ],
78        [
79            'Latn',
80            0x0061,
81            0x007A,
82        ],
83        [
84            'Latn',
85            0x0100,
86            0x02AF,
87        ],
88        [
89            'Grek',
90            0x0370,
91            0x03FF,
92        ],
93        [
94            'Cyrl',
95            0x0400,
96            0x052F,
97        ],
98        [
99            'Hebr',
100            0x0590,
101            0x05FF,
102        ],
103        [
104            'Arab',
105            0x0600,
106            0x06FF,
107        ],
108        [
109            'Arab',
110            0x0750,
111            0x077F,
112        ],
113        [
114            'Arab',
115            0x08A0,
116            0x08FF,
117        ],
118        [
119            'Deva',
120            0x0900,
121            0x097F,
122        ],
123        [
124            'Taml',
125            0x0B80,
126            0x0BFF,
127        ],
128        [
129            'Sinh',
130            0x0D80,
131            0x0DFF,
132        ],
133        [
134            'Thai',
135            0x0E00,
136            0x0E7F,
137        ],
138        [
139            'Geor',
140            0x10A0,
141            0x10FF,
142        ],
143        [
144            'Grek',
145            0x1F00,
146            0x1FFF,
147        ],
148        [
149            'Deva',
150            0xA8E0,
151            0xA8FF,
152        ],
153        [
154            'Hans',
155            0x3000,
156            0x303F,
157        ],
158        // Mixed CJK, not just Hans
159        [
160            'Hans',
161            0x3400,
162            0xFAFF,
163        ],
164        // Mixed CJK, not just Hans
165        [
166            'Hans',
167            0x20000,
168            0x2FA1F,
169        ],
170        // Mixed CJK, not just Hans
171    ];
172    private const MIRROR_CHARACTERS = [
173        '('  => ')',
174        ')'  => '(',
175        '['  => ']',
176        ']'  => '[',
177        '{'  => '}',
178        '}'  => '{',
179        '<'  => '>',
180        '>'  => '<',
181        '‹ ' => '›',
182        '› ' => '‹',
183        '«'  => '»',
184        '»'  => '«',
185        '﴾ ' => '﴿',
186        '﴿ ' => '﴾',
187        '“ ' => '”',
188        '” ' => '“',
189        '‘ ' => '’',
190        '’ ' => '‘',
191    ];
192    /** @var string Punctuation used to separate list items, typically a comma */
193    public static $list_separator;
194
195    // The ranges of characters used by each script.
196    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
197    private static $locale;
198
199    // Characters that are displayed in mirror form in RTL text.
200    /** @var Translator An object that performs translation */
201    private static $translator;
202    /** @var  Collator|null From the php-intl library */
203    private static $collator;
204
205    /**
206     * The preferred locales for this site, or a default list if no preference.
207     *
208     * @return LocaleInterface[]
209     */
210    public static function activeLocales(): array
211    {
212        /** @var Collection $locales */
213        $locales = app(ModuleService::class)
214            ->findByInterface(ModuleLanguageInterface::class, false, true)
215            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
216                return $module->locale();
217            });
218
219        if ($locales->isEmpty()) {
220            return [new LocaleEnUs()];
221        }
222
223        return $locales->all();
224    }
225
226    /**
227     * Which MySQL collation should be used for this locale?
228     *
229     * @return string
230     */
231    public static function collation(): string
232    {
233        $collation = self::$locale->collation();
234        switch ($collation) {
235            case 'croatian_ci':
236            case 'german2_ci':
237            case 'vietnamese_ci':
238                // Only available in MySQL 5.6
239                return 'utf8_unicode_ci';
240            default:
241                return 'utf8_' . $collation;
242        }
243    }
244
245    /**
246     * What format is used to display dates in the current locale?
247     *
248     * @return string
249     */
250    public static function dateFormat(): string
251    {
252        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
253        return self::$translator->translate('%j %F %Y');
254    }
255
256    /**
257     * Convert the digits 0-9 into the local script
258     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
259     *
260     * @param string|int $n
261     *
262     * @return string
263     */
264    public static function digits($n): string
265    {
266        return self::$locale->digits((string) $n);
267    }
268
269    /**
270     * What is the direction of the current locale
271     *
272     * @return string "ltr" or "rtl"
273     */
274    public static function direction(): string
275    {
276        return self::$locale->direction();
277    }
278
279    /**
280     * What is the first day of the week.
281     *
282     * @return int Sunday=0, Monday=1, etc.
283     */
284    public static function firstDay(): int
285    {
286        return self::$locale->territory()->firstDay();
287    }
288
289    /**
290     * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl"
291     *
292     * @return string
293     */
294    public static function htmlAttributes(): string
295    {
296        return self::$locale->htmlAttributes();
297    }
298
299    /**
300     * Initialise the translation adapter with a locale setting.
301     *
302     * @param string    $code  Use this locale/language code, or choose one automatically
303     * @param Tree|null $tree
304     * @param bool      $setup During setup, we cannot access the database.
305     *
306     * @return string $string
307     */
308    public static function init(string $code = '', Tree $tree = null, $setup = false): string
309    {
310        if ($code !== '') {
311            // Create the specified locale
312            self::$locale = Locale::create($code);
313        } elseif (Session::has('language')) {
314            // Select a previously used locale
315            self::$locale = Locale::create(Session::get('language'));
316        } else {
317            if ($tree instanceof Tree) {
318                $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US'));
319            } else {
320                $default_locale = new LocaleEnUs();
321            }
322
323            // Negotiate with the browser.
324            // Search engines don't negotiate.  They get the default locale of the tree.
325            if ($setup) {
326                $installed_locales = app(ModuleService::class)->setupLanguages()
327                    ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
328                        return $module->locale();
329                    });
330            } else {
331                $installed_locales = self::installedLocales();
332            }
333
334            self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale);
335        }
336
337        // Load the translation file
338        $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php';
339
340        try {
341            $translation  = new Translation($translation_file);
342            $translations = $translation->asArray();
343        } catch (Exception $ex) {
344            // The translations files are created during the build process, and are
345            // not included in the source code.
346            // Assuming we are using dev code, and build (or rebuild) the files.
347            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
348            $translation  = new Translation($po_file);
349            $translations = $translation->asArray();
350            file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';');
351        }
352
353        // Add translations from custom modules (but not during setup, as we have no database/modules)
354        if (!$setup) {
355            $translations = app(ModuleService::class)
356                ->findByInterface(ModuleCustomInterface::class)
357                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
358                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
359                }, $translations);
360        }
361
362        // Create a translator
363        self::$translator = new Translator($translations, self::$locale->pluralRule());
364
365        /* I18N: This punctuation is used to separate lists of items */
366        self::$list_separator = self::translate(', ');
367
368        // Create a collator
369        try {
370            if (class_exists('Collator')) {
371                // Symfony provides a very incomplete polyfill - which cannot be used.
372                self::$collator = new Collator(self::$locale->code());
373                // Ignore upper/lower case differences
374                self::$collator->setStrength(Collator::SECONDARY);
375            }
376        } catch (Exception $ex) {
377            // PHP-INTL is not installed?  We'll use a fallback later.
378            self::$collator = null;
379        }
380
381        return self::$locale->languageTag();
382    }
383
384    /**
385     * All locales for which a translation file exists.
386     *
387     * @return Collection
388     */
389    public static function installedLocales(): Collection
390    {
391        return app(ModuleService::class)
392            ->findByInterface(ModuleLanguageInterface::class, true)
393            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
394                return $module->locale();
395            });
396    }
397
398    /**
399     * Translate a string, and then substitute placeholders
400     * echo I18N::translate('Hello World!');
401     * echo I18N::translate('The %s sat on the mat', 'cat');
402     *
403     * @param string $message
404     * @param string ...$args
405     *
406     * @return string
407     */
408    public static function translate(string $message, ...$args): string
409    {
410        $message = self::$translator->translate($message);
411
412        return sprintf($message, ...$args);
413    }
414
415    /**
416     * Return the endonym for a given language - as per http://cldr.unicode.org/
417     *
418     * @param string $locale
419     *
420     * @return string
421     */
422    public static function languageName(string $locale): string
423    {
424        return Locale::create($locale)->endonym();
425    }
426
427    /**
428     * Return the script used by a given language
429     *
430     * @param string $locale
431     *
432     * @return string
433     */
434    public static function languageScript(string $locale): string
435    {
436        return Locale::create($locale)->script()->code();
437    }
438
439    /**
440     * Translate a number into the local representation.
441     * e.g. 12345.67 becomes
442     * en: 12,345.67
443     * fr: 12 345,67
444     * de: 12.345,67
445     *
446     * @param float $n
447     * @param int   $precision
448     *
449     * @return string
450     */
451    public static function number(float $n, int $precision = 0): string
452    {
453        return self::$locale->number(round($n, $precision));
454    }
455
456    /**
457     * Translate a fraction into a percentage.
458     * e.g. 0.123 becomes
459     * en: 12.3%
460     * fr: 12,3 %
461     * de: 12,3%
462     *
463     * @param float $n
464     * @param int   $precision
465     *
466     * @return string
467     */
468    public static function percentage(float $n, int $precision = 0): string
469    {
470        return self::$locale->percent(round($n, $precision + 2));
471    }
472
473    /**
474     * Translate a plural string
475     * echo self::plural('There is an error', 'There are errors', $num_errors);
476     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
477     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
478     *
479     * @param string $singular
480     * @param string $plural
481     * @param int    $count
482     * @param string ...$args
483     *
484     * @return string
485     */
486    public static function plural(string $singular, string $plural, int $count, ...$args): string
487    {
488        $message = self::$translator->translatePlural($singular, $plural, $count);
489
490        return sprintf($message, ...$args);
491    }
492
493    /**
494     * UTF8 version of PHP::strrev()
495     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
496     * These do not support UTF8 text direction, so we must mimic it for them.
497     * Numbers are always rendered LTR, even in RTL text.
498     * The visual direction of characters such as parentheses should be reversed.
499     *
500     * @param string $text Text to be reversed
501     *
502     * @return string
503     */
504    public static function reverseText($text): string
505    {
506        // Remove HTML markup - we can't display it and it is LTR.
507        $text = strip_tags($text);
508        // Remove HTML entities.
509        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
510
511        // LTR text doesn't need reversing
512        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
513            return $text;
514        }
515
516        // Mirrored characters
517        $text = strtr($text, self::MIRROR_CHARACTERS);
518
519        $reversed = '';
520        $digits   = '';
521        while ($text !== '') {
522            $letter = mb_substr($text, 0, 1);
523            $text   = mb_substr($text, 1);
524            if (strpos(self::DIGITS, $letter) !== false) {
525                $digits .= $letter;
526            } else {
527                $reversed = $letter . $digits . $reversed;
528                $digits   = '';
529            }
530        }
531
532        return $digits . $reversed;
533    }
534
535    /**
536     * Return the direction (ltr or rtl) for a given script
537     * The PHP/intl library does not provde this information, so we need
538     * our own lookup table.
539     *
540     * @param string $script
541     *
542     * @return string
543     */
544    public static function scriptDirection($script): string
545    {
546        switch ($script) {
547            case 'Arab':
548            case 'Hebr':
549            case 'Mong':
550            case 'Thaa':
551                return 'rtl';
552            default:
553                return 'ltr';
554        }
555    }
556
557    /**
558     * Identify the script used for a piece of text
559     *
560     * @param string $string
561     *
562     * @return string
563     */
564    public static function textScript($string): string
565    {
566        $string = strip_tags($string); // otherwise HTML tags show up as latin
567        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
568        $string = str_replace([
569            '@N.N.',
570            '@P.N.',
571        ], '', $string); // otherwise unknown names show up as latin
572        $pos    = 0;
573        $strlen = strlen($string);
574        while ($pos < $strlen) {
575            // get the Unicode Code Point for the character at position $pos
576            $byte1 = ord($string[$pos]);
577            if ($byte1 < 0x80) {
578                $code_point = $byte1;
579                $chrlen     = 1;
580            } elseif ($byte1 < 0xC0) {
581                // Invalid continuation character
582                return 'Latn';
583            } elseif ($byte1 < 0xE0) {
584                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
585                $chrlen     = 2;
586            } elseif ($byte1 < 0xF0) {
587                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
588                $chrlen     = 3;
589            } elseif ($byte1 < 0xF8) {
590                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
591                $chrlen     = 3;
592            } else {
593                // Invalid UTF
594                return 'Latn';
595            }
596
597            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
598                if ($code_point >= $range[1] && $code_point <= $range[2]) {
599                    return $range[0];
600                }
601            }
602            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
603            $pos += $chrlen;
604        }
605
606        return 'Latn';
607    }
608
609    /**
610     * Perform a case-insensitive comparison of two strings.
611     *
612     * @param string $string1
613     * @param string $string2
614     *
615     * @return int
616     */
617    public static function strcasecmp($string1, $string2): int
618    {
619        if (self::$collator instanceof Collator) {
620            return self::$collator->compare($string1, $string2);
621        }
622
623        return strcmp(self::strtolower($string1), self::strtolower($string2));
624    }
625
626    /**
627     * Convert a string to lower case.
628     *
629     * @param string $string
630     *
631     * @return string
632     */
633    public static function strtolower($string): string
634    {
635        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
636            $string = strtr($string, self::DOTLESS_I_TOLOWER);
637        }
638
639        return mb_strtolower($string);
640    }
641
642    /**
643     * Convert a string to upper case.
644     *
645     * @param string $string
646     *
647     * @return string
648     */
649    public static function strtoupper($string): string
650    {
651        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
652            $string = strtr($string, self::DOTLESS_I_TOUPPER);
653        }
654
655        return mb_strtoupper($string);
656    }
657
658    /**
659     * What format is used to display dates in the current locale?
660     *
661     * @return string
662     */
663    public static function timeFormat(): string
664    {
665        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
666        return self::$translator->translate('%H:%i:%s');
667    }
668
669    /**
670     * Context sensitive version of translate.
671     * echo I18N::translateContext('NOMINATIVE', 'January');
672     * echo I18N::translateContext('GENITIVE', 'January');
673     *
674     * @param string $context
675     * @param string $message
676     * @param string ...$args
677     *
678     * @return string
679     */
680    public static function translateContext(string $context, string $message, ...$args): string
681    {
682        $message = self::$translator->translateContext($context, $message);
683
684        return sprintf($message, ...$args);
685    }
686}
687