xref: /webtrees/app/I18N.php (revision e3c147d0d53873311b7c137c41b4439e01d4189e)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22use Collator;
23use Exception;
24use Fisharebest\Localization\Locale;
25use Fisharebest\Localization\Locale\LocaleEnUs;
26use Fisharebest\Localization\Locale\LocaleInterface;
27use Fisharebest\Localization\Translation;
28use Fisharebest\Localization\Translator;
29use Fisharebest\Webtrees\Module\ModuleCustomInterface;
30use Fisharebest\Webtrees\Module\ModuleLanguageInterface;
31use Fisharebest\Webtrees\Services\ModuleService;
32use Illuminate\Support\Collection;
33
34use function array_merge;
35use function class_exists;
36use function html_entity_decode;
37use function in_array;
38use function mb_strtolower;
39use function mb_strtoupper;
40use function mb_substr;
41use function ord;
42use function sprintf;
43use function str_replace;
44use function strcmp;
45use function strip_tags;
46use function strlen;
47use function strpos;
48use function strtr;
49use function var_export;
50
51/**
52 * Internationalization (i18n) and localization (l10n).
53 */
54class I18N
55{
56    // MO files use special characters for plurals and context.
57    public const PLURAL  = "\x00";
58    public const CONTEXT = "\x04";
59    private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹';
60    private const DOTLESS_I_LOCALES = [
61        'az',
62        'tr',
63    ];
64    private const DOTLESS_I_TOLOWER = [
65        'I' => 'ı',
66        'İ' => 'i',
67    ];
68
69    // Digits are always rendered LTR, even in RTL text.
70    private const DOTLESS_I_TOUPPER = [
71        'ı' => 'I',
72        'i' => 'İ',
73    ];
74
75    // These locales need special handling for the dotless letter I.
76    private const SCRIPT_CHARACTER_RANGES = [
77        [
78            'Latn',
79            0x0041,
80            0x005A,
81        ],
82        [
83            'Latn',
84            0x0061,
85            0x007A,
86        ],
87        [
88            'Latn',
89            0x0100,
90            0x02AF,
91        ],
92        [
93            'Grek',
94            0x0370,
95            0x03FF,
96        ],
97        [
98            'Cyrl',
99            0x0400,
100            0x052F,
101        ],
102        [
103            'Hebr',
104            0x0590,
105            0x05FF,
106        ],
107        [
108            'Arab',
109            0x0600,
110            0x06FF,
111        ],
112        [
113            'Arab',
114            0x0750,
115            0x077F,
116        ],
117        [
118            'Arab',
119            0x08A0,
120            0x08FF,
121        ],
122        [
123            'Deva',
124            0x0900,
125            0x097F,
126        ],
127        [
128            'Taml',
129            0x0B80,
130            0x0BFF,
131        ],
132        [
133            'Sinh',
134            0x0D80,
135            0x0DFF,
136        ],
137        [
138            'Thai',
139            0x0E00,
140            0x0E7F,
141        ],
142        [
143            'Geor',
144            0x10A0,
145            0x10FF,
146        ],
147        [
148            'Grek',
149            0x1F00,
150            0x1FFF,
151        ],
152        [
153            'Deva',
154            0xA8E0,
155            0xA8FF,
156        ],
157        [
158            'Hans',
159            0x3000,
160            0x303F,
161        ],
162        // Mixed CJK, not just Hans
163        [
164            'Hans',
165            0x3400,
166            0xFAFF,
167        ],
168        // Mixed CJK, not just Hans
169        [
170            'Hans',
171            0x20000,
172            0x2FA1F,
173        ],
174        // Mixed CJK, not just Hans
175    ];
176    private const MIRROR_CHARACTERS = [
177        '('  => ')',
178        ')'  => '(',
179        '['  => ']',
180        ']'  => '[',
181        '{'  => '}',
182        '}'  => '{',
183        '<'  => '>',
184        '>'  => '<',
185        '‹ ' => '›',
186        '› ' => '‹',
187        '«'  => '»',
188        '»'  => '«',
189        '﴾ ' => '﴿',
190        '﴿ ' => '﴾',
191        '“ ' => '”',
192        '” ' => '“',
193        '‘ ' => '’',
194        '’ ' => '‘',
195    ];
196    /** @var string Punctuation used to separate list items, typically a comma */
197    public static $list_separator;
198
199    // The ranges of characters used by each script.
200    /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */
201    private static $locale;
202
203    // Characters that are displayed in mirror form in RTL text.
204    /** @var Translator An object that performs translation */
205    private static $translator;
206    /** @var  Collator|null From the php-intl library */
207    private static $collator;
208
209    /**
210     * The preferred locales for this site, or a default list if no preference.
211     *
212     * @return LocaleInterface[]
213     */
214    public static function activeLocales(): array
215    {
216        /** @var Collection $locales */
217        $locales = app(ModuleService::class)
218            ->findByInterface(ModuleLanguageInterface::class, false, true)
219            ->map(static function (ModuleLanguageInterface $module): LocaleInterface {
220                return $module->locale();
221            });
222
223        if ($locales->isEmpty()) {
224            return [new LocaleEnUs()];
225        }
226
227        return $locales->all();
228    }
229
230    /**
231     * Which MySQL collation should be used for this locale?
232     *
233     * @return string
234     */
235    public static function collation(): string
236    {
237        $collation = self::$locale->collation();
238        switch ($collation) {
239            case 'croatian_ci':
240            case 'german2_ci':
241            case 'vietnamese_ci':
242                // Only available in MySQL 5.6
243                return 'utf8_unicode_ci';
244            default:
245                return 'utf8_' . $collation;
246        }
247    }
248
249    /**
250     * What format is used to display dates in the current locale?
251     *
252     * @return string
253     */
254    public static function dateFormat(): string
255    {
256        /* I18N: This is the format string for full dates. See http://php.net/date for codes */
257        return self::$translator->translate('%j %F %Y');
258    }
259
260    /**
261     * Convert the digits 0-9 into the local script
262     * Used for years, etc., where we do not want thousands-separators, decimals, etc.
263     *
264     * @param string|int $n
265     *
266     * @return string
267     */
268    public static function digits($n): string
269    {
270        return self::$locale->digits((string) $n);
271    }
272
273    /**
274     * What is the direction of the current locale
275     *
276     * @return string "ltr" or "rtl"
277     */
278    public static function direction(): string
279    {
280        return self::$locale->direction();
281    }
282
283    /**
284     * Initialise the translation adapter with a locale setting.
285     *
286     * @param string $code
287     * @param bool   $setup
288     *
289     * @return void
290     */
291    public static function init(string $code, bool $setup = false): void
292    {
293        self::$locale = Locale::create($code);
294
295        // Load the translation file
296        $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php';
297
298        try {
299            $translation  = new Translation($translation_file);
300            $translations = $translation->asArray();
301        } catch (Exception $ex) {
302            // The translations files are created during the build process, and are
303            // not included in the source code.
304            // Assuming we are using dev code, and build (or rebuild) the files.
305            $po_file      = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po';
306            $translation  = new Translation($po_file);
307            $translations = $translation->asArray();
308            file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n");
309        }
310
311        // Add translations from custom modules (but not during setup, as we have no database/modules)
312        if (!$setup) {
313            $translations = app(ModuleService::class)
314                ->findByInterface(ModuleCustomInterface::class)
315                ->reduce(static function (array $carry, ModuleCustomInterface $item): array {
316                    return array_merge($carry, $item->customTranslations(self::$locale->languageTag()));
317                }, $translations);
318        }
319
320        // Create a translator
321        self::$translator = new Translator($translations, self::$locale->pluralRule());
322
323        /* I18N: This punctuation is used to separate lists of items */
324        self::$list_separator = self::translate(', ');
325
326        // Create a collator
327        try {
328            if (class_exists('Collator')) {
329                // Symfony provides a very incomplete polyfill - which cannot be used.
330                self::$collator = new Collator(self::$locale->code());
331                // Ignore upper/lower case differences
332                self::$collator->setStrength(Collator::SECONDARY);
333            }
334        } catch (Exception $ex) {
335            // PHP-INTL is not installed?  We'll use a fallback later.
336            self::$collator = null;
337        }
338    }
339
340    /**
341     * Translate a string, and then substitute placeholders
342     * echo I18N::translate('Hello World!');
343     * echo I18N::translate('The %s sat on the mat', 'cat');
344     *
345     * @param string $message
346     * @param string ...$args
347     *
348     * @return string
349     */
350    public static function translate(string $message, ...$args): string
351    {
352        $message = self::$translator->translate($message);
353
354        return sprintf($message, ...$args);
355    }
356
357    /**
358     * @return string
359     */
360    public static function languageTag(): string
361    {
362        return self::$locale->languageTag();
363    }
364
365    /**
366     * @return LocaleInterface
367     */
368    public static function locale(): LocaleInterface
369    {
370        return self::$locale;
371    }
372
373    /**
374     * Translate a number into the local representation.
375     * e.g. 12345.67 becomes
376     * en: 12,345.67
377     * fr: 12 345,67
378     * de: 12.345,67
379     *
380     * @param float $n
381     * @param int   $precision
382     *
383     * @return string
384     */
385    public static function number(float $n, int $precision = 0): string
386    {
387        return self::$locale->number(round($n, $precision));
388    }
389
390    /**
391     * Translate a fraction into a percentage.
392     * e.g. 0.123 becomes
393     * en: 12.3%
394     * fr: 12,3 %
395     * de: 12,3%
396     *
397     * @param float $n
398     * @param int   $precision
399     *
400     * @return string
401     */
402    public static function percentage(float $n, int $precision = 0): string
403    {
404        return self::$locale->percent(round($n, $precision + 2));
405    }
406
407    /**
408     * Translate a plural string
409     * echo self::plural('There is an error', 'There are errors', $num_errors);
410     * echo self::plural('There is one error', 'There are %s errors', $num_errors);
411     * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour);
412     *
413     * @param string $singular
414     * @param string $plural
415     * @param int    $count
416     * @param string ...$args
417     *
418     * @return string
419     */
420    public static function plural(string $singular, string $plural, int $count, ...$args): string
421    {
422        $message = self::$translator->translatePlural($singular, $plural, $count);
423
424        return sprintf($message, ...$args);
425    }
426
427    /**
428     * UTF8 version of PHP::strrev()
429     * Reverse RTL text for third-party libraries such as GD2 and googlechart.
430     * These do not support UTF8 text direction, so we must mimic it for them.
431     * Numbers are always rendered LTR, even in RTL text.
432     * The visual direction of characters such as parentheses should be reversed.
433     *
434     * @param string $text Text to be reversed
435     *
436     * @return string
437     */
438    public static function reverseText($text): string
439    {
440        // Remove HTML markup - we can't display it and it is LTR.
441        $text = strip_tags($text);
442        // Remove HTML entities.
443        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
444
445        // LTR text doesn't need reversing
446        if (self::scriptDirection(self::textScript($text)) === 'ltr') {
447            return $text;
448        }
449
450        // Mirrored characters
451        $text = strtr($text, self::MIRROR_CHARACTERS);
452
453        $reversed = '';
454        $digits   = '';
455        while ($text !== '') {
456            $letter = mb_substr($text, 0, 1);
457            $text   = mb_substr($text, 1);
458            if (strpos(self::DIGITS, $letter) !== false) {
459                $digits .= $letter;
460            } else {
461                $reversed = $letter . $digits . $reversed;
462                $digits   = '';
463            }
464        }
465
466        return $digits . $reversed;
467    }
468
469    /**
470     * Return the direction (ltr or rtl) for a given script
471     * The PHP/intl library does not provde this information, so we need
472     * our own lookup table.
473     *
474     * @param string $script
475     *
476     * @return string
477     */
478    public static function scriptDirection($script): string
479    {
480        switch ($script) {
481            case 'Arab':
482            case 'Hebr':
483            case 'Mong':
484            case 'Thaa':
485                return 'rtl';
486            default:
487                return 'ltr';
488        }
489    }
490
491    /**
492     * Identify the script used for a piece of text
493     *
494     * @param string $string
495     *
496     * @return string
497     */
498    public static function textScript($string): string
499    {
500        $string = strip_tags($string); // otherwise HTML tags show up as latin
501        $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin
502        $string = str_replace([
503            '@N.N.',
504            '@P.N.',
505        ], '', $string); // otherwise unknown names show up as latin
506        $pos    = 0;
507        $strlen = strlen($string);
508        while ($pos < $strlen) {
509            // get the Unicode Code Point for the character at position $pos
510            $byte1 = ord($string[$pos]);
511            if ($byte1 < 0x80) {
512                $code_point = $byte1;
513                $chrlen     = 1;
514            } elseif ($byte1 < 0xC0) {
515                // Invalid continuation character
516                return 'Latn';
517            } elseif ($byte1 < 0xE0) {
518                $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F);
519                $chrlen     = 2;
520            } elseif ($byte1 < 0xF0) {
521                $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F);
522                $chrlen     = 3;
523            } elseif ($byte1 < 0xF8) {
524                $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F);
525                $chrlen     = 3;
526            } else {
527                // Invalid UTF
528                return 'Latn';
529            }
530
531            foreach (self::SCRIPT_CHARACTER_RANGES as $range) {
532                if ($code_point >= $range[1] && $code_point <= $range[2]) {
533                    return $range[0];
534                }
535            }
536            // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking.
537            $pos += $chrlen;
538        }
539
540        return 'Latn';
541    }
542
543    /**
544     * Perform a case-insensitive comparison of two strings.
545     *
546     * @param string $string1
547     * @param string $string2
548     *
549     * @return int
550     */
551    public static function strcasecmp($string1, $string2): int
552    {
553        if (self::$collator instanceof Collator) {
554            return self::$collator->compare($string1, $string2);
555        }
556
557        return strcmp(self::strtolower($string1), self::strtolower($string2));
558    }
559
560    /**
561     * Convert a string to lower case.
562     *
563     * @param string $string
564     *
565     * @return string
566     */
567    public static function strtolower($string): string
568    {
569        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
570            $string = strtr($string, self::DOTLESS_I_TOLOWER);
571        }
572
573        return mb_strtolower($string);
574    }
575
576    /**
577     * Convert a string to upper case.
578     *
579     * @param string $string
580     *
581     * @return string
582     */
583    public static function strtoupper($string): string
584    {
585        if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) {
586            $string = strtr($string, self::DOTLESS_I_TOUPPER);
587        }
588
589        return mb_strtoupper($string);
590    }
591
592    /**
593     * What format is used to display dates in the current locale?
594     *
595     * @return string
596     */
597    public static function timeFormat(): string
598    {
599        /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */
600        return self::$translator->translate('%H:%i:%s');
601    }
602
603    /**
604     * Context sensitive version of translate.
605     * echo I18N::translateContext('NOMINATIVE', 'January');
606     * echo I18N::translateContext('GENITIVE', 'January');
607     *
608     * @param string $context
609     * @param string $message
610     * @param string ...$args
611     *
612     * @return string
613     */
614    public static function translateContext(string $context, string $message, ...$args): string
615    {
616        $message = self::$translator->translateContext($context, $message);
617
618        return sprintf($message, ...$args);
619    }
620}
621