1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Module\ModuleCustomInterface; 28use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 29use Fisharebest\Webtrees\Services\ModuleService; 30use Illuminate\Support\Collection; 31use function array_merge; 32use function class_exists; 33use function filemtime; 34use function file_exists; 35use function html_entity_decode; 36use function in_array; 37use function mb_strtolower; 38use function mb_strtoupper; 39use function mb_substr; 40use function ord; 41use function sprintf; 42use function str_replace; 43use function strcmp; 44use function strip_tags; 45use function strlen; 46use function strpos; 47use function strtr; 48 49/** 50 * Internationalization (i18n) and localization (l10n). 51 */ 52class I18N 53{ 54 // MO files use special characters for plurals and context. 55 public const PLURAL = "\x00"; 56 public const CONTEXT = "\x04"; 57 58 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 59 private static $locale; 60 61 /** @var Translator An object that performs translation */ 62 private static $translator; 63 64 /** @var Collator|null From the php-intl library */ 65 private static $collator; 66 67 // Digits are always rendered LTR, even in RTL text. 68 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 69 70 // These locales need special handling for the dotless letter I. 71 private const DOTLESS_I_LOCALES = [ 72 'az', 73 'tr', 74 ]; 75 private const DOTLESS_I_TOLOWER = [ 76 'I' => 'ı', 77 'İ' => 'i', 78 ]; 79 private const DOTLESS_I_TOUPPER = [ 80 'ı' => 'I', 81 'i' => 'İ', 82 ]; 83 84 // The ranges of characters used by each script. 85 private const SCRIPT_CHARACTER_RANGES = [ 86 [ 87 'Latn', 88 0x0041, 89 0x005A, 90 ], 91 [ 92 'Latn', 93 0x0061, 94 0x007A, 95 ], 96 [ 97 'Latn', 98 0x0100, 99 0x02AF, 100 ], 101 [ 102 'Grek', 103 0x0370, 104 0x03FF, 105 ], 106 [ 107 'Cyrl', 108 0x0400, 109 0x052F, 110 ], 111 [ 112 'Hebr', 113 0x0590, 114 0x05FF, 115 ], 116 [ 117 'Arab', 118 0x0600, 119 0x06FF, 120 ], 121 [ 122 'Arab', 123 0x0750, 124 0x077F, 125 ], 126 [ 127 'Arab', 128 0x08A0, 129 0x08FF, 130 ], 131 [ 132 'Deva', 133 0x0900, 134 0x097F, 135 ], 136 [ 137 'Taml', 138 0x0B80, 139 0x0BFF, 140 ], 141 [ 142 'Sinh', 143 0x0D80, 144 0x0DFF, 145 ], 146 [ 147 'Thai', 148 0x0E00, 149 0x0E7F, 150 ], 151 [ 152 'Geor', 153 0x10A0, 154 0x10FF, 155 ], 156 [ 157 'Grek', 158 0x1F00, 159 0x1FFF, 160 ], 161 [ 162 'Deva', 163 0xA8E0, 164 0xA8FF, 165 ], 166 [ 167 'Hans', 168 0x3000, 169 0x303F, 170 ], 171 // Mixed CJK, not just Hans 172 [ 173 'Hans', 174 0x3400, 175 0xFAFF, 176 ], 177 // Mixed CJK, not just Hans 178 [ 179 'Hans', 180 0x20000, 181 0x2FA1F, 182 ], 183 // Mixed CJK, not just Hans 184 ]; 185 186 // Characters that are displayed in mirror form in RTL text. 187 private const MIRROR_CHARACTERS = [ 188 '(' => ')', 189 ')' => '(', 190 '[' => ']', 191 ']' => '[', 192 '{' => '}', 193 '}' => '{', 194 '<' => '>', 195 '>' => '<', 196 '‹ ' => '›', 197 '› ' => '‹', 198 '«' => '»', 199 '»' => '«', 200 '﴾ ' => '﴿', 201 '﴿ ' => '﴾', 202 '“ ' => '”', 203 '” ' => '“', 204 '‘ ' => '’', 205 '’ ' => '‘', 206 ]; 207 208 /** @var string Punctuation used to separate list items, typically a comma */ 209 public static $list_separator; 210 211 /** 212 * The preferred locales for this site, or a default list if no preference. 213 * 214 * @return LocaleInterface[] 215 */ 216 public static function activeLocales(): array 217 { 218 $locales = app(ModuleService::class) 219 ->findByInterface(ModuleLanguageInterface::class, false, true) 220 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 221 return $module->locale(); 222 }); 223 224 if ($locales->isEmpty()) { 225 return [new LocaleEnUs()]; 226 } 227 228 return $locales->all(); 229 } 230 231 /** 232 * Which MySQL collation should be used for this locale? 233 * 234 * @return string 235 */ 236 public static function collation(): string 237 { 238 $collation = self::$locale->collation(); 239 switch ($collation) { 240 case 'croatian_ci': 241 case 'german2_ci': 242 case 'vietnamese_ci': 243 // Only available in MySQL 5.6 244 return 'utf8_unicode_ci'; 245 default: 246 return 'utf8_' . $collation; 247 } 248 } 249 250 /** 251 * What format is used to display dates in the current locale? 252 * 253 * @return string 254 */ 255 public static function dateFormat(): string 256 { 257 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 258 return self::$translator->translate('%j %F %Y'); 259 } 260 261 /** 262 * Convert the digits 0-9 into the local script 263 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 264 * 265 * @param string|int $n 266 * 267 * @return string 268 */ 269 public static function digits($n): string 270 { 271 return self::$locale->digits((string) $n); 272 } 273 274 /** 275 * What is the direction of the current locale 276 * 277 * @return string "ltr" or "rtl" 278 */ 279 public static function direction(): string 280 { 281 return self::$locale->direction(); 282 } 283 284 /** 285 * What is the first day of the week. 286 * 287 * @return int Sunday=0, Monday=1, etc. 288 */ 289 public static function firstDay(): int 290 { 291 return self::$locale->territory()->firstDay(); 292 } 293 294 /** 295 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 296 * 297 * @return string 298 */ 299 public static function htmlAttributes(): string 300 { 301 return self::$locale->htmlAttributes(); 302 } 303 304 /** 305 * Initialise the translation adapter with a locale setting. 306 * 307 * @param string $code Use this locale/language code, or choose one automatically 308 * @param Tree|null $tree 309 * @param bool $setup During setup, we cannot access the database. 310 * 311 * @return string $string 312 */ 313 public static function init(string $code = '', Tree $tree = null, $setup = false): string 314 { 315 if ($code !== '') { 316 // Create the specified locale 317 self::$locale = Locale::create($code); 318 } elseif (Session::has('language') && file_exists(Webtrees::ROOT_DIR . 'resources/lang/' . Session::get('language') . '/messages.mo')) { 319 // Select a previously used locale 320 self::$locale = Locale::create(Session::get('language')); 321 } else { 322 if ($tree instanceof Tree) { 323 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 324 } else { 325 $default_locale = new LocaleEnUs(); 326 } 327 328 // Negotiate with the browser. 329 // Search engines don't negotiate. They get the default locale of the tree. 330 if ($setup) { 331 $installed_locales = app(ModuleService::class)->setupLanguages() 332 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 333 return $module->locale(); 334 }); 335 } else { 336 $installed_locales = self::installedLocales(); 337 } 338 339 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 340 } 341 342 $cache_dir = WT_DATA_DIR . 'cache/'; 343 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 344 if (file_exists($cache_file)) { 345 $filemtime = filemtime($cache_file); 346 } else { 347 $filemtime = 0; 348 } 349 350 // Load the translation file 351 $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo'; 352 353 if (!file_exists($translation_file)) { 354 // Test and dev environments may not have the compiled translations 355 $translations = []; 356 } elseif (filemtime($translation_file) > $filemtime) { 357 $translation = new Translation($translation_file); 358 $translations = $translation->asArray(); 359 360 try { 361 File::mkdir($cache_dir); 362 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 363 } catch (Exception $ex) { 364 // During setup, we may not have been able to create it. 365 } 366 } else { 367 $translations = include $cache_file; 368 } 369 370 // Add translations from custom modules (but not during setup, as we have no database/modules) 371 if (!$setup) { 372 $translations = app(ModuleService::class) 373 ->findByInterface(ModuleCustomInterface::class) 374 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 375 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 376 }, $translations); 377 } 378 379 // Create a translator 380 self::$translator = new Translator($translations, self::$locale->pluralRule()); 381 382 /* I18N: This punctuation is used to separate lists of items */ 383 self::$list_separator = self::translate(', '); 384 385 // Create a collator 386 try { 387 if (class_exists('Collator')) { 388 // Symfony provides a very incomplete polyfill - which cannot be used. 389 self::$collator = new Collator(self::$locale->code()); 390 // Ignore upper/lower case differences 391 self::$collator->setStrength(Collator::SECONDARY); 392 } 393 } catch (Exception $ex) { 394 // PHP-INTL is not installed? We'll use a fallback later. 395 self::$collator = null; 396 } 397 398 return self::$locale->languageTag(); 399 } 400 401 /** 402 * All locales for which a translation file exists. 403 * 404 * @return Collection 405 * @return LocaleInterface[] 406 */ 407 public static function installedLocales(): Collection 408 { 409 return app(ModuleService::class) 410 ->findByInterface(ModuleLanguageInterface::class, true) 411 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 412 return $module->locale(); 413 }); 414 } 415 416 /** 417 * Return the endonym for a given language - as per http://cldr.unicode.org/ 418 * 419 * @param string $locale 420 * 421 * @return string 422 */ 423 public static function languageName(string $locale): string 424 { 425 return Locale::create($locale)->endonym(); 426 } 427 428 /** 429 * Return the script used by a given language 430 * 431 * @param string $locale 432 * 433 * @return string 434 */ 435 public static function languageScript(string $locale): string 436 { 437 return Locale::create($locale)->script()->code(); 438 } 439 440 /** 441 * Translate a number into the local representation. 442 * e.g. 12345.67 becomes 443 * en: 12,345.67 444 * fr: 12 345,67 445 * de: 12.345,67 446 * 447 * @param float $n 448 * @param int $precision 449 * 450 * @return string 451 */ 452 public static function number(float $n, int $precision = 0): string 453 { 454 return self::$locale->number(round($n, $precision)); 455 } 456 457 /** 458 * Translate a fraction into a percentage. 459 * e.g. 0.123 becomes 460 * en: 12.3% 461 * fr: 12,3 % 462 * de: 12,3% 463 * 464 * @param float $n 465 * @param int $precision 466 * 467 * @return string 468 */ 469 public static function percentage(float $n, int $precision = 0): string 470 { 471 return self::$locale->percent(round($n, $precision + 2)); 472 } 473 474 /** 475 * Translate a plural string 476 * echo self::plural('There is an error', 'There are errors', $num_errors); 477 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 478 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 479 * 480 * @param string $singular 481 * @param string $plural 482 * @param int $count 483 * @param string ...$args 484 * 485 * @return string 486 */ 487 public static function plural(string $singular, string $plural, int $count, ...$args): string 488 { 489 $message = self::$translator->translatePlural($singular, $plural, $count); 490 491 return sprintf($message, ...$args); 492 } 493 494 /** 495 * UTF8 version of PHP::strrev() 496 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 497 * These do not support UTF8 text direction, so we must mimic it for them. 498 * Numbers are always rendered LTR, even in RTL text. 499 * The visual direction of characters such as parentheses should be reversed. 500 * 501 * @param string $text Text to be reversed 502 * 503 * @return string 504 */ 505 public static function reverseText($text): string 506 { 507 // Remove HTML markup - we can't display it and it is LTR. 508 $text = strip_tags($text); 509 // Remove HTML entities. 510 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 511 512 // LTR text doesn't need reversing 513 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 514 return $text; 515 } 516 517 // Mirrored characters 518 $text = strtr($text, self::MIRROR_CHARACTERS); 519 520 $reversed = ''; 521 $digits = ''; 522 while ($text !== '') { 523 $letter = mb_substr($text, 0, 1); 524 $text = mb_substr($text, 1); 525 if (strpos(self::DIGITS, $letter) !== false) { 526 $digits .= $letter; 527 } else { 528 $reversed = $letter . $digits . $reversed; 529 $digits = ''; 530 } 531 } 532 533 return $digits . $reversed; 534 } 535 536 /** 537 * Return the direction (ltr or rtl) for a given script 538 * The PHP/intl library does not provde this information, so we need 539 * our own lookup table. 540 * 541 * @param string $script 542 * 543 * @return string 544 */ 545 public static function scriptDirection($script): string 546 { 547 switch ($script) { 548 case 'Arab': 549 case 'Hebr': 550 case 'Mong': 551 case 'Thaa': 552 return 'rtl'; 553 default: 554 return 'ltr'; 555 } 556 } 557 558 /** 559 * Perform a case-insensitive comparison of two strings. 560 * 561 * @param string $string1 562 * @param string $string2 563 * 564 * @return int 565 */ 566 public static function strcasecmp($string1, $string2): int 567 { 568 if (self::$collator instanceof Collator) { 569 return self::$collator->compare($string1, $string2); 570 } 571 572 return strcmp(self::strtolower($string1), self::strtolower($string2)); 573 } 574 575 /** 576 * Convert a string to lower case. 577 * 578 * @param string $string 579 * 580 * @return string 581 */ 582 public static function strtolower($string): string 583 { 584 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 585 $string = strtr($string, self::DOTLESS_I_TOLOWER); 586 } 587 588 return mb_strtolower($string); 589 } 590 591 /** 592 * Convert a string to upper case. 593 * 594 * @param string $string 595 * 596 * @return string 597 */ 598 public static function strtoupper($string): string 599 { 600 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 601 $string = strtr($string, self::DOTLESS_I_TOUPPER); 602 } 603 604 return mb_strtoupper($string); 605 } 606 607 /** 608 * Identify the script used for a piece of text 609 * 610 * @param string $string 611 * 612 * @return string 613 */ 614 public static function textScript($string): string 615 { 616 $string = strip_tags($string); // otherwise HTML tags show up as latin 617 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 618 $string = str_replace([ 619 '@N.N.', 620 '@P.N.', 621 ], '', $string); // otherwise unknown names show up as latin 622 $pos = 0; 623 $strlen = strlen($string); 624 while ($pos < $strlen) { 625 // get the Unicode Code Point for the character at position $pos 626 $byte1 = ord($string[$pos]); 627 if ($byte1 < 0x80) { 628 $code_point = $byte1; 629 $chrlen = 1; 630 } elseif ($byte1 < 0xC0) { 631 // Invalid continuation character 632 return 'Latn'; 633 } elseif ($byte1 < 0xE0) { 634 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 635 $chrlen = 2; 636 } elseif ($byte1 < 0xF0) { 637 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 638 $chrlen = 3; 639 } elseif ($byte1 < 0xF8) { 640 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 641 $chrlen = 3; 642 } else { 643 // Invalid UTF 644 return 'Latn'; 645 } 646 647 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 648 if ($code_point >= $range[1] && $code_point <= $range[2]) { 649 return $range[0]; 650 } 651 } 652 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 653 $pos += $chrlen; 654 } 655 656 return 'Latn'; 657 } 658 659 /** 660 * What format is used to display dates in the current locale? 661 * 662 * @return string 663 */ 664 public static function timeFormat(): string 665 { 666 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 667 return self::$translator->translate('%H:%i:%s'); 668 } 669 670 /** 671 * Translate a string, and then substitute placeholders 672 * echo I18N::translate('Hello World!'); 673 * echo I18N::translate('The %s sat on the mat', 'cat'); 674 * 675 * @param string $message 676 * @param string ...$args 677 * 678 * @return string 679 */ 680 public static function translate(string $message, ...$args): string 681 { 682 $message = self::$translator->translate($message); 683 684 return sprintf($message, ...$args); 685 } 686 687 /** 688 * Context sensitive version of translate. 689 * echo I18N::translateContext('NOMINATIVE', 'January'); 690 * echo I18N::translateContext('GENITIVE', 'January'); 691 * 692 * @param string $context 693 * @param string $message 694 * @param string ...$args 695 * 696 * @return string 697 */ 698 public static function translateContext(string $context, string $message, ...$args): string 699 { 700 $message = self::$translator->translateContext($context, $message); 701 702 return sprintf($message, ...$args); 703 } 704} 705