1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Module\ModuleCustomInterface; 28use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 29use Fisharebest\Webtrees\Services\ModuleService; 30use Illuminate\Support\Collection; 31use function array_merge; 32use function class_exists; 33use function filemtime; 34use function file_exists; 35use function html_entity_decode; 36use function in_array; 37use function intdiv; 38use function mb_strtolower; 39use function mb_strtoupper; 40use function mb_substr; 41use function ord; 42use function sprintf; 43use function str_replace; 44use function strcmp; 45use function strip_tags; 46use function strlen; 47use function strpos; 48use function strtr; 49 50/** 51 * Internationalization (i18n) and localization (l10n). 52 */ 53class I18N 54{ 55 // MO files use special characters for plurals and context. 56 public const PLURAL = "\x00"; 57 public const CONTEXT = "\x04"; 58 59 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 60 private static $locale; 61 62 /** @var Translator An object that performs translation */ 63 private static $translator; 64 65 /** @var Collator|null From the php-intl library */ 66 private static $collator; 67 68 // Digits are always rendered LTR, even in RTL text. 69 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 70 71 // These locales need special handling for the dotless letter I. 72 private const DOTLESS_I_LOCALES = [ 73 'az', 74 'tr', 75 ]; 76 private const DOTLESS_I_TOLOWER = [ 77 'I' => 'ı', 78 'İ' => 'i', 79 ]; 80 private const DOTLESS_I_TOUPPER = [ 81 'ı' => 'I', 82 'i' => 'İ', 83 ]; 84 85 // The ranges of characters used by each script. 86 private const SCRIPT_CHARACTER_RANGES = [ 87 [ 88 'Latn', 89 0x0041, 90 0x005A, 91 ], 92 [ 93 'Latn', 94 0x0061, 95 0x007A, 96 ], 97 [ 98 'Latn', 99 0x0100, 100 0x02AF, 101 ], 102 [ 103 'Grek', 104 0x0370, 105 0x03FF, 106 ], 107 [ 108 'Cyrl', 109 0x0400, 110 0x052F, 111 ], 112 [ 113 'Hebr', 114 0x0590, 115 0x05FF, 116 ], 117 [ 118 'Arab', 119 0x0600, 120 0x06FF, 121 ], 122 [ 123 'Arab', 124 0x0750, 125 0x077F, 126 ], 127 [ 128 'Arab', 129 0x08A0, 130 0x08FF, 131 ], 132 [ 133 'Deva', 134 0x0900, 135 0x097F, 136 ], 137 [ 138 'Taml', 139 0x0B80, 140 0x0BFF, 141 ], 142 [ 143 'Sinh', 144 0x0D80, 145 0x0DFF, 146 ], 147 [ 148 'Thai', 149 0x0E00, 150 0x0E7F, 151 ], 152 [ 153 'Geor', 154 0x10A0, 155 0x10FF, 156 ], 157 [ 158 'Grek', 159 0x1F00, 160 0x1FFF, 161 ], 162 [ 163 'Deva', 164 0xA8E0, 165 0xA8FF, 166 ], 167 [ 168 'Hans', 169 0x3000, 170 0x303F, 171 ], 172 // Mixed CJK, not just Hans 173 [ 174 'Hans', 175 0x3400, 176 0xFAFF, 177 ], 178 // Mixed CJK, not just Hans 179 [ 180 'Hans', 181 0x20000, 182 0x2FA1F, 183 ], 184 // Mixed CJK, not just Hans 185 ]; 186 187 // Characters that are displayed in mirror form in RTL text. 188 private const MIRROR_CHARACTERS = [ 189 '(' => ')', 190 ')' => '(', 191 '[' => ']', 192 ']' => '[', 193 '{' => '}', 194 '}' => '{', 195 '<' => '>', 196 '>' => '<', 197 '‹ ' => '›', 198 '› ' => '‹', 199 '«' => '»', 200 '»' => '«', 201 '﴾ ' => '﴿', 202 '﴿ ' => '﴾', 203 '“ ' => '”', 204 '” ' => '“', 205 '‘ ' => '’', 206 '’ ' => '‘', 207 ]; 208 209 /** @var string Punctuation used to separate list items, typically a comma */ 210 public static $list_separator; 211 212 /** 213 * The preferred locales for this site, or a default list if no preference. 214 * 215 * @return LocaleInterface[] 216 */ 217 public static function activeLocales(): array 218 { 219 $locales = app(ModuleService::class) 220 ->findByInterface(ModuleLanguageInterface::class, false, true) 221 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 222 return $module->locale(); 223 }); 224 225 if ($locales->isEmpty()) { 226 return [new LocaleEnUs()]; 227 } 228 229 return $locales->all(); 230 } 231 232 /** 233 * Which MySQL collation should be used for this locale? 234 * 235 * @return string 236 */ 237 public static function collation(): string 238 { 239 $collation = self::$locale->collation(); 240 switch ($collation) { 241 case 'croatian_ci': 242 case 'german2_ci': 243 case 'vietnamese_ci': 244 // Only available in MySQL 5.6 245 return 'utf8_unicode_ci'; 246 default: 247 return 'utf8_' . $collation; 248 } 249 } 250 251 /** 252 * What format is used to display dates in the current locale? 253 * 254 * @return string 255 */ 256 public static function dateFormat(): string 257 { 258 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 259 return self::$translator->translate('%j %F %Y'); 260 } 261 262 /** 263 * Convert the digits 0-9 into the local script 264 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 265 * 266 * @param string|int $n 267 * 268 * @return string 269 */ 270 public static function digits($n): string 271 { 272 return self::$locale->digits((string) $n); 273 } 274 275 /** 276 * What is the direction of the current locale 277 * 278 * @return string "ltr" or "rtl" 279 */ 280 public static function direction(): string 281 { 282 return self::$locale->direction(); 283 } 284 285 /** 286 * What is the first day of the week. 287 * 288 * @return int Sunday=0, Monday=1, etc. 289 */ 290 public static function firstDay(): int 291 { 292 return self::$locale->territory()->firstDay(); 293 } 294 295 /** 296 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 297 * 298 * @return string 299 */ 300 public static function htmlAttributes(): string 301 { 302 return self::$locale->htmlAttributes(); 303 } 304 305 /** 306 * Initialise the translation adapter with a locale setting. 307 * 308 * @param string $code Use this locale/language code, or choose one automatically 309 * @param Tree|null $tree 310 * @param bool $setup During setup, we cannot access the database. 311 * 312 * @return string $string 313 */ 314 public static function init(string $code = '', Tree $tree = null, $setup = false): string 315 { 316 if ($code !== '') { 317 // Create the specified locale 318 self::$locale = Locale::create($code); 319 } elseif (Session::has('language') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('language') . '/messages.mo')) { 320 // Select a previously used locale 321 self::$locale = Locale::create(Session::get('language')); 322 } else { 323 if ($tree instanceof Tree) { 324 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 325 } else { 326 $default_locale = new LocaleEnUs(); 327 } 328 329 // Negotiate with the browser. 330 // Search engines don't negotiate. They get the default locale of the tree. 331 if ($setup) { 332 $installed_locales = app(ModuleService::class)->setupLanguages() 333 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 334 return $module->locale(); 335 }); 336 } else { 337 $installed_locales = self::installedLocales(); 338 } 339 340 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 341 } 342 343 $cache_dir = WT_DATA_DIR . 'cache/'; 344 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 345 if (file_exists($cache_file)) { 346 $filemtime = filemtime($cache_file); 347 } else { 348 $filemtime = 0; 349 } 350 351 // Load the translation file 352 $translation_file = WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo'; 353 354 if (!file_exists($translation_file)) { 355 // Test and dev environments may not have the compiled translations 356 $translations = []; 357 } elseif (filemtime($translation_file) > $filemtime) { 358 $translation = new Translation($translation_file); 359 $translations = $translation->asArray(); 360 361 try { 362 File::mkdir($cache_dir); 363 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 364 } catch (Exception $ex) { 365 // During setup, we may not have been able to create it. 366 } 367 } else { 368 $translations = include $cache_file; 369 } 370 371 // Add translations from custom modules (but not during setup, as we have no database/modules) 372 if (!$setup) { 373 $translations = app(ModuleService::class) 374 ->findByInterface(ModuleCustomInterface::class) 375 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 376 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 377 }, $translations); 378 } 379 380 // Create a translator 381 self::$translator = new Translator($translations, self::$locale->pluralRule()); 382 383 /* I18N: This punctuation is used to separate lists of items */ 384 self::$list_separator = self::translate(', '); 385 386 // Create a collator 387 try { 388 if (class_exists('Collator')) { 389 // Symfony provides a very incomplete polyfill - which cannot be used. 390 self::$collator = new Collator(self::$locale->code()); 391 // Ignore upper/lower case differences 392 self::$collator->setStrength(Collator::SECONDARY); 393 } 394 } catch (Exception $ex) { 395 // PHP-INTL is not installed? We'll use a fallback later. 396 self::$collator = null; 397 } 398 399 return self::$locale->languageTag(); 400 } 401 402 /** 403 * All locales for which a translation file exists. 404 * 405 * @return Collection 406 * @return LocaleInterface[] 407 */ 408 public static function installedLocales(): Collection 409 { 410 return app(ModuleService::class) 411 ->findByInterface(ModuleLanguageInterface::class, true) 412 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 413 return $module->locale(); 414 }); 415 } 416 417 /** 418 * Return the endonym for a given language - as per http://cldr.unicode.org/ 419 * 420 * @param string $locale 421 * 422 * @return string 423 */ 424 public static function languageName(string $locale): string 425 { 426 return Locale::create($locale)->endonym(); 427 } 428 429 /** 430 * Return the script used by a given language 431 * 432 * @param string $locale 433 * 434 * @return string 435 */ 436 public static function languageScript(string $locale): string 437 { 438 return Locale::create($locale)->script()->code(); 439 } 440 441 /** 442 * Translate a number into the local representation. 443 * e.g. 12345.67 becomes 444 * en: 12,345.67 445 * fr: 12 345,67 446 * de: 12.345,67 447 * 448 * @param float $n 449 * @param int $precision 450 * 451 * @return string 452 */ 453 public static function number(float $n, int $precision = 0): string 454 { 455 return self::$locale->number(round($n, $precision)); 456 } 457 458 /** 459 * Translate a fraction into a percentage. 460 * e.g. 0.123 becomes 461 * en: 12.3% 462 * fr: 12,3 % 463 * de: 12,3% 464 * 465 * @param float $n 466 * @param int $precision 467 * 468 * @return string 469 */ 470 public static function percentage(float $n, int $precision = 0): string 471 { 472 return self::$locale->percent(round($n, $precision + 2)); 473 } 474 475 /** 476 * Translate a plural string 477 * echo self::plural('There is an error', 'There are errors', $num_errors); 478 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 479 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 480 * 481 * @param string $singular 482 * @param string $plural 483 * @param int $count 484 * @param string ...$args 485 * 486 * @return string 487 */ 488 public static function plural(string $singular, string $plural, int $count, ...$args): string 489 { 490 $message = self::$translator->translatePlural($singular, $plural, $count); 491 492 return sprintf($message, ...$args); 493 } 494 495 /** 496 * UTF8 version of PHP::strrev() 497 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 498 * These do not support UTF8 text direction, so we must mimic it for them. 499 * Numbers are always rendered LTR, even in RTL text. 500 * The visual direction of characters such as parentheses should be reversed. 501 * 502 * @param string $text Text to be reversed 503 * 504 * @return string 505 */ 506 public static function reverseText($text): string 507 { 508 // Remove HTML markup - we can't display it and it is LTR. 509 $text = strip_tags($text); 510 // Remove HTML entities. 511 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 512 513 // LTR text doesn't need reversing 514 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 515 return $text; 516 } 517 518 // Mirrored characters 519 $text = strtr($text, self::MIRROR_CHARACTERS); 520 521 $reversed = ''; 522 $digits = ''; 523 while ($text !== '') { 524 $letter = mb_substr($text, 0, 1); 525 $text = mb_substr($text, 1); 526 if (strpos(self::DIGITS, $letter) !== false) { 527 $digits .= $letter; 528 } else { 529 $reversed = $letter . $digits . $reversed; 530 $digits = ''; 531 } 532 } 533 534 return $digits . $reversed; 535 } 536 537 /** 538 * Return the direction (ltr or rtl) for a given script 539 * The PHP/intl library does not provde this information, so we need 540 * our own lookup table. 541 * 542 * @param string $script 543 * 544 * @return string 545 */ 546 public static function scriptDirection($script): string 547 { 548 switch ($script) { 549 case 'Arab': 550 case 'Hebr': 551 case 'Mong': 552 case 'Thaa': 553 return 'rtl'; 554 default: 555 return 'ltr'; 556 } 557 } 558 559 /** 560 * Perform a case-insensitive comparison of two strings. 561 * 562 * @param string $string1 563 * @param string $string2 564 * 565 * @return int 566 */ 567 public static function strcasecmp($string1, $string2): int 568 { 569 if (self::$collator instanceof Collator) { 570 return self::$collator->compare($string1, $string2); 571 } 572 573 return strcmp(self::strtolower($string1), self::strtolower($string2)); 574 } 575 576 /** 577 * Convert a string to lower case. 578 * 579 * @param string $string 580 * 581 * @return string 582 */ 583 public static function strtolower($string): string 584 { 585 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 586 $string = strtr($string, self::DOTLESS_I_TOLOWER); 587 } 588 589 return mb_strtolower($string); 590 } 591 592 /** 593 * Convert a string to upper case. 594 * 595 * @param string $string 596 * 597 * @return string 598 */ 599 public static function strtoupper($string): string 600 { 601 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 602 $string = strtr($string, self::DOTLESS_I_TOUPPER); 603 } 604 605 return mb_strtoupper($string); 606 } 607 608 /** 609 * Identify the script used for a piece of text 610 * 611 * @param string $string 612 * 613 * @return string 614 */ 615 public static function textScript($string): string 616 { 617 $string = strip_tags($string); // otherwise HTML tags show up as latin 618 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 619 $string = str_replace([ 620 '@N.N.', 621 '@P.N.', 622 ], '', $string); // otherwise unknown names show up as latin 623 $pos = 0; 624 $strlen = strlen($string); 625 while ($pos < $strlen) { 626 // get the Unicode Code Point for the character at position $pos 627 $byte1 = ord($string[$pos]); 628 if ($byte1 < 0x80) { 629 $code_point = $byte1; 630 $chrlen = 1; 631 } elseif ($byte1 < 0xC0) { 632 // Invalid continuation character 633 return 'Latn'; 634 } elseif ($byte1 < 0xE0) { 635 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 636 $chrlen = 2; 637 } elseif ($byte1 < 0xF0) { 638 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 639 $chrlen = 3; 640 } elseif ($byte1 < 0xF8) { 641 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 642 $chrlen = 3; 643 } else { 644 // Invalid UTF 645 return 'Latn'; 646 } 647 648 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 649 if ($code_point >= $range[1] && $code_point <= $range[2]) { 650 return $range[0]; 651 } 652 } 653 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 654 $pos += $chrlen; 655 } 656 657 return 'Latn'; 658 } 659 660 /** 661 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 662 * 663 * @param int $seconds 664 * 665 * @return string 666 */ 667 public static function timeAgo($seconds): string 668 { 669 $minute = 60; 670 $hour = 60 * $minute; 671 $day = 24 * $hour; 672 $month = 30 * $day; 673 $year = 365 * $day; 674 675 if ($seconds > $year) { 676 $years = intdiv($seconds, $year); 677 678 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 679 } 680 681 if ($seconds > $month) { 682 $months = intdiv($seconds, $month); 683 684 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 685 } 686 687 if ($seconds > $day) { 688 $days = intdiv($seconds, $day); 689 690 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 691 } 692 693 if ($seconds > $hour) { 694 $hours = intdiv($seconds, $hour); 695 696 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 697 } 698 699 if ($seconds > $minute) { 700 $minutes = intdiv($seconds, $minute); 701 702 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 703 } 704 705 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 706 } 707 708 /** 709 * What format is used to display dates in the current locale? 710 * 711 * @return string 712 */ 713 public static function timeFormat(): string 714 { 715 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 716 return self::$translator->translate('%H:%i:%s'); 717 } 718 719 /** 720 * Translate a string, and then substitute placeholders 721 * echo I18N::translate('Hello World!'); 722 * echo I18N::translate('The %s sat on the mat', 'cat'); 723 * 724 * @param string $message 725 * @param string ...$args 726 * 727 * @return string 728 */ 729 public static function translate(string $message, ...$args): string 730 { 731 $message = self::$translator->translate($message); 732 733 return sprintf($message, ...$args); 734 } 735 736 /** 737 * Context sensitive version of translate. 738 * echo I18N::translateContext('NOMINATIVE', 'January'); 739 * echo I18N::translateContext('GENITIVE', 'January'); 740 * 741 * @param string $context 742 * @param string $message 743 * @param string ...$args 744 * 745 * @return string 746 */ 747 public static function translateContext(string $context, string $message, ...$args): string 748 { 749 $message = self::$translator->translateContext($context, $message); 750 751 return sprintf($message, ...$args); 752 } 753} 754