1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Module\ModuleCustomInterface; 28use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 29use Fisharebest\Webtrees\Services\ModuleService; 30use Illuminate\Support\Collection; 31 32/** 33 * Internationalization (i18n) and localization (l10n). 34 */ 35class I18N 36{ 37 // MO files use special characters for plurals and context. 38 public const PLURAL = '\x00'; 39 public const CONTEXT = '\x04'; 40 41 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 42 private static $locale; 43 44 /** @var Translator An object that performs translation */ 45 private static $translator; 46 47 /** @var Collator|null From the php-intl library */ 48 private static $collator; 49 50 // Digits are always rendered LTR, even in RTL text. 51 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 52 53 // These locales need special handling for the dotless letter I. 54 private const DOTLESS_I_LOCALES = [ 55 'az', 56 'tr', 57 ]; 58 private const DOTLESS_I_TOLOWER = [ 59 'I' => 'ı', 60 'İ' => 'i', 61 ]; 62 private const DOTLESS_I_TOUPPER = [ 63 'ı' => 'I', 64 'i' => 'İ', 65 ]; 66 67 // The ranges of characters used by each script. 68 private const SCRIPT_CHARACTER_RANGES = [ 69 [ 70 'Latn', 71 0x0041, 72 0x005A, 73 ], 74 [ 75 'Latn', 76 0x0061, 77 0x007A, 78 ], 79 [ 80 'Latn', 81 0x0100, 82 0x02AF, 83 ], 84 [ 85 'Grek', 86 0x0370, 87 0x03FF, 88 ], 89 [ 90 'Cyrl', 91 0x0400, 92 0x052F, 93 ], 94 [ 95 'Hebr', 96 0x0590, 97 0x05FF, 98 ], 99 [ 100 'Arab', 101 0x0600, 102 0x06FF, 103 ], 104 [ 105 'Arab', 106 0x0750, 107 0x077F, 108 ], 109 [ 110 'Arab', 111 0x08A0, 112 0x08FF, 113 ], 114 [ 115 'Deva', 116 0x0900, 117 0x097F, 118 ], 119 [ 120 'Taml', 121 0x0B80, 122 0x0BFF, 123 ], 124 [ 125 'Sinh', 126 0x0D80, 127 0x0DFF, 128 ], 129 [ 130 'Thai', 131 0x0E00, 132 0x0E7F, 133 ], 134 [ 135 'Geor', 136 0x10A0, 137 0x10FF, 138 ], 139 [ 140 'Grek', 141 0x1F00, 142 0x1FFF, 143 ], 144 [ 145 'Deva', 146 0xA8E0, 147 0xA8FF, 148 ], 149 [ 150 'Hans', 151 0x3000, 152 0x303F, 153 ], 154 // Mixed CJK, not just Hans 155 [ 156 'Hans', 157 0x3400, 158 0xFAFF, 159 ], 160 // Mixed CJK, not just Hans 161 [ 162 'Hans', 163 0x20000, 164 0x2FA1F, 165 ], 166 // Mixed CJK, not just Hans 167 ]; 168 169 // Characters that are displayed in mirror form in RTL text. 170 private const MIRROR_CHARACTERS = [ 171 '(' => ')', 172 ')' => '(', 173 '[' => ']', 174 ']' => '[', 175 '{' => '}', 176 '}' => '{', 177 '<' => '>', 178 '>' => '<', 179 '‹ ' => '›', 180 '› ' => '‹', 181 '«' => '»', 182 '»' => '«', 183 '﴾ ' => '﴿', 184 '﴿ ' => '﴾', 185 '“ ' => '”', 186 '” ' => '“', 187 '‘ ' => '’', 188 '’ ' => '‘', 189 ]; 190 191 /** @var string Punctuation used to separate list items, typically a comma */ 192 public static $list_separator; 193 194 /** 195 * The preferred locales for this site, or a default list if no preference. 196 * 197 * @return LocaleInterface[] 198 */ 199 public static function activeLocales(): array 200 { 201 $locales = app(ModuleService::class) 202 ->findByInterface(ModuleLanguageInterface::class, false, true) 203 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 204 return $module->locale(); 205 }); 206 207 if ($locales->isEmpty()) { 208 return [new LocaleEnUs()]; 209 } 210 211 return $locales->all(); 212 } 213 214 /** 215 * Which MySQL collation should be used for this locale? 216 * 217 * @return string 218 */ 219 public static function collation(): string 220 { 221 $collation = self::$locale->collation(); 222 switch ($collation) { 223 case 'croatian_ci': 224 case 'german2_ci': 225 case 'vietnamese_ci': 226 // Only available in MySQL 5.6 227 return 'utf8_unicode_ci'; 228 default: 229 return 'utf8_' . $collation; 230 } 231 } 232 233 /** 234 * What format is used to display dates in the current locale? 235 * 236 * @return string 237 */ 238 public static function dateFormat(): string 239 { 240 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 241 return self::$translator->translate('%j %F %Y'); 242 } 243 244 /** 245 * Convert the digits 0-9 into the local script 246 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 247 * 248 * @param string|int $n 249 * 250 * @return string 251 */ 252 public static function digits($n): string 253 { 254 return self::$locale->digits((string) $n); 255 } 256 257 /** 258 * What is the direction of the current locale 259 * 260 * @return string "ltr" or "rtl" 261 */ 262 public static function direction(): string 263 { 264 return self::$locale->direction(); 265 } 266 267 /** 268 * What is the first day of the week. 269 * 270 * @return int Sunday=0, Monday=1, etc. 271 */ 272 public static function firstDay(): int 273 { 274 return self::$locale->territory()->firstDay(); 275 } 276 277 /** 278 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 279 * 280 * @return string 281 */ 282 public static function htmlAttributes(): string 283 { 284 return self::$locale->htmlAttributes(); 285 } 286 287 /** 288 * Initialise the translation adapter with a locale setting. 289 * 290 * @param string $code Use this locale/language code, or choose one automatically 291 * @param Tree|null $tree 292 * @param bool $setup During setup, we cannot access the database. 293 * 294 * @return string $string 295 */ 296 public static function init(string $code = '', Tree $tree = null, $setup = false): string 297 { 298 if ($code !== '') { 299 // Create the specified locale 300 self::$locale = Locale::create($code); 301 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 302 // Select a previously used locale 303 self::$locale = Locale::create(Session::get('locale')); 304 } else { 305 if ($tree instanceof Tree) { 306 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 307 } else { 308 $default_locale = new LocaleEnUs(); 309 } 310 311 // Negotiate with the browser. 312 // Search engines don't negotiate. They get the default locale of the tree. 313 if ($setup) { 314 $installed_locales = app(ModuleService::class)->setupLanguages() 315 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 316 return $module->locale(); 317 }); 318 } else { 319 $installed_locales = self::installedLocales(); 320 } 321 322 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 323 } 324 325 $cache_dir = WT_DATA_DIR . 'cache/'; 326 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 327 if (file_exists($cache_file)) { 328 $filemtime = filemtime($cache_file); 329 } else { 330 $filemtime = 0; 331 } 332 333 // Load the translation file(s) 334 $translation_files = [ 335 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 336 ]; 337 338 // Rebuild files after one hour 339 $rebuild_cache = time() > $filemtime + 3600; 340 // Rebuild files if any translation file has been updated 341 foreach ($translation_files as $translation_file) { 342 if (filemtime($translation_file) > $filemtime) { 343 $rebuild_cache = true; 344 break; 345 } 346 } 347 348 if ($rebuild_cache) { 349 $translations = []; 350 foreach ($translation_files as $translation_file) { 351 $translation = new Translation($translation_file); 352 $translations = array_merge($translations, $translation->asArray()); 353 } 354 try { 355 File::mkdir($cache_dir); 356 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 357 } catch (Exception $ex) { 358 // During setup, we may not have been able to create it. 359 } 360 } else { 361 $translations = include $cache_file; 362 } 363 364 // Add translations from custom modules (but not during setup) 365 if (!$setup) { 366 $custom_modules = app(ModuleService::class) 367 ->findByInterface(ModuleCustomInterface::class); 368 369 foreach ($custom_modules as $custom_module) { 370 $custom_translations = $custom_module->customTranslations(self::$locale->languageTag()); 371 $translations = array_merge($translations, $custom_translations); 372 } 373 } 374 375 // Create a translator 376 self::$translator = new Translator($translations, self::$locale->pluralRule()); 377 378 /* I18N: This punctuation is used to separate lists of items */ 379 self::$list_separator = self::translate(', '); 380 381 // Create a collator 382 try { 383 if (class_exists('Collator')) { 384 // Symfony provides a very incomplete polyfill - which cannot be used. 385 self::$collator = new Collator(self::$locale->code()); 386 // Ignore upper/lower case differences 387 self::$collator->setStrength(Collator::SECONDARY); 388 } 389 } catch (Exception $ex) { 390 // PHP-INTL is not installed? We'll use a fallback later. 391 self::$collator = null; 392 } 393 394 return self::$locale->languageTag(); 395 } 396 397 /** 398 * All locales for which a translation file exists. 399 * 400 * @return Collection 401 * @return LocaleInterface[] 402 */ 403 public static function installedLocales(): Collection 404 { 405 return app(ModuleService::class) 406 ->findByInterface(ModuleLanguageInterface::class, true) 407 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 408 return $module->locale(); 409 }); 410 } 411 412 /** 413 * Return the endonym for a given language - as per http://cldr.unicode.org/ 414 * 415 * @param string $locale 416 * 417 * @return string 418 */ 419 public static function languageName(string $locale): string 420 { 421 return Locale::create($locale)->endonym(); 422 } 423 424 /** 425 * Return the script used by a given language 426 * 427 * @param string $locale 428 * 429 * @return string 430 */ 431 public static function languageScript(string $locale): string 432 { 433 return Locale::create($locale)->script()->code(); 434 } 435 436 /** 437 * Translate a number into the local representation. 438 * e.g. 12345.67 becomes 439 * en: 12,345.67 440 * fr: 12 345,67 441 * de: 12.345,67 442 * 443 * @param float $n 444 * @param int $precision 445 * 446 * @return string 447 */ 448 public static function number(float $n, int $precision = 0): string 449 { 450 return self::$locale->number(round($n, $precision)); 451 } 452 453 /** 454 * Translate a fraction into a percentage. 455 * e.g. 0.123 becomes 456 * en: 12.3% 457 * fr: 12,3 % 458 * de: 12,3% 459 * 460 * @param float $n 461 * @param int $precision 462 * 463 * @return string 464 */ 465 public static function percentage(float $n, int $precision = 0): string 466 { 467 return self::$locale->percent(round($n, $precision + 2)); 468 } 469 470 /** 471 * Translate a plural string 472 * echo self::plural('There is an error', 'There are errors', $num_errors); 473 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 474 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 475 * 476 * @param string $singular 477 * @param string $plural 478 * @param int $count 479 * @param string ...$args 480 * 481 * @return string 482 */ 483 public static function plural(string $singular, string $plural, int $count, ...$args): string 484 { 485 $message = self::$translator->translatePlural($singular, $plural, $count); 486 487 return sprintf($message, ...$args); 488 } 489 490 /** 491 * UTF8 version of PHP::strrev() 492 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 493 * These do not support UTF8 text direction, so we must mimic it for them. 494 * Numbers are always rendered LTR, even in RTL text. 495 * The visual direction of characters such as parentheses should be reversed. 496 * 497 * @param string $text Text to be reversed 498 * 499 * @return string 500 */ 501 public static function reverseText($text): string 502 { 503 // Remove HTML markup - we can't display it and it is LTR. 504 $text = strip_tags($text); 505 // Remove HTML entities. 506 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 507 508 // LTR text doesn't need reversing 509 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 510 return $text; 511 } 512 513 // Mirrored characters 514 $text = strtr($text, self::MIRROR_CHARACTERS); 515 516 $reversed = ''; 517 $digits = ''; 518 while ($text !== '') { 519 $letter = mb_substr($text, 0, 1); 520 $text = mb_substr($text, 1); 521 if (strpos(self::DIGITS, $letter) !== false) { 522 $digits .= $letter; 523 } else { 524 $reversed = $letter . $digits . $reversed; 525 $digits = ''; 526 } 527 } 528 529 return $digits . $reversed; 530 } 531 532 /** 533 * Return the direction (ltr or rtl) for a given script 534 * The PHP/intl library does not provde this information, so we need 535 * our own lookup table. 536 * 537 * @param string $script 538 * 539 * @return string 540 */ 541 public static function scriptDirection($script): string 542 { 543 switch ($script) { 544 case 'Arab': 545 case 'Hebr': 546 case 'Mong': 547 case 'Thaa': 548 return 'rtl'; 549 default: 550 return 'ltr'; 551 } 552 } 553 554 /** 555 * Perform a case-insensitive comparison of two strings. 556 * 557 * @param string $string1 558 * @param string $string2 559 * 560 * @return int 561 */ 562 public static function strcasecmp($string1, $string2): int 563 { 564 if (self::$collator instanceof Collator) { 565 return self::$collator->compare($string1, $string2); 566 } 567 568 return strcmp(self::strtolower($string1), self::strtolower($string2)); 569 } 570 571 /** 572 * Convert a string to lower case. 573 * 574 * @param string $string 575 * 576 * @return string 577 */ 578 public static function strtolower($string): string 579 { 580 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 581 $string = strtr($string, self::DOTLESS_I_TOLOWER); 582 } 583 584 return mb_strtolower($string); 585 } 586 587 /** 588 * Convert a string to upper case. 589 * 590 * @param string $string 591 * 592 * @return string 593 */ 594 public static function strtoupper($string): string 595 { 596 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 597 $string = strtr($string, self::DOTLESS_I_TOUPPER); 598 } 599 600 return mb_strtoupper($string); 601 } 602 603 /** 604 * Identify the script used for a piece of text 605 * 606 * @param string $string 607 * 608 * @return string 609 */ 610 public static function textScript($string): string 611 { 612 $string = strip_tags($string); // otherwise HTML tags show up as latin 613 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 614 $string = str_replace([ 615 '@N.N.', 616 '@P.N.', 617 ], '', $string); // otherwise unknown names show up as latin 618 $pos = 0; 619 $strlen = strlen($string); 620 while ($pos < $strlen) { 621 // get the Unicode Code Point for the character at position $pos 622 $byte1 = ord($string[$pos]); 623 if ($byte1 < 0x80) { 624 $code_point = $byte1; 625 $chrlen = 1; 626 } elseif ($byte1 < 0xC0) { 627 // Invalid continuation character 628 return 'Latn'; 629 } elseif ($byte1 < 0xE0) { 630 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 631 $chrlen = 2; 632 } elseif ($byte1 < 0xF0) { 633 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 634 $chrlen = 3; 635 } elseif ($byte1 < 0xF8) { 636 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 637 $chrlen = 3; 638 } else { 639 // Invalid UTF 640 return 'Latn'; 641 } 642 643 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 644 if ($code_point >= $range[1] && $code_point <= $range[2]) { 645 return $range[0]; 646 } 647 } 648 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 649 $pos += $chrlen; 650 } 651 652 return 'Latn'; 653 } 654 655 /** 656 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 657 * 658 * @param int $seconds 659 * 660 * @return string 661 */ 662 public static function timeAgo($seconds): string 663 { 664 $minute = 60; 665 $hour = 60 * $minute; 666 $day = 24 * $hour; 667 $month = 30 * $day; 668 $year = 365 * $day; 669 670 if ($seconds > $year) { 671 $years = intdiv($seconds, $year); 672 673 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 674 } 675 676 if ($seconds > $month) { 677 $months = intdiv($seconds, $month); 678 679 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 680 } 681 682 if ($seconds > $day) { 683 $days = intdiv($seconds, $day); 684 685 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 686 } 687 688 if ($seconds > $hour) { 689 $hours = intdiv($seconds, $hour); 690 691 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 692 } 693 694 if ($seconds > $minute) { 695 $minutes = intdiv($seconds, $minute); 696 697 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 698 } 699 700 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 701 } 702 703 /** 704 * What format is used to display dates in the current locale? 705 * 706 * @return string 707 */ 708 public static function timeFormat(): string 709 { 710 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 711 return self::$translator->translate('%H:%i:%s'); 712 } 713 714 /** 715 * Translate a string, and then substitute placeholders 716 * echo I18N::translate('Hello World!'); 717 * echo I18N::translate('The %s sat on the mat', 'cat'); 718 * 719 * @param string $message 720 * @param string ...$args 721 * 722 * @return string 723 */ 724 public static function translate(string $message, ...$args): string 725 { 726 $message = self::$translator->translate($message); 727 728 return sprintf($message, ...$args); 729 } 730 731 /** 732 * Context sensitive version of translate. 733 * echo I18N::translateContext('NOMINATIVE', 'January'); 734 * echo I18N::translateContext('GENITIVE', 'January'); 735 * 736 * @param string $context 737 * @param string $message 738 * @param string ...$args 739 * 740 * @return string 741 */ 742 public static function translateContext(string $context, string $message, ...$args): string 743 { 744 $message = self::$translator->translateContext($context, $message); 745 746 return sprintf($message, ...$args); 747 } 748} 749