1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29use Fisharebest\Webtrees\Module\LanguageEnglishUnitedStates; 30use Fisharebest\Webtrees\Module\ModuleCustomInterface; 31use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 32use Fisharebest\Webtrees\Services\ModuleService; 33use const GLOB_NOSORT; 34use Illuminate\Support\Collection; 35 36/** 37 * Internationalization (i18n) and localization (l10n). 38 */ 39class I18N 40{ 41 // MO files use special characters for plurals and context. 42 public const PLURAL = '\x00'; 43 public const CONTEXT = '\x04'; 44 45 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 46 private static $locale; 47 48 /** @var Translator An object that performs translation */ 49 private static $translator; 50 51 /** @var Collator|null From the php-intl library */ 52 private static $collator; 53 54 // Digits are always rendered LTR, even in RTL text. 55 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 56 57 // These locales need special handling for the dotless letter I. 58 private const DOTLESS_I_LOCALES = [ 59 'az', 60 'tr', 61 ]; 62 private const DOTLESS_I_TOLOWER = [ 63 'I' => 'ı', 64 'İ' => 'i', 65 ]; 66 private const DOTLESS_I_TOUPPER = [ 67 'ı' => 'I', 68 'i' => 'İ', 69 ]; 70 71 // The ranges of characters used by each script. 72 private const SCRIPT_CHARACTER_RANGES = [ 73 [ 74 'Latn', 75 0x0041, 76 0x005A, 77 ], 78 [ 79 'Latn', 80 0x0061, 81 0x007A, 82 ], 83 [ 84 'Latn', 85 0x0100, 86 0x02AF, 87 ], 88 [ 89 'Grek', 90 0x0370, 91 0x03FF, 92 ], 93 [ 94 'Cyrl', 95 0x0400, 96 0x052F, 97 ], 98 [ 99 'Hebr', 100 0x0590, 101 0x05FF, 102 ], 103 [ 104 'Arab', 105 0x0600, 106 0x06FF, 107 ], 108 [ 109 'Arab', 110 0x0750, 111 0x077F, 112 ], 113 [ 114 'Arab', 115 0x08A0, 116 0x08FF, 117 ], 118 [ 119 'Deva', 120 0x0900, 121 0x097F, 122 ], 123 [ 124 'Taml', 125 0x0B80, 126 0x0BFF, 127 ], 128 [ 129 'Sinh', 130 0x0D80, 131 0x0DFF, 132 ], 133 [ 134 'Thai', 135 0x0E00, 136 0x0E7F, 137 ], 138 [ 139 'Geor', 140 0x10A0, 141 0x10FF, 142 ], 143 [ 144 'Grek', 145 0x1F00, 146 0x1FFF, 147 ], 148 [ 149 'Deva', 150 0xA8E0, 151 0xA8FF, 152 ], 153 [ 154 'Hans', 155 0x3000, 156 0x303F, 157 ], 158 // Mixed CJK, not just Hans 159 [ 160 'Hans', 161 0x3400, 162 0xFAFF, 163 ], 164 // Mixed CJK, not just Hans 165 [ 166 'Hans', 167 0x20000, 168 0x2FA1F, 169 ], 170 // Mixed CJK, not just Hans 171 ]; 172 173 // Characters that are displayed in mirror form in RTL text. 174 private const MIRROR_CHARACTERS = [ 175 '(' => ')', 176 ')' => '(', 177 '[' => ']', 178 ']' => '[', 179 '{' => '}', 180 '}' => '{', 181 '<' => '>', 182 '>' => '<', 183 '‹ ' => '›', 184 '› ' => '‹', 185 '«' => '»', 186 '»' => '«', 187 '﴾ ' => '﴿', 188 '﴿ ' => '﴾', 189 '“ ' => '”', 190 '” ' => '“', 191 '‘ ' => '’', 192 '’ ' => '‘', 193 ]; 194 195 /** @var string Punctuation used to separate list items, typically a comma */ 196 public static $list_separator; 197 198 /** 199 * The preferred locales for this site, or a default list if no preference. 200 * 201 * @return LocaleInterface[] 202 */ 203 public static function activeLocales(): array 204 { 205 $locales = app(ModuleService::class) 206 ->findByInterface(ModuleLanguageInterface::class, false, true) 207 ->map(function (ModuleLanguageInterface $module): LocaleInterface { 208 return $module->locale(); 209 }); 210 211 if ($locales->isEmpty()) { 212 return [new LocaleEnUs()]; 213 } 214 215 return $locales->all(); 216 } 217 218 /** 219 * Which MySQL collation should be used for this locale? 220 * 221 * @return string 222 */ 223 public static function collation(): string 224 { 225 $collation = self::$locale->collation(); 226 switch ($collation) { 227 case 'croatian_ci': 228 case 'german2_ci': 229 case 'vietnamese_ci': 230 // Only available in MySQL 5.6 231 return 'utf8_unicode_ci'; 232 default: 233 return 'utf8_' . $collation; 234 } 235 } 236 237 /** 238 * What format is used to display dates in the current locale? 239 * 240 * @return string 241 */ 242 public static function dateFormat(): string 243 { 244 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 245 return self::$translator->translate('%j %F %Y'); 246 } 247 248 /** 249 * Generate consistent I18N for datatables.js 250 * 251 * @param int[] $lengths An optional array of page lengths 252 * 253 * @return string 254 */ 255 public static function datatablesI18N(array $lengths = [ 256 10, 257 20, 258 30, 259 50, 260 100, 261 -1, 262 ]): string 263 { 264 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 265 266 return 267 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 268 '"language": {' . 269 ' "paginate": {' . 270 ' "first": "' . self::translate('first') . '",' . 271 ' "last": "' . self::translate('last') . '",' . 272 ' "next": "' . self::translate('next') . '",' . 273 ' "previous": "' . self::translate('previous') . '"' . 274 ' },' . 275 ' "emptyTable": "' . self::translate('No records to display') . '",' . 276 ' "info": "' . /* I18N: %s are placeholders for numbers */ 277 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 278 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 279 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 280 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 281 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 282 self::translate('Display %s', addslashes($length_options)) . '",' . 283 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 284 ' "processing": "' . self::translate('Loading…') . '",' . 285 ' "search": "' . self::translate('Filter') . '",' . 286 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 287 '}'; 288 } 289 290 /** 291 * Convert the digits 0-9 into the local script 292 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 293 * 294 * @param string|int $n 295 * 296 * @return string 297 */ 298 public static function digits($n): string 299 { 300 return self::$locale->digits((string) $n); 301 } 302 303 /** 304 * What is the direction of the current locale 305 * 306 * @return string "ltr" or "rtl" 307 */ 308 public static function direction(): string 309 { 310 return self::$locale->direction(); 311 } 312 313 /** 314 * What is the first day of the week. 315 * 316 * @return int Sunday=0, Monday=1, etc. 317 */ 318 public static function firstDay(): int 319 { 320 return self::$locale->territory()->firstDay(); 321 } 322 323 /** 324 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 325 * 326 * @return string 327 */ 328 public static function htmlAttributes(): string 329 { 330 return self::$locale->htmlAttributes(); 331 } 332 333 /** 334 * Initialise the translation adapter with a locale setting. 335 * 336 * @param string $code Use this locale/language code, or choose one automatically 337 * @param Tree|null $tree 338 * @param bool $setup During setup, we cannot access the database. 339 * 340 * @return string $string 341 */ 342 public static function init(string $code = '', Tree $tree = null, $setup = false): string 343 { 344 if ($code !== '') { 345 // Create the specified locale 346 self::$locale = Locale::create($code); 347 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 348 // Select a previously used locale 349 self::$locale = Locale::create(Session::get('locale')); 350 } else { 351 if ($tree instanceof Tree) { 352 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 353 } else { 354 $default_locale = new LocaleEnUs(); 355 } 356 357 // Negotiate with the browser. 358 // Search engines don't negotiate. They get the default locale of the tree. 359 if ($setup) { 360 $installed_locales = app(ModuleService::class)->setupLanguages() 361 ->map(function (ModuleLanguageInterface $module): LocaleInterface { 362 return $module->locale(); 363 }); 364 } else { 365 $installed_locales = self::installedLocales(); 366 } 367 368 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 369 } 370 371 $cache_dir = WT_DATA_DIR . 'cache/'; 372 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 373 if (file_exists($cache_file)) { 374 $filemtime = filemtime($cache_file); 375 } else { 376 $filemtime = 0; 377 } 378 379 // Load the translation file(s) 380 $translation_files = [ 381 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 382 ]; 383 384 // Rebuild files after one hour 385 $rebuild_cache = time() > $filemtime + 3600; 386 // Rebuild files if any translation file has been updated 387 foreach ($translation_files as $translation_file) { 388 if (filemtime($translation_file) > $filemtime) { 389 $rebuild_cache = true; 390 break; 391 } 392 } 393 394 if ($rebuild_cache) { 395 $translations = []; 396 foreach ($translation_files as $translation_file) { 397 $translation = new Translation($translation_file); 398 $translations = array_merge($translations, $translation->asArray()); 399 } 400 try { 401 File::mkdir($cache_dir); 402 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 403 } catch (Exception $ex) { 404 // During setup, we may not have been able to create it. 405 } 406 } else { 407 $translations = include $cache_file; 408 } 409 410 // Add translations from custom modules (but not during setup) 411 if (!$setup) { 412 $custom_modules = app(ModuleService::class) 413 ->findByInterface(ModuleCustomInterface::class); 414 415 foreach ($custom_modules as $custom_module) { 416 $custom_translations = $custom_module->customTranslations(self::$locale->languageTag()); 417 $translations = array_merge($translations, $custom_translations); 418 } 419 } 420 421 // Create a translator 422 self::$translator = new Translator($translations, self::$locale->pluralRule()); 423 424 /* I18N: This punctuation is used to separate lists of items */ 425 self::$list_separator = self::translate(', '); 426 427 // Create a collator 428 try { 429 if (class_exists('Collator')) { 430 // Symfony provides a very incomplete polyfill - which cannot be used. 431 self::$collator = new Collator(self::$locale->code()); 432 // Ignore upper/lower case differences 433 self::$collator->setStrength(Collator::SECONDARY); 434 } 435 } catch (Exception $ex) { 436 // PHP-INTL is not installed? We'll use a fallback later. 437 self::$collator = null; 438 } 439 440 return self::$locale->languageTag(); 441 } 442 443 /** 444 * All locales for which a translation file exists. 445 * 446 * @return Collection 447 * @return LocaleInterface[] 448 */ 449 public static function installedLocales(): Collection 450 { 451 return app(ModuleService::class) 452 ->findByInterface(ModuleLanguageInterface::class, true) 453 ->map(function (ModuleLanguageInterface $module): LocaleInterface { 454 return $module->locale(); 455 }); 456 } 457 458 /** 459 * Return the endonym for a given language - as per http://cldr.unicode.org/ 460 * 461 * @param string $locale 462 * 463 * @return string 464 */ 465 public static function languageName(string $locale): string 466 { 467 return Locale::create($locale)->endonym(); 468 } 469 470 /** 471 * Return the script used by a given language 472 * 473 * @param string $locale 474 * 475 * @return string 476 */ 477 public static function languageScript(string $locale): string 478 { 479 return Locale::create($locale)->script()->code(); 480 } 481 482 /** 483 * Translate a number into the local representation. 484 * e.g. 12345.67 becomes 485 * en: 12,345.67 486 * fr: 12 345,67 487 * de: 12.345,67 488 * 489 * @param float $n 490 * @param int $precision 491 * 492 * @return string 493 */ 494 public static function number(float $n, int $precision = 0): string 495 { 496 return self::$locale->number(round($n, $precision)); 497 } 498 499 /** 500 * Translate a fraction into a percentage. 501 * e.g. 0.123 becomes 502 * en: 12.3% 503 * fr: 12,3 % 504 * de: 12,3% 505 * 506 * @param float $n 507 * @param int $precision 508 * 509 * @return string 510 */ 511 public static function percentage(float $n, int $precision = 0): string 512 { 513 return self::$locale->percent(round($n, $precision + 2)); 514 } 515 516 /** 517 * Translate a plural string 518 * echo self::plural('There is an error', 'There are errors', $num_errors); 519 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 520 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 521 * 522 * @param string $singular 523 * @param string $plural 524 * @param int $count 525 * @param string ...$args 526 * 527 * @return string 528 */ 529 public static function plural(string $singular, string $plural, int $count, ...$args): string 530 { 531 $message = self::$translator->translatePlural($singular, $plural, $count); 532 533 return sprintf($message, ...$args); 534 } 535 536 /** 537 * UTF8 version of PHP::strrev() 538 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 539 * These do not support UTF8 text direction, so we must mimic it for them. 540 * Numbers are always rendered LTR, even in RTL text. 541 * The visual direction of characters such as parentheses should be reversed. 542 * 543 * @param string $text Text to be reversed 544 * 545 * @return string 546 */ 547 public static function reverseText($text): string 548 { 549 // Remove HTML markup - we can't display it and it is LTR. 550 $text = strip_tags($text); 551 // Remove HTML entities. 552 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 553 554 // LTR text doesn't need reversing 555 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 556 return $text; 557 } 558 559 // Mirrored characters 560 $text = strtr($text, self::MIRROR_CHARACTERS); 561 562 $reversed = ''; 563 $digits = ''; 564 while ($text !== '') { 565 $letter = mb_substr($text, 0, 1); 566 $text = mb_substr($text, 1); 567 if (strpos(self::DIGITS, $letter) !== false) { 568 $digits .= $letter; 569 } else { 570 $reversed = $letter . $digits . $reversed; 571 $digits = ''; 572 } 573 } 574 575 return $digits . $reversed; 576 } 577 578 /** 579 * Return the direction (ltr or rtl) for a given script 580 * The PHP/intl library does not provde this information, so we need 581 * our own lookup table. 582 * 583 * @param string $script 584 * 585 * @return string 586 */ 587 public static function scriptDirection($script): string 588 { 589 switch ($script) { 590 case 'Arab': 591 case 'Hebr': 592 case 'Mong': 593 case 'Thaa': 594 return 'rtl'; 595 default: 596 return 'ltr'; 597 } 598 } 599 600 /** 601 * Perform a case-insensitive comparison of two strings. 602 * 603 * @param string $string1 604 * @param string $string2 605 * 606 * @return int 607 */ 608 public static function strcasecmp($string1, $string2): int 609 { 610 if (self::$collator instanceof Collator) { 611 return self::$collator->compare($string1, $string2); 612 } 613 614 return strcmp(self::strtolower($string1), self::strtolower($string2)); 615 } 616 617 /** 618 * Convert a string to lower case. 619 * 620 * @param string $string 621 * 622 * @return string 623 */ 624 public static function strtolower($string): string 625 { 626 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 627 $string = strtr($string, self::DOTLESS_I_TOLOWER); 628 } 629 630 return mb_strtolower($string); 631 } 632 633 /** 634 * Convert a string to upper case. 635 * 636 * @param string $string 637 * 638 * @return string 639 */ 640 public static function strtoupper($string): string 641 { 642 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 643 $string = strtr($string, self::DOTLESS_I_TOUPPER); 644 } 645 646 return mb_strtoupper($string); 647 } 648 649 /** 650 * Identify the script used for a piece of text 651 * 652 * @param string $string 653 * 654 * @return string 655 */ 656 public static function textScript($string): string 657 { 658 $string = strip_tags($string); // otherwise HTML tags show up as latin 659 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 660 $string = str_replace([ 661 '@N.N.', 662 '@P.N.', 663 ], '', $string); // otherwise unknown names show up as latin 664 $pos = 0; 665 $strlen = strlen($string); 666 while ($pos < $strlen) { 667 // get the Unicode Code Point for the character at position $pos 668 $byte1 = ord($string[$pos]); 669 if ($byte1 < 0x80) { 670 $code_point = $byte1; 671 $chrlen = 1; 672 } elseif ($byte1 < 0xC0) { 673 // Invalid continuation character 674 return 'Latn'; 675 } elseif ($byte1 < 0xE0) { 676 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 677 $chrlen = 2; 678 } elseif ($byte1 < 0xF0) { 679 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 680 $chrlen = 3; 681 } elseif ($byte1 < 0xF8) { 682 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 683 $chrlen = 3; 684 } else { 685 // Invalid UTF 686 return 'Latn'; 687 } 688 689 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 690 if ($code_point >= $range[1] && $code_point <= $range[2]) { 691 return $range[0]; 692 } 693 } 694 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 695 $pos += $chrlen; 696 } 697 698 return 'Latn'; 699 } 700 701 /** 702 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 703 * 704 * @param int $seconds 705 * 706 * @return string 707 */ 708 public static function timeAgo($seconds): string 709 { 710 $minute = 60; 711 $hour = 60 * $minute; 712 $day = 24 * $hour; 713 $month = 30 * $day; 714 $year = 365 * $day; 715 716 if ($seconds > $year) { 717 $years = intdiv($seconds, $year); 718 719 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 720 } 721 722 if ($seconds > $month) { 723 $months = intdiv($seconds, $month); 724 725 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 726 } 727 728 if ($seconds > $day) { 729 $days = intdiv($seconds, $day); 730 731 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 732 } 733 734 if ($seconds > $hour) { 735 $hours = intdiv($seconds, $hour); 736 737 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 738 } 739 740 if ($seconds > $minute) { 741 $minutes = intdiv($seconds, $minute); 742 743 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 744 } 745 746 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 747 } 748 749 /** 750 * What format is used to display dates in the current locale? 751 * 752 * @return string 753 */ 754 public static function timeFormat(): string 755 { 756 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 757 return self::$translator->translate('%H:%i:%s'); 758 } 759 760 /** 761 * Translate a string, and then substitute placeholders 762 * echo I18N::translate('Hello World!'); 763 * echo I18N::translate('The %s sat on the mat', 'cat'); 764 * 765 * @param string $message 766 * @param string ...$args 767 * 768 * @return string 769 */ 770 public static function translate(string $message, ...$args): string 771 { 772 $message = self::$translator->translate($message); 773 774 return sprintf($message, ...$args); 775 } 776 777 /** 778 * Context sensitive version of translate. 779 * echo I18N::translateContext('NOMINATIVE', 'January'); 780 * echo I18N::translateContext('GENITIVE', 'January'); 781 * 782 * @param string $context 783 * @param string $message 784 * @param string ...$args 785 * 786 * @return string 787 */ 788 public static function translateContext(string $context, string $message, ...$args): string 789 { 790 $message = self::$translator->translateContext($context, $message); 791 792 return sprintf($message, ...$args); 793 } 794} 795