1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29use Fisharebest\Webtrees\Module\LanguageEnglishUnitedStates; 30use Fisharebest\Webtrees\Module\ModuleCustomInterface; 31use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 32use Fisharebest\Webtrees\Services\ModuleService; 33use const GLOB_NOSORT; 34 35/** 36 * Internationalization (i18n) and localization (l10n). 37 */ 38class I18N 39{ 40 // MO files use special characters for plurals and context. 41 public const PLURAL = '\x00'; 42 public const CONTEXT = '\x04'; 43 44 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 45 private static $locale; 46 47 /** @var Translator An object that performs translation */ 48 private static $translator; 49 50 /** @var Collator|null From the php-intl library */ 51 private static $collator; 52 53 // Digits are always rendered LTR, even in RTL text. 54 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 55 56 // These locales need special handling for the dotless letter I. 57 private const DOTLESS_I_LOCALES = [ 58 'az', 59 'tr', 60 ]; 61 private const DOTLESS_I_TOLOWER = [ 62 'I' => 'ı', 63 'İ' => 'i', 64 ]; 65 private const DOTLESS_I_TOUPPER = [ 66 'ı' => 'I', 67 'i' => 'İ', 68 ]; 69 70 // The ranges of characters used by each script. 71 private const SCRIPT_CHARACTER_RANGES = [ 72 [ 73 'Latn', 74 0x0041, 75 0x005A, 76 ], 77 [ 78 'Latn', 79 0x0061, 80 0x007A, 81 ], 82 [ 83 'Latn', 84 0x0100, 85 0x02AF, 86 ], 87 [ 88 'Grek', 89 0x0370, 90 0x03FF, 91 ], 92 [ 93 'Cyrl', 94 0x0400, 95 0x052F, 96 ], 97 [ 98 'Hebr', 99 0x0590, 100 0x05FF, 101 ], 102 [ 103 'Arab', 104 0x0600, 105 0x06FF, 106 ], 107 [ 108 'Arab', 109 0x0750, 110 0x077F, 111 ], 112 [ 113 'Arab', 114 0x08A0, 115 0x08FF, 116 ], 117 [ 118 'Deva', 119 0x0900, 120 0x097F, 121 ], 122 [ 123 'Taml', 124 0x0B80, 125 0x0BFF, 126 ], 127 [ 128 'Sinh', 129 0x0D80, 130 0x0DFF, 131 ], 132 [ 133 'Thai', 134 0x0E00, 135 0x0E7F, 136 ], 137 [ 138 'Geor', 139 0x10A0, 140 0x10FF, 141 ], 142 [ 143 'Grek', 144 0x1F00, 145 0x1FFF, 146 ], 147 [ 148 'Deva', 149 0xA8E0, 150 0xA8FF, 151 ], 152 [ 153 'Hans', 154 0x3000, 155 0x303F, 156 ], 157 // Mixed CJK, not just Hans 158 [ 159 'Hans', 160 0x3400, 161 0xFAFF, 162 ], 163 // Mixed CJK, not just Hans 164 [ 165 'Hans', 166 0x20000, 167 0x2FA1F, 168 ], 169 // Mixed CJK, not just Hans 170 ]; 171 172 // Characters that are displayed in mirror form in RTL text. 173 private const MIRROR_CHARACTERS = [ 174 '(' => ')', 175 ')' => '(', 176 '[' => ']', 177 ']' => '[', 178 '{' => '}', 179 '}' => '{', 180 '<' => '>', 181 '>' => '<', 182 '‹ ' => '›', 183 '› ' => '‹', 184 '«' => '»', 185 '»' => '«', 186 '﴾ ' => '﴿', 187 '﴿ ' => '﴾', 188 '“ ' => '”', 189 '” ' => '“', 190 '‘ ' => '’', 191 '’ ' => '‘', 192 ]; 193 194 /** @var string Punctuation used to separate list items, typically a comma */ 195 public static $list_separator; 196 197 /** 198 * The preferred locales for this site, or a default list if no preference. 199 * 200 * @return LocaleInterface[] 201 */ 202 public static function activeLocales(): array 203 { 204 $locales = app(ModuleService::class) 205 ->findByInterface(ModuleLanguageInterface::class) 206 ->map(function (ModuleLanguageInterface $module): LocaleInterface { 207 return $module->locale(); 208 }); 209 210 if ($locales->isEmpty()) { 211 return [new LocaleEnUs()]; 212 } 213 214 return $locales->all(); 215 } 216 217 /** 218 * Which MySQL collation should be used for this locale? 219 * 220 * @return string 221 */ 222 public static function collation(): string 223 { 224 $collation = self::$locale->collation(); 225 switch ($collation) { 226 case 'croatian_ci': 227 case 'german2_ci': 228 case 'vietnamese_ci': 229 // Only available in MySQL 5.6 230 return 'utf8_unicode_ci'; 231 default: 232 return 'utf8_' . $collation; 233 } 234 } 235 236 /** 237 * What format is used to display dates in the current locale? 238 * 239 * @return string 240 */ 241 public static function dateFormat(): string 242 { 243 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 244 return self::$translator->translate('%j %F %Y'); 245 } 246 247 /** 248 * Generate consistent I18N for datatables.js 249 * 250 * @param int[] $lengths An optional array of page lengths 251 * 252 * @return string 253 */ 254 public static function datatablesI18N(array $lengths = [ 255 10, 256 20, 257 30, 258 50, 259 100, 260 -1, 261 ]): string 262 { 263 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 264 265 return 266 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 267 '"language": {' . 268 ' "paginate": {' . 269 ' "first": "' . self::translate('first') . '",' . 270 ' "last": "' . self::translate('last') . '",' . 271 ' "next": "' . self::translate('next') . '",' . 272 ' "previous": "' . self::translate('previous') . '"' . 273 ' },' . 274 ' "emptyTable": "' . self::translate('No records to display') . '",' . 275 ' "info": "' . /* I18N: %s are placeholders for numbers */ 276 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 277 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 278 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 279 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 280 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 281 self::translate('Display %s', addslashes($length_options)) . '",' . 282 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 283 ' "processing": "' . self::translate('Loading…') . '",' . 284 ' "search": "' . self::translate('Filter') . '",' . 285 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 286 '}'; 287 } 288 289 /** 290 * Convert the digits 0-9 into the local script 291 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 292 * 293 * @param string|int $n 294 * 295 * @return string 296 */ 297 public static function digits($n): string 298 { 299 return self::$locale->digits((string) $n); 300 } 301 302 /** 303 * What is the direction of the current locale 304 * 305 * @return string "ltr" or "rtl" 306 */ 307 public static function direction(): string 308 { 309 return self::$locale->direction(); 310 } 311 312 /** 313 * What is the first day of the week. 314 * 315 * @return int Sunday=0, Monday=1, etc. 316 */ 317 public static function firstDay(): int 318 { 319 return self::$locale->territory()->firstDay(); 320 } 321 322 /** 323 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 324 * 325 * @return string 326 */ 327 public static function htmlAttributes(): string 328 { 329 return self::$locale->htmlAttributes(); 330 } 331 332 /** 333 * Initialise the translation adapter with a locale setting. 334 * 335 * @param string $code Use this locale/language code, or choose one automatically 336 * @param Tree|null $tree 337 * @param bool $custom Load custom translations 338 * 339 * @return string $string 340 */ 341 public static function init(string $code = '', Tree $tree = null, $custom = true): string 342 { 343 if ($code !== '') { 344 // Create the specified locale 345 self::$locale = Locale::create($code); 346 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 347 // Select a previously used locale 348 self::$locale = Locale::create(Session::get('locale')); 349 } else { 350 if ($tree instanceof Tree) { 351 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 352 } else { 353 $default_locale = new LocaleEnUs(); 354 } 355 356 // Negotiate with the browser. 357 // Search engines don't negotiate. They get the default locale of the tree. 358 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 359 } 360 361 $cache_dir = WT_DATA_DIR . 'cache/'; 362 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 363 if (file_exists($cache_file)) { 364 $filemtime = filemtime($cache_file); 365 } else { 366 $filemtime = 0; 367 } 368 369 // Load the translation file(s) 370 $translation_files = [ 371 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 372 ]; 373 374 // Rebuild files after one hour 375 $rebuild_cache = time() > $filemtime + 3600; 376 // Rebuild files if any translation file has been updated 377 foreach ($translation_files as $translation_file) { 378 if (filemtime($translation_file) > $filemtime) { 379 $rebuild_cache = true; 380 break; 381 } 382 } 383 384 if ($rebuild_cache) { 385 $translations = []; 386 foreach ($translation_files as $translation_file) { 387 $translation = new Translation($translation_file); 388 $translations = array_merge($translations, $translation->asArray()); 389 } 390 try { 391 File::mkdir($cache_dir); 392 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 393 } catch (Exception $ex) { 394 // During setup, we may not have been able to create it. 395 } 396 } else { 397 $translations = include $cache_file; 398 } 399 400 // Add translations from custom modules (but not during setup) 401 if ($custom) { 402 $custom_modules = app(ModuleService::class) 403 ->findByInterface(ModuleCustomInterface::class); 404 405 foreach ($custom_modules as $custom_module) { 406 $custom_translations = $custom_module->customTranslations(self::$locale->languageTag()); 407 $translations = array_merge($translations, $custom_translations); 408 } 409 } 410 411 // Create a translator 412 self::$translator = new Translator($translations, self::$locale->pluralRule()); 413 414 /* I18N: This punctuation is used to separate lists of items */ 415 self::$list_separator = self::translate(', '); 416 417 // Create a collator 418 try { 419 if (class_exists('Collator')) { 420 // Symfony provides a very incomplete polyfill - which cannot be used. 421 self::$collator = new Collator(self::$locale->code()); 422 // Ignore upper/lower case differences 423 self::$collator->setStrength(Collator::SECONDARY); 424 } 425 } catch (Exception $ex) { 426 // PHP-INTL is not installed? We'll use a fallback later. 427 self::$collator = null; 428 } 429 430 return self::$locale->languageTag(); 431 } 432 433 /** 434 * All locales for which a translation file exists. 435 * 436 * @return LocaleInterface[] 437 */ 438 public static function installedLocales(): array 439 { 440 return app(ModuleService::class) 441 ->findByInterface(ModuleLanguageInterface::class, true) 442 ->map(function (ModuleLanguageInterface $module): LocaleInterface { 443 return $module->locale(); 444 }) 445 ->all(); 446 } 447 448 /** 449 * Return the endonym for a given language - as per http://cldr.unicode.org/ 450 * 451 * @param string $locale 452 * 453 * @return string 454 */ 455 public static function languageName(string $locale): string 456 { 457 return Locale::create($locale)->endonym(); 458 } 459 460 /** 461 * Return the script used by a given language 462 * 463 * @param string $locale 464 * 465 * @return string 466 */ 467 public static function languageScript(string $locale): string 468 { 469 return Locale::create($locale)->script()->code(); 470 } 471 472 /** 473 * Translate a number into the local representation. 474 * e.g. 12345.67 becomes 475 * en: 12,345.67 476 * fr: 12 345,67 477 * de: 12.345,67 478 * 479 * @param float $n 480 * @param int $precision 481 * 482 * @return string 483 */ 484 public static function number(float $n, int $precision = 0): string 485 { 486 return self::$locale->number(round($n, $precision)); 487 } 488 489 /** 490 * Translate a fraction into a percentage. 491 * e.g. 0.123 becomes 492 * en: 12.3% 493 * fr: 12,3 % 494 * de: 12,3% 495 * 496 * @param float $n 497 * @param int $precision 498 * 499 * @return string 500 */ 501 public static function percentage(float $n, int $precision = 0): string 502 { 503 return self::$locale->percent(round($n, $precision + 2)); 504 } 505 506 /** 507 * Translate a plural string 508 * echo self::plural('There is an error', 'There are errors', $num_errors); 509 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 510 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 511 * 512 * @param string $singular 513 * @param string $plural 514 * @param int $count 515 * @param string ...$args 516 * 517 * @return string 518 */ 519 public static function plural(string $singular, string $plural, int $count, ...$args): string 520 { 521 $message = self::$translator->translatePlural($singular, $plural, $count); 522 523 return sprintf($message, ...$args); 524 } 525 526 /** 527 * UTF8 version of PHP::strrev() 528 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 529 * These do not support UTF8 text direction, so we must mimic it for them. 530 * Numbers are always rendered LTR, even in RTL text. 531 * The visual direction of characters such as parentheses should be reversed. 532 * 533 * @param string $text Text to be reversed 534 * 535 * @return string 536 */ 537 public static function reverseText($text): string 538 { 539 // Remove HTML markup - we can't display it and it is LTR. 540 $text = strip_tags($text); 541 // Remove HTML entities. 542 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 543 544 // LTR text doesn't need reversing 545 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 546 return $text; 547 } 548 549 // Mirrored characters 550 $text = strtr($text, self::MIRROR_CHARACTERS); 551 552 $reversed = ''; 553 $digits = ''; 554 while ($text !== '') { 555 $letter = mb_substr($text, 0, 1); 556 $text = mb_substr($text, 1); 557 if (strpos(self::DIGITS, $letter) !== false) { 558 $digits .= $letter; 559 } else { 560 $reversed = $letter . $digits . $reversed; 561 $digits = ''; 562 } 563 } 564 565 return $digits . $reversed; 566 } 567 568 /** 569 * Return the direction (ltr or rtl) for a given script 570 * The PHP/intl library does not provde this information, so we need 571 * our own lookup table. 572 * 573 * @param string $script 574 * 575 * @return string 576 */ 577 public static function scriptDirection($script): string 578 { 579 switch ($script) { 580 case 'Arab': 581 case 'Hebr': 582 case 'Mong': 583 case 'Thaa': 584 return 'rtl'; 585 default: 586 return 'ltr'; 587 } 588 } 589 590 /** 591 * Perform a case-insensitive comparison of two strings. 592 * 593 * @param string $string1 594 * @param string $string2 595 * 596 * @return int 597 */ 598 public static function strcasecmp($string1, $string2): int 599 { 600 if (self::$collator instanceof Collator) { 601 return self::$collator->compare($string1, $string2); 602 } 603 604 return strcmp(self::strtolower($string1), self::strtolower($string2)); 605 } 606 607 /** 608 * Convert a string to lower case. 609 * 610 * @param string $string 611 * 612 * @return string 613 */ 614 public static function strtolower($string): string 615 { 616 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 617 $string = strtr($string, self::DOTLESS_I_TOLOWER); 618 } 619 620 return mb_strtolower($string); 621 } 622 623 /** 624 * Convert a string to upper case. 625 * 626 * @param string $string 627 * 628 * @return string 629 */ 630 public static function strtoupper($string): string 631 { 632 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 633 $string = strtr($string, self::DOTLESS_I_TOUPPER); 634 } 635 636 return mb_strtoupper($string); 637 } 638 639 /** 640 * Identify the script used for a piece of text 641 * 642 * @param string $string 643 * 644 * @return string 645 */ 646 public static function textScript($string): string 647 { 648 $string = strip_tags($string); // otherwise HTML tags show up as latin 649 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 650 $string = str_replace([ 651 '@N.N.', 652 '@P.N.', 653 ], '', $string); // otherwise unknown names show up as latin 654 $pos = 0; 655 $strlen = strlen($string); 656 while ($pos < $strlen) { 657 // get the Unicode Code Point for the character at position $pos 658 $byte1 = ord($string[$pos]); 659 if ($byte1 < 0x80) { 660 $code_point = $byte1; 661 $chrlen = 1; 662 } elseif ($byte1 < 0xC0) { 663 // Invalid continuation character 664 return 'Latn'; 665 } elseif ($byte1 < 0xE0) { 666 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 667 $chrlen = 2; 668 } elseif ($byte1 < 0xF0) { 669 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 670 $chrlen = 3; 671 } elseif ($byte1 < 0xF8) { 672 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 673 $chrlen = 3; 674 } else { 675 // Invalid UTF 676 return 'Latn'; 677 } 678 679 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 680 if ($code_point >= $range[1] && $code_point <= $range[2]) { 681 return $range[0]; 682 } 683 } 684 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 685 $pos += $chrlen; 686 } 687 688 return 'Latn'; 689 } 690 691 /** 692 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 693 * 694 * @param int $seconds 695 * 696 * @return string 697 */ 698 public static function timeAgo($seconds): string 699 { 700 $minute = 60; 701 $hour = 60 * $minute; 702 $day = 24 * $hour; 703 $month = 30 * $day; 704 $year = 365 * $day; 705 706 if ($seconds > $year) { 707 $years = intdiv($seconds, $year); 708 709 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 710 } 711 712 if ($seconds > $month) { 713 $months = intdiv($seconds, $month); 714 715 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 716 } 717 718 if ($seconds > $day) { 719 $days = intdiv($seconds, $day); 720 721 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 722 } 723 724 if ($seconds > $hour) { 725 $hours = intdiv($seconds, $hour); 726 727 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 728 } 729 730 if ($seconds > $minute) { 731 $minutes = intdiv($seconds, $minute); 732 733 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 734 } 735 736 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 737 } 738 739 /** 740 * What format is used to display dates in the current locale? 741 * 742 * @return string 743 */ 744 public static function timeFormat(): string 745 { 746 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 747 return self::$translator->translate('%H:%i:%s'); 748 } 749 750 /** 751 * Translate a string, and then substitute placeholders 752 * echo I18N::translate('Hello World!'); 753 * echo I18N::translate('The %s sat on the mat', 'cat'); 754 * 755 * @param string $message 756 * @param string ...$args 757 * 758 * @return string 759 */ 760 public static function translate(string $message, ...$args): string 761 { 762 $message = self::$translator->translate($message); 763 764 return sprintf($message, ...$args); 765 } 766 767 /** 768 * Context sensitive version of translate. 769 * echo I18N::translateContext('NOMINATIVE', 'January'); 770 * echo I18N::translateContext('GENITIVE', 'January'); 771 * 772 * @param string $context 773 * @param string $message 774 * @param string ...$args 775 * 776 * @return string 777 */ 778 public static function translateContext(string $context, string $message, ...$args): string 779 { 780 $message = self::$translator->translateContext($context, $message); 781 782 return sprintf($message, ...$args); 783 } 784} 785