1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29use Fisharebest\Webtrees\Module\ModuleCustomInterface; 30use Fisharebest\Webtrees\Services\ModuleService; 31use const GLOB_NOSORT; 32 33/** 34 * Internationalization (i18n) and localization (l10n). 35 */ 36class I18N 37{ 38 // MO files use special characters for plurals and context. 39 public const PLURAL = '\x00'; 40 public const CONTEXT = '\x04'; 41 42 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 43 private static $locale; 44 45 /** @var Translator An object that performs translation */ 46 private static $translator; 47 48 /** @var Collator|null From the php-intl library */ 49 private static $collator; 50 51 // Digits are always rendered LTR, even in RTL text. 52 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 53 54 // These locales need special handling for the dotless letter I. 55 private const DOTLESS_I_LOCALES = [ 56 'az', 57 'tr', 58 ]; 59 private const DOTLESS_I_TOLOWER = [ 60 'I' => 'ı', 61 'İ' => 'i', 62 ]; 63 private const DOTLESS_I_TOUPPER = [ 64 'ı' => 'I', 65 'i' => 'İ', 66 ]; 67 68 // The ranges of characters used by each script. 69 private const SCRIPT_CHARACTER_RANGES = [ 70 [ 71 'Latn', 72 0x0041, 73 0x005A, 74 ], 75 [ 76 'Latn', 77 0x0061, 78 0x007A, 79 ], 80 [ 81 'Latn', 82 0x0100, 83 0x02AF, 84 ], 85 [ 86 'Grek', 87 0x0370, 88 0x03FF, 89 ], 90 [ 91 'Cyrl', 92 0x0400, 93 0x052F, 94 ], 95 [ 96 'Hebr', 97 0x0590, 98 0x05FF, 99 ], 100 [ 101 'Arab', 102 0x0600, 103 0x06FF, 104 ], 105 [ 106 'Arab', 107 0x0750, 108 0x077F, 109 ], 110 [ 111 'Arab', 112 0x08A0, 113 0x08FF, 114 ], 115 [ 116 'Deva', 117 0x0900, 118 0x097F, 119 ], 120 [ 121 'Taml', 122 0x0B80, 123 0x0BFF, 124 ], 125 [ 126 'Sinh', 127 0x0D80, 128 0x0DFF, 129 ], 130 [ 131 'Thai', 132 0x0E00, 133 0x0E7F, 134 ], 135 [ 136 'Geor', 137 0x10A0, 138 0x10FF, 139 ], 140 [ 141 'Grek', 142 0x1F00, 143 0x1FFF, 144 ], 145 [ 146 'Deva', 147 0xA8E0, 148 0xA8FF, 149 ], 150 [ 151 'Hans', 152 0x3000, 153 0x303F, 154 ], 155 // Mixed CJK, not just Hans 156 [ 157 'Hans', 158 0x3400, 159 0xFAFF, 160 ], 161 // Mixed CJK, not just Hans 162 [ 163 'Hans', 164 0x20000, 165 0x2FA1F, 166 ], 167 // Mixed CJK, not just Hans 168 ]; 169 170 // Characters that are displayed in mirror form in RTL text. 171 private const MIRROR_CHARACTERS = [ 172 '(' => ')', 173 ')' => '(', 174 '[' => ']', 175 ']' => '[', 176 '{' => '}', 177 '}' => '{', 178 '<' => '>', 179 '>' => '<', 180 '‹ ' => '›', 181 '› ' => '‹', 182 '«' => '»', 183 '»' => '«', 184 '﴾ ' => '﴿', 185 '﴿ ' => '﴾', 186 '“ ' => '”', 187 '” ' => '“', 188 '‘ ' => '’', 189 '’ ' => '‘', 190 ]; 191 192 // Default list of locales to show in the menu. 193 private const DEFAULT_LOCALES = [ 194 'ar', 195 'bg', 196 'bs', 197 'ca', 198 'cs', 199 'da', 200 'de', 201 'el', 202 'en-GB', 203 'en-US', 204 'es', 205 'et', 206 'fi', 207 'fr', 208 'he', 209 'hr', 210 'hu', 211 'is', 212 'it', 213 'ka', 214 'kk', 215 'lt', 216 'mr', 217 'nb', 218 'nl', 219 'nn', 220 'pl', 221 'pt', 222 'ru', 223 'sk', 224 'sv', 225 'tr', 226 'uk', 227 'vi', 228 'zh-Hans', 229 ]; 230 231 /** @var string Punctuation used to separate list items, typically a comma */ 232 public static $list_separator; 233 234 /** 235 * The prefered locales for this site, or a default list if no preference. 236 * 237 * @return LocaleInterface[] 238 */ 239 public static function activeLocales(): array 240 { 241 $code_list = Site::getPreference('LANGUAGES'); 242 243 if ($code_list === '') { 244 $codes = self::DEFAULT_LOCALES; 245 } else { 246 $codes = explode(',', $code_list); 247 } 248 249 $locales = []; 250 foreach ($codes as $code) { 251 if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) { 252 try { 253 $locales[] = Locale::create($code); 254 } catch (Exception $ex) { 255 // No such locale exists? 256 } 257 } 258 } 259 260 usort($locales, '\Fisharebest\Localization\Locale::compare'); 261 262 return $locales; 263 } 264 265 /** 266 * Which MySQL collation should be used for this locale? 267 * 268 * @return string 269 */ 270 public static function collation(): string 271 { 272 $collation = self::$locale->collation(); 273 switch ($collation) { 274 case 'croatian_ci': 275 case 'german2_ci': 276 case 'vietnamese_ci': 277 // Only available in MySQL 5.6 278 return 'utf8_unicode_ci'; 279 default: 280 return 'utf8_' . $collation; 281 } 282 } 283 284 /** 285 * What format is used to display dates in the current locale? 286 * 287 * @return string 288 */ 289 public static function dateFormat(): string 290 { 291 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 292 return self::$translator->translate('%j %F %Y'); 293 } 294 295 /** 296 * Generate consistent I18N for datatables.js 297 * 298 * @param int[] $lengths An optional array of page lengths 299 * 300 * @return string 301 */ 302 public static function datatablesI18N(array $lengths = [ 303 10, 304 20, 305 30, 306 50, 307 100, 308 -1, 309 ]): string 310 { 311 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 312 313 return 314 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 315 '"language": {' . 316 ' "paginate": {' . 317 ' "first": "' . self::translate('first') . '",' . 318 ' "last": "' . self::translate('last') . '",' . 319 ' "next": "' . self::translate('next') . '",' . 320 ' "previous": "' . self::translate('previous') . '"' . 321 ' },' . 322 ' "emptyTable": "' . self::translate('No records to display') . '",' . 323 ' "info": "' . /* I18N: %s are placeholders for numbers */ 324 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 325 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 326 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 327 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 328 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 329 self::translate('Display %s', addslashes($length_options)) . '",' . 330 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 331 ' "processing": "' . self::translate('Loading…') . '",' . 332 ' "search": "' . self::translate('Filter') . '",' . 333 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 334 '}'; 335 } 336 337 /** 338 * Convert the digits 0-9 into the local script 339 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 340 * 341 * @param string|int $n 342 * 343 * @return string 344 */ 345 public static function digits($n): string 346 { 347 return self::$locale->digits((string) $n); 348 } 349 350 /** 351 * What is the direction of the current locale 352 * 353 * @return string "ltr" or "rtl" 354 */ 355 public static function direction(): string 356 { 357 return self::$locale->direction(); 358 } 359 360 /** 361 * What is the first day of the week. 362 * 363 * @return int Sunday=0, Monday=1, etc. 364 */ 365 public static function firstDay(): int 366 { 367 return self::$locale->territory()->firstDay(); 368 } 369 370 /** 371 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 372 * 373 * @return string 374 */ 375 public static function htmlAttributes(): string 376 { 377 return self::$locale->htmlAttributes(); 378 } 379 380 /** 381 * Initialise the translation adapter with a locale setting. 382 * 383 * @param string $code Use this locale/language code, or choose one automatically 384 * @param Tree|null $tree 385 * @param bool $custom Load custom translations 386 * 387 * @return string $string 388 */ 389 public static function init(string $code = '', Tree $tree = null, $custom = true): string 390 { 391 if ($code !== '') { 392 // Create the specified locale 393 self::$locale = Locale::create($code); 394 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 395 // Select a previously used locale 396 self::$locale = Locale::create(Session::get('locale')); 397 } else { 398 if ($tree instanceof Tree) { 399 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 400 } else { 401 $default_locale = new LocaleEnUs(); 402 } 403 404 // Negotiate with the browser. 405 // Search engines don't negotiate. They get the default locale of the tree. 406 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 407 } 408 409 $cache_dir = WT_DATA_DIR . 'cache/'; 410 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 411 if (file_exists($cache_file)) { 412 $filemtime = filemtime($cache_file); 413 } else { 414 $filemtime = 0; 415 } 416 417 // Load the translation file(s) 418 $translation_files = [ 419 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 420 ]; 421 422 // Rebuild files after one hour 423 $rebuild_cache = time() > $filemtime + 3600; 424 // Rebuild files if any translation file has been updated 425 foreach ($translation_files as $translation_file) { 426 if (filemtime($translation_file) > $filemtime) { 427 $rebuild_cache = true; 428 break; 429 } 430 } 431 432 if ($rebuild_cache) { 433 $translations = []; 434 foreach ($translation_files as $translation_file) { 435 $translation = new Translation($translation_file); 436 $translations = array_merge($translations, $translation->asArray()); 437 } 438 try { 439 File::mkdir($cache_dir); 440 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 441 } catch (Exception $ex) { 442 // During setup, we may not have been able to create it. 443 } 444 } else { 445 $translations = include $cache_file; 446 } 447 448 // Add translations from custom modules (but not during setup) 449 if ($custom) { 450 $custom_modules = app(ModuleService::class)->findByInterface(ModuleCustomInterface::class); 451 452 foreach ($custom_modules as $custom_module) { 453 $custom_translations = $custom_module->customTranslations(self::$locale->languageTag()); 454 $translations = array_merge($translations, $custom_translations); 455 } 456 } 457 458 // Create a translator 459 self::$translator = new Translator($translations, self::$locale->pluralRule()); 460 461 /* I18N: This punctuation is used to separate lists of items */ 462 self::$list_separator = self::translate(', '); 463 464 // Create a collator 465 try { 466 if (class_exists('Collator')) { 467 // Symfony provides a very incomplete polyfill - which cannot be used. 468 self::$collator = new Collator(self::$locale->code()); 469 // Ignore upper/lower case differences 470 self::$collator->setStrength(Collator::SECONDARY); 471 } 472 } catch (Exception $ex) { 473 // PHP-INTL is not installed? We'll use a fallback later. 474 self::$collator = null; 475 } 476 477 return self::$locale->languageTag(); 478 } 479 480 /** 481 * All locales for which a translation file exists. 482 * 483 * @return LocaleInterface[] 484 */ 485 public static function installedLocales(): array 486 { 487 $locales = []; 488 489 foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo', GLOB_NOSORT) as $file) { 490 try { 491 $locales[] = Locale::create(basename(dirname($file))); 492 } catch (DomainException $ex) { 493 // Not a recognised locale 494 } 495 } 496 usort($locales, '\Fisharebest\Localization\Locale::compare'); 497 498 return $locales; 499 } 500 501 /** 502 * Return the endonym for a given language - as per http://cldr.unicode.org/ 503 * 504 * @param string $locale 505 * 506 * @return string 507 */ 508 public static function languageName(string $locale): string 509 { 510 return Locale::create($locale)->endonym(); 511 } 512 513 /** 514 * Return the script used by a given language 515 * 516 * @param string $locale 517 * 518 * @return string 519 */ 520 public static function languageScript(string $locale): string 521 { 522 return Locale::create($locale)->script()->code(); 523 } 524 525 /** 526 * Translate a number into the local representation. 527 * e.g. 12345.67 becomes 528 * en: 12,345.67 529 * fr: 12 345,67 530 * de: 12.345,67 531 * 532 * @param float $n 533 * @param int $precision 534 * 535 * @return string 536 */ 537 public static function number(float $n, int $precision = 0): string 538 { 539 return self::$locale->number(round($n, $precision)); 540 } 541 542 /** 543 * Translate a fraction into a percentage. 544 * e.g. 0.123 becomes 545 * en: 12.3% 546 * fr: 12,3 % 547 * de: 12,3% 548 * 549 * @param float $n 550 * @param int $precision 551 * 552 * @return string 553 */ 554 public static function percentage(float $n, int $precision = 0): string 555 { 556 return self::$locale->percent(round($n, $precision + 2)); 557 } 558 559 /** 560 * Translate a plural string 561 * echo self::plural('There is an error', 'There are errors', $num_errors); 562 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 563 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 564 * 565 * @param string $singular 566 * @param string $plural 567 * @param int $count 568 * @param string ...$args 569 * 570 * @return string 571 */ 572 public static function plural(string $singular, string $plural, int $count, ...$args): string 573 { 574 $message = self::$translator->translatePlural($singular, $plural, $count); 575 576 return sprintf($message, ...$args); 577 } 578 579 /** 580 * UTF8 version of PHP::strrev() 581 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 582 * These do not support UTF8 text direction, so we must mimic it for them. 583 * Numbers are always rendered LTR, even in RTL text. 584 * The visual direction of characters such as parentheses should be reversed. 585 * 586 * @param string $text Text to be reversed 587 * 588 * @return string 589 */ 590 public static function reverseText($text): string 591 { 592 // Remove HTML markup - we can't display it and it is LTR. 593 $text = strip_tags($text); 594 // Remove HTML entities. 595 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 596 597 // LTR text doesn't need reversing 598 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 599 return $text; 600 } 601 602 // Mirrored characters 603 $text = strtr($text, self::MIRROR_CHARACTERS); 604 605 $reversed = ''; 606 $digits = ''; 607 while ($text !== '') { 608 $letter = mb_substr($text, 0, 1); 609 $text = mb_substr($text, 1); 610 if (strpos(self::DIGITS, $letter) !== false) { 611 $digits .= $letter; 612 } else { 613 $reversed = $letter . $digits . $reversed; 614 $digits = ''; 615 } 616 } 617 618 return $digits . $reversed; 619 } 620 621 /** 622 * Return the direction (ltr or rtl) for a given script 623 * The PHP/intl library does not provde this information, so we need 624 * our own lookup table. 625 * 626 * @param string $script 627 * 628 * @return string 629 */ 630 public static function scriptDirection($script): string 631 { 632 switch ($script) { 633 case 'Arab': 634 case 'Hebr': 635 case 'Mong': 636 case 'Thaa': 637 return 'rtl'; 638 default: 639 return 'ltr'; 640 } 641 } 642 643 /** 644 * Perform a case-insensitive comparison of two strings. 645 * 646 * @param string $string1 647 * @param string $string2 648 * 649 * @return int 650 */ 651 public static function strcasecmp($string1, $string2): int 652 { 653 if (self::$collator instanceof Collator) { 654 return self::$collator->compare($string1, $string2); 655 } 656 657 return strcmp(self::strtolower($string1), self::strtolower($string2)); 658 } 659 660 /** 661 * Convert a string to lower case. 662 * 663 * @param string $string 664 * 665 * @return string 666 */ 667 public static function strtolower($string): string 668 { 669 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 670 $string = strtr($string, self::DOTLESS_I_TOLOWER); 671 } 672 673 return mb_strtolower($string); 674 } 675 676 /** 677 * Convert a string to upper case. 678 * 679 * @param string $string 680 * 681 * @return string 682 */ 683 public static function strtoupper($string): string 684 { 685 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 686 $string = strtr($string, self::DOTLESS_I_TOUPPER); 687 } 688 689 return mb_strtoupper($string); 690 } 691 692 /** 693 * Identify the script used for a piece of text 694 * 695 * @param string $string 696 * 697 * @return string 698 */ 699 public static function textScript($string): string 700 { 701 $string = strip_tags($string); // otherwise HTML tags show up as latin 702 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 703 $string = str_replace([ 704 '@N.N.', 705 '@P.N.', 706 ], '', $string); // otherwise unknown names show up as latin 707 $pos = 0; 708 $strlen = strlen($string); 709 while ($pos < $strlen) { 710 // get the Unicode Code Point for the character at position $pos 711 $byte1 = ord($string[$pos]); 712 if ($byte1 < 0x80) { 713 $code_point = $byte1; 714 $chrlen = 1; 715 } elseif ($byte1 < 0xC0) { 716 // Invalid continuation character 717 return 'Latn'; 718 } elseif ($byte1 < 0xE0) { 719 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 720 $chrlen = 2; 721 } elseif ($byte1 < 0xF0) { 722 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 723 $chrlen = 3; 724 } elseif ($byte1 < 0xF8) { 725 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 726 $chrlen = 3; 727 } else { 728 // Invalid UTF 729 return 'Latn'; 730 } 731 732 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 733 if ($code_point >= $range[1] && $code_point <= $range[2]) { 734 return $range[0]; 735 } 736 } 737 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 738 $pos += $chrlen; 739 } 740 741 return 'Latn'; 742 } 743 744 /** 745 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 746 * 747 * @param int $seconds 748 * 749 * @return string 750 */ 751 public static function timeAgo($seconds): string 752 { 753 $minute = 60; 754 $hour = 60 * $minute; 755 $day = 24 * $hour; 756 $month = 30 * $day; 757 $year = 365 * $day; 758 759 if ($seconds > $year) { 760 $years = intdiv($seconds, $year); 761 762 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 763 } 764 765 if ($seconds > $month) { 766 $months = intdiv($seconds, $month); 767 768 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 769 } 770 771 if ($seconds > $day) { 772 $days = intdiv($seconds, $day); 773 774 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 775 } 776 777 if ($seconds > $hour) { 778 $hours = intdiv($seconds, $hour); 779 780 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 781 } 782 783 if ($seconds > $minute) { 784 $minutes = intdiv($seconds, $minute); 785 786 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 787 } 788 789 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 790 } 791 792 /** 793 * What format is used to display dates in the current locale? 794 * 795 * @return string 796 */ 797 public static function timeFormat(): string 798 { 799 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 800 return self::$translator->translate('%H:%i:%s'); 801 } 802 803 /** 804 * Translate a string, and then substitute placeholders 805 * echo I18N::translate('Hello World!'); 806 * echo I18N::translate('The %s sat on the mat', 'cat'); 807 * 808 * @param string $message 809 * @param string ...$args 810 * 811 * @return string 812 */ 813 public static function translate(string $message, ...$args): string 814 { 815 $message = self::$translator->translate($message); 816 817 return sprintf($message, ...$args); 818 } 819 820 /** 821 * Context sensitive version of translate. 822 * echo I18N::translateContext('NOMINATIVE', 'January'); 823 * echo I18N::translateContext('GENITIVE', 'January'); 824 * 825 * @param string $context 826 * @param string $message 827 * @param string ...$args 828 * 829 * @return string 830 */ 831 public static function translateContext(string $context, string $message, ...$args): string 832 { 833 $message = self::$translator->translateContext($context, $message); 834 835 return sprintf($message, ...$args); 836 } 837} 838