1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29 30/** 31 * Internationalization (i18n) and localization (l10n). 32 */ 33class I18N 34{ 35 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 36 private static $locale; 37 38 /** @var Translator An object that performs translation */ 39 private static $translator; 40 41 /** @var Collator|null From the php-intl library */ 42 private static $collator; 43 44 // Digits are always rendered LTR, even in RTL text. 45 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 46 47 // These locales need special handling for the dotless letter I. 48 private const DOTLESS_I_LOCALES = [ 49 'az', 50 'tr', 51 ]; 52 private const DOTLESS_I_TOLOWER = [ 53 'I' => 'ı', 54 'İ' => 'i', 55 ]; 56 private const DOTLESS_I_TOUPPER = [ 57 'ı' => 'I', 58 'i' => 'İ', 59 ]; 60 61 // The ranges of characters used by each script. 62 private const SCRIPT_CHARACTER_RANGES = [ 63 [ 64 'Latn', 65 0x0041, 66 0x005A, 67 ], 68 [ 69 'Latn', 70 0x0061, 71 0x007A, 72 ], 73 [ 74 'Latn', 75 0x0100, 76 0x02AF, 77 ], 78 [ 79 'Grek', 80 0x0370, 81 0x03FF, 82 ], 83 [ 84 'Cyrl', 85 0x0400, 86 0x052F, 87 ], 88 [ 89 'Hebr', 90 0x0590, 91 0x05FF, 92 ], 93 [ 94 'Arab', 95 0x0600, 96 0x06FF, 97 ], 98 [ 99 'Arab', 100 0x0750, 101 0x077F, 102 ], 103 [ 104 'Arab', 105 0x08A0, 106 0x08FF, 107 ], 108 [ 109 'Deva', 110 0x0900, 111 0x097F, 112 ], 113 [ 114 'Taml', 115 0x0B80, 116 0x0BFF, 117 ], 118 [ 119 'Sinh', 120 0x0D80, 121 0x0DFF, 122 ], 123 [ 124 'Thai', 125 0x0E00, 126 0x0E7F, 127 ], 128 [ 129 'Geor', 130 0x10A0, 131 0x10FF, 132 ], 133 [ 134 'Grek', 135 0x1F00, 136 0x1FFF, 137 ], 138 [ 139 'Deva', 140 0xA8E0, 141 0xA8FF, 142 ], 143 [ 144 'Hans', 145 0x3000, 146 0x303F, 147 ], 148 // Mixed CJK, not just Hans 149 [ 150 'Hans', 151 0x3400, 152 0xFAFF, 153 ], 154 // Mixed CJK, not just Hans 155 [ 156 'Hans', 157 0x20000, 158 0x2FA1F, 159 ], 160 // Mixed CJK, not just Hans 161 ]; 162 163 // Characters that are displayed in mirror form in RTL text. 164 private const MIRROR_CHARACTERS = [ 165 '(' => ')', 166 ')' => '(', 167 '[' => ']', 168 ']' => '[', 169 '{' => '}', 170 '}' => '{', 171 '<' => '>', 172 '>' => '<', 173 '‹ ' => '›', 174 '› ' => '‹', 175 '«' => '»', 176 '»' => '«', 177 '﴾ ' => '﴿', 178 '﴿ ' => '﴾', 179 '“ ' => '”', 180 '” ' => '“', 181 '‘ ' => '’', 182 '’ ' => '‘', 183 ]; 184 185 // Default list of locales to show in the menu. 186 private const DEFAULT_LOCALES = [ 187 'ar', 188 'bg', 189 'bs', 190 'ca', 191 'cs', 192 'da', 193 'de', 194 'el', 195 'en-GB', 196 'en-US', 197 'es', 198 'et', 199 'fi', 200 'fr', 201 'he', 202 'hr', 203 'hu', 204 'is', 205 'it', 206 'ka', 207 'kk', 208 'lt', 209 'mr', 210 'nb', 211 'nl', 212 'nn', 213 'pl', 214 'pt', 215 'ru', 216 'sk', 217 'sv', 218 'tr', 219 'uk', 220 'vi', 221 'zh-Hans', 222 ]; 223 224 /** @var string Punctuation used to separate list items, typically a comma */ 225 public static $list_separator; 226 227 /** 228 * The prefered locales for this site, or a default list if no preference. 229 * 230 * @return LocaleInterface[] 231 */ 232 public static function activeLocales(): array 233 { 234 $code_list = Site::getPreference('LANGUAGES'); 235 236 if ($code_list === '') { 237 $codes = self::DEFAULT_LOCALES; 238 } else { 239 $codes = explode(',', $code_list); 240 } 241 242 $locales = []; 243 foreach ($codes as $code) { 244 if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) { 245 try { 246 $locales[] = Locale::create($code); 247 } catch (Exception $ex) { 248 // No such locale exists? 249 } 250 } 251 } 252 253 usort($locales, '\Fisharebest\Localization\Locale::compare'); 254 255 return $locales; 256 } 257 258 /** 259 * Which MySQL collation should be used for this locale? 260 * 261 * @return string 262 */ 263 public static function collation(): string 264 { 265 $collation = self::$locale->collation(); 266 switch ($collation) { 267 case 'croatian_ci': 268 case 'german2_ci': 269 case 'vietnamese_ci': 270 // Only available in MySQL 5.6 271 return 'utf8_unicode_ci'; 272 default: 273 return 'utf8_' . $collation; 274 } 275 } 276 277 /** 278 * What format is used to display dates in the current locale? 279 * 280 * @return string 281 */ 282 public static function dateFormat(): string 283 { 284 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 285 return self::$translator->translate('%j %F %Y'); 286 } 287 288 /** 289 * Generate consistent I18N for datatables.js 290 * 291 * @param int[] $lengths An optional array of page lengths 292 * 293 * @return string 294 */ 295 public static function datatablesI18N(array $lengths = [ 296 10, 297 20, 298 30, 299 50, 300 100, 301 -1, 302 ]): string 303 { 304 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 305 306 return 307 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 308 '"language": {' . 309 ' "paginate": {' . 310 ' "first": "' . self::translate('first') . '",' . 311 ' "last": "' . self::translate('last') . '",' . 312 ' "next": "' . self::translate('next') . '",' . 313 ' "previous": "' . self::translate('previous') . '"' . 314 ' },' . 315 ' "emptyTable": "' . self::translate('No records to display') . '",' . 316 ' "info": "' . /* I18N: %s are placeholders for numbers */ 317 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 318 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 319 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 320 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 321 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 322 self::translate('Display %s', addslashes($length_options)) . '",' . 323 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 324 ' "processing": "' . self::translate('Loading…') . '",' . 325 ' "search": "' . self::translate('Filter') . '",' . 326 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 327 '}'; 328 } 329 330 /** 331 * Convert the digits 0-9 into the local script 332 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 333 * 334 * @param string|int $n 335 * 336 * @return string 337 */ 338 public static function digits($n): string 339 { 340 return self::$locale->digits((string) $n); 341 } 342 343 /** 344 * What is the direction of the current locale 345 * 346 * @return string "ltr" or "rtl" 347 */ 348 public static function direction(): string 349 { 350 return self::$locale->direction(); 351 } 352 353 /** 354 * What is the first day of the week. 355 * 356 * @return int Sunday=0, Monday=1, etc. 357 */ 358 public static function firstDay(): int 359 { 360 return self::$locale->territory()->firstDay(); 361 } 362 363 /** 364 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 365 * 366 * @return string 367 */ 368 public static function htmlAttributes(): string 369 { 370 return self::$locale->htmlAttributes(); 371 } 372 373 /** 374 * Initialise the translation adapter with a locale setting. 375 * 376 * @param string $code Use this locale/language code, or choose one automatically 377 * @param Tree|null $tree 378 * 379 * @return string $string 380 */ 381 public static function init(string $code = '', Tree $tree = null): string 382 { 383 if ($code !== '') { 384 // Create the specified locale 385 self::$locale = Locale::create($code); 386 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 387 // Select a previously used locale 388 self::$locale = Locale::create(Session::get('locale')); 389 } else { 390 if ($tree instanceof Tree) { 391 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 392 } else { 393 $default_locale = new LocaleEnUs(); 394 } 395 396 // Negotiate with the browser. 397 // Search engines don't negotiate. They get the default locale of the tree. 398 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 399 } 400 401 $cache_dir = WT_DATA_DIR . 'cache/'; 402 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 403 if (file_exists($cache_file)) { 404 $filemtime = filemtime($cache_file); 405 } else { 406 $filemtime = 0; 407 } 408 409 // Load the translation file(s) 410 $translation_files = [ 411 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 412 ]; 413 414 // Rebuild files after one hour 415 $rebuild_cache = time() > $filemtime + 3600; 416 // Rebuild files if any translation file has been updated 417 foreach ($translation_files as $translation_file) { 418 if (filemtime($translation_file) > $filemtime) { 419 $rebuild_cache = true; 420 break; 421 } 422 } 423 424 if ($rebuild_cache) { 425 $translations = []; 426 foreach ($translation_files as $translation_file) { 427 $translation = new Translation($translation_file); 428 $translations = array_merge($translations, $translation->asArray()); 429 } 430 try { 431 File::mkdir($cache_dir); 432 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 433 } catch (Exception $ex) { 434 // During setup, we may not have been able to create it. 435 } 436 } else { 437 $translations = include $cache_file; 438 } 439 440 // Create a translator 441 self::$translator = new Translator($translations, self::$locale->pluralRule()); 442 443 /* I18N: This punctuation is used to separate lists of items */ 444 self::$list_separator = self::translate(', '); 445 446 // Create a collator 447 try { 448 if (class_exists('Collator')) { 449 // Symfony provides a very incomplete polyfill - which cannot be used. 450 self::$collator = new Collator(self::$locale->code()); 451 // Ignore upper/lower case differences 452 self::$collator->setStrength(Collator::SECONDARY); 453 } 454 } catch (Exception $ex) { 455 // PHP-INTL is not installed? We'll use a fallback later. 456 self::$collator = null; 457 } 458 459 return self::$locale->languageTag(); 460 } 461 462 /** 463 * All locales for which a translation file exists. 464 * 465 * @return LocaleInterface[] 466 */ 467 public static function installedLocales(): array 468 { 469 $locales = []; 470 471 foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo') as $file) { 472 try { 473 $locales[] = Locale::create(basename(dirname($file))); 474 } catch (DomainException $ex) { 475 // Not a recognised locale 476 } 477 } 478 usort($locales, '\Fisharebest\Localization\Locale::compare'); 479 480 return $locales; 481 } 482 483 /** 484 * Return the endonym for a given language - as per http://cldr.unicode.org/ 485 * 486 * @param string $locale 487 * 488 * @return string 489 */ 490 public static function languageName(string $locale): string 491 { 492 return Locale::create($locale)->endonym(); 493 } 494 495 /** 496 * Return the script used by a given language 497 * 498 * @param string $locale 499 * 500 * @return string 501 */ 502 public static function languageScript(string $locale): string 503 { 504 return Locale::create($locale)->script()->code(); 505 } 506 507 /** 508 * Translate a number into the local representation. 509 * e.g. 12345.67 becomes 510 * en: 12,345.67 511 * fr: 12 345,67 512 * de: 12.345,67 513 * 514 * @param float $n 515 * @param int $precision 516 * 517 * @return string 518 */ 519 public static function number(float $n, int $precision = 0): string 520 { 521 return self::$locale->number(round($n, $precision)); 522 } 523 524 /** 525 * Translate a fraction into a percentage. 526 * e.g. 0.123 becomes 527 * en: 12.3% 528 * fr: 12,3 % 529 * de: 12,3% 530 * 531 * @param float $n 532 * @param int $precision 533 * 534 * @return string 535 */ 536 public static function percentage(float $n, int $precision = 0): string 537 { 538 return self::$locale->percent(round($n, $precision + 2)); 539 } 540 541 /** 542 * Translate a plural string 543 * echo self::plural('There is an error', 'There are errors', $num_errors); 544 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 545 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 546 * 547 * @param string $singular 548 * @param string $plural 549 * @param int $count 550 * @param string ...$args 551 * 552 * @return string 553 */ 554 public static function plural(string $singular, string $plural, int $count, ...$args): string 555 { 556 $message = self::$translator->translatePlural($singular, $plural, $count); 557 558 return sprintf($message, ...$args); 559 } 560 561 /** 562 * UTF8 version of PHP::strrev() 563 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 564 * These do not support UTF8 text direction, so we must mimic it for them. 565 * Numbers are always rendered LTR, even in RTL text. 566 * The visual direction of characters such as parentheses should be reversed. 567 * 568 * @param string $text Text to be reversed 569 * 570 * @return string 571 */ 572 public static function reverseText($text): string 573 { 574 // Remove HTML markup - we can't display it and it is LTR. 575 $text = strip_tags($text); 576 // Remove HTML entities. 577 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 578 579 // LTR text doesn't need reversing 580 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 581 return $text; 582 } 583 584 // Mirrored characters 585 $text = strtr($text, self::MIRROR_CHARACTERS); 586 587 $reversed = ''; 588 $digits = ''; 589 while ($text !== '') { 590 $letter = mb_substr($text, 0, 1); 591 $text = mb_substr($text, 1); 592 if (strpos(self::DIGITS, $letter) !== false) { 593 $digits .= $letter; 594 } else { 595 $reversed = $letter . $digits . $reversed; 596 $digits = ''; 597 } 598 } 599 600 return $digits . $reversed; 601 } 602 603 /** 604 * Return the direction (ltr or rtl) for a given script 605 * The PHP/intl library does not provde this information, so we need 606 * our own lookup table. 607 * 608 * @param string $script 609 * 610 * @return string 611 */ 612 public static function scriptDirection($script): string 613 { 614 switch ($script) { 615 case 'Arab': 616 case 'Hebr': 617 case 'Mong': 618 case 'Thaa': 619 return 'rtl'; 620 default: 621 return 'ltr'; 622 } 623 } 624 625 /** 626 * Perform a case-insensitive comparison of two strings. 627 * 628 * @param string $string1 629 * @param string $string2 630 * 631 * @return int 632 */ 633 public static function strcasecmp($string1, $string2): int 634 { 635 if (self::$collator instanceof Collator) { 636 return self::$collator->compare($string1, $string2); 637 } 638 639 return strcmp(self::strtolower($string1), self::strtolower($string2)); 640 } 641 642 /** 643 * Convert a string to lower case. 644 * 645 * @param string $string 646 * 647 * @return string 648 */ 649 public static function strtolower($string): string 650 { 651 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 652 $string = strtr($string, self::DOTLESS_I_TOLOWER); 653 } 654 655 return mb_strtolower($string); 656 } 657 658 /** 659 * Convert a string to upper case. 660 * 661 * @param string $string 662 * 663 * @return string 664 */ 665 public static function strtoupper($string): string 666 { 667 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 668 $string = strtr($string, self::DOTLESS_I_TOUPPER); 669 } 670 671 return mb_strtoupper($string); 672 } 673 674 /** 675 * Identify the script used for a piece of text 676 * 677 * @param string $string 678 * 679 * @return string 680 */ 681 public static function textScript($string): string 682 { 683 $string = strip_tags($string); // otherwise HTML tags show up as latin 684 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 685 $string = str_replace([ 686 '@N.N.', 687 '@P.N.', 688 ], '', $string); // otherwise unknown names show up as latin 689 $pos = 0; 690 $strlen = strlen($string); 691 while ($pos < $strlen) { 692 // get the Unicode Code Point for the character at position $pos 693 $byte1 = ord($string[$pos]); 694 if ($byte1 < 0x80) { 695 $code_point = $byte1; 696 $chrlen = 1; 697 } elseif ($byte1 < 0xC0) { 698 // Invalid continuation character 699 return 'Latn'; 700 } elseif ($byte1 < 0xE0) { 701 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 702 $chrlen = 2; 703 } elseif ($byte1 < 0xF0) { 704 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 705 $chrlen = 3; 706 } elseif ($byte1 < 0xF8) { 707 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 708 $chrlen = 3; 709 } else { 710 // Invalid UTF 711 return 'Latn'; 712 } 713 714 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 715 if ($code_point >= $range[1] && $code_point <= $range[2]) { 716 return $range[0]; 717 } 718 } 719 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 720 $pos += $chrlen; 721 } 722 723 return 'Latn'; 724 } 725 726 /** 727 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 728 * 729 * @param int $seconds 730 * 731 * @return string 732 */ 733 public static function timeAgo($seconds): string 734 { 735 $minute = 60; 736 $hour = 60 * $minute; 737 $day = 24 * $hour; 738 $month = 30 * $day; 739 $year = 365 * $day; 740 741 if ($seconds > $year) { 742 $years = intdiv($seconds, $year); 743 744 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 745 } 746 747 if ($seconds > $month) { 748 $months = intdiv($seconds, $month); 749 750 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 751 } 752 753 if ($seconds > $day) { 754 $days = intdiv($seconds, $day); 755 756 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 757 } 758 759 if ($seconds > $hour) { 760 $hours = intdiv($seconds, $hour); 761 762 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 763 } 764 765 if ($seconds > $minute) { 766 $minutes = intdiv($seconds, $minute); 767 768 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 769 } 770 771 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 772 } 773 774 /** 775 * What format is used to display dates in the current locale? 776 * 777 * @return string 778 */ 779 public static function timeFormat(): string 780 { 781 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 782 return self::$translator->translate('%H:%i:%s'); 783 } 784 785 /** 786 * Translate a string, and then substitute placeholders 787 * echo I18N::translate('Hello World!'); 788 * echo I18N::translate('The %s sat on the mat', 'cat'); 789 * 790 * @param string $message 791 * @param string ...$args 792 * 793 * @return string 794 */ 795 public static function translate(string $message, ...$args): string 796 { 797 $message = self::$translator->translate($message); 798 799 return sprintf($message, ...$args); 800 } 801 802 /** 803 * Context sensitive version of translate. 804 * echo I18N::translateContext('NOMINATIVE', 'January'); 805 * echo I18N::translateContext('GENITIVE', 'January'); 806 * 807 * @param string $context 808 * @param string $message 809 * @param string ...$args 810 * 811 * @return string 812 */ 813 public static function translateContext(string $context, string $message, ...$args): string 814 { 815 $message = self::$translator->translateContext($context, $message); 816 817 return sprintf($message, ...$args); 818 } 819} 820