1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29 30/** 31 * Internationalization (i18n) and localization (l10n). 32 */ 33class I18N 34{ 35 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 36 private static $locale; 37 38 /** @var Translator An object that performs translation */ 39 private static $translator; 40 41 /** @var Collator|null From the php-intl library */ 42 private static $collator; 43 44 // Digits are always rendered LTR, even in RTL text. 45 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 46 47 // These locales need special handling for the dotless letter I. 48 private const DOTLESS_I_LOCALES = [ 49 'az', 50 'tr', 51 ]; 52 private const DOTLESS_I_TOLOWER = [ 53 'I' => 'ı', 54 'İ' => 'i', 55 ]; 56 private const DOTLESS_I_TOUPPER = [ 57 'ı' => 'I', 58 'i' => 'İ', 59 ]; 60 61 // The ranges of characters used by each script. 62 private const SCRIPT_CHARACTER_RANGES = [ 63 [ 64 'Latn', 65 0x0041, 66 0x005A, 67 ], 68 [ 69 'Latn', 70 0x0061, 71 0x007A, 72 ], 73 [ 74 'Latn', 75 0x0100, 76 0x02AF, 77 ], 78 [ 79 'Grek', 80 0x0370, 81 0x03FF, 82 ], 83 [ 84 'Cyrl', 85 0x0400, 86 0x052F, 87 ], 88 [ 89 'Hebr', 90 0x0590, 91 0x05FF, 92 ], 93 [ 94 'Arab', 95 0x0600, 96 0x06FF, 97 ], 98 [ 99 'Arab', 100 0x0750, 101 0x077F, 102 ], 103 [ 104 'Arab', 105 0x08A0, 106 0x08FF, 107 ], 108 [ 109 'Deva', 110 0x0900, 111 0x097F, 112 ], 113 [ 114 'Taml', 115 0x0B80, 116 0x0BFF, 117 ], 118 [ 119 'Sinh', 120 0x0D80, 121 0x0DFF, 122 ], 123 [ 124 'Thai', 125 0x0E00, 126 0x0E7F, 127 ], 128 [ 129 'Geor', 130 0x10A0, 131 0x10FF, 132 ], 133 [ 134 'Grek', 135 0x1F00, 136 0x1FFF, 137 ], 138 [ 139 'Deva', 140 0xA8E0, 141 0xA8FF, 142 ], 143 [ 144 'Hans', 145 0x3000, 146 0x303F, 147 ], 148 // Mixed CJK, not just Hans 149 [ 150 'Hans', 151 0x3400, 152 0xFAFF, 153 ], 154 // Mixed CJK, not just Hans 155 [ 156 'Hans', 157 0x20000, 158 0x2FA1F, 159 ], 160 // Mixed CJK, not just Hans 161 ]; 162 163 // Characters that are displayed in mirror form in RTL text. 164 private const MIRROR_CHARACTERS = [ 165 '(' => ')', 166 ')' => '(', 167 '[' => ']', 168 ']' => '[', 169 '{' => '}', 170 '}' => '{', 171 '<' => '>', 172 '>' => '<', 173 '‹ ' => '›', 174 '› ' => '‹', 175 '«' => '»', 176 '»' => '«', 177 '﴾ ' => '﴿', 178 '﴿ ' => '﴾', 179 '“ ' => '”', 180 '” ' => '“', 181 '‘ ' => '’', 182 '’ ' => '‘', 183 ]; 184 185 // Default list of locales to show in the menu. 186 private const DEFAULT_LOCALES = [ 187 'ar', 188 'bg', 189 'bs', 190 'ca', 191 'cs', 192 'da', 193 'de', 194 'el', 195 'en-GB', 196 'en-US', 197 'es', 198 'et', 199 'fi', 200 'fr', 201 'he', 202 'hr', 203 'hu', 204 'is', 205 'it', 206 'ka', 207 'kk', 208 'lt', 209 'mr', 210 'nb', 211 'nl', 212 'nn', 213 'pl', 214 'pt', 215 'ru', 216 'sk', 217 'sv', 218 'tr', 219 'uk', 220 'vi', 221 'zh-Hans', 222 ]; 223 224 /** @var string Punctuation used to separate list items, typically a comma */ 225 public static $list_separator; 226 227 /** 228 * The prefered locales for this site, or a default list if no preference. 229 * 230 * @return LocaleInterface[] 231 */ 232 public static function activeLocales(): array 233 { 234 $code_list = Site::getPreference('LANGUAGES'); 235 236 if ($code_list === '') { 237 $codes = self::DEFAULT_LOCALES; 238 } else { 239 $codes = explode(',', $code_list); 240 } 241 242 $locales = []; 243 foreach ($codes as $code) { 244 if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) { 245 try { 246 $locales[] = Locale::create($code); 247 } catch (Exception $ex) { 248 // No such locale exists? 249 } 250 } 251 } 252 253 usort($locales, '\Fisharebest\Localization\Locale::compare'); 254 255 return $locales; 256 } 257 258 /** 259 * Which MySQL collation should be used for this locale? 260 * 261 * @return string 262 */ 263 public static function collation(): string 264 { 265 $collation = self::$locale->collation(); 266 switch ($collation) { 267 case 'croatian_ci': 268 case 'german2_ci': 269 case 'vietnamese_ci': 270 // Only available in MySQL 5.6 271 return 'utf8_unicode_ci'; 272 default: 273 return 'utf8_' . $collation; 274 } 275 } 276 277 /** 278 * What format is used to display dates in the current locale? 279 * 280 * @return string 281 */ 282 public static function dateFormat(): string 283 { 284 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 285 return self::$translator->translate('%j %F %Y'); 286 } 287 288 /** 289 * Generate consistent I18N for datatables.js 290 * 291 * @param int[] $lengths An optional array of page lengths 292 * 293 * @return string 294 */ 295 public static function datatablesI18N(array $lengths = [ 296 10, 297 20, 298 30, 299 50, 300 100, 301 -1, 302 ]): string 303 { 304 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 305 306 return 307 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 308 '"language": {' . 309 ' "paginate": {' . 310 ' "first": "' . self::translate('first') . '",' . 311 ' "last": "' . self::translate('last') . '",' . 312 ' "next": "' . self::translate('next') . '",' . 313 ' "previous": "' . self::translate('previous') . '"' . 314 ' },' . 315 ' "emptyTable": "' . self::translate('No records to display') . '",' . 316 ' "info": "' . /* I18N: %s are placeholders for numbers */ 317 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 318 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 319 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 320 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 321 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 322 self::translate('Display %s', addslashes($length_options)) . '",' . 323 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 324 ' "processing": "' . self::translate('Loading…') . '",' . 325 ' "search": "' . self::translate('Filter') . '",' . 326 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 327 '}'; 328 } 329 330 /** 331 * Convert the digits 0-9 into the local script 332 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 333 * 334 * @param string|int $n 335 * 336 * @return string 337 */ 338 public static function digits($n): string 339 { 340 return self::$locale->digits((string) $n); 341 } 342 343 /** 344 * What is the direction of the current locale 345 * 346 * @return string "ltr" or "rtl" 347 */ 348 public static function direction(): string 349 { 350 return self::$locale->direction(); 351 } 352 353 /** 354 * What is the first day of the week. 355 * 356 * @return int Sunday=0, Monday=1, etc. 357 */ 358 public static function firstDay(): int 359 { 360 return self::$locale->territory()->firstDay(); 361 } 362 363 /** 364 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 365 * 366 * @return string 367 */ 368 public static function htmlAttributes(): string 369 { 370 return self::$locale->htmlAttributes(); 371 } 372 373 /** 374 * Initialise the translation adapter with a locale setting. 375 * 376 * @param string $code Use this locale/language code, or choose one automatically 377 * @param Tree|null $tree 378 * 379 * @return string $string 380 */ 381 public static function init(string $code = '', Tree $tree = null): string 382 { 383 mb_internal_encoding('UTF-8'); 384 385 if ($code !== '') { 386 // Create the specified locale 387 self::$locale = Locale::create($code); 388 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 389 // Select a previously used locale 390 self::$locale = Locale::create(Session::get('locale')); 391 } else { 392 if ($tree instanceof Tree) { 393 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 394 } else { 395 $default_locale = new LocaleEnUs(); 396 } 397 398 // Negotiate with the browser. 399 // Search engines don't negotiate. They get the default locale of the tree. 400 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 401 } 402 403 $cache_dir = WT_DATA_DIR . 'cache/'; 404 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 405 if (file_exists($cache_file)) { 406 $filemtime = filemtime($cache_file); 407 } else { 408 $filemtime = 0; 409 } 410 411 // Load the translation file(s) 412 $translation_files = [ 413 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 414 ]; 415 416 // Rebuild files after one hour 417 $rebuild_cache = time() > $filemtime + 3600; 418 // Rebuild files if any translation file has been updated 419 foreach ($translation_files as $translation_file) { 420 if (filemtime($translation_file) > $filemtime) { 421 $rebuild_cache = true; 422 break; 423 } 424 } 425 426 if ($rebuild_cache) { 427 $translations = []; 428 foreach ($translation_files as $translation_file) { 429 $translation = new Translation($translation_file); 430 $translations = array_merge($translations, $translation->asArray()); 431 } 432 try { 433 File::mkdir($cache_dir); 434 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 435 } catch (Exception $ex) { 436 // During setup, we may not have been able to create it. 437 } 438 } else { 439 $translations = include $cache_file; 440 } 441 442 // Create a translator 443 self::$translator = new Translator($translations, self::$locale->pluralRule()); 444 445 /* I18N: This punctuation is used to separate lists of items */ 446 self::$list_separator = self::translate(', '); 447 448 // Create a collator 449 try { 450 if (class_exists('Collator')) { 451 // Symfony provides a very incomplete polyfill - which cannot be used. 452 self::$collator = new Collator(self::$locale->code()); 453 // Ignore upper/lower case differences 454 self::$collator->setStrength(Collator::SECONDARY); 455 } 456 } catch (Exception $ex) { 457 // PHP-INTL is not installed? We'll use a fallback later. 458 self::$collator = null; 459 } 460 461 return self::$locale->languageTag(); 462 } 463 464 /** 465 * All locales for which a translation file exists. 466 * 467 * @return LocaleInterface[] 468 */ 469 public static function installedLocales(): array 470 { 471 $locales = []; 472 473 foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo') as $file) { 474 try { 475 $locales[] = Locale::create(basename($file, '.mo')); 476 } catch (DomainException $ex) { 477 // Not a recognised locale 478 } 479 } 480 usort($locales, '\Fisharebest\Localization\Locale::compare'); 481 482 return $locales; 483 } 484 485 /** 486 * Return the endonym for a given language - as per http://cldr.unicode.org/ 487 * 488 * @param string $locale 489 * 490 * @return string 491 */ 492 public static function languageName(string $locale): string 493 { 494 return Locale::create($locale)->endonym(); 495 } 496 497 /** 498 * Return the script used by a given language 499 * 500 * @param string $locale 501 * 502 * @return string 503 */ 504 public static function languageScript(string $locale): string 505 { 506 return Locale::create($locale)->script()->code(); 507 } 508 509 /** 510 * Translate a number into the local representation. 511 * e.g. 12345.67 becomes 512 * en: 12,345.67 513 * fr: 12 345,67 514 * de: 12.345,67 515 * 516 * @param float $n 517 * @param int $precision 518 * 519 * @return string 520 */ 521 public static function number(float $n, int $precision = 0): string 522 { 523 return self::$locale->number(round($n, $precision)); 524 } 525 526 /** 527 * Translate a fraction into a percentage. 528 * e.g. 0.123 becomes 529 * en: 12.3% 530 * fr: 12,3 % 531 * de: 12,3% 532 * 533 * @param float $n 534 * @param int $precision 535 * 536 * @return string 537 */ 538 public static function percentage(float $n, int $precision = 0): string 539 { 540 return self::$locale->percent(round($n, $precision + 2)); 541 } 542 543 /** 544 * Translate a plural string 545 * echo self::plural('There is an error', 'There are errors', $num_errors); 546 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 547 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 548 * 549 * @param string $singular 550 * @param string $plural 551 * @param int $count 552 * @param string ...$args 553 * 554 * @return string 555 */ 556 public static function plural(string $singular, string $plural, int $count, ...$args): string 557 { 558 $message = self::$translator->translatePlural($singular, $plural, $count); 559 560 return sprintf($message, ...$args); 561 } 562 563 /** 564 * UTF8 version of PHP::strrev() 565 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 566 * These do not support UTF8 text direction, so we must mimic it for them. 567 * Numbers are always rendered LTR, even in RTL text. 568 * The visual direction of characters such as parentheses should be reversed. 569 * 570 * @param string $text Text to be reversed 571 * 572 * @return string 573 */ 574 public static function reverseText($text): string 575 { 576 // Remove HTML markup - we can't display it and it is LTR. 577 $text = strip_tags($text); 578 // Remove HTML entities. 579 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 580 581 // LTR text doesn't need reversing 582 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 583 return $text; 584 } 585 586 // Mirrored characters 587 $text = strtr($text, self::MIRROR_CHARACTERS); 588 589 $reversed = ''; 590 $digits = ''; 591 while ($text !== '') { 592 $letter = mb_substr($text, 0, 1); 593 $text = mb_substr($text, 1); 594 if (strpos(self::DIGITS, $letter) !== false) { 595 $digits .= $letter; 596 } else { 597 $reversed = $letter . $digits . $reversed; 598 $digits = ''; 599 } 600 } 601 602 return $digits . $reversed; 603 } 604 605 /** 606 * Return the direction (ltr or rtl) for a given script 607 * The PHP/intl library does not provde this information, so we need 608 * our own lookup table. 609 * 610 * @param string $script 611 * 612 * @return string 613 */ 614 public static function scriptDirection($script): string 615 { 616 switch ($script) { 617 case 'Arab': 618 case 'Hebr': 619 case 'Mong': 620 case 'Thaa': 621 return 'rtl'; 622 default: 623 return 'ltr'; 624 } 625 } 626 627 /** 628 * Perform a case-insensitive comparison of two strings. 629 * 630 * @param string $string1 631 * @param string $string2 632 * 633 * @return int 634 */ 635 public static function strcasecmp($string1, $string2): int 636 { 637 if (self::$collator instanceof Collator) { 638 return self::$collator->compare($string1, $string2); 639 } 640 641 return strcmp(self::strtolower($string1), self::strtolower($string2)); 642 } 643 644 /** 645 * Convert a string to lower case. 646 * 647 * @param string $string 648 * 649 * @return string 650 */ 651 public static function strtolower($string): string 652 { 653 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 654 $string = strtr($string, self::DOTLESS_I_TOLOWER); 655 } 656 657 return mb_strtolower($string); 658 } 659 660 /** 661 * Convert a string to upper case. 662 * 663 * @param string $string 664 * 665 * @return string 666 */ 667 public static function strtoupper($string): string 668 { 669 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 670 $string = strtr($string, self::DOTLESS_I_TOUPPER); 671 } 672 673 return mb_strtoupper($string); 674 } 675 676 /** 677 * Identify the script used for a piece of text 678 * 679 * @param string $string 680 * 681 * @return string 682 */ 683 public static function textScript($string): string 684 { 685 $string = strip_tags($string); // otherwise HTML tags show up as latin 686 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 687 $string = str_replace([ 688 '@N.N.', 689 '@P.N.', 690 ], '', $string); // otherwise unknown names show up as latin 691 $pos = 0; 692 $strlen = strlen($string); 693 while ($pos < $strlen) { 694 // get the Unicode Code Point for the character at position $pos 695 $byte1 = ord($string[$pos]); 696 if ($byte1 < 0x80) { 697 $code_point = $byte1; 698 $chrlen = 1; 699 } elseif ($byte1 < 0xC0) { 700 // Invalid continuation character 701 return 'Latn'; 702 } elseif ($byte1 < 0xE0) { 703 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 704 $chrlen = 2; 705 } elseif ($byte1 < 0xF0) { 706 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 707 $chrlen = 3; 708 } elseif ($byte1 < 0xF8) { 709 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 710 $chrlen = 3; 711 } else { 712 // Invalid UTF 713 return 'Latn'; 714 } 715 716 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 717 if ($code_point >= $range[1] && $code_point <= $range[2]) { 718 return $range[0]; 719 } 720 } 721 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 722 $pos += $chrlen; 723 } 724 725 return 'Latn'; 726 } 727 728 /** 729 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 730 * 731 * @param int $seconds 732 * 733 * @return string 734 */ 735 public static function timeAgo($seconds): string 736 { 737 $minute = 60; 738 $hour = 60 * $minute; 739 $day = 24 * $hour; 740 $month = 30 * $day; 741 $year = 365 * $day; 742 743 if ($seconds > $year) { 744 $years = intdiv($seconds, $year); 745 746 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 747 } 748 749 if ($seconds > $month) { 750 $months = intdiv($seconds, $month); 751 752 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 753 } 754 755 if ($seconds > $day) { 756 $days = intdiv($seconds, $day); 757 758 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 759 } 760 761 if ($seconds > $hour) { 762 $hours = intdiv($seconds, $hour); 763 764 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 765 } 766 767 if ($seconds > $minute) { 768 $minutes = intdiv($seconds, $minute); 769 770 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 771 } 772 773 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 774 } 775 776 /** 777 * What format is used to display dates in the current locale? 778 * 779 * @return string 780 */ 781 public static function timeFormat(): string 782 { 783 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 784 return self::$translator->translate('%H:%i:%s'); 785 } 786 787 /** 788 * Translate a string, and then substitute placeholders 789 * echo I18N::translate('Hello World!'); 790 * echo I18N::translate('The %s sat on the mat', 'cat'); 791 * 792 * @param string $message 793 * @param string ...$args 794 * 795 * @return string 796 */ 797 public static function translate(string $message, ...$args): string 798 { 799 $message = self::$translator->translate($message); 800 801 return sprintf($message, ...$args); 802 } 803 804 /** 805 * Context sensitive version of translate. 806 * echo I18N::translateContext('NOMINATIVE', 'January'); 807 * echo I18N::translateContext('GENITIVE', 'January'); 808 * 809 * @param string $context 810 * @param string $message 811 * @param string ...$args 812 * 813 * @return string 814 */ 815 public static function translateContext(string $context, string $message, ...$args): string 816 { 817 $message = self::$translator->translateContext($context, $message); 818 819 return sprintf($message, ...$args); 820 } 821} 822