1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use DomainException; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Functions\FunctionsEdit; 29use const GLOB_NOSORT; 30 31/** 32 * Internationalization (i18n) and localization (l10n). 33 */ 34class I18N 35{ 36 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 37 private static $locale; 38 39 /** @var Translator An object that performs translation */ 40 private static $translator; 41 42 /** @var Collator|null From the php-intl library */ 43 private static $collator; 44 45 // Digits are always rendered LTR, even in RTL text. 46 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 47 48 // These locales need special handling for the dotless letter I. 49 private const DOTLESS_I_LOCALES = [ 50 'az', 51 'tr', 52 ]; 53 private const DOTLESS_I_TOLOWER = [ 54 'I' => 'ı', 55 'İ' => 'i', 56 ]; 57 private const DOTLESS_I_TOUPPER = [ 58 'ı' => 'I', 59 'i' => 'İ', 60 ]; 61 62 // The ranges of characters used by each script. 63 private const SCRIPT_CHARACTER_RANGES = [ 64 [ 65 'Latn', 66 0x0041, 67 0x005A, 68 ], 69 [ 70 'Latn', 71 0x0061, 72 0x007A, 73 ], 74 [ 75 'Latn', 76 0x0100, 77 0x02AF, 78 ], 79 [ 80 'Grek', 81 0x0370, 82 0x03FF, 83 ], 84 [ 85 'Cyrl', 86 0x0400, 87 0x052F, 88 ], 89 [ 90 'Hebr', 91 0x0590, 92 0x05FF, 93 ], 94 [ 95 'Arab', 96 0x0600, 97 0x06FF, 98 ], 99 [ 100 'Arab', 101 0x0750, 102 0x077F, 103 ], 104 [ 105 'Arab', 106 0x08A0, 107 0x08FF, 108 ], 109 [ 110 'Deva', 111 0x0900, 112 0x097F, 113 ], 114 [ 115 'Taml', 116 0x0B80, 117 0x0BFF, 118 ], 119 [ 120 'Sinh', 121 0x0D80, 122 0x0DFF, 123 ], 124 [ 125 'Thai', 126 0x0E00, 127 0x0E7F, 128 ], 129 [ 130 'Geor', 131 0x10A0, 132 0x10FF, 133 ], 134 [ 135 'Grek', 136 0x1F00, 137 0x1FFF, 138 ], 139 [ 140 'Deva', 141 0xA8E0, 142 0xA8FF, 143 ], 144 [ 145 'Hans', 146 0x3000, 147 0x303F, 148 ], 149 // Mixed CJK, not just Hans 150 [ 151 'Hans', 152 0x3400, 153 0xFAFF, 154 ], 155 // Mixed CJK, not just Hans 156 [ 157 'Hans', 158 0x20000, 159 0x2FA1F, 160 ], 161 // Mixed CJK, not just Hans 162 ]; 163 164 // Characters that are displayed in mirror form in RTL text. 165 private const MIRROR_CHARACTERS = [ 166 '(' => ')', 167 ')' => '(', 168 '[' => ']', 169 ']' => '[', 170 '{' => '}', 171 '}' => '{', 172 '<' => '>', 173 '>' => '<', 174 '‹ ' => '›', 175 '› ' => '‹', 176 '«' => '»', 177 '»' => '«', 178 '﴾ ' => '﴿', 179 '﴿ ' => '﴾', 180 '“ ' => '”', 181 '” ' => '“', 182 '‘ ' => '’', 183 '’ ' => '‘', 184 ]; 185 186 // Default list of locales to show in the menu. 187 private const DEFAULT_LOCALES = [ 188 'ar', 189 'bg', 190 'bs', 191 'ca', 192 'cs', 193 'da', 194 'de', 195 'el', 196 'en-GB', 197 'en-US', 198 'es', 199 'et', 200 'fi', 201 'fr', 202 'he', 203 'hr', 204 'hu', 205 'is', 206 'it', 207 'ka', 208 'kk', 209 'lt', 210 'mr', 211 'nb', 212 'nl', 213 'nn', 214 'pl', 215 'pt', 216 'ru', 217 'sk', 218 'sv', 219 'tr', 220 'uk', 221 'vi', 222 'zh-Hans', 223 ]; 224 225 /** @var string Punctuation used to separate list items, typically a comma */ 226 public static $list_separator; 227 228 /** 229 * The prefered locales for this site, or a default list if no preference. 230 * 231 * @return LocaleInterface[] 232 */ 233 public static function activeLocales(): array 234 { 235 $code_list = Site::getPreference('LANGUAGES'); 236 237 if ($code_list === '') { 238 $codes = self::DEFAULT_LOCALES; 239 } else { 240 $codes = explode(',', $code_list); 241 } 242 243 $locales = []; 244 foreach ($codes as $code) { 245 if (file_exists(WT_ROOT . 'resources/lang/' . $code . '/messages.mo')) { 246 try { 247 $locales[] = Locale::create($code); 248 } catch (Exception $ex) { 249 // No such locale exists? 250 } 251 } 252 } 253 254 usort($locales, '\Fisharebest\Localization\Locale::compare'); 255 256 return $locales; 257 } 258 259 /** 260 * Which MySQL collation should be used for this locale? 261 * 262 * @return string 263 */ 264 public static function collation(): string 265 { 266 $collation = self::$locale->collation(); 267 switch ($collation) { 268 case 'croatian_ci': 269 case 'german2_ci': 270 case 'vietnamese_ci': 271 // Only available in MySQL 5.6 272 return 'utf8_unicode_ci'; 273 default: 274 return 'utf8_' . $collation; 275 } 276 } 277 278 /** 279 * What format is used to display dates in the current locale? 280 * 281 * @return string 282 */ 283 public static function dateFormat(): string 284 { 285 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 286 return self::$translator->translate('%j %F %Y'); 287 } 288 289 /** 290 * Generate consistent I18N for datatables.js 291 * 292 * @param int[] $lengths An optional array of page lengths 293 * 294 * @return string 295 */ 296 public static function datatablesI18N(array $lengths = [ 297 10, 298 20, 299 30, 300 50, 301 100, 302 -1, 303 ]): string 304 { 305 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 306 307 return 308 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 309 '"language": {' . 310 ' "paginate": {' . 311 ' "first": "' . self::translate('first') . '",' . 312 ' "last": "' . self::translate('last') . '",' . 313 ' "next": "' . self::translate('next') . '",' . 314 ' "previous": "' . self::translate('previous') . '"' . 315 ' },' . 316 ' "emptyTable": "' . self::translate('No records to display') . '",' . 317 ' "info": "' . /* I18N: %s are placeholders for numbers */ 318 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 319 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 320 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 321 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 322 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 323 self::translate('Display %s', addslashes($length_options)) . '",' . 324 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 325 ' "processing": "' . self::translate('Loading…') . '",' . 326 ' "search": "' . self::translate('Filter') . '",' . 327 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 328 '}'; 329 } 330 331 /** 332 * Convert the digits 0-9 into the local script 333 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 334 * 335 * @param string|int $n 336 * 337 * @return string 338 */ 339 public static function digits($n): string 340 { 341 return self::$locale->digits((string) $n); 342 } 343 344 /** 345 * What is the direction of the current locale 346 * 347 * @return string "ltr" or "rtl" 348 */ 349 public static function direction(): string 350 { 351 return self::$locale->direction(); 352 } 353 354 /** 355 * What is the first day of the week. 356 * 357 * @return int Sunday=0, Monday=1, etc. 358 */ 359 public static function firstDay(): int 360 { 361 return self::$locale->territory()->firstDay(); 362 } 363 364 /** 365 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 366 * 367 * @return string 368 */ 369 public static function htmlAttributes(): string 370 { 371 return self::$locale->htmlAttributes(); 372 } 373 374 /** 375 * Initialise the translation adapter with a locale setting. 376 * 377 * @param string $code Use this locale/language code, or choose one automatically 378 * @param Tree|null $tree 379 * 380 * @return string $string 381 */ 382 public static function init(string $code = '', Tree $tree = null): string 383 { 384 if ($code !== '') { 385 // Create the specified locale 386 self::$locale = Locale::create($code); 387 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 388 // Select a previously used locale 389 self::$locale = Locale::create(Session::get('locale')); 390 } else { 391 if ($tree instanceof Tree) { 392 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 393 } else { 394 $default_locale = new LocaleEnUs(); 395 } 396 397 // Negotiate with the browser. 398 // Search engines don't negotiate. They get the default locale of the tree. 399 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 400 } 401 402 $cache_dir = WT_DATA_DIR . 'cache/'; 403 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 404 if (file_exists($cache_file)) { 405 $filemtime = filemtime($cache_file); 406 } else { 407 $filemtime = 0; 408 } 409 410 // Load the translation file(s) 411 $translation_files = [ 412 WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo', 413 ]; 414 415 // Rebuild files after one hour 416 $rebuild_cache = time() > $filemtime + 3600; 417 // Rebuild files if any translation file has been updated 418 foreach ($translation_files as $translation_file) { 419 if (filemtime($translation_file) > $filemtime) { 420 $rebuild_cache = true; 421 break; 422 } 423 } 424 425 if ($rebuild_cache) { 426 $translations = []; 427 foreach ($translation_files as $translation_file) { 428 $translation = new Translation($translation_file); 429 $translations = array_merge($translations, $translation->asArray()); 430 } 431 try { 432 File::mkdir($cache_dir); 433 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 434 } catch (Exception $ex) { 435 // During setup, we may not have been able to create it. 436 } 437 } else { 438 $translations = include $cache_file; 439 } 440 441 // Create a translator 442 self::$translator = new Translator($translations, self::$locale->pluralRule()); 443 444 /* I18N: This punctuation is used to separate lists of items */ 445 self::$list_separator = self::translate(', '); 446 447 // Create a collator 448 try { 449 if (class_exists('Collator')) { 450 // Symfony provides a very incomplete polyfill - which cannot be used. 451 self::$collator = new Collator(self::$locale->code()); 452 // Ignore upper/lower case differences 453 self::$collator->setStrength(Collator::SECONDARY); 454 } 455 } catch (Exception $ex) { 456 // PHP-INTL is not installed? We'll use a fallback later. 457 self::$collator = null; 458 } 459 460 return self::$locale->languageTag(); 461 } 462 463 /** 464 * All locales for which a translation file exists. 465 * 466 * @return LocaleInterface[] 467 */ 468 public static function installedLocales(): array 469 { 470 $locales = []; 471 472 foreach (glob(WT_ROOT . 'resources/lang/*/messages.mo', GLOB_NOSORT) as $file) { 473 try { 474 $locales[] = Locale::create(basename(dirname($file))); 475 } catch (DomainException $ex) { 476 // Not a recognised locale 477 } 478 } 479 usort($locales, '\Fisharebest\Localization\Locale::compare'); 480 481 return $locales; 482 } 483 484 /** 485 * Return the endonym for a given language - as per http://cldr.unicode.org/ 486 * 487 * @param string $locale 488 * 489 * @return string 490 */ 491 public static function languageName(string $locale): string 492 { 493 return Locale::create($locale)->endonym(); 494 } 495 496 /** 497 * Return the script used by a given language 498 * 499 * @param string $locale 500 * 501 * @return string 502 */ 503 public static function languageScript(string $locale): string 504 { 505 return Locale::create($locale)->script()->code(); 506 } 507 508 /** 509 * Translate a number into the local representation. 510 * e.g. 12345.67 becomes 511 * en: 12,345.67 512 * fr: 12 345,67 513 * de: 12.345,67 514 * 515 * @param float $n 516 * @param int $precision 517 * 518 * @return string 519 */ 520 public static function number(float $n, int $precision = 0): string 521 { 522 return self::$locale->number(round($n, $precision)); 523 } 524 525 /** 526 * Translate a fraction into a percentage. 527 * e.g. 0.123 becomes 528 * en: 12.3% 529 * fr: 12,3 % 530 * de: 12,3% 531 * 532 * @param float $n 533 * @param int $precision 534 * 535 * @return string 536 */ 537 public static function percentage(float $n, int $precision = 0): string 538 { 539 return self::$locale->percent(round($n, $precision + 2)); 540 } 541 542 /** 543 * Translate a plural string 544 * echo self::plural('There is an error', 'There are errors', $num_errors); 545 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 546 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 547 * 548 * @param string $singular 549 * @param string $plural 550 * @param int $count 551 * @param string ...$args 552 * 553 * @return string 554 */ 555 public static function plural(string $singular, string $plural, int $count, ...$args): string 556 { 557 $message = self::$translator->translatePlural($singular, $plural, $count); 558 559 return sprintf($message, ...$args); 560 } 561 562 /** 563 * UTF8 version of PHP::strrev() 564 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 565 * These do not support UTF8 text direction, so we must mimic it for them. 566 * Numbers are always rendered LTR, even in RTL text. 567 * The visual direction of characters such as parentheses should be reversed. 568 * 569 * @param string $text Text to be reversed 570 * 571 * @return string 572 */ 573 public static function reverseText($text): string 574 { 575 // Remove HTML markup - we can't display it and it is LTR. 576 $text = strip_tags($text); 577 // Remove HTML entities. 578 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 579 580 // LTR text doesn't need reversing 581 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 582 return $text; 583 } 584 585 // Mirrored characters 586 $text = strtr($text, self::MIRROR_CHARACTERS); 587 588 $reversed = ''; 589 $digits = ''; 590 while ($text !== '') { 591 $letter = mb_substr($text, 0, 1); 592 $text = mb_substr($text, 1); 593 if (strpos(self::DIGITS, $letter) !== false) { 594 $digits .= $letter; 595 } else { 596 $reversed = $letter . $digits . $reversed; 597 $digits = ''; 598 } 599 } 600 601 return $digits . $reversed; 602 } 603 604 /** 605 * Return the direction (ltr or rtl) for a given script 606 * The PHP/intl library does not provde this information, so we need 607 * our own lookup table. 608 * 609 * @param string $script 610 * 611 * @return string 612 */ 613 public static function scriptDirection($script): string 614 { 615 switch ($script) { 616 case 'Arab': 617 case 'Hebr': 618 case 'Mong': 619 case 'Thaa': 620 return 'rtl'; 621 default: 622 return 'ltr'; 623 } 624 } 625 626 /** 627 * Perform a case-insensitive comparison of two strings. 628 * 629 * @param string $string1 630 * @param string $string2 631 * 632 * @return int 633 */ 634 public static function strcasecmp($string1, $string2): int 635 { 636 if (self::$collator instanceof Collator) { 637 return self::$collator->compare($string1, $string2); 638 } 639 640 return strcmp(self::strtolower($string1), self::strtolower($string2)); 641 } 642 643 /** 644 * Convert a string to lower case. 645 * 646 * @param string $string 647 * 648 * @return string 649 */ 650 public static function strtolower($string): string 651 { 652 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 653 $string = strtr($string, self::DOTLESS_I_TOLOWER); 654 } 655 656 return mb_strtolower($string); 657 } 658 659 /** 660 * Convert a string to upper case. 661 * 662 * @param string $string 663 * 664 * @return string 665 */ 666 public static function strtoupper($string): string 667 { 668 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 669 $string = strtr($string, self::DOTLESS_I_TOUPPER); 670 } 671 672 return mb_strtoupper($string); 673 } 674 675 /** 676 * Identify the script used for a piece of text 677 * 678 * @param string $string 679 * 680 * @return string 681 */ 682 public static function textScript($string): string 683 { 684 $string = strip_tags($string); // otherwise HTML tags show up as latin 685 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 686 $string = str_replace([ 687 '@N.N.', 688 '@P.N.', 689 ], '', $string); // otherwise unknown names show up as latin 690 $pos = 0; 691 $strlen = strlen($string); 692 while ($pos < $strlen) { 693 // get the Unicode Code Point for the character at position $pos 694 $byte1 = ord($string[$pos]); 695 if ($byte1 < 0x80) { 696 $code_point = $byte1; 697 $chrlen = 1; 698 } elseif ($byte1 < 0xC0) { 699 // Invalid continuation character 700 return 'Latn'; 701 } elseif ($byte1 < 0xE0) { 702 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 703 $chrlen = 2; 704 } elseif ($byte1 < 0xF0) { 705 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 706 $chrlen = 3; 707 } elseif ($byte1 < 0xF8) { 708 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 709 $chrlen = 3; 710 } else { 711 // Invalid UTF 712 return 'Latn'; 713 } 714 715 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 716 if ($code_point >= $range[1] && $code_point <= $range[2]) { 717 return $range[0]; 718 } 719 } 720 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 721 $pos += $chrlen; 722 } 723 724 return 'Latn'; 725 } 726 727 /** 728 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 729 * 730 * @param int $seconds 731 * 732 * @return string 733 */ 734 public static function timeAgo($seconds): string 735 { 736 $minute = 60; 737 $hour = 60 * $minute; 738 $day = 24 * $hour; 739 $month = 30 * $day; 740 $year = 365 * $day; 741 742 if ($seconds > $year) { 743 $years = intdiv($seconds, $year); 744 745 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 746 } 747 748 if ($seconds > $month) { 749 $months = intdiv($seconds, $month); 750 751 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 752 } 753 754 if ($seconds > $day) { 755 $days = intdiv($seconds, $day); 756 757 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 758 } 759 760 if ($seconds > $hour) { 761 $hours = intdiv($seconds, $hour); 762 763 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 764 } 765 766 if ($seconds > $minute) { 767 $minutes = intdiv($seconds, $minute); 768 769 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 770 } 771 772 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 773 } 774 775 /** 776 * What format is used to display dates in the current locale? 777 * 778 * @return string 779 */ 780 public static function timeFormat(): string 781 { 782 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 783 return self::$translator->translate('%H:%i:%s'); 784 } 785 786 /** 787 * Translate a string, and then substitute placeholders 788 * echo I18N::translate('Hello World!'); 789 * echo I18N::translate('The %s sat on the mat', 'cat'); 790 * 791 * @param string $message 792 * @param string ...$args 793 * 794 * @return string 795 */ 796 public static function translate(string $message, ...$args): string 797 { 798 $message = self::$translator->translate($message); 799 800 return sprintf($message, ...$args); 801 } 802 803 /** 804 * Context sensitive version of translate. 805 * echo I18N::translateContext('NOMINATIVE', 'January'); 806 * echo I18N::translateContext('GENITIVE', 'January'); 807 * 808 * @param string $context 809 * @param string $message 810 * @param string ...$args 811 * 812 * @return string 813 */ 814 public static function translateContext(string $context, string $message, ...$args): string 815 { 816 $message = self::$translator->translateContext($context, $message); 817 818 return sprintf($message, ...$args); 819 } 820} 821