1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Functions\FunctionsEdit; 28 29/** 30 * Internationalization (i18n) and localization (l10n). 31 */ 32class I18N 33{ 34 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 35 private static $locale; 36 37 /** @var Translator An object that performs translation */ 38 private static $translator; 39 40 /** @var Collator|null From the php-intl library */ 41 private static $collator; 42 43 // Digits are always rendered LTR, even in RTL text. 44 const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 45 46 // These locales need special handling for the dotless letter I. 47 const DOTLESS_I_LOCALES = [ 48 'az', 49 'tr', 50 ]; 51 const DOTLESS_I_TOLOWER = [ 52 'I' => 'ı', 53 'İ' => 'i', 54 ]; 55 const DOTLESS_I_TOUPPER = [ 56 'ı' => 'I', 57 'i' => 'İ', 58 ]; 59 60 // The ranges of characters used by each script. 61 const SCRIPT_CHARACTER_RANGES = [ 62 [ 63 'Latn', 64 0x0041, 65 0x005A, 66 ], 67 [ 68 'Latn', 69 0x0061, 70 0x007A, 71 ], 72 [ 73 'Latn', 74 0x0100, 75 0x02AF, 76 ], 77 [ 78 'Grek', 79 0x0370, 80 0x03FF, 81 ], 82 [ 83 'Cyrl', 84 0x0400, 85 0x052F, 86 ], 87 [ 88 'Hebr', 89 0x0590, 90 0x05FF, 91 ], 92 [ 93 'Arab', 94 0x0600, 95 0x06FF, 96 ], 97 [ 98 'Arab', 99 0x0750, 100 0x077F, 101 ], 102 [ 103 'Arab', 104 0x08A0, 105 0x08FF, 106 ], 107 [ 108 'Deva', 109 0x0900, 110 0x097F, 111 ], 112 [ 113 'Taml', 114 0x0B80, 115 0x0BFF, 116 ], 117 [ 118 'Sinh', 119 0x0D80, 120 0x0DFF, 121 ], 122 [ 123 'Thai', 124 0x0E00, 125 0x0E7F, 126 ], 127 [ 128 'Geor', 129 0x10A0, 130 0x10FF, 131 ], 132 [ 133 'Grek', 134 0x1F00, 135 0x1FFF, 136 ], 137 [ 138 'Deva', 139 0xA8E0, 140 0xA8FF, 141 ], 142 [ 143 'Hans', 144 0x3000, 145 0x303F, 146 ], 147 // Mixed CJK, not just Hans 148 [ 149 'Hans', 150 0x3400, 151 0xFAFF, 152 ], 153 // Mixed CJK, not just Hans 154 [ 155 'Hans', 156 0x20000, 157 0x2FA1F, 158 ], 159 // Mixed CJK, not just Hans 160 ]; 161 162 // Characters that are displayed in mirror form in RTL text. 163 const MIRROR_CHARACTERS = [ 164 '(' => ')', 165 ')' => '(', 166 '[' => ']', 167 ']' => '[', 168 '{' => '}', 169 '}' => '{', 170 '<' => '>', 171 '>' => '<', 172 '‹ ' => '›', 173 '› ' => '‹', 174 '«' => '»', 175 '»' => '«', 176 '﴾ ' => '﴿', 177 '﴿ ' => '﴾', 178 '“ ' => '”', 179 '” ' => '“', 180 '‘ ' => '’', 181 '’ ' => '‘', 182 ]; 183 184 // Default list of locales to show in the menu. 185 const DEFAULT_LOCALES = [ 186 'ar', 187 'bg', 188 'bs', 189 'ca', 190 'cs', 191 'da', 192 'de', 193 'el', 194 'en-GB', 195 'en-US', 196 'es', 197 'et', 198 'fi', 199 'fr', 200 'he', 201 'hr', 202 'hu', 203 'is', 204 'it', 205 'ka', 206 'kk', 207 'lt', 208 'mr', 209 'nb', 210 'nl', 211 'nn', 212 'pl', 213 'pt', 214 'ru', 215 'sk', 216 'sv', 217 'tr', 218 'uk', 219 'vi', 220 'zh-Hans', 221 ]; 222 223 /** @var string Punctuation used to separate list items, typically a comma */ 224 public static $list_separator; 225 226 /** 227 * The prefered locales for this site, or a default list if no preference. 228 * 229 * @return LocaleInterface[] 230 */ 231 public static function activeLocales(): array 232 { 233 $code_list = Site::getPreference('LANGUAGES'); 234 235 if ($code_list === '') { 236 $codes = self::DEFAULT_LOCALES; 237 } else { 238 $codes = explode(',', $code_list); 239 } 240 241 $locales = []; 242 foreach ($codes as $code) { 243 if (file_exists(WT_ROOT . 'language/' . $code . '.mo')) { 244 try { 245 $locales[] = Locale::create($code); 246 } catch (Exception $ex) { 247 // No such locale exists? 248 } 249 } 250 } 251 usort($locales, '\Fisharebest\Localization\Locale::compare'); 252 253 return $locales; 254 } 255 256 /** 257 * Which MySQL collation should be used for this locale? 258 * 259 * @return string 260 */ 261 public static function collation() 262 { 263 $collation = self::$locale->collation(); 264 switch ($collation) { 265 case 'croatian_ci': 266 case 'german2_ci': 267 case 'vietnamese_ci': 268 // Only available in MySQL 5.6 269 return 'utf8_unicode_ci'; 270 default: 271 return 'utf8_' . $collation; 272 } 273 } 274 275 /** 276 * What format is used to display dates in the current locale? 277 * 278 * @return string 279 */ 280 public static function dateFormat(): string 281 { 282 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 283 return self::$translator->translate('%j %F %Y'); 284 } 285 286 /** 287 * Generate consistent I18N for datatables.js 288 * 289 * @param int[] $lengths An optional array of page lengths 290 * 291 * @return string 292 */ 293 public static function datatablesI18N(array $lengths = [ 294 10, 295 20, 296 30, 297 50, 298 100, 299 -1, 300 ]): string 301 { 302 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 303 304 return 305 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 306 '"language": {' . 307 ' "paginate": {' . 308 ' "first": "' . self::translate('first') . '",' . 309 ' "last": "' . self::translate('last') . '",' . 310 ' "next": "' . self::translate('next') . '",' . 311 ' "previous": "' . self::translate('previous') . '"' . 312 ' },' . 313 ' "emptyTable": "' . self::translate('No records to display') . '",' . 314 ' "info": "' . /* I18N: %s are placeholders for numbers */ 315 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 316 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 317 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 318 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 319 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 320 self::translate('Display %s', addslashes($length_options)) . '",' . 321 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 322 ' "processing": "' . self::translate('Loading…') . '",' . 323 ' "search": "' . self::translate('Filter') . '",' . 324 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 325 '}'; 326 } 327 328 /** 329 * Convert the digits 0-9 into the local script 330 * 331 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 332 * 333 * @param string|int $n 334 * 335 * @return string 336 */ 337 public static function digits($n): string 338 { 339 return self::$locale->digits((string) $n); 340 } 341 342 /** 343 * What is the direction of the current locale 344 * 345 * @return string "ltr" or "rtl" 346 */ 347 public static function direction(): string 348 { 349 return self::$locale->direction(); 350 } 351 352 /** 353 * What is the first day of the week. 354 * 355 * @return int Sunday=0, Monday=1, etc. 356 */ 357 public static function firstDay(): int 358 { 359 return self::$locale->territory()->firstDay(); 360 } 361 362 /** 363 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 364 * 365 * @return string 366 */ 367 public static function htmlAttributes(): string 368 { 369 return self::$locale->htmlAttributes(); 370 } 371 372 /** 373 * Initialise the translation adapter with a locale setting. 374 * 375 * @param string $code Use this locale/language code, or choose one automatically 376 * @param Tree|null $tree 377 * 378 * @return string $string 379 */ 380 public static function init(string $code = '', Tree $tree = null): string 381 { 382 mb_internal_encoding('UTF-8'); 383 384 if ($code !== '') { 385 // Create the specified locale 386 self::$locale = Locale::create($code); 387 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'language/' . Session::get('locale') . '.mo')) { 388 // Select a previously used locale 389 self::$locale = Locale::create(Session::get('locale')); 390 } else { 391 if ($tree instanceof Tree) { 392 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 393 } else { 394 $default_locale = new LocaleEnUs(); 395 } 396 397 // Negotiate with the browser. 398 // Search engines don't negotiate. They get the default locale of the tree. 399 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 400 } 401 402 $cache_dir = WT_DATA_DIR . 'cache/'; 403 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 404 if (file_exists($cache_file)) { 405 $filemtime = filemtime($cache_file); 406 } else { 407 $filemtime = 0; 408 } 409 410 // Load the translation file(s) 411 // Note that glob() returns false instead of an empty array when open_basedir_restriction 412 // is in force and no files are found. See PHP bug #47358. 413 if (defined('GLOB_BRACE')) { 414 $translation_files = array_merge( 415 [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'], 416 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [], 417 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [] 418 ); 419 } else { 420 // Some servers do not have GLOB_BRACE - see http://php.net/manual/en/function.glob.php 421 $translation_files = array_merge( 422 [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'], 423 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.csv') ?: [], 424 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.php') ?: [], 425 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.mo') ?: [], 426 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.csv') ?: [], 427 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.php') ?: [], 428 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.mo') ?: [] 429 ); 430 } 431 // Rebuild files after one hour 432 $rebuild_cache = time() > $filemtime + 3600; 433 // Rebuild files if any translation file has been updated 434 foreach ($translation_files as $translation_file) { 435 if (filemtime($translation_file) > $filemtime) { 436 $rebuild_cache = true; 437 break; 438 } 439 } 440 441 if ($rebuild_cache) { 442 $translations = []; 443 foreach ($translation_files as $translation_file) { 444 $translation = new Translation($translation_file); 445 $translations = array_merge($translations, $translation->asArray()); 446 } 447 try { 448 File::mkdir($cache_dir); 449 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 450 } catch (Exception $ex) { 451 // During setup, we may not have been able to create it. 452 } 453 } else { 454 $translations = include $cache_file; 455 } 456 457 // Create a translator 458 self::$translator = new Translator($translations, self::$locale->pluralRule()); 459 460 /* I18N: This punctuation is used to separate lists of items */ 461 self::$list_separator = self::translate(', '); 462 463 // Create a collator 464 try { 465 if (class_exists('Collator')) { 466 // Symfony provides a very incomplete polyfill - which cannot be used. 467 self::$collator = new Collator(self::$locale->code()); 468 // Ignore upper/lower case differences 469 self::$collator->setStrength(Collator::SECONDARY); 470 } 471 } catch (Exception $ex) { 472 // PHP-INTL is not installed? We'll use a fallback later. 473 self::$collator = null; 474 } 475 476 return self::$locale->languageTag(); 477 } 478 479 /** 480 * All locales for which a translation file exists. 481 * 482 * @return LocaleInterface[] 483 */ 484 public static function installedLocales(): array 485 { 486 $locales = []; 487 foreach (glob(WT_ROOT . 'language/*.mo') as $file) { 488 try { 489 $locales[] = Locale::create(basename($file, '.mo')); 490 } catch (Exception $ex) { 491 // Not a recognised locale 492 } 493 } 494 usort($locales, '\Fisharebest\Localization\Locale::compare'); 495 496 return $locales; 497 } 498 499 /** 500 * Return the endonym for a given language - as per http://cldr.unicode.org/ 501 * 502 * @param string $locale 503 * 504 * @return string 505 */ 506 public static function languageName(string $locale): string 507 { 508 return Locale::create($locale)->endonym(); 509 } 510 511 /** 512 * Return the script used by a given language 513 * 514 * @param string $locale 515 * 516 * @return string 517 */ 518 public static function languageScript(string $locale): string 519 { 520 return Locale::create($locale)->script()->code(); 521 } 522 523 /** 524 * Translate a number into the local representation. 525 * 526 * e.g. 12345.67 becomes 527 * en: 12,345.67 528 * fr: 12 345,67 529 * de: 12.345,67 530 * 531 * @param float $n 532 * @param int $precision 533 * 534 * @return string 535 */ 536 public static function number(float $n, int $precision = 0): string 537 { 538 return self::$locale->number(round($n, $precision)); 539 } 540 541 /** 542 * Translate a fraction into a percentage. 543 * 544 * e.g. 0.123 becomes 545 * en: 12.3% 546 * fr: 12,3 % 547 * de: 12,3% 548 * 549 * @param float $n 550 * @param int $precision 551 * 552 * @return string 553 */ 554 public static function percentage(float $n, int $precision = 0): string 555 { 556 return self::$locale->percent(round($n, $precision + 2)); 557 } 558 559 /** 560 * Translate a plural string 561 * echo self::plural('There is an error', 'There are errors', $num_errors); 562 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 563 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 564 * 565 * @param string $singular 566 * @param string $plural 567 * @param int $count 568 * @param string ...$args 569 * 570 * @return string 571 */ 572 public static function plural(string $singular, string $plural, int $count, ...$args): string 573 { 574 $message = self::$translator->translatePlural($singular, $plural, $count); 575 576 return sprintf($message, ...$args); 577 } 578 579 /** 580 * UTF8 version of PHP::strrev() 581 * 582 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 583 * 584 * These do not support UTF8 text direction, so we must mimic it for them. 585 * 586 * Numbers are always rendered LTR, even in RTL text. 587 * The visual direction of characters such as parentheses should be reversed. 588 * 589 * @param string $text Text to be reversed 590 * 591 * @return string 592 */ 593 public static function reverseText($text): string 594 { 595 // Remove HTML markup - we can't display it and it is LTR. 596 $text = strip_tags($text); 597 // Remove HTML entities. 598 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 599 600 // LTR text doesn't need reversing 601 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 602 return $text; 603 } 604 605 // Mirrored characters 606 $text = strtr($text, self::MIRROR_CHARACTERS); 607 608 $reversed = ''; 609 $digits = ''; 610 while ($text != '') { 611 $letter = mb_substr($text, 0, 1); 612 $text = mb_substr($text, 1); 613 if (strpos(self::DIGITS, $letter) !== false) { 614 $digits .= $letter; 615 } else { 616 $reversed = $letter . $digits . $reversed; 617 $digits = ''; 618 } 619 } 620 621 return $digits . $reversed; 622 } 623 624 /** 625 * Return the direction (ltr or rtl) for a given script 626 * 627 * The PHP/intl library does not provde this information, so we need 628 * our own lookup table. 629 * 630 * @param string $script 631 * 632 * @return string 633 */ 634 public static function scriptDirection($script) 635 { 636 switch ($script) { 637 case 'Arab': 638 case 'Hebr': 639 case 'Mong': 640 case 'Thaa': 641 return 'rtl'; 642 default: 643 return 'ltr'; 644 } 645 } 646 647 /** 648 * Perform a case-insensitive comparison of two strings. 649 * 650 * @param string $string1 651 * @param string $string2 652 * 653 * @return int 654 */ 655 public static function strcasecmp($string1, $string2) 656 { 657 if (self::$collator instanceof Collator) { 658 return self::$collator->compare($string1, $string2); 659 } else { 660 return strcmp(self::strtolower($string1), self::strtolower($string2)); 661 } 662 } 663 664 /** 665 * Convert a string to lower case. 666 * 667 * @param string $string 668 * 669 * @return string 670 */ 671 public static function strtolower($string): string 672 { 673 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 674 $string = strtr($string, self::DOTLESS_I_TOLOWER); 675 } 676 677 return mb_strtolower($string); 678 } 679 680 /** 681 * Convert a string to upper case. 682 * 683 * @param string $string 684 * 685 * @return string 686 */ 687 public static function strtoupper($string): string 688 { 689 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 690 $string = strtr($string, self::DOTLESS_I_TOUPPER); 691 } 692 693 return mb_strtoupper($string); 694 } 695 696 /** 697 * Identify the script used for a piece of text 698 * 699 * @param string $string 700 * 701 * @return string 702 */ 703 public static function textScript($string): string 704 { 705 $string = strip_tags($string); // otherwise HTML tags show up as latin 706 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 707 $string = str_replace([ 708 '@N.N.', 709 '@P.N.', 710 ], '', $string); // otherwise unknown names show up as latin 711 $pos = 0; 712 $strlen = strlen($string); 713 while ($pos < $strlen) { 714 // get the Unicode Code Point for the character at position $pos 715 $byte1 = ord($string[$pos]); 716 if ($byte1 < 0x80) { 717 $code_point = $byte1; 718 $chrlen = 1; 719 } elseif ($byte1 < 0xC0) { 720 // Invalid continuation character 721 return 'Latn'; 722 } elseif ($byte1 < 0xE0) { 723 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 724 $chrlen = 2; 725 } elseif ($byte1 < 0xF0) { 726 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 727 $chrlen = 3; 728 } elseif ($byte1 < 0xF8) { 729 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 730 $chrlen = 3; 731 } else { 732 // Invalid UTF 733 return 'Latn'; 734 } 735 736 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 737 if ($code_point >= $range[1] && $code_point <= $range[2]) { 738 return $range[0]; 739 } 740 } 741 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 742 $pos += $chrlen; 743 } 744 745 return 'Latn'; 746 } 747 748 /** 749 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 750 * 751 * @param int $seconds 752 * 753 * @return string 754 */ 755 public static function timeAgo($seconds) 756 { 757 $minute = 60; 758 $hour = 60 * $minute; 759 $day = 24 * $hour; 760 $month = 30 * $day; 761 $year = 365 * $day; 762 763 if ($seconds > $year) { 764 $years = intdiv($seconds, $year); 765 766 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 767 } 768 769 if ($seconds > $month) { 770 $months = intdiv($seconds, $month); 771 772 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 773 } 774 775 if ($seconds > $day) { 776 $days = intdiv($seconds, $day); 777 778 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 779 } 780 781 if ($seconds > $hour) { 782 $hours = intdiv($seconds, $hour); 783 784 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 785 } 786 787 if ($seconds > $minute) { 788 $minutes = intdiv($seconds, $minute); 789 790 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 791 } 792 793 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 794 } 795 796 /** 797 * What format is used to display dates in the current locale? 798 * 799 * @return string 800 */ 801 public static function timeFormat(): string 802 { 803 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 804 return self::$translator->translate('%H:%i:%s'); 805 } 806 807 /** 808 * Translate a string, and then substitute placeholders 809 * 810 * echo I18N::translate('Hello World!'); 811 * echo I18N::translate('The %s sat on the mat', 'cat'); 812 * 813 * @param string $message 814 * @param string ...$args 815 * 816 * @return string 817 */ 818 public static function translate(string $message, ...$args): string 819 { 820 $message = self::$translator->translate($message); 821 822 return sprintf($message, ...$args); 823 } 824 825 /** 826 * Context sensitive version of translate. 827 * echo I18N::translateContext('NOMINATIVE', 'January'); 828 * echo I18N::translateContext('GENITIVE', 'January'); 829 * 830 * @param string $context 831 * @param string $message 832 * @param string ...$args 833 * 834 * @return string 835 */ 836 public static function translateContext(string $context, string $message, ...$args): string 837 { 838 $message = self::$translator->translateContext($context, $message); 839 840 return sprintf($message, ...$args); 841 } 842} 843