1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Functions\FunctionsEdit; 28 29/** 30 * Internationalization (i18n) and localization (l10n). 31 */ 32class I18N 33{ 34 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 35 private static $locale; 36 37 /** @var Translator An object that performs translation */ 38 private static $translator; 39 40 /** @var Collator|null From the php-intl library */ 41 private static $collator; 42 43 // Digits are always rendered LTR, even in RTL text. 44 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 45 46 // These locales need special handling for the dotless letter I. 47 private const DOTLESS_I_LOCALES = [ 48 'az', 49 'tr', 50 ]; 51 private const DOTLESS_I_TOLOWER = [ 52 'I' => 'ı', 53 'İ' => 'i', 54 ]; 55 private const DOTLESS_I_TOUPPER = [ 56 'ı' => 'I', 57 'i' => 'İ', 58 ]; 59 60 // The ranges of characters used by each script. 61 private const SCRIPT_CHARACTER_RANGES = [ 62 [ 63 'Latn', 64 0x0041, 65 0x005A, 66 ], 67 [ 68 'Latn', 69 0x0061, 70 0x007A, 71 ], 72 [ 73 'Latn', 74 0x0100, 75 0x02AF, 76 ], 77 [ 78 'Grek', 79 0x0370, 80 0x03FF, 81 ], 82 [ 83 'Cyrl', 84 0x0400, 85 0x052F, 86 ], 87 [ 88 'Hebr', 89 0x0590, 90 0x05FF, 91 ], 92 [ 93 'Arab', 94 0x0600, 95 0x06FF, 96 ], 97 [ 98 'Arab', 99 0x0750, 100 0x077F, 101 ], 102 [ 103 'Arab', 104 0x08A0, 105 0x08FF, 106 ], 107 [ 108 'Deva', 109 0x0900, 110 0x097F, 111 ], 112 [ 113 'Taml', 114 0x0B80, 115 0x0BFF, 116 ], 117 [ 118 'Sinh', 119 0x0D80, 120 0x0DFF, 121 ], 122 [ 123 'Thai', 124 0x0E00, 125 0x0E7F, 126 ], 127 [ 128 'Geor', 129 0x10A0, 130 0x10FF, 131 ], 132 [ 133 'Grek', 134 0x1F00, 135 0x1FFF, 136 ], 137 [ 138 'Deva', 139 0xA8E0, 140 0xA8FF, 141 ], 142 [ 143 'Hans', 144 0x3000, 145 0x303F, 146 ], 147 // Mixed CJK, not just Hans 148 [ 149 'Hans', 150 0x3400, 151 0xFAFF, 152 ], 153 // Mixed CJK, not just Hans 154 [ 155 'Hans', 156 0x20000, 157 0x2FA1F, 158 ], 159 // Mixed CJK, not just Hans 160 ]; 161 162 // Characters that are displayed in mirror form in RTL text. 163 private const MIRROR_CHARACTERS = [ 164 '(' => ')', 165 ')' => '(', 166 '[' => ']', 167 ']' => '[', 168 '{' => '}', 169 '}' => '{', 170 '<' => '>', 171 '>' => '<', 172 '‹ ' => '›', 173 '› ' => '‹', 174 '«' => '»', 175 '»' => '«', 176 '﴾ ' => '﴿', 177 '﴿ ' => '﴾', 178 '“ ' => '”', 179 '” ' => '“', 180 '‘ ' => '’', 181 '’ ' => '‘', 182 ]; 183 184 // Default list of locales to show in the menu. 185 private const DEFAULT_LOCALES = [ 186 'ar', 187 'bg', 188 'bs', 189 'ca', 190 'cs', 191 'da', 192 'de', 193 'el', 194 'en-GB', 195 'en-US', 196 'es', 197 'et', 198 'fi', 199 'fr', 200 'he', 201 'hr', 202 'hu', 203 'is', 204 'it', 205 'ka', 206 'kk', 207 'lt', 208 'mr', 209 'nb', 210 'nl', 211 'nn', 212 'pl', 213 'pt', 214 'ru', 215 'sk', 216 'sv', 217 'tr', 218 'uk', 219 'vi', 220 'zh-Hans', 221 ]; 222 223 /** @var string Punctuation used to separate list items, typically a comma */ 224 public static $list_separator; 225 226 /** 227 * The prefered locales for this site, or a default list if no preference. 228 * 229 * @return LocaleInterface[] 230 */ 231 public static function activeLocales(): array 232 { 233 $code_list = Site::getPreference('LANGUAGES'); 234 235 if ($code_list === '') { 236 $codes = self::DEFAULT_LOCALES; 237 } else { 238 $codes = explode(',', $code_list); 239 } 240 241 $locales = []; 242 foreach ($codes as $code) { 243 if (file_exists(WT_ROOT . 'language/' . $code . '.mo')) { 244 try { 245 $locales[] = Locale::create($code); 246 } catch (Exception $ex) { 247 // No such locale exists? 248 } 249 } 250 } 251 usort($locales, '\Fisharebest\Localization\Locale::compare'); 252 253 return $locales; 254 } 255 256 /** 257 * Which MySQL collation should be used for this locale? 258 * 259 * @return string 260 */ 261 public static function collation() 262 { 263 $collation = self::$locale->collation(); 264 switch ($collation) { 265 case 'croatian_ci': 266 case 'german2_ci': 267 case 'vietnamese_ci': 268 // Only available in MySQL 5.6 269 return 'utf8_unicode_ci'; 270 default: 271 return 'utf8_' . $collation; 272 } 273 } 274 275 /** 276 * What format is used to display dates in the current locale? 277 * 278 * @return string 279 */ 280 public static function dateFormat(): string 281 { 282 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 283 return self::$translator->translate('%j %F %Y'); 284 } 285 286 /** 287 * Generate consistent I18N for datatables.js 288 * 289 * @param int[] $lengths An optional array of page lengths 290 * 291 * @return string 292 */ 293 public static function datatablesI18N(array $lengths = [ 294 10, 295 20, 296 30, 297 50, 298 100, 299 -1, 300 ]): string 301 { 302 $length_options = Bootstrap4::select(FunctionsEdit::numericOptions($lengths), '10'); 303 304 return 305 '"formatNumber": function(n) { return String(n).replace(/[0-9]/g, function(w) { return ("' . self::$locale->digits('0123456789') . '")[+w]; }); },' . 306 '"language": {' . 307 ' "paginate": {' . 308 ' "first": "' . self::translate('first') . '",' . 309 ' "last": "' . self::translate('last') . '",' . 310 ' "next": "' . self::translate('next') . '",' . 311 ' "previous": "' . self::translate('previous') . '"' . 312 ' },' . 313 ' "emptyTable": "' . self::translate('No records to display') . '",' . 314 ' "info": "' . /* I18N: %s are placeholders for numbers */ 315 self::translate('Showing %1$s to %2$s of %3$s', '_START_', '_END_', '_TOTAL_') . '",' . 316 ' "infoEmpty": "' . self::translate('Showing %1$s to %2$s of %3$s', self::$locale->digits('0'), self::$locale->digits('0'), self::$locale->digits('0')) . '",' . 317 ' "infoFiltered": "' . /* I18N: %s is a placeholder for a number */ 318 self::translate('(filtered from %s total entries)', '_MAX_') . '",' . 319 ' "lengthMenu": "' . /* I18N: %s is a number of records per page */ 320 self::translate('Display %s', addslashes($length_options)) . '",' . 321 ' "loadingRecords": "' . self::translate('Loading…') . '",' . 322 ' "processing": "' . self::translate('Loading…') . '",' . 323 ' "search": "' . self::translate('Filter') . '",' . 324 ' "zeroRecords": "' . self::translate('No records to display') . '"' . 325 '}'; 326 } 327 328 /** 329 * Convert the digits 0-9 into the local script 330 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 331 * 332 * @param string|int $n 333 * 334 * @return string 335 */ 336 public static function digits($n): string 337 { 338 return self::$locale->digits((string) $n); 339 } 340 341 /** 342 * What is the direction of the current locale 343 * 344 * @return string "ltr" or "rtl" 345 */ 346 public static function direction(): string 347 { 348 return self::$locale->direction(); 349 } 350 351 /** 352 * What is the first day of the week. 353 * 354 * @return int Sunday=0, Monday=1, etc. 355 */ 356 public static function firstDay(): int 357 { 358 return self::$locale->territory()->firstDay(); 359 } 360 361 /** 362 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 363 * 364 * @return string 365 */ 366 public static function htmlAttributes(): string 367 { 368 return self::$locale->htmlAttributes(); 369 } 370 371 /** 372 * Initialise the translation adapter with a locale setting. 373 * 374 * @param string $code Use this locale/language code, or choose one automatically 375 * @param Tree|null $tree 376 * 377 * @return string $string 378 */ 379 public static function init(string $code = '', Tree $tree = null): string 380 { 381 mb_internal_encoding('UTF-8'); 382 383 if ($code !== '') { 384 // Create the specified locale 385 self::$locale = Locale::create($code); 386 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'language/' . Session::get('locale') . '.mo')) { 387 // Select a previously used locale 388 self::$locale = Locale::create(Session::get('locale')); 389 } else { 390 if ($tree instanceof Tree) { 391 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 392 } else { 393 $default_locale = new LocaleEnUs(); 394 } 395 396 // Negotiate with the browser. 397 // Search engines don't negotiate. They get the default locale of the tree. 398 self::$locale = Locale::httpAcceptLanguage($_SERVER, self::installedLocales(), $default_locale); 399 } 400 401 $cache_dir = WT_DATA_DIR . 'cache/'; 402 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 403 if (file_exists($cache_file)) { 404 $filemtime = filemtime($cache_file); 405 } else { 406 $filemtime = 0; 407 } 408 409 // Load the translation file(s) 410 // Note that glob() returns false instead of an empty array when open_basedir_restriction 411 // is in force and no files are found. See PHP bug #47358. 412 if (defined('GLOB_BRACE')) { 413 $translation_files = array_merge( 414 [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'], 415 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [], 416 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.{csv,php,mo}', GLOB_BRACE) ?: [] 417 ); 418 } else { 419 // Some servers do not have GLOB_BRACE - see http://php.net/manual/en/function.glob.php 420 $translation_files = array_merge( 421 [WT_ROOT . 'language/' . self::$locale->languageTag() . '.mo'], 422 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.csv') ?: [], 423 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.php') ?: [], 424 glob(Webtrees::MODULES_PATH . '*/language/' . self::$locale->languageTag() . '.mo') ?: [], 425 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.csv') ?: [], 426 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.php') ?: [], 427 glob(WT_DATA_DIR . 'language/' . self::$locale->languageTag() . '.mo') ?: [] 428 ); 429 } 430 // Rebuild files after one hour 431 $rebuild_cache = time() > $filemtime + 3600; 432 // Rebuild files if any translation file has been updated 433 foreach ($translation_files as $translation_file) { 434 if (filemtime($translation_file) > $filemtime) { 435 $rebuild_cache = true; 436 break; 437 } 438 } 439 440 if ($rebuild_cache) { 441 $translations = []; 442 foreach ($translation_files as $translation_file) { 443 $translation = new Translation($translation_file); 444 $translations = array_merge($translations, $translation->asArray()); 445 } 446 try { 447 File::mkdir($cache_dir); 448 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 449 } catch (Exception $ex) { 450 // During setup, we may not have been able to create it. 451 } 452 } else { 453 $translations = include $cache_file; 454 } 455 456 // Create a translator 457 self::$translator = new Translator($translations, self::$locale->pluralRule()); 458 459 /* I18N: This punctuation is used to separate lists of items */ 460 self::$list_separator = self::translate(', '); 461 462 // Create a collator 463 try { 464 if (class_exists('Collator')) { 465 // Symfony provides a very incomplete polyfill - which cannot be used. 466 self::$collator = new Collator(self::$locale->code()); 467 // Ignore upper/lower case differences 468 self::$collator->setStrength(Collator::SECONDARY); 469 } 470 } catch (Exception $ex) { 471 // PHP-INTL is not installed? We'll use a fallback later. 472 self::$collator = null; 473 } 474 475 return self::$locale->languageTag(); 476 } 477 478 /** 479 * All locales for which a translation file exists. 480 * 481 * @return LocaleInterface[] 482 */ 483 public static function installedLocales(): array 484 { 485 $locales = []; 486 foreach (glob(WT_ROOT . 'language/*.mo') as $file) { 487 try { 488 $locales[] = Locale::create(basename($file, '.mo')); 489 } catch (Exception $ex) { 490 // Not a recognised locale 491 } 492 } 493 usort($locales, '\Fisharebest\Localization\Locale::compare'); 494 495 return $locales; 496 } 497 498 /** 499 * Return the endonym for a given language - as per http://cldr.unicode.org/ 500 * 501 * @param string $locale 502 * 503 * @return string 504 */ 505 public static function languageName(string $locale): string 506 { 507 return Locale::create($locale)->endonym(); 508 } 509 510 /** 511 * Return the script used by a given language 512 * 513 * @param string $locale 514 * 515 * @return string 516 */ 517 public static function languageScript(string $locale): string 518 { 519 return Locale::create($locale)->script()->code(); 520 } 521 522 /** 523 * Translate a number into the local representation. 524 * e.g. 12345.67 becomes 525 * en: 12,345.67 526 * fr: 12 345,67 527 * de: 12.345,67 528 * 529 * @param float $n 530 * @param int $precision 531 * 532 * @return string 533 */ 534 public static function number(float $n, int $precision = 0): string 535 { 536 return self::$locale->number(round($n, $precision)); 537 } 538 539 /** 540 * Translate a fraction into a percentage. 541 * e.g. 0.123 becomes 542 * en: 12.3% 543 * fr: 12,3 % 544 * de: 12,3% 545 * 546 * @param float $n 547 * @param int $precision 548 * 549 * @return string 550 */ 551 public static function percentage(float $n, int $precision = 0): string 552 { 553 return self::$locale->percent(round($n, $precision + 2)); 554 } 555 556 /** 557 * Translate a plural string 558 * echo self::plural('There is an error', 'There are errors', $num_errors); 559 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 560 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 561 * 562 * @param string $singular 563 * @param string $plural 564 * @param int $count 565 * @param string ...$args 566 * 567 * @return string 568 */ 569 public static function plural(string $singular, string $plural, int $count, ...$args): string 570 { 571 $message = self::$translator->translatePlural($singular, $plural, $count); 572 573 return sprintf($message, ...$args); 574 } 575 576 /** 577 * UTF8 version of PHP::strrev() 578 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 579 * These do not support UTF8 text direction, so we must mimic it for them. 580 * Numbers are always rendered LTR, even in RTL text. 581 * The visual direction of characters such as parentheses should be reversed. 582 * 583 * @param string $text Text to be reversed 584 * 585 * @return string 586 */ 587 public static function reverseText($text): string 588 { 589 // Remove HTML markup - we can't display it and it is LTR. 590 $text = strip_tags($text); 591 // Remove HTML entities. 592 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 593 594 // LTR text doesn't need reversing 595 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 596 return $text; 597 } 598 599 // Mirrored characters 600 $text = strtr($text, self::MIRROR_CHARACTERS); 601 602 $reversed = ''; 603 $digits = ''; 604 while ($text != '') { 605 $letter = mb_substr($text, 0, 1); 606 $text = mb_substr($text, 1); 607 if (strpos(self::DIGITS, $letter) !== false) { 608 $digits .= $letter; 609 } else { 610 $reversed = $letter . $digits . $reversed; 611 $digits = ''; 612 } 613 } 614 615 return $digits . $reversed; 616 } 617 618 /** 619 * Return the direction (ltr or rtl) for a given script 620 * The PHP/intl library does not provde this information, so we need 621 * our own lookup table. 622 * 623 * @param string $script 624 * 625 * @return string 626 */ 627 public static function scriptDirection($script) 628 { 629 switch ($script) { 630 case 'Arab': 631 case 'Hebr': 632 case 'Mong': 633 case 'Thaa': 634 return 'rtl'; 635 default: 636 return 'ltr'; 637 } 638 } 639 640 /** 641 * Perform a case-insensitive comparison of two strings. 642 * 643 * @param string $string1 644 * @param string $string2 645 * 646 * @return int 647 */ 648 public static function strcasecmp($string1, $string2) 649 { 650 if (self::$collator instanceof Collator) { 651 return self::$collator->compare($string1, $string2); 652 } else { 653 return strcmp(self::strtolower($string1), self::strtolower($string2)); 654 } 655 } 656 657 /** 658 * Convert a string to lower case. 659 * 660 * @param string $string 661 * 662 * @return string 663 */ 664 public static function strtolower($string): string 665 { 666 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 667 $string = strtr($string, self::DOTLESS_I_TOLOWER); 668 } 669 670 return mb_strtolower($string); 671 } 672 673 /** 674 * Convert a string to upper case. 675 * 676 * @param string $string 677 * 678 * @return string 679 */ 680 public static function strtoupper($string): string 681 { 682 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES)) { 683 $string = strtr($string, self::DOTLESS_I_TOUPPER); 684 } 685 686 return mb_strtoupper($string); 687 } 688 689 /** 690 * Identify the script used for a piece of text 691 * 692 * @param string $string 693 * 694 * @return string 695 */ 696 public static function textScript($string): string 697 { 698 $string = strip_tags($string); // otherwise HTML tags show up as latin 699 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 700 $string = str_replace([ 701 '@N.N.', 702 '@P.N.', 703 ], '', $string); // otherwise unknown names show up as latin 704 $pos = 0; 705 $strlen = strlen($string); 706 while ($pos < $strlen) { 707 // get the Unicode Code Point for the character at position $pos 708 $byte1 = ord($string[$pos]); 709 if ($byte1 < 0x80) { 710 $code_point = $byte1; 711 $chrlen = 1; 712 } elseif ($byte1 < 0xC0) { 713 // Invalid continuation character 714 return 'Latn'; 715 } elseif ($byte1 < 0xE0) { 716 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 717 $chrlen = 2; 718 } elseif ($byte1 < 0xF0) { 719 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 720 $chrlen = 3; 721 } elseif ($byte1 < 0xF8) { 722 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 723 $chrlen = 3; 724 } else { 725 // Invalid UTF 726 return 'Latn'; 727 } 728 729 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 730 if ($code_point >= $range[1] && $code_point <= $range[2]) { 731 return $range[0]; 732 } 733 } 734 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 735 $pos += $chrlen; 736 } 737 738 return 'Latn'; 739 } 740 741 /** 742 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 743 * 744 * @param int $seconds 745 * 746 * @return string 747 */ 748 public static function timeAgo($seconds) 749 { 750 $minute = 60; 751 $hour = 60 * $minute; 752 $day = 24 * $hour; 753 $month = 30 * $day; 754 $year = 365 * $day; 755 756 if ($seconds > $year) { 757 $years = intdiv($seconds, $year); 758 759 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 760 } 761 762 if ($seconds > $month) { 763 $months = intdiv($seconds, $month); 764 765 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 766 } 767 768 if ($seconds > $day) { 769 $days = intdiv($seconds, $day); 770 771 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 772 } 773 774 if ($seconds > $hour) { 775 $hours = intdiv($seconds, $hour); 776 777 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 778 } 779 780 if ($seconds > $minute) { 781 $minutes = intdiv($seconds, $minute); 782 783 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 784 } 785 786 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 787 } 788 789 /** 790 * What format is used to display dates in the current locale? 791 * 792 * @return string 793 */ 794 public static function timeFormat(): string 795 { 796 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 797 return self::$translator->translate('%H:%i:%s'); 798 } 799 800 /** 801 * Translate a string, and then substitute placeholders 802 * echo I18N::translate('Hello World!'); 803 * echo I18N::translate('The %s sat on the mat', 'cat'); 804 * 805 * @param string $message 806 * @param string ...$args 807 * 808 * @return string 809 */ 810 public static function translate(string $message, ...$args): string 811 { 812 $message = self::$translator->translate($message); 813 814 return sprintf($message, ...$args); 815 } 816 817 /** 818 * Context sensitive version of translate. 819 * echo I18N::translateContext('NOMINATIVE', 'January'); 820 * echo I18N::translateContext('GENITIVE', 'January'); 821 * 822 * @param string $context 823 * @param string $message 824 * @param string ...$args 825 * 826 * @return string 827 */ 828 public static function translateContext(string $context, string $message, ...$args): string 829 { 830 $message = self::$translator->translateContext($context, $message); 831 832 return sprintf($message, ...$args); 833 } 834} 835