1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Module\ModuleCustomInterface; 28use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 29use Fisharebest\Webtrees\Services\ModuleService; 30use Illuminate\Support\Collection; 31use function array_merge; 32use function filemtime; 33 34/** 35 * Internationalization (i18n) and localization (l10n). 36 */ 37class I18N 38{ 39 // MO files use special characters for plurals and context. 40 public const PLURAL = "\x00"; 41 public const CONTEXT = "\x04"; 42 43 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 44 private static $locale; 45 46 /** @var Translator An object that performs translation */ 47 private static $translator; 48 49 /** @var Collator|null From the php-intl library */ 50 private static $collator; 51 52 // Digits are always rendered LTR, even in RTL text. 53 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 54 55 // These locales need special handling for the dotless letter I. 56 private const DOTLESS_I_LOCALES = [ 57 'az', 58 'tr', 59 ]; 60 private const DOTLESS_I_TOLOWER = [ 61 'I' => 'ı', 62 'İ' => 'i', 63 ]; 64 private const DOTLESS_I_TOUPPER = [ 65 'ı' => 'I', 66 'i' => 'İ', 67 ]; 68 69 // The ranges of characters used by each script. 70 private const SCRIPT_CHARACTER_RANGES = [ 71 [ 72 'Latn', 73 0x0041, 74 0x005A, 75 ], 76 [ 77 'Latn', 78 0x0061, 79 0x007A, 80 ], 81 [ 82 'Latn', 83 0x0100, 84 0x02AF, 85 ], 86 [ 87 'Grek', 88 0x0370, 89 0x03FF, 90 ], 91 [ 92 'Cyrl', 93 0x0400, 94 0x052F, 95 ], 96 [ 97 'Hebr', 98 0x0590, 99 0x05FF, 100 ], 101 [ 102 'Arab', 103 0x0600, 104 0x06FF, 105 ], 106 [ 107 'Arab', 108 0x0750, 109 0x077F, 110 ], 111 [ 112 'Arab', 113 0x08A0, 114 0x08FF, 115 ], 116 [ 117 'Deva', 118 0x0900, 119 0x097F, 120 ], 121 [ 122 'Taml', 123 0x0B80, 124 0x0BFF, 125 ], 126 [ 127 'Sinh', 128 0x0D80, 129 0x0DFF, 130 ], 131 [ 132 'Thai', 133 0x0E00, 134 0x0E7F, 135 ], 136 [ 137 'Geor', 138 0x10A0, 139 0x10FF, 140 ], 141 [ 142 'Grek', 143 0x1F00, 144 0x1FFF, 145 ], 146 [ 147 'Deva', 148 0xA8E0, 149 0xA8FF, 150 ], 151 [ 152 'Hans', 153 0x3000, 154 0x303F, 155 ], 156 // Mixed CJK, not just Hans 157 [ 158 'Hans', 159 0x3400, 160 0xFAFF, 161 ], 162 // Mixed CJK, not just Hans 163 [ 164 'Hans', 165 0x20000, 166 0x2FA1F, 167 ], 168 // Mixed CJK, not just Hans 169 ]; 170 171 // Characters that are displayed in mirror form in RTL text. 172 private const MIRROR_CHARACTERS = [ 173 '(' => ')', 174 ')' => '(', 175 '[' => ']', 176 ']' => '[', 177 '{' => '}', 178 '}' => '{', 179 '<' => '>', 180 '>' => '<', 181 '‹ ' => '›', 182 '› ' => '‹', 183 '«' => '»', 184 '»' => '«', 185 '﴾ ' => '﴿', 186 '﴿ ' => '﴾', 187 '“ ' => '”', 188 '” ' => '“', 189 '‘ ' => '’', 190 '’ ' => '‘', 191 ]; 192 193 /** @var string Punctuation used to separate list items, typically a comma */ 194 public static $list_separator; 195 196 /** 197 * The preferred locales for this site, or a default list if no preference. 198 * 199 * @return LocaleInterface[] 200 */ 201 public static function activeLocales(): array 202 { 203 $locales = app(ModuleService::class) 204 ->findByInterface(ModuleLanguageInterface::class, false, true) 205 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 206 return $module->locale(); 207 }); 208 209 if ($locales->isEmpty()) { 210 return [new LocaleEnUs()]; 211 } 212 213 return $locales->all(); 214 } 215 216 /** 217 * Which MySQL collation should be used for this locale? 218 * 219 * @return string 220 */ 221 public static function collation(): string 222 { 223 $collation = self::$locale->collation(); 224 switch ($collation) { 225 case 'croatian_ci': 226 case 'german2_ci': 227 case 'vietnamese_ci': 228 // Only available in MySQL 5.6 229 return 'utf8_unicode_ci'; 230 default: 231 return 'utf8_' . $collation; 232 } 233 } 234 235 /** 236 * What format is used to display dates in the current locale? 237 * 238 * @return string 239 */ 240 public static function dateFormat(): string 241 { 242 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 243 return self::$translator->translate('%j %F %Y'); 244 } 245 246 /** 247 * Convert the digits 0-9 into the local script 248 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 249 * 250 * @param string|int $n 251 * 252 * @return string 253 */ 254 public static function digits($n): string 255 { 256 return self::$locale->digits((string) $n); 257 } 258 259 /** 260 * What is the direction of the current locale 261 * 262 * @return string "ltr" or "rtl" 263 */ 264 public static function direction(): string 265 { 266 return self::$locale->direction(); 267 } 268 269 /** 270 * What is the first day of the week. 271 * 272 * @return int Sunday=0, Monday=1, etc. 273 */ 274 public static function firstDay(): int 275 { 276 return self::$locale->territory()->firstDay(); 277 } 278 279 /** 280 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 281 * 282 * @return string 283 */ 284 public static function htmlAttributes(): string 285 { 286 return self::$locale->htmlAttributes(); 287 } 288 289 /** 290 * Initialise the translation adapter with a locale setting. 291 * 292 * @param string $code Use this locale/language code, or choose one automatically 293 * @param Tree|null $tree 294 * @param bool $setup During setup, we cannot access the database. 295 * 296 * @return string $string 297 */ 298 public static function init(string $code = '', Tree $tree = null, $setup = false): string 299 { 300 if ($code !== '') { 301 // Create the specified locale 302 self::$locale = Locale::create($code); 303 } elseif (Session::has('locale') && file_exists(WT_ROOT . 'resources/lang/' . Session::get('locale') . '/messages.mo')) { 304 // Select a previously used locale 305 self::$locale = Locale::create(Session::get('locale')); 306 } else { 307 if ($tree instanceof Tree) { 308 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 309 } else { 310 $default_locale = new LocaleEnUs(); 311 } 312 313 // Negotiate with the browser. 314 // Search engines don't negotiate. They get the default locale of the tree. 315 if ($setup) { 316 $installed_locales = app(ModuleService::class)->setupLanguages() 317 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 318 return $module->locale(); 319 }); 320 } else { 321 $installed_locales = self::installedLocales(); 322 } 323 324 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 325 } 326 327 $cache_dir = WT_DATA_DIR . 'cache/'; 328 $cache_file = $cache_dir . 'language-' . self::$locale->languageTag() . '-cache.php'; 329 if (file_exists($cache_file)) { 330 $filemtime = filemtime($cache_file); 331 } else { 332 $filemtime = 0; 333 } 334 335 // Load the translation file 336 $translation_file = WT_ROOT . 'resources/lang/' . self::$locale->languageTag() . '/messages.mo'; 337 338 // Rebuild files if the translation file has been updated 339 if (filemtime($translation_file) > $filemtime) { 340 $translation = new Translation($translation_file); 341 $translations = $translation->asArray(); 342 343 try { 344 File::mkdir($cache_dir); 345 file_put_contents($cache_file, '<?php return ' . var_export($translations, true) . ';'); 346 } catch (Exception $ex) { 347 // During setup, we may not have been able to create it. 348 } 349 } else { 350 $translations = include $cache_file; 351 } 352 353 // Add translations from custom modules (but not during setup, as we have no database/modules) 354 if (!$setup) { 355 $translations = app(ModuleService::class) 356 ->findByInterface(ModuleCustomInterface::class) 357 ->reduce(function (array $carry, ModuleCustomInterface $item): array { 358 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 359 }, $translations); 360 } 361 362 // Create a translator 363 self::$translator = new Translator($translations, self::$locale->pluralRule()); 364 365 /* I18N: This punctuation is used to separate lists of items */ 366 self::$list_separator = self::translate(', '); 367 368 // Create a collator 369 try { 370 if (class_exists('Collator')) { 371 // Symfony provides a very incomplete polyfill - which cannot be used. 372 self::$collator = new Collator(self::$locale->code()); 373 // Ignore upper/lower case differences 374 self::$collator->setStrength(Collator::SECONDARY); 375 } 376 } catch (Exception $ex) { 377 // PHP-INTL is not installed? We'll use a fallback later. 378 self::$collator = null; 379 } 380 381 return self::$locale->languageTag(); 382 } 383 384 /** 385 * All locales for which a translation file exists. 386 * 387 * @return Collection 388 * @return LocaleInterface[] 389 */ 390 public static function installedLocales(): Collection 391 { 392 return app(ModuleService::class) 393 ->findByInterface(ModuleLanguageInterface::class, true) 394 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 395 return $module->locale(); 396 }); 397 } 398 399 /** 400 * Return the endonym for a given language - as per http://cldr.unicode.org/ 401 * 402 * @param string $locale 403 * 404 * @return string 405 */ 406 public static function languageName(string $locale): string 407 { 408 return Locale::create($locale)->endonym(); 409 } 410 411 /** 412 * Return the script used by a given language 413 * 414 * @param string $locale 415 * 416 * @return string 417 */ 418 public static function languageScript(string $locale): string 419 { 420 return Locale::create($locale)->script()->code(); 421 } 422 423 /** 424 * Translate a number into the local representation. 425 * e.g. 12345.67 becomes 426 * en: 12,345.67 427 * fr: 12 345,67 428 * de: 12.345,67 429 * 430 * @param float $n 431 * @param int $precision 432 * 433 * @return string 434 */ 435 public static function number(float $n, int $precision = 0): string 436 { 437 return self::$locale->number(round($n, $precision)); 438 } 439 440 /** 441 * Translate a fraction into a percentage. 442 * e.g. 0.123 becomes 443 * en: 12.3% 444 * fr: 12,3 % 445 * de: 12,3% 446 * 447 * @param float $n 448 * @param int $precision 449 * 450 * @return string 451 */ 452 public static function percentage(float $n, int $precision = 0): string 453 { 454 return self::$locale->percent(round($n, $precision + 2)); 455 } 456 457 /** 458 * Translate a plural string 459 * echo self::plural('There is an error', 'There are errors', $num_errors); 460 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 461 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 462 * 463 * @param string $singular 464 * @param string $plural 465 * @param int $count 466 * @param string ...$args 467 * 468 * @return string 469 */ 470 public static function plural(string $singular, string $plural, int $count, ...$args): string 471 { 472 $message = self::$translator->translatePlural($singular, $plural, $count); 473 474 return sprintf($message, ...$args); 475 } 476 477 /** 478 * UTF8 version of PHP::strrev() 479 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 480 * These do not support UTF8 text direction, so we must mimic it for them. 481 * Numbers are always rendered LTR, even in RTL text. 482 * The visual direction of characters such as parentheses should be reversed. 483 * 484 * @param string $text Text to be reversed 485 * 486 * @return string 487 */ 488 public static function reverseText($text): string 489 { 490 // Remove HTML markup - we can't display it and it is LTR. 491 $text = strip_tags($text); 492 // Remove HTML entities. 493 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 494 495 // LTR text doesn't need reversing 496 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 497 return $text; 498 } 499 500 // Mirrored characters 501 $text = strtr($text, self::MIRROR_CHARACTERS); 502 503 $reversed = ''; 504 $digits = ''; 505 while ($text !== '') { 506 $letter = mb_substr($text, 0, 1); 507 $text = mb_substr($text, 1); 508 if (strpos(self::DIGITS, $letter) !== false) { 509 $digits .= $letter; 510 } else { 511 $reversed = $letter . $digits . $reversed; 512 $digits = ''; 513 } 514 } 515 516 return $digits . $reversed; 517 } 518 519 /** 520 * Return the direction (ltr or rtl) for a given script 521 * The PHP/intl library does not provde this information, so we need 522 * our own lookup table. 523 * 524 * @param string $script 525 * 526 * @return string 527 */ 528 public static function scriptDirection($script): string 529 { 530 switch ($script) { 531 case 'Arab': 532 case 'Hebr': 533 case 'Mong': 534 case 'Thaa': 535 return 'rtl'; 536 default: 537 return 'ltr'; 538 } 539 } 540 541 /** 542 * Perform a case-insensitive comparison of two strings. 543 * 544 * @param string $string1 545 * @param string $string2 546 * 547 * @return int 548 */ 549 public static function strcasecmp($string1, $string2): int 550 { 551 if (self::$collator instanceof Collator) { 552 return self::$collator->compare($string1, $string2); 553 } 554 555 return strcmp(self::strtolower($string1), self::strtolower($string2)); 556 } 557 558 /** 559 * Convert a string to lower case. 560 * 561 * @param string $string 562 * 563 * @return string 564 */ 565 public static function strtolower($string): string 566 { 567 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 568 $string = strtr($string, self::DOTLESS_I_TOLOWER); 569 } 570 571 return mb_strtolower($string); 572 } 573 574 /** 575 * Convert a string to upper case. 576 * 577 * @param string $string 578 * 579 * @return string 580 */ 581 public static function strtoupper($string): string 582 { 583 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 584 $string = strtr($string, self::DOTLESS_I_TOUPPER); 585 } 586 587 return mb_strtoupper($string); 588 } 589 590 /** 591 * Identify the script used for a piece of text 592 * 593 * @param string $string 594 * 595 * @return string 596 */ 597 public static function textScript($string): string 598 { 599 $string = strip_tags($string); // otherwise HTML tags show up as latin 600 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 601 $string = str_replace([ 602 '@N.N.', 603 '@P.N.', 604 ], '', $string); // otherwise unknown names show up as latin 605 $pos = 0; 606 $strlen = strlen($string); 607 while ($pos < $strlen) { 608 // get the Unicode Code Point for the character at position $pos 609 $byte1 = ord($string[$pos]); 610 if ($byte1 < 0x80) { 611 $code_point = $byte1; 612 $chrlen = 1; 613 } elseif ($byte1 < 0xC0) { 614 // Invalid continuation character 615 return 'Latn'; 616 } elseif ($byte1 < 0xE0) { 617 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 618 $chrlen = 2; 619 } elseif ($byte1 < 0xF0) { 620 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 621 $chrlen = 3; 622 } elseif ($byte1 < 0xF8) { 623 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 624 $chrlen = 3; 625 } else { 626 // Invalid UTF 627 return 'Latn'; 628 } 629 630 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 631 if ($code_point >= $range[1] && $code_point <= $range[2]) { 632 return $range[0]; 633 } 634 } 635 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 636 $pos += $chrlen; 637 } 638 639 return 'Latn'; 640 } 641 642 /** 643 * Convert a number of seconds into a relative time. For example, 630 => "10 hours, 30 minutes ago" 644 * 645 * @param int $seconds 646 * 647 * @return string 648 */ 649 public static function timeAgo($seconds): string 650 { 651 $minute = 60; 652 $hour = 60 * $minute; 653 $day = 24 * $hour; 654 $month = 30 * $day; 655 $year = 365 * $day; 656 657 if ($seconds > $year) { 658 $years = intdiv($seconds, $year); 659 660 return self::plural('%s year ago', '%s years ago', $years, self::number($years)); 661 } 662 663 if ($seconds > $month) { 664 $months = intdiv($seconds, $month); 665 666 return self::plural('%s month ago', '%s months ago', $months, self::number($months)); 667 } 668 669 if ($seconds > $day) { 670 $days = intdiv($seconds, $day); 671 672 return self::plural('%s day ago', '%s days ago', $days, self::number($days)); 673 } 674 675 if ($seconds > $hour) { 676 $hours = intdiv($seconds, $hour); 677 678 return self::plural('%s hour ago', '%s hours ago', $hours, self::number($hours)); 679 } 680 681 if ($seconds > $minute) { 682 $minutes = intdiv($seconds, $minute); 683 684 return self::plural('%s minute ago', '%s minutes ago', $minutes, self::number($minutes)); 685 } 686 687 return self::plural('%s second ago', '%s seconds ago', $seconds, self::number($seconds)); 688 } 689 690 /** 691 * What format is used to display dates in the current locale? 692 * 693 * @return string 694 */ 695 public static function timeFormat(): string 696 { 697 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 698 return self::$translator->translate('%H:%i:%s'); 699 } 700 701 /** 702 * Translate a string, and then substitute placeholders 703 * echo I18N::translate('Hello World!'); 704 * echo I18N::translate('The %s sat on the mat', 'cat'); 705 * 706 * @param string $message 707 * @param string ...$args 708 * 709 * @return string 710 */ 711 public static function translate(string $message, ...$args): string 712 { 713 $message = self::$translator->translate($message); 714 715 return sprintf($message, ...$args); 716 } 717 718 /** 719 * Context sensitive version of translate. 720 * echo I18N::translateContext('NOMINATIVE', 'January'); 721 * echo I18N::translateContext('GENITIVE', 'January'); 722 * 723 * @param string $context 724 * @param string $message 725 * @param string ...$args 726 * 727 * @return string 728 */ 729 public static function translateContext(string $context, string $message, ...$args): string 730 { 731 $message = self::$translator->translateContext($context, $message); 732 733 return sprintf($message, ...$args); 734 } 735} 736