1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22use Collator; 23use Exception; 24use Fisharebest\Localization\Locale; 25use Fisharebest\Localization\Locale\LocaleEnUs; 26use Fisharebest\Localization\Locale\LocaleInterface; 27use Fisharebest\Localization\Translation; 28use Fisharebest\Localization\Translator; 29use Fisharebest\Webtrees\Module\ModuleCustomInterface; 30use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 31use Fisharebest\Webtrees\Services\ModuleService; 32use Illuminate\Support\Collection; 33 34use function array_merge; 35use function class_exists; 36use function html_entity_decode; 37use function in_array; 38use function mb_strtolower; 39use function mb_strtoupper; 40use function mb_substr; 41use function ord; 42use function sprintf; 43use function str_replace; 44use function strcmp; 45use function strip_tags; 46use function strlen; 47use function strpos; 48use function strtr; 49 50/** 51 * Internationalization (i18n) and localization (l10n). 52 */ 53class I18N 54{ 55 // MO files use special characters for plurals and context. 56 public const PLURAL = "\x00"; 57 public const CONTEXT = "\x04"; 58 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 59 private const DOTLESS_I_LOCALES = [ 60 'az', 61 'tr', 62 ]; 63 private const DOTLESS_I_TOLOWER = [ 64 'I' => 'ı', 65 'İ' => 'i', 66 ]; 67 68 // Digits are always rendered LTR, even in RTL text. 69 private const DOTLESS_I_TOUPPER = [ 70 'ı' => 'I', 71 'i' => 'İ', 72 ]; 73 74 // These locales need special handling for the dotless letter I. 75 private const SCRIPT_CHARACTER_RANGES = [ 76 [ 77 'Latn', 78 0x0041, 79 0x005A, 80 ], 81 [ 82 'Latn', 83 0x0061, 84 0x007A, 85 ], 86 [ 87 'Latn', 88 0x0100, 89 0x02AF, 90 ], 91 [ 92 'Grek', 93 0x0370, 94 0x03FF, 95 ], 96 [ 97 'Cyrl', 98 0x0400, 99 0x052F, 100 ], 101 [ 102 'Hebr', 103 0x0590, 104 0x05FF, 105 ], 106 [ 107 'Arab', 108 0x0600, 109 0x06FF, 110 ], 111 [ 112 'Arab', 113 0x0750, 114 0x077F, 115 ], 116 [ 117 'Arab', 118 0x08A0, 119 0x08FF, 120 ], 121 [ 122 'Deva', 123 0x0900, 124 0x097F, 125 ], 126 [ 127 'Taml', 128 0x0B80, 129 0x0BFF, 130 ], 131 [ 132 'Sinh', 133 0x0D80, 134 0x0DFF, 135 ], 136 [ 137 'Thai', 138 0x0E00, 139 0x0E7F, 140 ], 141 [ 142 'Geor', 143 0x10A0, 144 0x10FF, 145 ], 146 [ 147 'Grek', 148 0x1F00, 149 0x1FFF, 150 ], 151 [ 152 'Deva', 153 0xA8E0, 154 0xA8FF, 155 ], 156 [ 157 'Hans', 158 0x3000, 159 0x303F, 160 ], 161 // Mixed CJK, not just Hans 162 [ 163 'Hans', 164 0x3400, 165 0xFAFF, 166 ], 167 // Mixed CJK, not just Hans 168 [ 169 'Hans', 170 0x20000, 171 0x2FA1F, 172 ], 173 // Mixed CJK, not just Hans 174 ]; 175 private const MIRROR_CHARACTERS = [ 176 '(' => ')', 177 ')' => '(', 178 '[' => ']', 179 ']' => '[', 180 '{' => '}', 181 '}' => '{', 182 '<' => '>', 183 '>' => '<', 184 '‹ ' => '›', 185 '› ' => '‹', 186 '«' => '»', 187 '»' => '«', 188 '﴾ ' => '﴿', 189 '﴿ ' => '﴾', 190 '“ ' => '”', 191 '” ' => '“', 192 '‘ ' => '’', 193 '’ ' => '‘', 194 ]; 195 /** @var string Punctuation used to separate list items, typically a comma */ 196 public static $list_separator; 197 198 // The ranges of characters used by each script. 199 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 200 private static $locale; 201 202 // Characters that are displayed in mirror form in RTL text. 203 /** @var Translator An object that performs translation */ 204 private static $translator; 205 /** @var Collator|null From the php-intl library */ 206 private static $collator; 207 208 /** 209 * The preferred locales for this site, or a default list if no preference. 210 * 211 * @return LocaleInterface[] 212 */ 213 public static function activeLocales(): array 214 { 215 /** @var Collection $locales */ 216 $locales = app(ModuleService::class) 217 ->findByInterface(ModuleLanguageInterface::class, false, true) 218 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 219 return $module->locale(); 220 }); 221 222 if ($locales->isEmpty()) { 223 return [new LocaleEnUs()]; 224 } 225 226 return $locales->all(); 227 } 228 229 /** 230 * Which MySQL collation should be used for this locale? 231 * 232 * @return string 233 */ 234 public static function collation(): string 235 { 236 $collation = self::$locale->collation(); 237 switch ($collation) { 238 case 'croatian_ci': 239 case 'german2_ci': 240 case 'vietnamese_ci': 241 // Only available in MySQL 5.6 242 return 'utf8_unicode_ci'; 243 default: 244 return 'utf8_' . $collation; 245 } 246 } 247 248 /** 249 * What format is used to display dates in the current locale? 250 * 251 * @return string 252 */ 253 public static function dateFormat(): string 254 { 255 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 256 return self::$translator->translate('%j %F %Y'); 257 } 258 259 /** 260 * Convert the digits 0-9 into the local script 261 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 262 * 263 * @param string|int $n 264 * 265 * @return string 266 */ 267 public static function digits($n): string 268 { 269 return self::$locale->digits((string) $n); 270 } 271 272 /** 273 * What is the direction of the current locale 274 * 275 * @return string "ltr" or "rtl" 276 */ 277 public static function direction(): string 278 { 279 return self::$locale->direction(); 280 } 281 282 /** 283 * What is the first day of the week. 284 * 285 * @return int Sunday=0, Monday=1, etc. 286 */ 287 public static function firstDay(): int 288 { 289 return self::$locale->territory()->firstDay(); 290 } 291 292 /** 293 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 294 * 295 * @return string 296 */ 297 public static function htmlAttributes(): string 298 { 299 return self::$locale->htmlAttributes(); 300 } 301 302 /** 303 * Initialise the translation adapter with a locale setting. 304 * 305 * @param string $code Use this locale/language code, or choose one automatically 306 * @param Tree|null $tree 307 * @param bool $setup During setup, we cannot access the database. 308 * 309 * @return string $string 310 */ 311 public static function init(string $code = '', Tree $tree = null, $setup = false): string 312 { 313 if ($code !== '') { 314 // Create the specified locale 315 self::$locale = Locale::create($code); 316 } elseif (Session::has('language')) { 317 // Select a previously used locale 318 self::$locale = Locale::create(Session::get('language')); 319 } else { 320 if ($tree instanceof Tree) { 321 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 322 } else { 323 $default_locale = new LocaleEnUs(); 324 } 325 326 // Negotiate with the browser. 327 // Search engines don't negotiate. They get the default locale of the tree. 328 if ($setup) { 329 $installed_locales = app(ModuleService::class)->setupLanguages() 330 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 331 return $module->locale(); 332 }); 333 } else { 334 $installed_locales = self::installedLocales(); 335 } 336 337 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 338 } 339 340 // Load the translation file 341 $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php'; 342 343 try { 344 $translation = new Translation($translation_file); 345 $translations = $translation->asArray(); 346 } catch (Exception $ex) { 347 // The translations files are created during the build process, and are 348 // not included in the source code. 349 // Assuming we are using dev code, and build (or rebuild) the files. 350 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 351 $translation = new Translation($po_file); 352 $translations = $translation->asArray(); 353 file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';'); 354 } 355 356 // Add translations from custom modules (but not during setup, as we have no database/modules) 357 if (!$setup) { 358 $translations = app(ModuleService::class) 359 ->findByInterface(ModuleCustomInterface::class) 360 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 361 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 362 }, $translations); 363 } 364 365 // Create a translator 366 self::$translator = new Translator($translations, self::$locale->pluralRule()); 367 368 /* I18N: This punctuation is used to separate lists of items */ 369 self::$list_separator = self::translate(', '); 370 371 // Create a collator 372 try { 373 if (class_exists('Collator')) { 374 // Symfony provides a very incomplete polyfill - which cannot be used. 375 self::$collator = new Collator(self::$locale->code()); 376 // Ignore upper/lower case differences 377 self::$collator->setStrength(Collator::SECONDARY); 378 } 379 } catch (Exception $ex) { 380 // PHP-INTL is not installed? We'll use a fallback later. 381 self::$collator = null; 382 } 383 384 return self::$locale->languageTag(); 385 } 386 387 /** 388 * All locales for which a translation file exists. 389 * 390 * @return Collection 391 */ 392 public static function installedLocales(): Collection 393 { 394 return app(ModuleService::class) 395 ->findByInterface(ModuleLanguageInterface::class, true) 396 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 397 return $module->locale(); 398 }); 399 } 400 401 /** 402 * Translate a string, and then substitute placeholders 403 * echo I18N::translate('Hello World!'); 404 * echo I18N::translate('The %s sat on the mat', 'cat'); 405 * 406 * @param string $message 407 * @param string ...$args 408 * 409 * @return string 410 */ 411 public static function translate(string $message, ...$args): string 412 { 413 $message = self::$translator->translate($message); 414 415 return sprintf($message, ...$args); 416 } 417 418 /** 419 * Return the endonym for a given language - as per http://cldr.unicode.org/ 420 * 421 * @param string $locale 422 * 423 * @return string 424 */ 425 public static function languageName(string $locale): string 426 { 427 return Locale::create($locale)->endonym(); 428 } 429 430 /** 431 * @return string 432 */ 433 public static function languageTag(): string 434 { 435 return self::$locale->languageTag(); 436 } 437 438 /** 439 * Return the script used by a given language 440 * 441 * @param string $locale 442 * 443 * @return string 444 */ 445 public static function languageScript(string $locale): string 446 { 447 return Locale::create($locale)->script()->code(); 448 } 449 450 /** 451 * Translate a number into the local representation. 452 * e.g. 12345.67 becomes 453 * en: 12,345.67 454 * fr: 12 345,67 455 * de: 12.345,67 456 * 457 * @param float $n 458 * @param int $precision 459 * 460 * @return string 461 */ 462 public static function number(float $n, int $precision = 0): string 463 { 464 return self::$locale->number(round($n, $precision)); 465 } 466 467 /** 468 * Translate a fraction into a percentage. 469 * e.g. 0.123 becomes 470 * en: 12.3% 471 * fr: 12,3 % 472 * de: 12,3% 473 * 474 * @param float $n 475 * @param int $precision 476 * 477 * @return string 478 */ 479 public static function percentage(float $n, int $precision = 0): string 480 { 481 return self::$locale->percent(round($n, $precision + 2)); 482 } 483 484 /** 485 * Translate a plural string 486 * echo self::plural('There is an error', 'There are errors', $num_errors); 487 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 488 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 489 * 490 * @param string $singular 491 * @param string $plural 492 * @param int $count 493 * @param string ...$args 494 * 495 * @return string 496 */ 497 public static function plural(string $singular, string $plural, int $count, ...$args): string 498 { 499 $message = self::$translator->translatePlural($singular, $plural, $count); 500 501 return sprintf($message, ...$args); 502 } 503 504 /** 505 * UTF8 version of PHP::strrev() 506 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 507 * These do not support UTF8 text direction, so we must mimic it for them. 508 * Numbers are always rendered LTR, even in RTL text. 509 * The visual direction of characters such as parentheses should be reversed. 510 * 511 * @param string $text Text to be reversed 512 * 513 * @return string 514 */ 515 public static function reverseText($text): string 516 { 517 // Remove HTML markup - we can't display it and it is LTR. 518 $text = strip_tags($text); 519 // Remove HTML entities. 520 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 521 522 // LTR text doesn't need reversing 523 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 524 return $text; 525 } 526 527 // Mirrored characters 528 $text = strtr($text, self::MIRROR_CHARACTERS); 529 530 $reversed = ''; 531 $digits = ''; 532 while ($text !== '') { 533 $letter = mb_substr($text, 0, 1); 534 $text = mb_substr($text, 1); 535 if (strpos(self::DIGITS, $letter) !== false) { 536 $digits .= $letter; 537 } else { 538 $reversed = $letter . $digits . $reversed; 539 $digits = ''; 540 } 541 } 542 543 return $digits . $reversed; 544 } 545 546 /** 547 * Return the direction (ltr or rtl) for a given script 548 * The PHP/intl library does not provde this information, so we need 549 * our own lookup table. 550 * 551 * @param string $script 552 * 553 * @return string 554 */ 555 public static function scriptDirection($script): string 556 { 557 switch ($script) { 558 case 'Arab': 559 case 'Hebr': 560 case 'Mong': 561 case 'Thaa': 562 return 'rtl'; 563 default: 564 return 'ltr'; 565 } 566 } 567 568 /** 569 * Identify the script used for a piece of text 570 * 571 * @param string $string 572 * 573 * @return string 574 */ 575 public static function textScript($string): string 576 { 577 $string = strip_tags($string); // otherwise HTML tags show up as latin 578 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 579 $string = str_replace([ 580 '@N.N.', 581 '@P.N.', 582 ], '', $string); // otherwise unknown names show up as latin 583 $pos = 0; 584 $strlen = strlen($string); 585 while ($pos < $strlen) { 586 // get the Unicode Code Point for the character at position $pos 587 $byte1 = ord($string[$pos]); 588 if ($byte1 < 0x80) { 589 $code_point = $byte1; 590 $chrlen = 1; 591 } elseif ($byte1 < 0xC0) { 592 // Invalid continuation character 593 return 'Latn'; 594 } elseif ($byte1 < 0xE0) { 595 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 596 $chrlen = 2; 597 } elseif ($byte1 < 0xF0) { 598 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 599 $chrlen = 3; 600 } elseif ($byte1 < 0xF8) { 601 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 602 $chrlen = 3; 603 } else { 604 // Invalid UTF 605 return 'Latn'; 606 } 607 608 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 609 if ($code_point >= $range[1] && $code_point <= $range[2]) { 610 return $range[0]; 611 } 612 } 613 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 614 $pos += $chrlen; 615 } 616 617 return 'Latn'; 618 } 619 620 /** 621 * Perform a case-insensitive comparison of two strings. 622 * 623 * @param string $string1 624 * @param string $string2 625 * 626 * @return int 627 */ 628 public static function strcasecmp($string1, $string2): int 629 { 630 if (self::$collator instanceof Collator) { 631 return self::$collator->compare($string1, $string2); 632 } 633 634 return strcmp(self::strtolower($string1), self::strtolower($string2)); 635 } 636 637 /** 638 * Convert a string to lower case. 639 * 640 * @param string $string 641 * 642 * @return string 643 */ 644 public static function strtolower($string): string 645 { 646 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 647 $string = strtr($string, self::DOTLESS_I_TOLOWER); 648 } 649 650 return mb_strtolower($string); 651 } 652 653 /** 654 * Convert a string to upper case. 655 * 656 * @param string $string 657 * 658 * @return string 659 */ 660 public static function strtoupper($string): string 661 { 662 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 663 $string = strtr($string, self::DOTLESS_I_TOUPPER); 664 } 665 666 return mb_strtoupper($string); 667 } 668 669 /** 670 * What format is used to display dates in the current locale? 671 * 672 * @return string 673 */ 674 public static function timeFormat(): string 675 { 676 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 677 return self::$translator->translate('%H:%i:%s'); 678 } 679 680 /** 681 * Context sensitive version of translate. 682 * echo I18N::translateContext('NOMINATIVE', 'January'); 683 * echo I18N::translateContext('GENITIVE', 'January'); 684 * 685 * @param string $context 686 * @param string $message 687 * @param string ...$args 688 * 689 * @return string 690 */ 691 public static function translateContext(string $context, string $message, ...$args): string 692 { 693 $message = self::$translator->translateContext($context, $message); 694 695 return sprintf($message, ...$args); 696 } 697} 698