1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17declare(strict_types=1); 18 19namespace Fisharebest\Webtrees; 20 21use Collator; 22use Exception; 23use Fisharebest\Localization\Locale; 24use Fisharebest\Localization\Locale\LocaleEnUs; 25use Fisharebest\Localization\Locale\LocaleInterface; 26use Fisharebest\Localization\Translation; 27use Fisharebest\Localization\Translator; 28use Fisharebest\Webtrees\Module\ModuleCustomInterface; 29use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 30use Fisharebest\Webtrees\Services\ModuleService; 31use Illuminate\Support\Collection; 32 33use function array_merge; 34use function class_exists; 35use function html_entity_decode; 36use function in_array; 37use function mb_strtolower; 38use function mb_strtoupper; 39use function mb_substr; 40use function ord; 41use function sprintf; 42use function str_replace; 43use function strcmp; 44use function strip_tags; 45use function strlen; 46use function strpos; 47use function strtr; 48 49/** 50 * Internationalization (i18n) and localization (l10n). 51 */ 52class I18N 53{ 54 // MO files use special characters for plurals and context. 55 public const PLURAL = "\x00"; 56 public const CONTEXT = "\x04"; 57 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 58 private const DOTLESS_I_LOCALES = [ 59 'az', 60 'tr', 61 ]; 62 private const DOTLESS_I_TOLOWER = [ 63 'I' => 'ı', 64 'İ' => 'i', 65 ]; 66 67 // Digits are always rendered LTR, even in RTL text. 68 private const DOTLESS_I_TOUPPER = [ 69 'ı' => 'I', 70 'i' => 'İ', 71 ]; 72 73 // These locales need special handling for the dotless letter I. 74 private const SCRIPT_CHARACTER_RANGES = [ 75 [ 76 'Latn', 77 0x0041, 78 0x005A, 79 ], 80 [ 81 'Latn', 82 0x0061, 83 0x007A, 84 ], 85 [ 86 'Latn', 87 0x0100, 88 0x02AF, 89 ], 90 [ 91 'Grek', 92 0x0370, 93 0x03FF, 94 ], 95 [ 96 'Cyrl', 97 0x0400, 98 0x052F, 99 ], 100 [ 101 'Hebr', 102 0x0590, 103 0x05FF, 104 ], 105 [ 106 'Arab', 107 0x0600, 108 0x06FF, 109 ], 110 [ 111 'Arab', 112 0x0750, 113 0x077F, 114 ], 115 [ 116 'Arab', 117 0x08A0, 118 0x08FF, 119 ], 120 [ 121 'Deva', 122 0x0900, 123 0x097F, 124 ], 125 [ 126 'Taml', 127 0x0B80, 128 0x0BFF, 129 ], 130 [ 131 'Sinh', 132 0x0D80, 133 0x0DFF, 134 ], 135 [ 136 'Thai', 137 0x0E00, 138 0x0E7F, 139 ], 140 [ 141 'Geor', 142 0x10A0, 143 0x10FF, 144 ], 145 [ 146 'Grek', 147 0x1F00, 148 0x1FFF, 149 ], 150 [ 151 'Deva', 152 0xA8E0, 153 0xA8FF, 154 ], 155 [ 156 'Hans', 157 0x3000, 158 0x303F, 159 ], 160 // Mixed CJK, not just Hans 161 [ 162 'Hans', 163 0x3400, 164 0xFAFF, 165 ], 166 // Mixed CJK, not just Hans 167 [ 168 'Hans', 169 0x20000, 170 0x2FA1F, 171 ], 172 // Mixed CJK, not just Hans 173 ]; 174 private const MIRROR_CHARACTERS = [ 175 '(' => ')', 176 ')' => '(', 177 '[' => ']', 178 ']' => '[', 179 '{' => '}', 180 '}' => '{', 181 '<' => '>', 182 '>' => '<', 183 '‹ ' => '›', 184 '› ' => '‹', 185 '«' => '»', 186 '»' => '«', 187 '﴾ ' => '﴿', 188 '﴿ ' => '﴾', 189 '“ ' => '”', 190 '” ' => '“', 191 '‘ ' => '’', 192 '’ ' => '‘', 193 ]; 194 /** @var string Punctuation used to separate list items, typically a comma */ 195 public static $list_separator; 196 197 // The ranges of characters used by each script. 198 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 199 private static $locale; 200 201 // Characters that are displayed in mirror form in RTL text. 202 /** @var Translator An object that performs translation */ 203 private static $translator; 204 /** @var Collator|null From the php-intl library */ 205 private static $collator; 206 207 /** 208 * The preferred locales for this site, or a default list if no preference. 209 * 210 * @return LocaleInterface[] 211 */ 212 public static function activeLocales(): array 213 { 214 /** @var Collection $locales */ 215 $locales = app(ModuleService::class) 216 ->findByInterface(ModuleLanguageInterface::class, false, true) 217 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 218 return $module->locale(); 219 }); 220 221 if ($locales->isEmpty()) { 222 return [new LocaleEnUs()]; 223 } 224 225 return $locales->all(); 226 } 227 228 /** 229 * Which MySQL collation should be used for this locale? 230 * 231 * @return string 232 */ 233 public static function collation(): string 234 { 235 $collation = self::$locale->collation(); 236 switch ($collation) { 237 case 'croatian_ci': 238 case 'german2_ci': 239 case 'vietnamese_ci': 240 // Only available in MySQL 5.6 241 return 'utf8_unicode_ci'; 242 default: 243 return 'utf8_' . $collation; 244 } 245 } 246 247 /** 248 * What format is used to display dates in the current locale? 249 * 250 * @return string 251 */ 252 public static function dateFormat(): string 253 { 254 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 255 return self::$translator->translate('%j %F %Y'); 256 } 257 258 /** 259 * Convert the digits 0-9 into the local script 260 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 261 * 262 * @param string|int $n 263 * 264 * @return string 265 */ 266 public static function digits($n): string 267 { 268 return self::$locale->digits((string) $n); 269 } 270 271 /** 272 * What is the direction of the current locale 273 * 274 * @return string "ltr" or "rtl" 275 */ 276 public static function direction(): string 277 { 278 return self::$locale->direction(); 279 } 280 281 /** 282 * What is the first day of the week. 283 * 284 * @return int Sunday=0, Monday=1, etc. 285 */ 286 public static function firstDay(): int 287 { 288 return self::$locale->territory()->firstDay(); 289 } 290 291 /** 292 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 293 * 294 * @return string 295 */ 296 public static function htmlAttributes(): string 297 { 298 return self::$locale->htmlAttributes(); 299 } 300 301 /** 302 * Initialise the translation adapter with a locale setting. 303 * 304 * @param string $code Use this locale/language code, or choose one automatically 305 * @param Tree|null $tree 306 * @param bool $setup During setup, we cannot access the database. 307 * 308 * @return string $string 309 */ 310 public static function init(string $code = '', Tree $tree = null, $setup = false): string 311 { 312 if ($code !== '') { 313 // Create the specified locale 314 self::$locale = Locale::create($code); 315 } elseif (Session::has('language')) { 316 // Select a previously used locale 317 self::$locale = Locale::create(Session::get('language')); 318 } else { 319 if ($tree instanceof Tree) { 320 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 321 } else { 322 $default_locale = new LocaleEnUs(); 323 } 324 325 // Negotiate with the browser. 326 // Search engines don't negotiate. They get the default locale of the tree. 327 if ($setup) { 328 $installed_locales = app(ModuleService::class)->setupLanguages() 329 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 330 return $module->locale(); 331 }); 332 } else { 333 $installed_locales = self::installedLocales(); 334 } 335 336 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 337 } 338 339 // Load the translation file 340 $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php'; 341 342 try { 343 $translation = new Translation($translation_file); 344 $translations = $translation->asArray(); 345 } catch (Exception $ex) { 346 // The translations files are created during the build process, and are 347 // not included in the source code. 348 // Assuming we are using dev code, and build (or rebuild) the files. 349 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 350 $translation = new Translation($po_file); 351 $translations = $translation->asArray(); 352 file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';'); 353 } 354 355 // Add translations from custom modules (but not during setup, as we have no database/modules) 356 if (!$setup) { 357 $translations = app(ModuleService::class) 358 ->findByInterface(ModuleCustomInterface::class) 359 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 360 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 361 }, $translations); 362 } 363 364 // Create a translator 365 self::$translator = new Translator($translations, self::$locale->pluralRule()); 366 367 /* I18N: This punctuation is used to separate lists of items */ 368 self::$list_separator = self::translate(', '); 369 370 // Create a collator 371 try { 372 if (class_exists('Collator')) { 373 // Symfony provides a very incomplete polyfill - which cannot be used. 374 self::$collator = new Collator(self::$locale->code()); 375 // Ignore upper/lower case differences 376 self::$collator->setStrength(Collator::SECONDARY); 377 } 378 } catch (Exception $ex) { 379 // PHP-INTL is not installed? We'll use a fallback later. 380 self::$collator = null; 381 } 382 383 return self::$locale->languageTag(); 384 } 385 386 /** 387 * All locales for which a translation file exists. 388 * 389 * @return Collection 390 */ 391 public static function installedLocales(): Collection 392 { 393 return app(ModuleService::class) 394 ->findByInterface(ModuleLanguageInterface::class, true) 395 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 396 return $module->locale(); 397 }); 398 } 399 400 /** 401 * Translate a string, and then substitute placeholders 402 * echo I18N::translate('Hello World!'); 403 * echo I18N::translate('The %s sat on the mat', 'cat'); 404 * 405 * @param string $message 406 * @param string ...$args 407 * 408 * @return string 409 */ 410 public static function translate(string $message, ...$args): string 411 { 412 $message = self::$translator->translate($message); 413 414 return sprintf($message, ...$args); 415 } 416 417 /** 418 * Return the endonym for a given language - as per http://cldr.unicode.org/ 419 * 420 * @param string $locale 421 * 422 * @return string 423 */ 424 public static function languageName(string $locale): string 425 { 426 return Locale::create($locale)->endonym(); 427 } 428 429 /** 430 * Return the script used by a given language 431 * 432 * @param string $locale 433 * 434 * @return string 435 */ 436 public static function languageScript(string $locale): string 437 { 438 return Locale::create($locale)->script()->code(); 439 } 440 441 /** 442 * Translate a number into the local representation. 443 * e.g. 12345.67 becomes 444 * en: 12,345.67 445 * fr: 12 345,67 446 * de: 12.345,67 447 * 448 * @param float $n 449 * @param int $precision 450 * 451 * @return string 452 */ 453 public static function number(float $n, int $precision = 0): string 454 { 455 return self::$locale->number(round($n, $precision)); 456 } 457 458 /** 459 * Translate a fraction into a percentage. 460 * e.g. 0.123 becomes 461 * en: 12.3% 462 * fr: 12,3 % 463 * de: 12,3% 464 * 465 * @param float $n 466 * @param int $precision 467 * 468 * @return string 469 */ 470 public static function percentage(float $n, int $precision = 0): string 471 { 472 return self::$locale->percent(round($n, $precision + 2)); 473 } 474 475 /** 476 * Translate a plural string 477 * echo self::plural('There is an error', 'There are errors', $num_errors); 478 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 479 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 480 * 481 * @param string $singular 482 * @param string $plural 483 * @param int $count 484 * @param string ...$args 485 * 486 * @return string 487 */ 488 public static function plural(string $singular, string $plural, int $count, ...$args): string 489 { 490 $message = self::$translator->translatePlural($singular, $plural, $count); 491 492 return sprintf($message, ...$args); 493 } 494 495 /** 496 * UTF8 version of PHP::strrev() 497 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 498 * These do not support UTF8 text direction, so we must mimic it for them. 499 * Numbers are always rendered LTR, even in RTL text. 500 * The visual direction of characters such as parentheses should be reversed. 501 * 502 * @param string $text Text to be reversed 503 * 504 * @return string 505 */ 506 public static function reverseText($text): string 507 { 508 // Remove HTML markup - we can't display it and it is LTR. 509 $text = strip_tags($text); 510 // Remove HTML entities. 511 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 512 513 // LTR text doesn't need reversing 514 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 515 return $text; 516 } 517 518 // Mirrored characters 519 $text = strtr($text, self::MIRROR_CHARACTERS); 520 521 $reversed = ''; 522 $digits = ''; 523 while ($text !== '') { 524 $letter = mb_substr($text, 0, 1); 525 $text = mb_substr($text, 1); 526 if (strpos(self::DIGITS, $letter) !== false) { 527 $digits .= $letter; 528 } else { 529 $reversed = $letter . $digits . $reversed; 530 $digits = ''; 531 } 532 } 533 534 return $digits . $reversed; 535 } 536 537 /** 538 * Return the direction (ltr or rtl) for a given script 539 * The PHP/intl library does not provde this information, so we need 540 * our own lookup table. 541 * 542 * @param string $script 543 * 544 * @return string 545 */ 546 public static function scriptDirection($script): string 547 { 548 switch ($script) { 549 case 'Arab': 550 case 'Hebr': 551 case 'Mong': 552 case 'Thaa': 553 return 'rtl'; 554 default: 555 return 'ltr'; 556 } 557 } 558 559 /** 560 * Identify the script used for a piece of text 561 * 562 * @param string $string 563 * 564 * @return string 565 */ 566 public static function textScript($string): string 567 { 568 $string = strip_tags($string); // otherwise HTML tags show up as latin 569 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 570 $string = str_replace([ 571 '@N.N.', 572 '@P.N.', 573 ], '', $string); // otherwise unknown names show up as latin 574 $pos = 0; 575 $strlen = strlen($string); 576 while ($pos < $strlen) { 577 // get the Unicode Code Point for the character at position $pos 578 $byte1 = ord($string[$pos]); 579 if ($byte1 < 0x80) { 580 $code_point = $byte1; 581 $chrlen = 1; 582 } elseif ($byte1 < 0xC0) { 583 // Invalid continuation character 584 return 'Latn'; 585 } elseif ($byte1 < 0xE0) { 586 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 587 $chrlen = 2; 588 } elseif ($byte1 < 0xF0) { 589 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 590 $chrlen = 3; 591 } elseif ($byte1 < 0xF8) { 592 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 593 $chrlen = 3; 594 } else { 595 // Invalid UTF 596 return 'Latn'; 597 } 598 599 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 600 if ($code_point >= $range[1] && $code_point <= $range[2]) { 601 return $range[0]; 602 } 603 } 604 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 605 $pos += $chrlen; 606 } 607 608 return 'Latn'; 609 } 610 611 /** 612 * Perform a case-insensitive comparison of two strings. 613 * 614 * @param string $string1 615 * @param string $string2 616 * 617 * @return int 618 */ 619 public static function strcasecmp($string1, $string2): int 620 { 621 if (self::$collator instanceof Collator) { 622 return self::$collator->compare($string1, $string2); 623 } 624 625 return strcmp(self::strtolower($string1), self::strtolower($string2)); 626 } 627 628 /** 629 * Convert a string to lower case. 630 * 631 * @param string $string 632 * 633 * @return string 634 */ 635 public static function strtolower($string): string 636 { 637 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 638 $string = strtr($string, self::DOTLESS_I_TOLOWER); 639 } 640 641 return mb_strtolower($string); 642 } 643 644 /** 645 * Convert a string to upper case. 646 * 647 * @param string $string 648 * 649 * @return string 650 */ 651 public static function strtoupper($string): string 652 { 653 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 654 $string = strtr($string, self::DOTLESS_I_TOUPPER); 655 } 656 657 return mb_strtoupper($string); 658 } 659 660 /** 661 * What format is used to display dates in the current locale? 662 * 663 * @return string 664 */ 665 public static function timeFormat(): string 666 { 667 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 668 return self::$translator->translate('%H:%i:%s'); 669 } 670 671 /** 672 * Context sensitive version of translate. 673 * echo I18N::translateContext('NOMINATIVE', 'January'); 674 * echo I18N::translateContext('GENITIVE', 'January'); 675 * 676 * @param string $context 677 * @param string $message 678 * @param string ...$args 679 * 680 * @return string 681 */ 682 public static function translateContext(string $context, string $message, ...$args): string 683 { 684 $message = self::$translator->translateContext($context, $message); 685 686 return sprintf($message, ...$args); 687 } 688} 689