1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20use Collator; 21use Exception; 22use Fisharebest\Localization\Locale; 23use Fisharebest\Localization\Locale\LocaleEnUs; 24use Fisharebest\Localization\Locale\LocaleInterface; 25use Fisharebest\Localization\Translation; 26use Fisharebest\Localization\Translator; 27use Fisharebest\Webtrees\Module\ModuleCustomInterface; 28use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 29use Fisharebest\Webtrees\Services\ModuleService; 30use Illuminate\Support\Collection; 31use function array_merge; 32use function class_exists; 33use function html_entity_decode; 34use function in_array; 35use function mb_strtolower; 36use function mb_strtoupper; 37use function mb_substr; 38use function ord; 39use function sprintf; 40use function str_replace; 41use function strcmp; 42use function strip_tags; 43use function strlen; 44use function strpos; 45use function strtr; 46 47/** 48 * Internationalization (i18n) and localization (l10n). 49 */ 50class I18N 51{ 52 // MO files use special characters for plurals and context. 53 public const PLURAL = "\x00"; 54 public const CONTEXT = "\x04"; 55 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 56 private const DOTLESS_I_LOCALES = [ 57 'az', 58 'tr', 59 ]; 60 private const DOTLESS_I_TOLOWER = [ 61 'I' => 'ı', 62 'İ' => 'i', 63 ]; 64 65 // Digits are always rendered LTR, even in RTL text. 66 private const DOTLESS_I_TOUPPER = [ 67 'ı' => 'I', 68 'i' => 'İ', 69 ]; 70 71 // These locales need special handling for the dotless letter I. 72 private const SCRIPT_CHARACTER_RANGES = [ 73 [ 74 'Latn', 75 0x0041, 76 0x005A, 77 ], 78 [ 79 'Latn', 80 0x0061, 81 0x007A, 82 ], 83 [ 84 'Latn', 85 0x0100, 86 0x02AF, 87 ], 88 [ 89 'Grek', 90 0x0370, 91 0x03FF, 92 ], 93 [ 94 'Cyrl', 95 0x0400, 96 0x052F, 97 ], 98 [ 99 'Hebr', 100 0x0590, 101 0x05FF, 102 ], 103 [ 104 'Arab', 105 0x0600, 106 0x06FF, 107 ], 108 [ 109 'Arab', 110 0x0750, 111 0x077F, 112 ], 113 [ 114 'Arab', 115 0x08A0, 116 0x08FF, 117 ], 118 [ 119 'Deva', 120 0x0900, 121 0x097F, 122 ], 123 [ 124 'Taml', 125 0x0B80, 126 0x0BFF, 127 ], 128 [ 129 'Sinh', 130 0x0D80, 131 0x0DFF, 132 ], 133 [ 134 'Thai', 135 0x0E00, 136 0x0E7F, 137 ], 138 [ 139 'Geor', 140 0x10A0, 141 0x10FF, 142 ], 143 [ 144 'Grek', 145 0x1F00, 146 0x1FFF, 147 ], 148 [ 149 'Deva', 150 0xA8E0, 151 0xA8FF, 152 ], 153 [ 154 'Hans', 155 0x3000, 156 0x303F, 157 ], 158 // Mixed CJK, not just Hans 159 [ 160 'Hans', 161 0x3400, 162 0xFAFF, 163 ], 164 // Mixed CJK, not just Hans 165 [ 166 'Hans', 167 0x20000, 168 0x2FA1F, 169 ], 170 // Mixed CJK, not just Hans 171 ]; 172 private const MIRROR_CHARACTERS = [ 173 '(' => ')', 174 ')' => '(', 175 '[' => ']', 176 ']' => '[', 177 '{' => '}', 178 '}' => '{', 179 '<' => '>', 180 '>' => '<', 181 '‹ ' => '›', 182 '› ' => '‹', 183 '«' => '»', 184 '»' => '«', 185 '﴾ ' => '﴿', 186 '﴿ ' => '﴾', 187 '“ ' => '”', 188 '” ' => '“', 189 '‘ ' => '’', 190 '’ ' => '‘', 191 ]; 192 /** @var string Punctuation used to separate list items, typically a comma */ 193 public static $list_separator; 194 195 // The ranges of characters used by each script. 196 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 197 private static $locale; 198 199 // Characters that are displayed in mirror form in RTL text. 200 /** @var Translator An object that performs translation */ 201 private static $translator; 202 /** @var Collator|null From the php-intl library */ 203 private static $collator; 204 205 /** 206 * The preferred locales for this site, or a default list if no preference. 207 * 208 * @return LocaleInterface[] 209 */ 210 public static function activeLocales(): array 211 { 212 /** @var Collection $locales */ 213 $locales = app(ModuleService::class) 214 ->findByInterface(ModuleLanguageInterface::class, false, true) 215 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 216 return $module->locale(); 217 }); 218 219 if ($locales->isEmpty()) { 220 return [new LocaleEnUs()]; 221 } 222 223 return $locales->all(); 224 } 225 226 /** 227 * Which MySQL collation should be used for this locale? 228 * 229 * @return string 230 */ 231 public static function collation(): string 232 { 233 $collation = self::$locale->collation(); 234 switch ($collation) { 235 case 'croatian_ci': 236 case 'german2_ci': 237 case 'vietnamese_ci': 238 // Only available in MySQL 5.6 239 return 'utf8_unicode_ci'; 240 default: 241 return 'utf8_' . $collation; 242 } 243 } 244 245 /** 246 * What format is used to display dates in the current locale? 247 * 248 * @return string 249 */ 250 public static function dateFormat(): string 251 { 252 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 253 return self::$translator->translate('%j %F %Y'); 254 } 255 256 /** 257 * Convert the digits 0-9 into the local script 258 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 259 * 260 * @param string|int $n 261 * 262 * @return string 263 */ 264 public static function digits($n): string 265 { 266 return self::$locale->digits((string) $n); 267 } 268 269 /** 270 * What is the direction of the current locale 271 * 272 * @return string "ltr" or "rtl" 273 */ 274 public static function direction(): string 275 { 276 return self::$locale->direction(); 277 } 278 279 /** 280 * What is the first day of the week. 281 * 282 * @return int Sunday=0, Monday=1, etc. 283 */ 284 public static function firstDay(): int 285 { 286 return self::$locale->territory()->firstDay(); 287 } 288 289 /** 290 * Generate i18n markup for the <html> tag, e.g. lang="ar" dir="rtl" 291 * 292 * @return string 293 */ 294 public static function htmlAttributes(): string 295 { 296 return self::$locale->htmlAttributes(); 297 } 298 299 /** 300 * Initialise the translation adapter with a locale setting. 301 * 302 * @param string $code Use this locale/language code, or choose one automatically 303 * @param Tree|null $tree 304 * @param bool $setup During setup, we cannot access the database. 305 * 306 * @return string $string 307 */ 308 public static function init(string $code = '', Tree $tree = null, $setup = false): string 309 { 310 if ($code !== '') { 311 // Create the specified locale 312 self::$locale = Locale::create($code); 313 } elseif (Session::has('language')) { 314 // Select a previously used locale 315 self::$locale = Locale::create(Session::get('language')); 316 } else { 317 if ($tree instanceof Tree) { 318 $default_locale = Locale::create($tree->getPreference('LANGUAGE', 'en-US')); 319 } else { 320 $default_locale = new LocaleEnUs(); 321 } 322 323 // Negotiate with the browser. 324 // Search engines don't negotiate. They get the default locale of the tree. 325 if ($setup) { 326 $installed_locales = app(ModuleService::class)->setupLanguages() 327 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 328 return $module->locale(); 329 }); 330 } else { 331 $installed_locales = self::installedLocales(); 332 } 333 334 self::$locale = Locale::httpAcceptLanguage($_SERVER, $installed_locales->all(), $default_locale); 335 } 336 337 // Load the translation file 338 $translation_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.php'; 339 340 try { 341 $translation = new Translation($translation_file); 342 $translations = $translation->asArray(); 343 } catch (Exception $ex) { 344 // The translations files are created during the build process, and are 345 // not included in the source code. 346 // Assuming we are using dev code, and build (or rebuild) the files. 347 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 348 $translation = new Translation($po_file); 349 $translations = $translation->asArray(); 350 file_put_contents($translation_file, '<?php return ' . var_export($translations, true) . ';'); 351 } 352 353 // Add translations from custom modules (but not during setup, as we have no database/modules) 354 if (!$setup) { 355 $translations = app(ModuleService::class) 356 ->findByInterface(ModuleCustomInterface::class) 357 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 358 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 359 }, $translations); 360 } 361 362 // Create a translator 363 self::$translator = new Translator($translations, self::$locale->pluralRule()); 364 365 /* I18N: This punctuation is used to separate lists of items */ 366 self::$list_separator = self::translate(', '); 367 368 // Create a collator 369 try { 370 if (class_exists('Collator')) { 371 // Symfony provides a very incomplete polyfill - which cannot be used. 372 self::$collator = new Collator(self::$locale->code()); 373 // Ignore upper/lower case differences 374 self::$collator->setStrength(Collator::SECONDARY); 375 } 376 } catch (Exception $ex) { 377 // PHP-INTL is not installed? We'll use a fallback later. 378 self::$collator = null; 379 } 380 381 return self::$locale->languageTag(); 382 } 383 384 /** 385 * All locales for which a translation file exists. 386 * 387 * @return Collection 388 */ 389 public static function installedLocales(): Collection 390 { 391 return app(ModuleService::class) 392 ->findByInterface(ModuleLanguageInterface::class, true) 393 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 394 return $module->locale(); 395 }); 396 } 397 398 /** 399 * Translate a string, and then substitute placeholders 400 * echo I18N::translate('Hello World!'); 401 * echo I18N::translate('The %s sat on the mat', 'cat'); 402 * 403 * @param string $message 404 * @param string ...$args 405 * 406 * @return string 407 */ 408 public static function translate(string $message, ...$args): string 409 { 410 $message = self::$translator->translate($message); 411 412 return sprintf($message, ...$args); 413 } 414 415 /** 416 * Return the endonym for a given language - as per http://cldr.unicode.org/ 417 * 418 * @param string $locale 419 * 420 * @return string 421 */ 422 public static function languageName(string $locale): string 423 { 424 return Locale::create($locale)->endonym(); 425 } 426 427 /** 428 * Return the script used by a given language 429 * 430 * @param string $locale 431 * 432 * @return string 433 */ 434 public static function languageScript(string $locale): string 435 { 436 return Locale::create($locale)->script()->code(); 437 } 438 439 /** 440 * Translate a number into the local representation. 441 * e.g. 12345.67 becomes 442 * en: 12,345.67 443 * fr: 12 345,67 444 * de: 12.345,67 445 * 446 * @param float $n 447 * @param int $precision 448 * 449 * @return string 450 */ 451 public static function number(float $n, int $precision = 0): string 452 { 453 return self::$locale->number(round($n, $precision)); 454 } 455 456 /** 457 * Translate a fraction into a percentage. 458 * e.g. 0.123 becomes 459 * en: 12.3% 460 * fr: 12,3 % 461 * de: 12,3% 462 * 463 * @param float $n 464 * @param int $precision 465 * 466 * @return string 467 */ 468 public static function percentage(float $n, int $precision = 0): string 469 { 470 return self::$locale->percent(round($n, $precision + 2)); 471 } 472 473 /** 474 * Translate a plural string 475 * echo self::plural('There is an error', 'There are errors', $num_errors); 476 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 477 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 478 * 479 * @param string $singular 480 * @param string $plural 481 * @param int $count 482 * @param string ...$args 483 * 484 * @return string 485 */ 486 public static function plural(string $singular, string $plural, int $count, ...$args): string 487 { 488 $message = self::$translator->translatePlural($singular, $plural, $count); 489 490 return sprintf($message, ...$args); 491 } 492 493 /** 494 * UTF8 version of PHP::strrev() 495 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 496 * These do not support UTF8 text direction, so we must mimic it for them. 497 * Numbers are always rendered LTR, even in RTL text. 498 * The visual direction of characters such as parentheses should be reversed. 499 * 500 * @param string $text Text to be reversed 501 * 502 * @return string 503 */ 504 public static function reverseText($text): string 505 { 506 // Remove HTML markup - we can't display it and it is LTR. 507 $text = strip_tags($text); 508 // Remove HTML entities. 509 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 510 511 // LTR text doesn't need reversing 512 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 513 return $text; 514 } 515 516 // Mirrored characters 517 $text = strtr($text, self::MIRROR_CHARACTERS); 518 519 $reversed = ''; 520 $digits = ''; 521 while ($text !== '') { 522 $letter = mb_substr($text, 0, 1); 523 $text = mb_substr($text, 1); 524 if (strpos(self::DIGITS, $letter) !== false) { 525 $digits .= $letter; 526 } else { 527 $reversed = $letter . $digits . $reversed; 528 $digits = ''; 529 } 530 } 531 532 return $digits . $reversed; 533 } 534 535 /** 536 * Return the direction (ltr or rtl) for a given script 537 * The PHP/intl library does not provde this information, so we need 538 * our own lookup table. 539 * 540 * @param string $script 541 * 542 * @return string 543 */ 544 public static function scriptDirection($script): string 545 { 546 switch ($script) { 547 case 'Arab': 548 case 'Hebr': 549 case 'Mong': 550 case 'Thaa': 551 return 'rtl'; 552 default: 553 return 'ltr'; 554 } 555 } 556 557 /** 558 * Identify the script used for a piece of text 559 * 560 * @param string $string 561 * 562 * @return string 563 */ 564 public static function textScript($string): string 565 { 566 $string = strip_tags($string); // otherwise HTML tags show up as latin 567 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 568 $string = str_replace([ 569 '@N.N.', 570 '@P.N.', 571 ], '', $string); // otherwise unknown names show up as latin 572 $pos = 0; 573 $strlen = strlen($string); 574 while ($pos < $strlen) { 575 // get the Unicode Code Point for the character at position $pos 576 $byte1 = ord($string[$pos]); 577 if ($byte1 < 0x80) { 578 $code_point = $byte1; 579 $chrlen = 1; 580 } elseif ($byte1 < 0xC0) { 581 // Invalid continuation character 582 return 'Latn'; 583 } elseif ($byte1 < 0xE0) { 584 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 585 $chrlen = 2; 586 } elseif ($byte1 < 0xF0) { 587 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 588 $chrlen = 3; 589 } elseif ($byte1 < 0xF8) { 590 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 591 $chrlen = 3; 592 } else { 593 // Invalid UTF 594 return 'Latn'; 595 } 596 597 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 598 if ($code_point >= $range[1] && $code_point <= $range[2]) { 599 return $range[0]; 600 } 601 } 602 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 603 $pos += $chrlen; 604 } 605 606 return 'Latn'; 607 } 608 609 /** 610 * Perform a case-insensitive comparison of two strings. 611 * 612 * @param string $string1 613 * @param string $string2 614 * 615 * @return int 616 */ 617 public static function strcasecmp($string1, $string2): int 618 { 619 if (self::$collator instanceof Collator) { 620 return self::$collator->compare($string1, $string2); 621 } 622 623 return strcmp(self::strtolower($string1), self::strtolower($string2)); 624 } 625 626 /** 627 * Convert a string to lower case. 628 * 629 * @param string $string 630 * 631 * @return string 632 */ 633 public static function strtolower($string): string 634 { 635 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 636 $string = strtr($string, self::DOTLESS_I_TOLOWER); 637 } 638 639 return mb_strtolower($string); 640 } 641 642 /** 643 * Convert a string to upper case. 644 * 645 * @param string $string 646 * 647 * @return string 648 */ 649 public static function strtoupper($string): string 650 { 651 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 652 $string = strtr($string, self::DOTLESS_I_TOUPPER); 653 } 654 655 return mb_strtoupper($string); 656 } 657 658 /** 659 * What format is used to display dates in the current locale? 660 * 661 * @return string 662 */ 663 public static function timeFormat(): string 664 { 665 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 666 return self::$translator->translate('%H:%i:%s'); 667 } 668 669 /** 670 * Context sensitive version of translate. 671 * echo I18N::translateContext('NOMINATIVE', 'January'); 672 * echo I18N::translateContext('GENITIVE', 'January'); 673 * 674 * @param string $context 675 * @param string $message 676 * @param string ...$args 677 * 678 * @return string 679 */ 680 public static function translateContext(string $context, string $message, ...$args): string 681 { 682 $message = self::$translator->translateContext($context, $message); 683 684 return sprintf($message, ...$args); 685 } 686} 687