1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22use Closure; 23use Collator; 24use Exception; 25use Fisharebest\Localization\Locale; 26use Fisharebest\Localization\Locale\LocaleEnUs; 27use Fisharebest\Localization\Locale\LocaleInterface; 28use Fisharebest\Localization\Translation; 29use Fisharebest\Localization\Translator; 30use Fisharebest\Webtrees\Module\ModuleCustomInterface; 31use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 32use Fisharebest\Webtrees\Services\ModuleService; 33 34use function array_merge; 35use function class_exists; 36use function html_entity_decode; 37use function in_array; 38use function mb_strtolower; 39use function mb_strtoupper; 40use function mb_substr; 41use function ord; 42use function sprintf; 43use function str_contains; 44use function str_replace; 45use function strcmp; 46use function strip_tags; 47use function strlen; 48use function strtr; 49use function var_export; 50 51/** 52 * Internationalization (i18n) and localization (l10n). 53 */ 54class I18N 55{ 56 // MO files use special characters for plurals and context. 57 public const PLURAL = "\x00"; 58 public const CONTEXT = "\x04"; 59 60 // Digits are always rendered LTR, even in RTL text. 61 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 62 63 // These locales need special handling for the dotless letter I. 64 private const DOTLESS_I_LOCALES = [ 65 'az', 66 'tr', 67 ]; 68 69 private const DOTLESS_I_TOLOWER = [ 70 'I' => 'ı', 71 'İ' => 'i', 72 ]; 73 74 private const DOTLESS_I_TOUPPER = [ 75 'ı' => 'I', 76 'i' => 'İ', 77 ]; 78 79 // The ranges of characters used by each script. 80 private const SCRIPT_CHARACTER_RANGES = [ 81 [ 82 'Latn', 83 0x0041, 84 0x005A, 85 ], 86 [ 87 'Latn', 88 0x0061, 89 0x007A, 90 ], 91 [ 92 'Latn', 93 0x0100, 94 0x02AF, 95 ], 96 [ 97 'Grek', 98 0x0370, 99 0x03FF, 100 ], 101 [ 102 'Cyrl', 103 0x0400, 104 0x052F, 105 ], 106 [ 107 'Hebr', 108 0x0590, 109 0x05FF, 110 ], 111 [ 112 'Arab', 113 0x0600, 114 0x06FF, 115 ], 116 [ 117 'Arab', 118 0x0750, 119 0x077F, 120 ], 121 [ 122 'Arab', 123 0x08A0, 124 0x08FF, 125 ], 126 [ 127 'Deva', 128 0x0900, 129 0x097F, 130 ], 131 [ 132 'Taml', 133 0x0B80, 134 0x0BFF, 135 ], 136 [ 137 'Sinh', 138 0x0D80, 139 0x0DFF, 140 ], 141 [ 142 'Thai', 143 0x0E00, 144 0x0E7F, 145 ], 146 [ 147 'Geor', 148 0x10A0, 149 0x10FF, 150 ], 151 [ 152 'Grek', 153 0x1F00, 154 0x1FFF, 155 ], 156 [ 157 'Deva', 158 0xA8E0, 159 0xA8FF, 160 ], 161 [ 162 'Hans', 163 0x3000, 164 0x303F, 165 ], 166 // Mixed CJK, not just Hans 167 [ 168 'Hans', 169 0x3400, 170 0xFAFF, 171 ], 172 // Mixed CJK, not just Hans 173 [ 174 'Hans', 175 0x20000, 176 0x2FA1F, 177 ], 178 // Mixed CJK, not just Hans 179 ]; 180 181 // Characters that are displayed in mirror form in RTL text. 182 private const MIRROR_CHARACTERS = [ 183 '(' => ')', 184 ')' => '(', 185 '[' => ']', 186 ']' => '[', 187 '{' => '}', 188 '}' => '{', 189 '<' => '>', 190 '>' => '<', 191 '‹ ' => '›', 192 '› ' => '‹', 193 '«' => '»', 194 '»' => '«', 195 '﴾ ' => '﴿', 196 '﴿ ' => '﴾', 197 '“ ' => '”', 198 '” ' => '“', 199 '‘ ' => '’', 200 '’ ' => '‘', 201 ]; 202 203 // Punctuation used to separate list items, typically a comma 204 public static string $list_separator; 205 206 private static ModuleLanguageInterface $language; 207 208 private static LocaleInterface $locale; 209 210 private static Translator $translator; 211 212 private static ?Collator $collator = null; 213 214 /** 215 * The preferred locales for this site, or a default list if no preference. 216 * 217 * @return array<LocaleInterface> 218 */ 219 public static function activeLocales(): array 220 { 221 $locales = app(ModuleService::class) 222 ->findByInterface(ModuleLanguageInterface::class, false, true) 223 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 224 return $module->locale(); 225 }); 226 227 if ($locales->isEmpty()) { 228 return [new LocaleEnUs()]; 229 } 230 231 return $locales->all(); 232 } 233 234 /** 235 * Which MySQL collation should be used for this locale? 236 * 237 * @return string 238 */ 239 public static function collation(): string 240 { 241 $collation = self::$locale->collation(); 242 switch ($collation) { 243 case 'croatian_ci': 244 case 'german2_ci': 245 case 'vietnamese_ci': 246 // Only available in MySQL 5.6 247 return 'utf8_unicode_ci'; 248 default: 249 return 'utf8_' . $collation; 250 } 251 } 252 253 /** 254 * What format is used to display dates in the current locale? 255 * 256 * @return string 257 */ 258 public static function dateFormat(): string 259 { 260 /* I18N: This is the format string for full dates. See https://php.net/date for codes */ 261 return self::$translator->translate('%j %F %Y'); 262 } 263 264 /** 265 * Convert the digits 0-9 into the local script 266 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 267 * 268 * @param string|int $n 269 * 270 * @return string 271 */ 272 public static function digits(string|int $n): string 273 { 274 return self::$locale->digits((string) $n); 275 } 276 277 /** 278 * What is the direction of the current locale 279 * 280 * @return string "ltr" or "rtl" 281 */ 282 public static function direction(): string 283 { 284 return self::$locale->direction(); 285 } 286 287 /** 288 * Initialise the translation adapter with a locale setting. 289 * 290 * @param string $code 291 * @param bool $setup 292 * 293 * @return void 294 */ 295 public static function init(string $code, bool $setup = false): void 296 { 297 self::$locale = Locale::create($code); 298 299 // Load the translation file 300 $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php'; 301 302 try { 303 $translation = new Translation($translation_file); 304 $translations = $translation->asArray(); 305 } catch (Exception) { 306 // The translations files are created during the build process, and are 307 // not included in the source code. 308 // Assuming we are using dev code, and build (or rebuild) the files. 309 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 310 $translation = new Translation($po_file); 311 $translations = $translation->asArray(); 312 file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n"); 313 } 314 315 // Add translations from custom modules (but not during setup, as we have no database/modules) 316 if (!$setup) { 317 $module_service = app(ModuleService::class); 318 319 $translations = $module_service 320 ->findByInterface(ModuleCustomInterface::class) 321 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 322 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 323 }, $translations); 324 325 self::$language = $module_service 326 ->findByInterface(ModuleLanguageInterface::class, true) 327 ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code); 328 } 329 330 // Create a translator 331 self::$translator = new Translator($translations, self::$locale->pluralRule()); 332 333 /* I18N: This punctuation is used to separate lists of items */ 334 self::$list_separator = self::translate(', '); 335 336 // Create a collator 337 try { 338 // Symfony provides a very incomplete polyfill - which cannot be used. 339 if (class_exists('Collator')) { 340 // Need phonebook collation rules for German Ä, Ö and Ü. 341 if (str_contains(self::$locale->code(), '@')) { 342 self::$collator = new Collator(self::$locale->code() . ';collation=phonebook'); 343 } else { 344 self::$collator = new Collator(self::$locale->code() . '@collation=phonebook'); 345 } 346 // Ignore upper/lower case differences 347 self::$collator->setStrength(Collator::SECONDARY); 348 } 349 } catch (Exception) { 350 // PHP-INTL is not installed? We'll use a fallback later. 351 } 352 } 353 354 /** 355 * Translate a string, and then substitute placeholders 356 * echo I18N::translate('Hello World!'); 357 * echo I18N::translate('The %s sat on the mat', 'cat'); 358 * 359 * @param string $message 360 * @param string ...$args 361 * 362 * @return string 363 */ 364 public static function translate(string $message, ...$args): string 365 { 366 $message = self::$translator->translate($message); 367 368 return sprintf($message, ...$args); 369 } 370 371 /** 372 * @return string 373 */ 374 public static function languageTag(): string 375 { 376 return self::$locale->languageTag(); 377 } 378 379 /** 380 * @return LocaleInterface 381 */ 382 public static function locale(): LocaleInterface 383 { 384 return self::$locale; 385 } 386 387 /** 388 * @return ModuleLanguageInterface 389 */ 390 public static function language(): ModuleLanguageInterface 391 { 392 return self::$language; 393 } 394 395 /** 396 * Translate a number into the local representation. 397 * e.g. 12345.67 becomes 398 * en: 12,345.67 399 * fr: 12 345,67 400 * de: 12.345,67 401 * 402 * @param float $n 403 * @param int $precision 404 * 405 * @return string 406 */ 407 public static function number(float $n, int $precision = 0): string 408 { 409 return self::$locale->number(round($n, $precision)); 410 } 411 412 /** 413 * Translate a fraction into a percentage. 414 * e.g. 0.123 becomes 415 * en: 12.3% 416 * fr: 12,3 % 417 * de: 12,3% 418 * 419 * @param float $n 420 * @param int $precision 421 * 422 * @return string 423 */ 424 public static function percentage(float $n, int $precision = 0): string 425 { 426 return self::$locale->percent(round($n, $precision + 2)); 427 } 428 429 /** 430 * Translate a plural string 431 * echo self::plural('There is an error', 'There are errors', $num_errors); 432 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 433 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 434 * 435 * @param string $singular 436 * @param string $plural 437 * @param int $count 438 * @param string ...$args 439 * 440 * @return string 441 */ 442 public static function plural(string $singular, string $plural, int $count, ...$args): string 443 { 444 $message = self::$translator->translatePlural($singular, $plural, $count); 445 446 return sprintf($message, ...$args); 447 } 448 449 /** 450 * UTF8 version of PHP::strrev() 451 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 452 * These do not support UTF8 text direction, so we must mimic it for them. 453 * Numbers are always rendered LTR, even in RTL text. 454 * The visual direction of characters such as parentheses should be reversed. 455 * 456 * @param string $text Text to be reversed 457 * 458 * @return string 459 */ 460 public static function reverseText(string $text): string 461 { 462 // Remove HTML markup - we can't display it and it is LTR. 463 $text = strip_tags($text); 464 // Remove HTML entities. 465 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 466 467 // LTR text doesn't need reversing 468 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 469 return $text; 470 } 471 472 // Mirrored characters 473 $text = strtr($text, self::MIRROR_CHARACTERS); 474 475 $reversed = ''; 476 $digits = ''; 477 while ($text !== '') { 478 $letter = mb_substr($text, 0, 1); 479 $text = mb_substr($text, 1); 480 if (str_contains(self::DIGITS, $letter)) { 481 $digits .= $letter; 482 } else { 483 $reversed = $letter . $digits . $reversed; 484 $digits = ''; 485 } 486 } 487 488 return $digits . $reversed; 489 } 490 491 /** 492 * Return the direction (ltr or rtl) for a given script 493 * The PHP/intl library does not provde this information, so we need 494 * our own lookup table. 495 * 496 * @param string $script 497 * 498 * @return string 499 */ 500 public static function scriptDirection(string $script): string 501 { 502 switch ($script) { 503 case 'Arab': 504 case 'Hebr': 505 case 'Mong': 506 case 'Thaa': 507 return 'rtl'; 508 default: 509 return 'ltr'; 510 } 511 } 512 513 /** 514 * Identify the script used for a piece of text 515 * 516 * @param string $string 517 * 518 * @return string 519 */ 520 public static function textScript(string $string): string 521 { 522 $string = strip_tags($string); // otherwise HTML tags show up as latin 523 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 524 $string = str_replace([ 525 Individual::NOMEN_NESCIO, 526 Individual::PRAENOMEN_NESCIO, 527 ], '', $string); 528 $pos = 0; 529 $strlen = strlen($string); 530 while ($pos < $strlen) { 531 // get the Unicode Code Point for the character at position $pos 532 $byte1 = ord($string[$pos]); 533 if ($byte1 < 0x80) { 534 $code_point = $byte1; 535 $chrlen = 1; 536 } elseif ($byte1 < 0xC0) { 537 // Invalid continuation character 538 return 'Latn'; 539 } elseif ($byte1 < 0xE0) { 540 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 541 $chrlen = 2; 542 } elseif ($byte1 < 0xF0) { 543 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 544 $chrlen = 3; 545 } elseif ($byte1 < 0xF8) { 546 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 547 $chrlen = 3; 548 } else { 549 // Invalid UTF 550 return 'Latn'; 551 } 552 553 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 554 if ($code_point >= $range[1] && $code_point <= $range[2]) { 555 return $range[0]; 556 } 557 } 558 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 559 $pos += $chrlen; 560 } 561 562 return 'Latn'; 563 } 564 565 /** 566 * A closure which will compare strings using local collation rules. 567 * 568 * @return Closure 569 */ 570 public static function comparator(): Closure 571 { 572 $collator = self::$collator; 573 574 if ($collator instanceof Collator) { 575 return static fn (string $x, string $y): int => (int) $collator->compare($x, $y); 576 } 577 578 return static fn (string $x, string $y): int => strcmp(self::strtolower($x), self::strtolower($y)); 579 } 580 581 582 583 /** 584 * Convert a string to lower case. 585 * 586 * @param string $string 587 * 588 * @return string 589 */ 590 public static function strtolower(string $string): string 591 { 592 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 593 $string = strtr($string, self::DOTLESS_I_TOLOWER); 594 } 595 596 return mb_strtolower($string); 597 } 598 599 /** 600 * Convert a string to upper case. 601 * 602 * @param string $string 603 * 604 * @return string 605 */ 606 public static function strtoupper(string $string): string 607 { 608 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 609 $string = strtr($string, self::DOTLESS_I_TOUPPER); 610 } 611 612 return mb_strtoupper($string); 613 } 614 615 /** 616 * What format is used to display dates in the current locale? 617 * 618 * @return string 619 */ 620 public static function timeFormat(): string 621 { 622 /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */ 623 return self::$translator->translate('%H:%i:%s'); 624 } 625 626 /** 627 * Context sensitive version of translate. 628 * echo I18N::translateContext('NOMINATIVE', 'January'); 629 * echo I18N::translateContext('GENITIVE', 'January'); 630 * 631 * @param string $context 632 * @param string $message 633 * @param string ...$args 634 * 635 * @return string 636 */ 637 public static function translateContext(string $context, string $message, ...$args): string 638 { 639 $message = self::$translator->translateContext($context, $message); 640 641 return sprintf($message, ...$args); 642 } 643} 644