1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22use Closure; 23use Collator; 24use Exception; 25use Fisharebest\Localization\Locale; 26use Fisharebest\Localization\Locale\LocaleEnUs; 27use Fisharebest\Localization\Locale\LocaleInterface; 28use Fisharebest\Localization\Translation; 29use Fisharebest\Localization\Translator; 30use Fisharebest\Webtrees\Module\ModuleCustomInterface; 31use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 32use Fisharebest\Webtrees\Services\ModuleService; 33use Illuminate\Support\Collection; 34 35use function array_merge; 36use function class_exists; 37use function html_entity_decode; 38use function in_array; 39use function mb_strtolower; 40use function mb_strtoupper; 41use function mb_substr; 42use function ord; 43use function sprintf; 44use function str_contains; 45use function str_replace; 46use function strcmp; 47use function strip_tags; 48use function strlen; 49use function strtr; 50use function var_export; 51 52/** 53 * Internationalization (i18n) and localization (l10n). 54 */ 55class I18N 56{ 57 // MO files use special characters for plurals and context. 58 public const PLURAL = "\x00"; 59 public const CONTEXT = "\x04"; 60 61 // Digits are always rendered LTR, even in RTL text. 62 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 63 64 // These locales need special handling for the dotless letter I. 65 private const DOTLESS_I_LOCALES = [ 66 'az', 67 'tr', 68 ]; 69 70 private const DOTLESS_I_TOLOWER = [ 71 'I' => 'ı', 72 'İ' => 'i', 73 ]; 74 75 private const DOTLESS_I_TOUPPER = [ 76 'ı' => 'I', 77 'i' => 'İ', 78 ]; 79 80 // The ranges of characters used by each script. 81 private const SCRIPT_CHARACTER_RANGES = [ 82 [ 83 'Latn', 84 0x0041, 85 0x005A, 86 ], 87 [ 88 'Latn', 89 0x0061, 90 0x007A, 91 ], 92 [ 93 'Latn', 94 0x0100, 95 0x02AF, 96 ], 97 [ 98 'Grek', 99 0x0370, 100 0x03FF, 101 ], 102 [ 103 'Cyrl', 104 0x0400, 105 0x052F, 106 ], 107 [ 108 'Hebr', 109 0x0590, 110 0x05FF, 111 ], 112 [ 113 'Arab', 114 0x0600, 115 0x06FF, 116 ], 117 [ 118 'Arab', 119 0x0750, 120 0x077F, 121 ], 122 [ 123 'Arab', 124 0x08A0, 125 0x08FF, 126 ], 127 [ 128 'Deva', 129 0x0900, 130 0x097F, 131 ], 132 [ 133 'Taml', 134 0x0B80, 135 0x0BFF, 136 ], 137 [ 138 'Sinh', 139 0x0D80, 140 0x0DFF, 141 ], 142 [ 143 'Thai', 144 0x0E00, 145 0x0E7F, 146 ], 147 [ 148 'Geor', 149 0x10A0, 150 0x10FF, 151 ], 152 [ 153 'Grek', 154 0x1F00, 155 0x1FFF, 156 ], 157 [ 158 'Deva', 159 0xA8E0, 160 0xA8FF, 161 ], 162 [ 163 'Hans', 164 0x3000, 165 0x303F, 166 ], 167 // Mixed CJK, not just Hans 168 [ 169 'Hans', 170 0x3400, 171 0xFAFF, 172 ], 173 // Mixed CJK, not just Hans 174 [ 175 'Hans', 176 0x20000, 177 0x2FA1F, 178 ], 179 // Mixed CJK, not just Hans 180 ]; 181 182 // Characters that are displayed in mirror form in RTL text. 183 private const MIRROR_CHARACTERS = [ 184 '(' => ')', 185 ')' => '(', 186 '[' => ']', 187 ']' => '[', 188 '{' => '}', 189 '}' => '{', 190 '<' => '>', 191 '>' => '<', 192 '‹ ' => '›', 193 '› ' => '‹', 194 '«' => '»', 195 '»' => '«', 196 '﴾ ' => '﴿', 197 '﴿ ' => '﴾', 198 '“ ' => '”', 199 '” ' => '“', 200 '‘ ' => '’', 201 '’ ' => '‘', 202 ]; 203 204 // Punctuation used to separate list items, typically a comma 205 public static string $list_separator; 206 207 private static ?ModuleLanguageInterface $language; 208 209 private static LocaleInterface $locale; 210 211 private static Translator $translator; 212 213 private static ?Collator $collator; 214 215 /** 216 * The preferred locales for this site, or a default list if no preference. 217 * 218 * @return LocaleInterface[] 219 */ 220 public static function activeLocales(): array 221 { 222 /** @var Collection $locales */ 223 $locales = app(ModuleService::class) 224 ->findByInterface(ModuleLanguageInterface::class, false, true) 225 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 226 return $module->locale(); 227 }); 228 229 if ($locales->isEmpty()) { 230 return [new LocaleEnUs()]; 231 } 232 233 return $locales->all(); 234 } 235 236 /** 237 * Which MySQL collation should be used for this locale? 238 * 239 * @return string 240 */ 241 public static function collation(): string 242 { 243 $collation = self::$locale->collation(); 244 switch ($collation) { 245 case 'croatian_ci': 246 case 'german2_ci': 247 case 'vietnamese_ci': 248 // Only available in MySQL 5.6 249 return 'utf8_unicode_ci'; 250 default: 251 return 'utf8_' . $collation; 252 } 253 } 254 255 /** 256 * What format is used to display dates in the current locale? 257 * 258 * @return string 259 */ 260 public static function dateFormat(): string 261 { 262 /* I18N: This is the format string for full dates. See https://php.net/date for codes */ 263 return self::$translator->translate('%j %F %Y'); 264 } 265 266 /** 267 * Convert the digits 0-9 into the local script 268 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 269 * 270 * @param string|int $n 271 * 272 * @return string 273 */ 274 public static function digits($n): string 275 { 276 return self::$locale->digits((string) $n); 277 } 278 279 /** 280 * What is the direction of the current locale 281 * 282 * @return string "ltr" or "rtl" 283 */ 284 public static function direction(): string 285 { 286 return self::$locale->direction(); 287 } 288 289 /** 290 * Initialise the translation adapter with a locale setting. 291 * 292 * @param string $code 293 * @param bool $setup 294 * 295 * @return void 296 */ 297 public static function init(string $code, bool $setup = false): void 298 { 299 self::$locale = Locale::create($code); 300 301 // Load the translation file 302 $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php'; 303 304 try { 305 $translation = new Translation($translation_file); 306 $translations = $translation->asArray(); 307 } catch (Exception $ex) { 308 // The translations files are created during the build process, and are 309 // not included in the source code. 310 // Assuming we are using dev code, and build (or rebuild) the files. 311 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 312 $translation = new Translation($po_file); 313 $translations = $translation->asArray(); 314 file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n"); 315 } 316 317 // Add translations from custom modules (but not during setup, as we have no database/modules) 318 if (!$setup) { 319 $module_service = app(ModuleService::class); 320 321 $translations = $module_service 322 ->findByInterface(ModuleCustomInterface::class) 323 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 324 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 325 }, $translations); 326 327 self::$language = $module_service 328 ->findByInterface(ModuleLanguageInterface::class) 329 ->first(fn (ModuleLanguageInterface $module): bool => $module->locale()->languageTag() === $code); 330 } 331 332 // Create a translator 333 self::$translator = new Translator($translations, self::$locale->pluralRule()); 334 335 /* I18N: This punctuation is used to separate lists of items */ 336 self::$list_separator = self::translate(', '); 337 338 // Create a collator 339 try { 340 if (class_exists('Collator')) { 341 // Symfony provides a very incomplete polyfill - which cannot be used. 342 self::$collator = new Collator(self::$locale->code()); 343 // Ignore upper/lower case differences 344 self::$collator->setStrength(Collator::SECONDARY); 345 } 346 } catch (Exception $ex) { 347 // PHP-INTL is not installed? We'll use a fallback later. 348 self::$collator = null; 349 } 350 } 351 352 /** 353 * Translate a string, and then substitute placeholders 354 * echo I18N::translate('Hello World!'); 355 * echo I18N::translate('The %s sat on the mat', 'cat'); 356 * 357 * @param string $message 358 * @param string ...$args 359 * 360 * @return string 361 */ 362 public static function translate(string $message, ...$args): string 363 { 364 $message = self::$translator->translate($message); 365 366 return sprintf($message, ...$args); 367 } 368 369 /** 370 * @return string 371 */ 372 public static function languageTag(): string 373 { 374 return self::$locale->languageTag(); 375 } 376 377 /** 378 * @return LocaleInterface 379 */ 380 public static function locale(): LocaleInterface 381 { 382 return self::$locale; 383 } 384 385 /** 386 * @return ModuleLanguageInterface 387 */ 388 public static function language(): ModuleLanguageInterface 389 { 390 return self::$language; 391 } 392 393 /** 394 * Translate a number into the local representation. 395 * e.g. 12345.67 becomes 396 * en: 12,345.67 397 * fr: 12 345,67 398 * de: 12.345,67 399 * 400 * @param float $n 401 * @param int $precision 402 * 403 * @return string 404 */ 405 public static function number(float $n, int $precision = 0): string 406 { 407 return self::$locale->number(round($n, $precision)); 408 } 409 410 /** 411 * Translate a fraction into a percentage. 412 * e.g. 0.123 becomes 413 * en: 12.3% 414 * fr: 12,3 % 415 * de: 12,3% 416 * 417 * @param float $n 418 * @param int $precision 419 * 420 * @return string 421 */ 422 public static function percentage(float $n, int $precision = 0): string 423 { 424 return self::$locale->percent(round($n, $precision + 2)); 425 } 426 427 /** 428 * Translate a plural string 429 * echo self::plural('There is an error', 'There are errors', $num_errors); 430 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 431 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 432 * 433 * @param string $singular 434 * @param string $plural 435 * @param int $count 436 * @param string ...$args 437 * 438 * @return string 439 */ 440 public static function plural(string $singular, string $plural, int $count, ...$args): string 441 { 442 $message = self::$translator->translatePlural($singular, $plural, $count); 443 444 return sprintf($message, ...$args); 445 } 446 447 /** 448 * UTF8 version of PHP::strrev() 449 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 450 * These do not support UTF8 text direction, so we must mimic it for them. 451 * Numbers are always rendered LTR, even in RTL text. 452 * The visual direction of characters such as parentheses should be reversed. 453 * 454 * @param string $text Text to be reversed 455 * 456 * @return string 457 */ 458 public static function reverseText(string $text): string 459 { 460 // Remove HTML markup - we can't display it and it is LTR. 461 $text = strip_tags($text); 462 // Remove HTML entities. 463 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 464 465 // LTR text doesn't need reversing 466 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 467 return $text; 468 } 469 470 // Mirrored characters 471 $text = strtr($text, self::MIRROR_CHARACTERS); 472 473 $reversed = ''; 474 $digits = ''; 475 while ($text !== '') { 476 $letter = mb_substr($text, 0, 1); 477 $text = mb_substr($text, 1); 478 if (str_contains(self::DIGITS, $letter)) { 479 $digits .= $letter; 480 } else { 481 $reversed = $letter . $digits . $reversed; 482 $digits = ''; 483 } 484 } 485 486 return $digits . $reversed; 487 } 488 489 /** 490 * Return the direction (ltr or rtl) for a given script 491 * The PHP/intl library does not provde this information, so we need 492 * our own lookup table. 493 * 494 * @param string $script 495 * 496 * @return string 497 */ 498 public static function scriptDirection(string $script): string 499 { 500 switch ($script) { 501 case 'Arab': 502 case 'Hebr': 503 case 'Mong': 504 case 'Thaa': 505 return 'rtl'; 506 default: 507 return 'ltr'; 508 } 509 } 510 511 /** 512 * Identify the script used for a piece of text 513 * 514 * @param string $string 515 * 516 * @return string 517 */ 518 public static function textScript(string $string): string 519 { 520 $string = strip_tags($string); // otherwise HTML tags show up as latin 521 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 522 $string = str_replace([ 523 Individual::NOMEN_NESCIO, 524 Individual::PRAENOMEN_NESCIO, 525 ], '', $string); 526 $pos = 0; 527 $strlen = strlen($string); 528 while ($pos < $strlen) { 529 // get the Unicode Code Point for the character at position $pos 530 $byte1 = ord($string[$pos]); 531 if ($byte1 < 0x80) { 532 $code_point = $byte1; 533 $chrlen = 1; 534 } elseif ($byte1 < 0xC0) { 535 // Invalid continuation character 536 return 'Latn'; 537 } elseif ($byte1 < 0xE0) { 538 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 539 $chrlen = 2; 540 } elseif ($byte1 < 0xF0) { 541 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 542 $chrlen = 3; 543 } elseif ($byte1 < 0xF8) { 544 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 545 $chrlen = 3; 546 } else { 547 // Invalid UTF 548 return 'Latn'; 549 } 550 551 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 552 if ($code_point >= $range[1] && $code_point <= $range[2]) { 553 return $range[0]; 554 } 555 } 556 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 557 $pos += $chrlen; 558 } 559 560 return 'Latn'; 561 } 562 563 /** 564 * A closure which will compare strings using local collation rules. 565 * 566 * @return Closure 567 */ 568 public static function comparator(): Closure 569 { 570 if (self::$collator instanceof Collator) { 571 return static function (string $x, string $y): int { 572 return (int) self::$collator->compare($x, $y); 573 }; 574 } 575 576 return static function (string $x, string $y): int { 577 return strcmp(self::strtolower($x), self::strtolower($y)); 578 }; 579 } 580 581 582 583 /** 584 * Convert a string to lower case. 585 * 586 * @param string $string 587 * 588 * @return string 589 */ 590 public static function strtolower(string $string): string 591 { 592 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 593 $string = strtr($string, self::DOTLESS_I_TOLOWER); 594 } 595 596 return mb_strtolower($string); 597 } 598 599 /** 600 * Convert a string to upper case. 601 * 602 * @param string $string 603 * 604 * @return string 605 */ 606 public static function strtoupper(string $string): string 607 { 608 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 609 $string = strtr($string, self::DOTLESS_I_TOUPPER); 610 } 611 612 return mb_strtoupper($string); 613 } 614 615 /** 616 * What format is used to display dates in the current locale? 617 * 618 * @return string 619 */ 620 public static function timeFormat(): string 621 { 622 /* I18N: This is the format string for the time-of-day. See https://php.net/date for codes */ 623 return self::$translator->translate('%H:%i:%s'); 624 } 625 626 /** 627 * Context sensitive version of translate. 628 * echo I18N::translateContext('NOMINATIVE', 'January'); 629 * echo I18N::translateContext('GENITIVE', 'January'); 630 * 631 * @param string $context 632 * @param string $message 633 * @param string ...$args 634 * 635 * @return string 636 */ 637 public static function translateContext(string $context, string $message, ...$args): string 638 { 639 $message = self::$translator->translateContext($context, $message); 640 641 return sprintf($message, ...$args); 642 } 643} 644