1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22use Collator; 23use Exception; 24use Fisharebest\Localization\Locale; 25use Fisharebest\Localization\Locale\LocaleEnUs; 26use Fisharebest\Localization\Locale\LocaleInterface; 27use Fisharebest\Localization\Translation; 28use Fisharebest\Localization\Translator; 29use Fisharebest\Webtrees\Module\ModuleCustomInterface; 30use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 31use Fisharebest\Webtrees\Services\ModuleService; 32use Illuminate\Support\Collection; 33 34use function array_merge; 35use function class_exists; 36use function html_entity_decode; 37use function in_array; 38use function mb_strtolower; 39use function mb_strtoupper; 40use function mb_substr; 41use function ord; 42use function sprintf; 43use function str_contains; 44use function str_replace; 45use function strcmp; 46use function strip_tags; 47use function strlen; 48use function strtr; 49use function var_export; 50 51/** 52 * Internationalization (i18n) and localization (l10n). 53 */ 54class I18N 55{ 56 // MO files use special characters for plurals and context. 57 public const PLURAL = "\x00"; 58 public const CONTEXT = "\x04"; 59 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 60 private const DOTLESS_I_LOCALES = [ 61 'az', 62 'tr', 63 ]; 64 private const DOTLESS_I_TOLOWER = [ 65 'I' => 'ı', 66 'İ' => 'i', 67 ]; 68 69 // Digits are always rendered LTR, even in RTL text. 70 private const DOTLESS_I_TOUPPER = [ 71 'ı' => 'I', 72 'i' => 'İ', 73 ]; 74 75 // These locales need special handling for the dotless letter I. 76 private const SCRIPT_CHARACTER_RANGES = [ 77 [ 78 'Latn', 79 0x0041, 80 0x005A, 81 ], 82 [ 83 'Latn', 84 0x0061, 85 0x007A, 86 ], 87 [ 88 'Latn', 89 0x0100, 90 0x02AF, 91 ], 92 [ 93 'Grek', 94 0x0370, 95 0x03FF, 96 ], 97 [ 98 'Cyrl', 99 0x0400, 100 0x052F, 101 ], 102 [ 103 'Hebr', 104 0x0590, 105 0x05FF, 106 ], 107 [ 108 'Arab', 109 0x0600, 110 0x06FF, 111 ], 112 [ 113 'Arab', 114 0x0750, 115 0x077F, 116 ], 117 [ 118 'Arab', 119 0x08A0, 120 0x08FF, 121 ], 122 [ 123 'Deva', 124 0x0900, 125 0x097F, 126 ], 127 [ 128 'Taml', 129 0x0B80, 130 0x0BFF, 131 ], 132 [ 133 'Sinh', 134 0x0D80, 135 0x0DFF, 136 ], 137 [ 138 'Thai', 139 0x0E00, 140 0x0E7F, 141 ], 142 [ 143 'Geor', 144 0x10A0, 145 0x10FF, 146 ], 147 [ 148 'Grek', 149 0x1F00, 150 0x1FFF, 151 ], 152 [ 153 'Deva', 154 0xA8E0, 155 0xA8FF, 156 ], 157 [ 158 'Hans', 159 0x3000, 160 0x303F, 161 ], 162 // Mixed CJK, not just Hans 163 [ 164 'Hans', 165 0x3400, 166 0xFAFF, 167 ], 168 // Mixed CJK, not just Hans 169 [ 170 'Hans', 171 0x20000, 172 0x2FA1F, 173 ], 174 // Mixed CJK, not just Hans 175 ]; 176 private const MIRROR_CHARACTERS = [ 177 '(' => ')', 178 ')' => '(', 179 '[' => ']', 180 ']' => '[', 181 '{' => '}', 182 '}' => '{', 183 '<' => '>', 184 '>' => '<', 185 '‹ ' => '›', 186 '› ' => '‹', 187 '«' => '»', 188 '»' => '«', 189 '﴾ ' => '﴿', 190 '﴿ ' => '﴾', 191 '“ ' => '”', 192 '” ' => '“', 193 '‘ ' => '’', 194 '’ ' => '‘', 195 ]; 196 /** @var string Punctuation used to separate list items, typically a comma */ 197 public static $list_separator; 198 199 // The ranges of characters used by each script. 200 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 201 private static $locale; 202 203 // Characters that are displayed in mirror form in RTL text. 204 /** @var Translator An object that performs translation */ 205 private static $translator; 206 /** @var Collator|null From the php-intl library */ 207 private static $collator; 208 209 /** 210 * The preferred locales for this site, or a default list if no preference. 211 * 212 * @return LocaleInterface[] 213 */ 214 public static function activeLocales(): array 215 { 216 /** @var Collection $locales */ 217 $locales = app(ModuleService::class) 218 ->findByInterface(ModuleLanguageInterface::class, false, true) 219 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 220 return $module->locale(); 221 }); 222 223 if ($locales->isEmpty()) { 224 return [new LocaleEnUs()]; 225 } 226 227 return $locales->all(); 228 } 229 230 /** 231 * Which MySQL collation should be used for this locale? 232 * 233 * @return string 234 */ 235 public static function collation(): string 236 { 237 $collation = self::$locale->collation(); 238 switch ($collation) { 239 case 'croatian_ci': 240 case 'german2_ci': 241 case 'vietnamese_ci': 242 // Only available in MySQL 5.6 243 return 'utf8_unicode_ci'; 244 default: 245 return 'utf8_' . $collation; 246 } 247 } 248 249 /** 250 * What format is used to display dates in the current locale? 251 * 252 * @return string 253 */ 254 public static function dateFormat(): string 255 { 256 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 257 return self::$translator->translate('%j %F %Y'); 258 } 259 260 /** 261 * Convert the digits 0-9 into the local script 262 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 263 * 264 * @param string|int $n 265 * 266 * @return string 267 */ 268 public static function digits($n): string 269 { 270 return self::$locale->digits((string) $n); 271 } 272 273 /** 274 * What is the direction of the current locale 275 * 276 * @return string "ltr" or "rtl" 277 */ 278 public static function direction(): string 279 { 280 return self::$locale->direction(); 281 } 282 283 /** 284 * Initialise the translation adapter with a locale setting. 285 * 286 * @param string $code 287 * @param bool $setup 288 * 289 * @return void 290 */ 291 public static function init(string $code, bool $setup = false): void 292 { 293 self::$locale = Locale::create($code); 294 295 // Load the translation file 296 $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php'; 297 298 try { 299 $translation = new Translation($translation_file); 300 $translations = $translation->asArray(); 301 } catch (Exception $ex) { 302 // The translations files are created during the build process, and are 303 // not included in the source code. 304 // Assuming we are using dev code, and build (or rebuild) the files. 305 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 306 $translation = new Translation($po_file); 307 $translations = $translation->asArray(); 308 file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n"); 309 } 310 311 // Add translations from custom modules (but not during setup, as we have no database/modules) 312 if (!$setup) { 313 $translations = app(ModuleService::class) 314 ->findByInterface(ModuleCustomInterface::class) 315 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 316 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 317 }, $translations); 318 } 319 320 // Create a translator 321 self::$translator = new Translator($translations, self::$locale->pluralRule()); 322 323 /* I18N: This punctuation is used to separate lists of items */ 324 self::$list_separator = self::translate(', '); 325 326 // Create a collator 327 try { 328 if (class_exists('Collator')) { 329 // Symfony provides a very incomplete polyfill - which cannot be used. 330 self::$collator = new Collator(self::$locale->code()); 331 // Ignore upper/lower case differences 332 self::$collator->setStrength(Collator::SECONDARY); 333 } 334 } catch (Exception $ex) { 335 // PHP-INTL is not installed? We'll use a fallback later. 336 self::$collator = null; 337 } 338 } 339 340 /** 341 * Translate a string, and then substitute placeholders 342 * echo I18N::translate('Hello World!'); 343 * echo I18N::translate('The %s sat on the mat', 'cat'); 344 * 345 * @param string $message 346 * @param string ...$args 347 * 348 * @return string 349 */ 350 public static function translate(string $message, ...$args): string 351 { 352 $message = self::$translator->translate($message); 353 354 return sprintf($message, ...$args); 355 } 356 357 /** 358 * @return string 359 */ 360 public static function languageTag(): string 361 { 362 return self::$locale->languageTag(); 363 } 364 365 /** 366 * @return LocaleInterface 367 */ 368 public static function locale(): LocaleInterface 369 { 370 return self::$locale; 371 } 372 373 /** 374 * Translate a number into the local representation. 375 * e.g. 12345.67 becomes 376 * en: 12,345.67 377 * fr: 12 345,67 378 * de: 12.345,67 379 * 380 * @param float $n 381 * @param int $precision 382 * 383 * @return string 384 */ 385 public static function number(float $n, int $precision = 0): string 386 { 387 return self::$locale->number(round($n, $precision)); 388 } 389 390 /** 391 * Translate a fraction into a percentage. 392 * e.g. 0.123 becomes 393 * en: 12.3% 394 * fr: 12,3 % 395 * de: 12,3% 396 * 397 * @param float $n 398 * @param int $precision 399 * 400 * @return string 401 */ 402 public static function percentage(float $n, int $precision = 0): string 403 { 404 return self::$locale->percent(round($n, $precision + 2)); 405 } 406 407 /** 408 * Translate a plural string 409 * echo self::plural('There is an error', 'There are errors', $num_errors); 410 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 411 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 412 * 413 * @param string $singular 414 * @param string $plural 415 * @param int $count 416 * @param string ...$args 417 * 418 * @return string 419 */ 420 public static function plural(string $singular, string $plural, int $count, ...$args): string 421 { 422 $message = self::$translator->translatePlural($singular, $plural, $count); 423 424 return sprintf($message, ...$args); 425 } 426 427 /** 428 * UTF8 version of PHP::strrev() 429 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 430 * These do not support UTF8 text direction, so we must mimic it for them. 431 * Numbers are always rendered LTR, even in RTL text. 432 * The visual direction of characters such as parentheses should be reversed. 433 * 434 * @param string $text Text to be reversed 435 * 436 * @return string 437 */ 438 public static function reverseText($text): string 439 { 440 // Remove HTML markup - we can't display it and it is LTR. 441 $text = strip_tags($text); 442 // Remove HTML entities. 443 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 444 445 // LTR text doesn't need reversing 446 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 447 return $text; 448 } 449 450 // Mirrored characters 451 $text = strtr($text, self::MIRROR_CHARACTERS); 452 453 $reversed = ''; 454 $digits = ''; 455 while ($text !== '') { 456 $letter = mb_substr($text, 0, 1); 457 $text = mb_substr($text, 1); 458 if (str_contains(self::DIGITS, $letter)) { 459 $digits .= $letter; 460 } else { 461 $reversed = $letter . $digits . $reversed; 462 $digits = ''; 463 } 464 } 465 466 return $digits . $reversed; 467 } 468 469 /** 470 * Return the direction (ltr or rtl) for a given script 471 * The PHP/intl library does not provde this information, so we need 472 * our own lookup table. 473 * 474 * @param string $script 475 * 476 * @return string 477 */ 478 public static function scriptDirection($script): string 479 { 480 switch ($script) { 481 case 'Arab': 482 case 'Hebr': 483 case 'Mong': 484 case 'Thaa': 485 return 'rtl'; 486 default: 487 return 'ltr'; 488 } 489 } 490 491 /** 492 * Identify the script used for a piece of text 493 * 494 * @param string $string 495 * 496 * @return string 497 */ 498 public static function textScript($string): string 499 { 500 $string = strip_tags($string); // otherwise HTML tags show up as latin 501 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 502 $string = str_replace([ 503 '@N.N.', 504 '@P.N.', 505 ], '', $string); // otherwise unknown names show up as latin 506 $pos = 0; 507 $strlen = strlen($string); 508 while ($pos < $strlen) { 509 // get the Unicode Code Point for the character at position $pos 510 $byte1 = ord($string[$pos]); 511 if ($byte1 < 0x80) { 512 $code_point = $byte1; 513 $chrlen = 1; 514 } elseif ($byte1 < 0xC0) { 515 // Invalid continuation character 516 return 'Latn'; 517 } elseif ($byte1 < 0xE0) { 518 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 519 $chrlen = 2; 520 } elseif ($byte1 < 0xF0) { 521 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 522 $chrlen = 3; 523 } elseif ($byte1 < 0xF8) { 524 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 525 $chrlen = 3; 526 } else { 527 // Invalid UTF 528 return 'Latn'; 529 } 530 531 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 532 if ($code_point >= $range[1] && $code_point <= $range[2]) { 533 return $range[0]; 534 } 535 } 536 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 537 $pos += $chrlen; 538 } 539 540 return 'Latn'; 541 } 542 543 /** 544 * Perform a case-insensitive comparison of two strings. 545 * 546 * @param string $string1 547 * @param string $string2 548 * 549 * @return int 550 */ 551 public static function strcasecmp($string1, $string2): int 552 { 553 if (self::$collator instanceof Collator) { 554 return self::$collator->compare($string1, $string2); 555 } 556 557 return strcmp(self::strtolower($string1), self::strtolower($string2)); 558 } 559 560 /** 561 * Convert a string to lower case. 562 * 563 * @param string $string 564 * 565 * @return string 566 */ 567 public static function strtolower($string): string 568 { 569 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 570 $string = strtr($string, self::DOTLESS_I_TOLOWER); 571 } 572 573 return mb_strtolower($string); 574 } 575 576 /** 577 * Convert a string to upper case. 578 * 579 * @param string $string 580 * 581 * @return string 582 */ 583 public static function strtoupper($string): string 584 { 585 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 586 $string = strtr($string, self::DOTLESS_I_TOUPPER); 587 } 588 589 return mb_strtoupper($string); 590 } 591 592 /** 593 * What format is used to display dates in the current locale? 594 * 595 * @return string 596 */ 597 public static function timeFormat(): string 598 { 599 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 600 return self::$translator->translate('%H:%i:%s'); 601 } 602 603 /** 604 * Context sensitive version of translate. 605 * echo I18N::translateContext('NOMINATIVE', 'January'); 606 * echo I18N::translateContext('GENITIVE', 'January'); 607 * 608 * @param string $context 609 * @param string $message 610 * @param string ...$args 611 * 612 * @return string 613 */ 614 public static function translateContext(string $context, string $message, ...$args): string 615 { 616 $message = self::$translator->translateContext($context, $message); 617 618 return sprintf($message, ...$args); 619 } 620} 621