1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22use Collator; 23use Exception; 24use Fisharebest\Localization\Locale; 25use Fisharebest\Localization\Locale\LocaleEnUs; 26use Fisharebest\Localization\Locale\LocaleInterface; 27use Fisharebest\Localization\Translation; 28use Fisharebest\Localization\Translator; 29use Fisharebest\Webtrees\Module\ModuleCustomInterface; 30use Fisharebest\Webtrees\Module\ModuleLanguageInterface; 31use Fisharebest\Webtrees\Services\ModuleService; 32use Illuminate\Support\Collection; 33 34use function array_merge; 35use function class_exists; 36use function html_entity_decode; 37use function in_array; 38use function mb_strtolower; 39use function mb_strtoupper; 40use function mb_substr; 41use function ord; 42use function sprintf; 43use function str_contains; 44use function str_replace; 45use function strcmp; 46use function strip_tags; 47use function strlen; 48use function strpos; 49use function strtr; 50use function var_export; 51 52/** 53 * Internationalization (i18n) and localization (l10n). 54 */ 55class I18N 56{ 57 // MO files use special characters for plurals and context. 58 public const PLURAL = "\x00"; 59 public const CONTEXT = "\x04"; 60 private const DIGITS = '0123456789٠١٢٣٤٥٦٧٨٩۰۱۲۳۴۵۶۷۸۹'; 61 private const DOTLESS_I_LOCALES = [ 62 'az', 63 'tr', 64 ]; 65 private const DOTLESS_I_TOLOWER = [ 66 'I' => 'ı', 67 'İ' => 'i', 68 ]; 69 70 // Digits are always rendered LTR, even in RTL text. 71 private const DOTLESS_I_TOUPPER = [ 72 'ı' => 'I', 73 'i' => 'İ', 74 ]; 75 76 // These locales need special handling for the dotless letter I. 77 private const SCRIPT_CHARACTER_RANGES = [ 78 [ 79 'Latn', 80 0x0041, 81 0x005A, 82 ], 83 [ 84 'Latn', 85 0x0061, 86 0x007A, 87 ], 88 [ 89 'Latn', 90 0x0100, 91 0x02AF, 92 ], 93 [ 94 'Grek', 95 0x0370, 96 0x03FF, 97 ], 98 [ 99 'Cyrl', 100 0x0400, 101 0x052F, 102 ], 103 [ 104 'Hebr', 105 0x0590, 106 0x05FF, 107 ], 108 [ 109 'Arab', 110 0x0600, 111 0x06FF, 112 ], 113 [ 114 'Arab', 115 0x0750, 116 0x077F, 117 ], 118 [ 119 'Arab', 120 0x08A0, 121 0x08FF, 122 ], 123 [ 124 'Deva', 125 0x0900, 126 0x097F, 127 ], 128 [ 129 'Taml', 130 0x0B80, 131 0x0BFF, 132 ], 133 [ 134 'Sinh', 135 0x0D80, 136 0x0DFF, 137 ], 138 [ 139 'Thai', 140 0x0E00, 141 0x0E7F, 142 ], 143 [ 144 'Geor', 145 0x10A0, 146 0x10FF, 147 ], 148 [ 149 'Grek', 150 0x1F00, 151 0x1FFF, 152 ], 153 [ 154 'Deva', 155 0xA8E0, 156 0xA8FF, 157 ], 158 [ 159 'Hans', 160 0x3000, 161 0x303F, 162 ], 163 // Mixed CJK, not just Hans 164 [ 165 'Hans', 166 0x3400, 167 0xFAFF, 168 ], 169 // Mixed CJK, not just Hans 170 [ 171 'Hans', 172 0x20000, 173 0x2FA1F, 174 ], 175 // Mixed CJK, not just Hans 176 ]; 177 private const MIRROR_CHARACTERS = [ 178 '(' => ')', 179 ')' => '(', 180 '[' => ']', 181 ']' => '[', 182 '{' => '}', 183 '}' => '{', 184 '<' => '>', 185 '>' => '<', 186 '‹ ' => '›', 187 '› ' => '‹', 188 '«' => '»', 189 '»' => '«', 190 '﴾ ' => '﴿', 191 '﴿ ' => '﴾', 192 '“ ' => '”', 193 '” ' => '“', 194 '‘ ' => '’', 195 '’ ' => '‘', 196 ]; 197 /** @var string Punctuation used to separate list items, typically a comma */ 198 public static $list_separator; 199 200 // The ranges of characters used by each script. 201 /** @var LocaleInterface The current locale (e.g. LocaleEnGb) */ 202 private static $locale; 203 204 // Characters that are displayed in mirror form in RTL text. 205 /** @var Translator An object that performs translation */ 206 private static $translator; 207 /** @var Collator|null From the php-intl library */ 208 private static $collator; 209 210 /** 211 * The preferred locales for this site, or a default list if no preference. 212 * 213 * @return LocaleInterface[] 214 */ 215 public static function activeLocales(): array 216 { 217 /** @var Collection $locales */ 218 $locales = app(ModuleService::class) 219 ->findByInterface(ModuleLanguageInterface::class, false, true) 220 ->map(static function (ModuleLanguageInterface $module): LocaleInterface { 221 return $module->locale(); 222 }); 223 224 if ($locales->isEmpty()) { 225 return [new LocaleEnUs()]; 226 } 227 228 return $locales->all(); 229 } 230 231 /** 232 * Which MySQL collation should be used for this locale? 233 * 234 * @return string 235 */ 236 public static function collation(): string 237 { 238 $collation = self::$locale->collation(); 239 switch ($collation) { 240 case 'croatian_ci': 241 case 'german2_ci': 242 case 'vietnamese_ci': 243 // Only available in MySQL 5.6 244 return 'utf8_unicode_ci'; 245 default: 246 return 'utf8_' . $collation; 247 } 248 } 249 250 /** 251 * What format is used to display dates in the current locale? 252 * 253 * @return string 254 */ 255 public static function dateFormat(): string 256 { 257 /* I18N: This is the format string for full dates. See http://php.net/date for codes */ 258 return self::$translator->translate('%j %F %Y'); 259 } 260 261 /** 262 * Convert the digits 0-9 into the local script 263 * Used for years, etc., where we do not want thousands-separators, decimals, etc. 264 * 265 * @param string|int $n 266 * 267 * @return string 268 */ 269 public static function digits($n): string 270 { 271 return self::$locale->digits((string) $n); 272 } 273 274 /** 275 * What is the direction of the current locale 276 * 277 * @return string "ltr" or "rtl" 278 */ 279 public static function direction(): string 280 { 281 return self::$locale->direction(); 282 } 283 284 /** 285 * Initialise the translation adapter with a locale setting. 286 * 287 * @param string $code 288 * @param bool $setup 289 * 290 * @return void 291 */ 292 public static function init(string $code, bool $setup = false): void 293 { 294 self::$locale = Locale::create($code); 295 296 // Load the translation file 297 $translation_file = __DIR__ . '/../resources/lang/' . self::$locale->languageTag() . '/messages.php'; 298 299 try { 300 $translation = new Translation($translation_file); 301 $translations = $translation->asArray(); 302 } catch (Exception $ex) { 303 // The translations files are created during the build process, and are 304 // not included in the source code. 305 // Assuming we are using dev code, and build (or rebuild) the files. 306 $po_file = Webtrees::ROOT_DIR . 'resources/lang/' . self::$locale->languageTag() . '/messages.po'; 307 $translation = new Translation($po_file); 308 $translations = $translation->asArray(); 309 file_put_contents($translation_file, "<?php\n\nreturn " . var_export($translations, true) . ";\n"); 310 } 311 312 // Add translations from custom modules (but not during setup, as we have no database/modules) 313 if (!$setup) { 314 $translations = app(ModuleService::class) 315 ->findByInterface(ModuleCustomInterface::class) 316 ->reduce(static function (array $carry, ModuleCustomInterface $item): array { 317 return array_merge($carry, $item->customTranslations(self::$locale->languageTag())); 318 }, $translations); 319 } 320 321 // Create a translator 322 self::$translator = new Translator($translations, self::$locale->pluralRule()); 323 324 /* I18N: This punctuation is used to separate lists of items */ 325 self::$list_separator = self::translate(', '); 326 327 // Create a collator 328 try { 329 if (class_exists('Collator')) { 330 // Symfony provides a very incomplete polyfill - which cannot be used. 331 self::$collator = new Collator(self::$locale->code()); 332 // Ignore upper/lower case differences 333 self::$collator->setStrength(Collator::SECONDARY); 334 } 335 } catch (Exception $ex) { 336 // PHP-INTL is not installed? We'll use a fallback later. 337 self::$collator = null; 338 } 339 } 340 341 /** 342 * Translate a string, and then substitute placeholders 343 * echo I18N::translate('Hello World!'); 344 * echo I18N::translate('The %s sat on the mat', 'cat'); 345 * 346 * @param string $message 347 * @param string ...$args 348 * 349 * @return string 350 */ 351 public static function translate(string $message, ...$args): string 352 { 353 $message = self::$translator->translate($message); 354 355 return sprintf($message, ...$args); 356 } 357 358 /** 359 * @return string 360 */ 361 public static function languageTag(): string 362 { 363 return self::$locale->languageTag(); 364 } 365 366 /** 367 * @return LocaleInterface 368 */ 369 public static function locale(): LocaleInterface 370 { 371 return self::$locale; 372 } 373 374 /** 375 * Translate a number into the local representation. 376 * e.g. 12345.67 becomes 377 * en: 12,345.67 378 * fr: 12 345,67 379 * de: 12.345,67 380 * 381 * @param float $n 382 * @param int $precision 383 * 384 * @return string 385 */ 386 public static function number(float $n, int $precision = 0): string 387 { 388 return self::$locale->number(round($n, $precision)); 389 } 390 391 /** 392 * Translate a fraction into a percentage. 393 * e.g. 0.123 becomes 394 * en: 12.3% 395 * fr: 12,3 % 396 * de: 12,3% 397 * 398 * @param float $n 399 * @param int $precision 400 * 401 * @return string 402 */ 403 public static function percentage(float $n, int $precision = 0): string 404 { 405 return self::$locale->percent(round($n, $precision + 2)); 406 } 407 408 /** 409 * Translate a plural string 410 * echo self::plural('There is an error', 'There are errors', $num_errors); 411 * echo self::plural('There is one error', 'There are %s errors', $num_errors); 412 * echo self::plural('There is %1$s %2$s cat', 'There are %1$s %2$s cats', $num, $num, $colour); 413 * 414 * @param string $singular 415 * @param string $plural 416 * @param int $count 417 * @param string ...$args 418 * 419 * @return string 420 */ 421 public static function plural(string $singular, string $plural, int $count, ...$args): string 422 { 423 $message = self::$translator->translatePlural($singular, $plural, $count); 424 425 return sprintf($message, ...$args); 426 } 427 428 /** 429 * UTF8 version of PHP::strrev() 430 * Reverse RTL text for third-party libraries such as GD2 and googlechart. 431 * These do not support UTF8 text direction, so we must mimic it for them. 432 * Numbers are always rendered LTR, even in RTL text. 433 * The visual direction of characters such as parentheses should be reversed. 434 * 435 * @param string $text Text to be reversed 436 * 437 * @return string 438 */ 439 public static function reverseText($text): string 440 { 441 // Remove HTML markup - we can't display it and it is LTR. 442 $text = strip_tags($text); 443 // Remove HTML entities. 444 $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8'); 445 446 // LTR text doesn't need reversing 447 if (self::scriptDirection(self::textScript($text)) === 'ltr') { 448 return $text; 449 } 450 451 // Mirrored characters 452 $text = strtr($text, self::MIRROR_CHARACTERS); 453 454 $reversed = ''; 455 $digits = ''; 456 while ($text !== '') { 457 $letter = mb_substr($text, 0, 1); 458 $text = mb_substr($text, 1); 459 if (str_contains(self::DIGITS, $letter)) { 460 $digits .= $letter; 461 } else { 462 $reversed = $letter . $digits . $reversed; 463 $digits = ''; 464 } 465 } 466 467 return $digits . $reversed; 468 } 469 470 /** 471 * Return the direction (ltr or rtl) for a given script 472 * The PHP/intl library does not provde this information, so we need 473 * our own lookup table. 474 * 475 * @param string $script 476 * 477 * @return string 478 */ 479 public static function scriptDirection($script): string 480 { 481 switch ($script) { 482 case 'Arab': 483 case 'Hebr': 484 case 'Mong': 485 case 'Thaa': 486 return 'rtl'; 487 default: 488 return 'ltr'; 489 } 490 } 491 492 /** 493 * Identify the script used for a piece of text 494 * 495 * @param string $string 496 * 497 * @return string 498 */ 499 public static function textScript($string): string 500 { 501 $string = strip_tags($string); // otherwise HTML tags show up as latin 502 $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); // otherwise HTML entities show up as latin 503 $string = str_replace([ 504 '@N.N.', 505 '@P.N.', 506 ], '', $string); // otherwise unknown names show up as latin 507 $pos = 0; 508 $strlen = strlen($string); 509 while ($pos < $strlen) { 510 // get the Unicode Code Point for the character at position $pos 511 $byte1 = ord($string[$pos]); 512 if ($byte1 < 0x80) { 513 $code_point = $byte1; 514 $chrlen = 1; 515 } elseif ($byte1 < 0xC0) { 516 // Invalid continuation character 517 return 'Latn'; 518 } elseif ($byte1 < 0xE0) { 519 $code_point = (($byte1 & 0x1F) << 6) + (ord($string[$pos + 1]) & 0x3F); 520 $chrlen = 2; 521 } elseif ($byte1 < 0xF0) { 522 $code_point = (($byte1 & 0x0F) << 12) + ((ord($string[$pos + 1]) & 0x3F) << 6) + (ord($string[$pos + 2]) & 0x3F); 523 $chrlen = 3; 524 } elseif ($byte1 < 0xF8) { 525 $code_point = (($byte1 & 0x07) << 24) + ((ord($string[$pos + 1]) & 0x3F) << 12) + ((ord($string[$pos + 2]) & 0x3F) << 6) + (ord($string[$pos + 3]) & 0x3F); 526 $chrlen = 3; 527 } else { 528 // Invalid UTF 529 return 'Latn'; 530 } 531 532 foreach (self::SCRIPT_CHARACTER_RANGES as $range) { 533 if ($code_point >= $range[1] && $code_point <= $range[2]) { 534 return $range[0]; 535 } 536 } 537 // Not a recognised script. Maybe punctuation, spacing, etc. Keep looking. 538 $pos += $chrlen; 539 } 540 541 return 'Latn'; 542 } 543 544 /** 545 * Perform a case-insensitive comparison of two strings. 546 * 547 * @param string $string1 548 * @param string $string2 549 * 550 * @return int 551 */ 552 public static function strcasecmp($string1, $string2): int 553 { 554 if (self::$collator instanceof Collator) { 555 return self::$collator->compare($string1, $string2); 556 } 557 558 return strcmp(self::strtolower($string1), self::strtolower($string2)); 559 } 560 561 /** 562 * Convert a string to lower case. 563 * 564 * @param string $string 565 * 566 * @return string 567 */ 568 public static function strtolower($string): string 569 { 570 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 571 $string = strtr($string, self::DOTLESS_I_TOLOWER); 572 } 573 574 return mb_strtolower($string); 575 } 576 577 /** 578 * Convert a string to upper case. 579 * 580 * @param string $string 581 * 582 * @return string 583 */ 584 public static function strtoupper($string): string 585 { 586 if (in_array(self::$locale->language()->code(), self::DOTLESS_I_LOCALES, true)) { 587 $string = strtr($string, self::DOTLESS_I_TOUPPER); 588 } 589 590 return mb_strtoupper($string); 591 } 592 593 /** 594 * What format is used to display dates in the current locale? 595 * 596 * @return string 597 */ 598 public static function timeFormat(): string 599 { 600 /* I18N: This is the format string for the time-of-day. See http://php.net/date for codes */ 601 return self::$translator->translate('%H:%i:%s'); 602 } 603 604 /** 605 * Context sensitive version of translate. 606 * echo I18N::translateContext('NOMINATIVE', 'January'); 607 * echo I18N::translateContext('GENITIVE', 'January'); 608 * 609 * @param string $context 610 * @param string $message 611 * @param string ...$args 612 * 613 * @return string 614 */ 615 public static function translateContext(string $context, string $message, ...$args): string 616 { 617 $message = self::$translator->translateContext($context, $message); 618 619 return sprintf($message, ...$args); 620 } 621} 622