1<?php 2 3/** 4 * webtrees: online genealogy 5 * 'Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Report; 21 22use Fisharebest\Webtrees\I18N; 23 24use function ord; 25use function preg_replace; 26use function str_contains; 27use function str_pad; 28use function str_replace; 29use function strlen; 30use function strpos; 31use function strrpos; 32use function strtolower; 33use function strtoupper; 34use function substr; 35 36use const STR_PAD_LEFT; 37use const STR_PAD_RIGHT; 38 39/** 40 * RTL Functions for use in the PDF reports 41 */ 42class RightToLeftSupport 43{ 44 private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark: zero-width character with LTR directionality) 45 private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark: zero-width character with RTL directionality) 46 private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode) 47 private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode) 48 private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text) 49 private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text) 50 private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE) 51 52 private const OPEN_PARENTHESES = '([{'; 53 54 private const CLOSE_PARENTHESES = ')]}'; 55 56 private const NUMBERS = '0123456789'; 57 58 private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings 59 60 private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings 61 62 private const PUNCTUATION = ',.:;?!'; 63 64 // Markup 65 private const START_LTR = '<LTR>'; 66 private const END_LTR = '</LTR>'; 67 private const START_RTL = '<RTL>'; 68 private const END_RTL = '</RTL>'; 69 private const LENGTH_START = 5; 70 private const LENGTH_END = 6; 71 72 /* Were we previously processing LTR or RTL. */ 73 private static string $previousState; 74 75 /* Are we currently processing LTR or RTL. */ 76 private static string $currentState; 77 78 /* Text waiting to be processed. */ 79 private static string $waitingText; 80 81 /* Offset into the text. */ 82 private static int $posSpanStart; 83 84 /** 85 * This function strips ‎ and ‏ from the input string. It should be used for all 86 * text that has been passed through the PrintReady() function before that text is stored 87 * in the database. The database should NEVER contain these characters. 88 * 89 * @param string $inputText The string from which the ‎ and ‏ characters should be stripped 90 * 91 * @return string The input string, with ‎ and ‏ stripped 92 */ 93 private static function stripLrmRlm(string $inputText): string 94 { 95 return str_replace([ 96 self::UTF8_LRM, 97 self::UTF8_RLM, 98 self::UTF8_LRO, 99 self::UTF8_RLO, 100 self::UTF8_LRE, 101 self::UTF8_RLE, 102 self::UTF8_PDF, 103 '‎', 104 '‏', 105 '&LRM;', 106 '&RLM;', 107 ], '', $inputText); 108 } 109 110 /** 111 * This function encapsulates all texts in the input with <span dir='xxx'> and </span> 112 * according to the directionality specified. 113 * 114 * @param string $inputText Raw input 115 * 116 * @return string The string with all texts encapsulated as required 117 */ 118 public static function spanLtrRtl(string $inputText): string 119 { 120 if ($inputText === '') { 121 // Nothing to do 122 return ''; 123 } 124 125 $workingText = str_replace("\n", '<br>', $inputText); 126 $workingText = str_replace([ 127 '<span class="starredname"><br>', 128 '<span<br>class="starredname">', 129 ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks 130 $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes 131 132 self::$previousState = ''; 133 self::$currentState = strtoupper(I18N::direction()); 134 $numberState = false; // Set when we're inside a numeric string 135 $result = ''; 136 self::$waitingText = ''; 137 $openParDirection = []; 138 139 self::beginCurrentSpan($result); 140 141 while ($workingText !== '') { 142 $charArray = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character 143 $currentLetter = $charArray['letter']; 144 $currentLen = $charArray['length']; 145 146 $openParIndex = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this? 147 $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this? 148 149 switch ($currentLetter) { 150 case '<': 151 // Assume this '<' starts an HTML element 152 $endPos = strpos($workingText, '>'); // look for the terminating '>' 153 if ($endPos === false) { 154 $endPos = 0; 155 } 156 $currentLen += $endPos; 157 $element = substr($workingText, 0, $currentLen); 158 $temp = strtolower(substr($element, 0, 3)); 159 if (strlen($element) < 7 && $temp === '<br') { 160 if ($numberState) { 161 $numberState = false; 162 if (self::$currentState === 'RTL') { 163 self::$waitingText .= self::UTF8_PDF; 164 } 165 } 166 self::breakCurrentSpan($result); 167 } elseif (self::$waitingText === '') { 168 $result .= $element; 169 } else { 170 self::$waitingText .= $element; 171 } 172 $workingText = substr($workingText, $currentLen); 173 break; 174 case '&': 175 // Assume this '&' starts an HTML entity 176 $endPos = strpos($workingText, ';'); // look for the terminating ';' 177 if ($endPos === false) { 178 $endPos = 0; 179 } 180 $currentLen += $endPos; 181 $entity = substr($workingText, 0, $currentLen); 182 if (strtolower($entity) === ' ') { 183 $entity = ' '; // Ensure consistent case for this entity 184 } 185 if (self::$waitingText === '') { 186 $result .= $entity; 187 } else { 188 self::$waitingText .= $entity; 189 } 190 $workingText = substr($workingText, $currentLen); 191 break; 192 case '{': 193 if (substr($workingText, 1, 1) === '{') { 194 // Assume this '{{' starts a TCPDF directive 195 $endPos = strpos($workingText, '}}'); // look for the terminating '}}' 196 if ($endPos === false) { 197 $endPos = 0; 198 } 199 $currentLen = $endPos + 2; 200 $directive = substr($workingText, 0, $currentLen); 201 $workingText = substr($workingText, $currentLen); 202 $result .= self::$waitingText . $directive; 203 self::$waitingText = ''; 204 break; 205 } 206 // no break 207 default: 208 // Look for strings of numbers with optional leading or trailing + or - 209 // and with optional embedded numeric punctuation 210 if ($numberState) { 211 // If we're inside a numeric string, look for reasons to end it 212 $offset = 0; // Be sure to look at the current character first 213 $charArray = self::getChar($workingText . "\n", $offset); 214 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 215 // This is not a digit. Is it numeric punctuation? 216 if (substr($workingText . "\n", $offset, 6) === ' ') { 217 $offset += 6; // This could be numeric punctuation 218 } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) { 219 $offset += $charArray['length']; // This could be numeric punctuation 220 } 221 // If the next character is a digit, the current character is numeric punctuation 222 $charArray = self::getChar($workingText . "\n", $offset); 223 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 224 // This is not a digit. End the run of digits and punctuation. 225 $numberState = false; 226 if (self::$currentState === 'RTL') { 227 if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) { 228 $currentLetter = self::UTF8_PDF . $currentLetter; 229 } else { 230 $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run 231 } 232 } 233 } 234 } 235 } elseif (str_contains(self::NUMBER_PREFIX, $currentLetter)) { 236 // If we're outside a numeric string, look for reasons to start it 237 // This might be a number lead-in 238 $offset = $currentLen; 239 $nextChar = substr($workingText . "\n", $offset, 1); 240 if (str_contains(self::NUMBERS, $nextChar)) { 241 $numberState = true; // We found a digit: the lead-in is therefore numeric 242 if (self::$currentState === 'RTL') { 243 $currentLetter = self::UTF8_LRE . $currentLetter; 244 } 245 } 246 } elseif (str_contains(self::NUMBERS, $currentLetter)) { 247 $numberState = true; // The current letter is a digit 248 if (self::$currentState === 'RTL') { 249 $currentLetter = self::UTF8_LRE . $currentLetter; 250 } 251 } 252 253 // Determine the directionality of the current UTF-8 character 254 $newState = self::$currentState; 255 256 while (true) { 257 if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') { 258 if (self::$currentState === '') { 259 $newState = 'RTL'; 260 break; 261 } 262 263 if (self::$currentState === 'RTL') { 264 break; 265 } 266 // Switch to RTL only if this isn't a solitary RTL letter 267 $tempText = substr($workingText, $currentLen); 268 while ($tempText !== '') { 269 $nextCharArray = self::getChar($tempText, 0); 270 $nextLetter = $nextCharArray['letter']; 271 $nextLen = $nextCharArray['length']; 272 $tempText = substr($tempText, $nextLen); 273 274 if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') { 275 $newState = 'RTL'; 276 break 2; 277 } 278 279 if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) { 280 $newState = 'RTL'; 281 break 2; 282 } 283 284 if ($nextLetter === ' ') { 285 break; 286 } 287 $nextLetter .= substr($tempText . "\n", 0, 5); 288 if ($nextLetter === ' ') { 289 break; 290 } 291 } 292 // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality 293 $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF; 294 $newState = 'LTR'; 295 break; 296 } 297 if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { 298 // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR 299 $newState = 'LTR'; 300 break; 301 } 302 if ($closeParIndex !== false) { 303 // This closing parenthesis has to inherit the matching opening parenthesis' directionality 304 if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') { 305 $newState = $openParDirection[$closeParIndex]; 306 } 307 $openParDirection[$closeParIndex] = ''; 308 break; 309 } 310 self::$waitingText .= $currentLetter; 311 $workingText = substr($workingText, $currentLen); 312 if ($openParIndex !== false) { 313 // Opening parentheses always inherit the following directionality 314 while (true) { 315 if ($workingText === '') { 316 break; 317 } 318 if (str_starts_with($workingText, ' ')) { 319 // Spaces following this left parenthesis inherit the following directionality too 320 self::$waitingText .= ' '; 321 $workingText = substr($workingText, 1); 322 continue; 323 } 324 if (str_starts_with($workingText, ' ')) { 325 // Spaces following this left parenthesis inherit the following directionality too 326 self::$waitingText .= ' '; 327 $workingText = substr($workingText, 6); 328 continue; 329 } 330 break; 331 } 332 $openParDirection[$openParIndex] = '?'; 333 break 2; // double break because we're waiting for more information 334 } 335 336 // We have a digit or a "normal" special character. 337 // 338 // When this character is not at the start of the input string, it inherits the preceding directionality; 339 // at the start of the input string, it assumes the following directionality. 340 // 341 // Exceptions to this rule will be handled later during final clean-up. 342 // 343 if (self::$currentState !== '') { 344 $result .= self::$waitingText; 345 self::$waitingText = ''; 346 } 347 break 2; // double break because we're waiting for more information 348 } 349 if ($newState !== self::$currentState) { 350 // A direction change has occurred 351 self::finishCurrentSpan($result); 352 self::$previousState = self::$currentState; 353 self::$currentState = $newState; 354 self::beginCurrentSpan($result); 355 } 356 self::$waitingText .= $currentLetter; 357 $workingText = substr($workingText, $currentLen); 358 $result .= self::$waitingText; 359 self::$waitingText = ''; 360 361 foreach ($openParDirection as $index => $value) { 362 // Since we now know the proper direction, remember it for all waiting opening parentheses 363 if ($value === '?') { 364 $openParDirection[$index] = self::$currentState; 365 } 366 } 367 368 break; 369 } 370 } 371 372 // We're done. Finish last <span> if necessary 373 if ($numberState) { 374 if (self::$waitingText === '') { 375 if (self::$currentState === 'RTL') { 376 $result .= self::UTF8_PDF; 377 } 378 } elseif (self::$currentState === 'RTL') { 379 self::$waitingText .= self::UTF8_PDF; 380 } 381 } 382 self::finishCurrentSpan($result, true); 383 384 // Get rid of any waiting text 385 if (self::$waitingText !== '') { 386 if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') { 387 $result .= self::START_RTL; 388 $result .= self::$waitingText; 389 $result .= self::END_RTL; 390 } else { 391 $result .= self::START_LTR; 392 $result .= self::$waitingText; 393 $result .= self::END_LTR; 394 } 395 self::$waitingText = ''; 396 } 397 398 // Lastly, do some more cleanups 399 400 // Move leading RTL numeric strings to following LTR text 401 // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) 402 while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) { 403 $spanEnd = strpos($result, self::END_RTL . self::START_LTR); 404 if ($spanEnd === false) { 405 break; 406 } 407 $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3)); 408 if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') { 409 break; 410 } 411 $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END); 412 break; 413 } 414 415 // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span 416 if (I18N::direction() === 'rtl') { 417 $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result); 418 } 419 420 // Trim trailing blanks preceding <br> in LTR text 421 while (self::$previousState !== 'RTL') { 422 if (str_contains($result, ' <LTRbr>')) { 423 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 424 continue; 425 } 426 if (str_contains($result, ' <LTRbr>')) { 427 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 428 continue; 429 } 430 if (str_contains($result, ' <br>')) { 431 $result = str_replace(' <br>', '<br>', $result); 432 continue; 433 } 434 if (str_contains($result, ' <br>')) { 435 $result = str_replace(' <br>', '<br>', $result); 436 continue; 437 } 438 break; // Neither space nor : we're done 439 } 440 441 // Trim trailing blanks preceding <br> in RTL text 442 while (true) { 443 if (str_contains($result, ' <RTLbr>')) { 444 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 445 continue; 446 } 447 if (str_contains($result, ' <RTLbr>')) { 448 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 449 continue; 450 } 451 break; // Neither space nor : we're done 452 } 453 454 // Convert '<LTRbr>' and '<RTLbr' 455 $result = str_replace([ 456 '<LTRbr>', 457 '<RTLbr>', 458 ], [ 459 self::END_LTR . '<br>' . self::START_LTR, 460 self::END_RTL . '<br>' . self::START_RTL, 461 ], $result); 462 463 // Include leading indeterminate directional text in whatever follows 464 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && !str_starts_with($result . "\n", '<br>')) { 465 $leadingText = ''; 466 while (true) { 467 if ($result === '') { 468 $result = $leadingText; 469 break; 470 } 471 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) { 472 $leadingText .= substr($result, 0, 1); 473 $result = substr($result, 1); 474 continue; 475 } 476 $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START); 477 break; 478 } 479 } 480 481 // Include solitary "-" and "+" in surrounding RTL text 482 $result = str_replace([ 483 self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL, 484 self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL, 485 ], [ 486 '-', 487 '+', 488 ], $result); 489 490 //$result = strtr($result, [ 491 // self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-', 492 // self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+', 493 //]); 494 495 // Remove empty spans 496 $result = str_replace([ 497 self::START_LTR . self::END_LTR, 498 self::START_RTL . self::END_RTL, 499 ], '', $result); 500 501 // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' 502 // LTR text: <span dir="ltr"> text </span> 503 // RTL text: <span dir="rtl"> text </span> 504 505 $result = str_replace([ 506 self::START_LTR, 507 self::END_LTR, 508 self::START_RTL, 509 self::END_RTL, 510 ], [ 511 '<span dir="ltr">', 512 '</span>', 513 '<span dir="rtl">', 514 '</span>', 515 ], $result); 516 517 return $result; 518 } 519 520 /** 521 * Wrap words that have an asterisk suffix in <u> and </u> tags. 522 * This should underline starred names to show the preferred name. 523 * 524 * @param string $textSpan 525 * @param string $direction 526 * 527 * @return string 528 */ 529 private static function starredName(string $textSpan, string $direction): string 530 { 531 // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags 532 // only when page and span directions are identical. 533 if ($direction === strtoupper(I18N::direction())) { 534 while (true) { 535 $starPos = strpos($textSpan, '*'); 536 if ($starPos === false) { 537 break; 538 } 539 $trailingText = substr($textSpan, $starPos + 1); 540 $textSpan = substr($textSpan, 0, $starPos); 541 $wordStart = strrpos($textSpan, ' '); // Find the start of the word 542 if ($wordStart !== false) { 543 $leadingText = substr($textSpan, 0, $wordStart + 1); 544 $wordText = substr($textSpan, $wordStart + 1); 545 } else { 546 $leadingText = ''; 547 $wordText = $textSpan; 548 } 549 $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText; 550 } 551 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan); 552 // The is a work-around for a TCPDF bug eating blanks. 553 $textSpan = str_replace([ 554 ' <u>', 555 '</u> ', 556 ], [ 557 ' <u>', 558 '</u> ', 559 ], $textSpan); 560 } else { 561 // Text and page directions differ: remove the <span> and </span> 562 $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan); 563 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan); 564 } 565 566 return $textSpan; 567 } 568 569 /** 570 * Get the next character from an input string 571 * 572 * @param string $text 573 * @param int $offset 574 * 575 * @return array{letter:string,length:int} 576 */ 577 private static function getChar(string $text, int $offset): array 578 { 579 if ($text === '') { 580 return [ 581 'letter' => '', 582 'length' => 0, 583 ]; 584 } 585 586 $char = substr($text, $offset, 1); 587 $length = 1; 588 if ((ord($char) & 0xE0) === 0xC0) { 589 $length = 2; 590 } 591 if ((ord($char) & 0xF0) === 0xE0) { 592 $length = 3; 593 } 594 if ((ord($char) & 0xF8) === 0xF0) { 595 $length = 4; 596 } 597 $letter = substr($text, $offset, $length); 598 599 return [ 600 'letter' => $letter, 601 'length' => $length, 602 ]; 603 } 604 605 /** 606 * Insert <br> into current span 607 * 608 * @param string $result 609 * 610 * @return void 611 */ 612 private static function breakCurrentSpan(string &$result): void 613 { 614 // Interrupt the current span, insert that <br>, and then continue the current span 615 $result .= self::$waitingText; 616 self::$waitingText = ''; 617 618 $breakString = '<' . self::$currentState . 'br>'; 619 $result .= $breakString; 620 } 621 622 /** 623 * Begin current span 624 * 625 * @param string $result 626 * 627 * @return void 628 */ 629 private static function beginCurrentSpan(string &$result): void 630 { 631 if (self::$currentState === 'LTR') { 632 $result .= self::START_LTR; 633 } 634 if (self::$currentState === 'RTL') { 635 $result .= self::START_RTL; 636 } 637 638 self::$posSpanStart = strlen($result); 639 } 640 641 /** 642 * Finish current span 643 * 644 * @param string $result 645 * @param bool $theEnd 646 * 647 * @return void 648 */ 649 private static function finishCurrentSpan(string &$result, bool $theEnd = false): void 650 { 651 $textSpan = substr($result, self::$posSpanStart); 652 $result = substr($result, 0, self::$posSpanStart); 653 654 // Get rid of empty spans, so that our check for presence of RTL will work 655 $result = str_replace([ 656 self::START_LTR . self::END_LTR, 657 self::START_RTL . self::END_RTL, 658 ], '', $result); 659 660 // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers. 661 $tempResult = ''; 662 while ($textSpan !== '') { 663 $posColon = strpos($textSpan, ':'); 664 if ($posColon === false) { 665 break; 666 } // No more possible time strings 667 $posLRE = strpos($textSpan, self::UTF8_LRE); 668 if ($posLRE === false) { 669 break; 670 } // No more numeric strings 671 $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE); 672 if ($posPDF === false) { 673 break; 674 } // No more numeric strings 675 676 $tempResult .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string 677 $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string 678 $textSpan = substr($textSpan, $posPDF + 3); 679 $posColon = strpos($numericString, ':'); 680 if ($posColon === false) { 681 // Nothing that looks like a time here 682 $tempResult .= $numericString; 683 continue; 684 } 685 $posBlank = strpos($numericString . ' ', ' '); 686 $posNbsp = strpos($numericString . ' ', ' '); 687 if ($posBlank < $posNbsp) { 688 $posSeparator = $posBlank; 689 $lengthSeparator = 1; 690 } else { 691 $posSeparator = $posNbsp; 692 $lengthSeparator = 6; 693 } 694 if ($posColon > $posSeparator) { 695 // We have a time string preceded by a blank: Exclude that blank from the numeric string 696 $tempResult .= substr($numericString, 0, $posSeparator); 697 $tempResult .= self::UTF8_PDF; 698 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 699 $tempResult .= self::UTF8_LRE; 700 $numericString = substr($numericString, $posSeparator + $lengthSeparator); 701 } 702 703 $posBlank = strpos($numericString, ' '); 704 $posNbsp = strpos($numericString, ' '); 705 if ($posBlank === false && $posNbsp === false) { 706 // The time string isn't followed by a blank 707 $textSpan = $numericString . $textSpan; 708 continue; 709 } 710 711 // We have a time string followed by a blank: Exclude that blank from the numeric string 712 if ($posBlank === false) { 713 $posSeparator = $posNbsp; 714 $lengthSeparator = 6; 715 } elseif ($posNbsp === false) { 716 $posSeparator = $posBlank; 717 $lengthSeparator = 1; 718 } elseif ($posBlank < $posNbsp) { 719 $posSeparator = $posBlank; 720 $lengthSeparator = 1; 721 } else { 722 $posSeparator = $posNbsp; 723 $lengthSeparator = 6; 724 } 725 $tempResult .= substr($numericString, 0, $posSeparator); 726 $tempResult .= self::UTF8_PDF; 727 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 728 $posSeparator += $lengthSeparator; 729 $numericString = substr($numericString, $posSeparator); 730 $textSpan = self::UTF8_LRE . $numericString . $textSpan; 731 } 732 $textSpan = $tempResult . $textSpan; 733 $trailingBlanks = ''; 734 $trailingBreaks = ''; 735 736 /* ****************************** LTR text handling ******************************** */ 737 738 if (self::$currentState === 'LTR') { 739 // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too. 740 if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) { 741 $trailingString = ''; 742 $savedSpan = $textSpan; 743 while ($textSpan !== '') { 744 // Look for trailing spaces and tentatively move them 745 if (str_ends_with($textSpan, ' ')) { 746 $trailingString = ' ' . $trailingString; 747 $textSpan = substr($textSpan, 0, -1); 748 continue; 749 } 750 if (str_ends_with($textSpan, ' ')) { 751 $trailingString = ' ' . $trailingString; 752 $textSpan = substr($textSpan, 0, -1); 753 continue; 754 } 755 if (substr($textSpan, -3) !== self::UTF8_PDF) { 756 // There is no trailing numeric string 757 $textSpan = $savedSpan; 758 break; 759 } 760 761 // We have a numeric string 762 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 763 if ($posStartNumber === false) { 764 $posStartNumber = 0; 765 } 766 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 767 $textSpan = substr($textSpan, 0, $posStartNumber); 768 769 // Look for more spaces and move them too 770 while ($textSpan !== '') { 771 if (str_ends_with($textSpan, ' ')) { 772 $trailingString = ' ' . $trailingString; 773 $textSpan = substr($textSpan, 0, -1); 774 continue; 775 } 776 if (str_ends_with($textSpan, ' ')) { 777 $trailingString = ' ' . $trailingString; 778 $textSpan = substr($textSpan, 0, -1); 779 continue; 780 } 781 break; 782 } 783 784 self::$waitingText = $trailingString . self::$waitingText; 785 break; 786 } 787 } 788 789 $savedSpan = $textSpan; 790 // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span 791 while ($textSpan !== '') { 792 if (str_ends_with($textSpan, ' ')) { 793 $trailingBlanks = ' ' . $trailingBlanks; 794 $textSpan = substr($textSpan, 0, -1); 795 continue; 796 } 797 if (str_ends_with('......' . $textSpan, ' ')) { 798 $trailingBlanks = ' ' . $trailingBlanks; 799 $textSpan = substr($textSpan, 0, -6); 800 continue; 801 } 802 break; 803 } 804 while (str_ends_with($textSpan, '<LTRbr>')) { 805 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 806 $textSpan = substr($textSpan, 0, -7); 807 } 808 if ($trailingBreaks !== '') { 809 while ($textSpan !== '') { 810 if (str_ends_with($textSpan, ' ')) { 811 $trailingBreaks = ' ' . $trailingBreaks; 812 $textSpan = substr($textSpan, 0, -1); 813 continue; 814 } 815 if (str_ends_with($textSpan, ' ')) { 816 $trailingBreaks = ' ' . $trailingBreaks; 817 $textSpan = substr($textSpan, 0, -6); 818 continue; 819 } 820 break; 821 } 822 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 823 } else { 824 $textSpan = $savedSpan; 825 } 826 827 $trailingBlanks = ''; 828 $trailingPunctuation = ''; 829 $trailingID = ''; 830 $trailingSeparator = ''; 831 $leadingSeparator = ''; 832 833 while (I18N::direction() === 'rtl') { 834 if (str_contains($result, self::START_RTL)) { 835 // Remove trailing blanks for inclusion in a separate LTR span 836 while ($textSpan !== '') { 837 if (str_ends_with($textSpan, ' ')) { 838 $trailingBlanks = ' ' . $trailingBlanks; 839 $textSpan = substr($textSpan, 0, -1); 840 continue; 841 } 842 if (str_ends_with($textSpan, ' ')) { 843 $trailingBlanks = ' ' . $trailingBlanks; 844 $textSpan = substr($textSpan, 0, -1); 845 continue; 846 } 847 break; 848 } 849 850 // Remove trailing punctuation for inclusion in a separate LTR span 851 if ($textSpan === '') { 852 $trailingChar = "\n"; 853 } else { 854 $trailingChar = substr($textSpan, -1); 855 } 856 if (str_contains(self::PUNCTUATION, $trailingChar)) { 857 $trailingPunctuation = $trailingChar; 858 $textSpan = substr($textSpan, 0, -1); 859 } 860 } 861 862 // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span 863 while (true) { 864 if (!str_ends_with($textSpan, ')')) { 865 break; 866 } // There is no trailing ')' 867 $posLeftParen = strrpos($textSpan, '('); 868 if ($posLeftParen === false) { 869 break; 870 } // There is no leading '(' 871 $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes 872 873 // If the parenthesized text doesn't look like an ID number, 874 // we don't want to touch it. 875 // This check won’t work if somebody uses ID numbers with an unusual format. 876 $offset = 1; 877 $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text 878 if (str_contains(self::NUMBERS, $charArray['letter'])) { 879 break; 880 } 881 $offset += $charArray['length']; // Point at 2nd character of parenthesized text 882 if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) { 883 break; 884 } 885 // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too 886 if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) { 887 break; 888 } 889 890 $trailingID = substr($textSpan, $posLeftParen); 891 $textSpan = substr($textSpan, 0, $posLeftParen); 892 break; 893 } 894 895 // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span 896 if ($trailingID !== '') { 897 while ($textSpan !== '') { 898 if (str_ends_with($textSpan, ' ')) { 899 $trailingSeparator = ' ' . $trailingSeparator; 900 $textSpan = substr($textSpan, 0, -1); 901 continue; 902 } 903 if (str_ends_with($textSpan, ' ')) { 904 $trailingSeparator = ' ' . $trailingSeparator; 905 $textSpan = substr($textSpan, 0, -6); 906 continue; 907 } 908 if (str_ends_with($textSpan, '-')) { 909 $trailingSeparator = '-' . $trailingSeparator; 910 $textSpan = substr($textSpan, 0, -1); 911 continue; 912 } 913 break; 914 } 915 } 916 917 // Look for " - " preceding the text and remove it for inclusion in a separate LTR span 918 $foundSeparator = false; 919 $savedSpan = $textSpan; 920 while ($textSpan !== '') { 921 if (str_starts_with($textSpan, ' ')) { 922 $leadingSeparator = ' ' . $leadingSeparator; 923 $textSpan = substr($textSpan, 1); 924 continue; 925 } 926 if (str_starts_with($textSpan, ' ')) { 927 $leadingSeparator = ' ' . $leadingSeparator; 928 $textSpan = substr($textSpan, 6); 929 continue; 930 } 931 if (str_starts_with($textSpan, '-')) { 932 $leadingSeparator = '-' . $leadingSeparator; 933 $textSpan = substr($textSpan, 1); 934 $foundSeparator = true; 935 continue; 936 } 937 break; 938 } 939 if (!$foundSeparator) { 940 $textSpan = $savedSpan; 941 $leadingSeparator = ''; 942 } 943 break; 944 } 945 946 // We're done: finish the span 947 $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags 948 while (true) { 949 // Remove blanks that precede <LTRbr> 950 if (str_contains($textSpan, ' <LTRbr>')) { 951 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 952 continue; 953 } 954 if (str_contains($textSpan, ' <LTRbr>')) { 955 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 956 continue; 957 } 958 break; 959 } 960 if ($leadingSeparator !== '') { 961 $result .= self::START_LTR . $leadingSeparator . self::END_LTR; 962 } 963 $result .= $textSpan . self::END_LTR; 964 if ($trailingSeparator !== '') { 965 $result .= self::START_LTR . $trailingSeparator . self::END_LTR; 966 } 967 if ($trailingID !== '') { 968 $result .= self::START_LTR . $trailingID . self::END_LTR; 969 } 970 if ($trailingPunctuation !== '') { 971 $result .= self::START_LTR . $trailingPunctuation . self::END_LTR; 972 } 973 if ($trailingBlanks !== '') { 974 $result .= self::START_LTR . $trailingBlanks . self::END_LTR; 975 } 976 } 977 978 /* ****************************** RTL text handling ******************************** */ 979 980 if (self::$currentState === 'RTL') { 981 $savedSpan = $textSpan; 982 983 // Move any trailing <br>, optionally followed by blanks, outside this RTL span 984 while ($textSpan !== '') { 985 if (str_ends_with($textSpan, ' ')) { 986 $trailingBlanks = ' ' . $trailingBlanks; 987 $textSpan = substr($textSpan, 0, -1); 988 continue; 989 } 990 if (str_ends_with('......' . $textSpan, ' ')) { 991 $trailingBlanks = ' ' . $trailingBlanks; 992 $textSpan = substr($textSpan, 0, -6); 993 continue; 994 } 995 break; 996 } 997 while (str_ends_with($textSpan, '<RTLbr>')) { 998 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 999 $textSpan = substr($textSpan, 0, -7); 1000 } 1001 if ($trailingBreaks !== '') { 1002 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 1003 } else { 1004 $textSpan = $savedSpan; 1005 } 1006 1007 // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too. 1008 if (!$theEnd && I18N::direction() !== 'rtl') { 1009 $trailingString = ''; 1010 $savedSpan = $textSpan; 1011 while ($textSpan !== '') { 1012 // Look for trailing spaces and tentatively move them 1013 if (str_ends_with($textSpan, ' ')) { 1014 $trailingString = ' ' . $trailingString; 1015 $textSpan = substr($textSpan, 0, -1); 1016 continue; 1017 } 1018 if (str_ends_with($textSpan, ' ')) { 1019 $trailingString = ' ' . $trailingString; 1020 $textSpan = substr($textSpan, 0, -1); 1021 continue; 1022 } 1023 if (substr($textSpan, -3) !== self::UTF8_PDF) { 1024 // There is no trailing numeric string 1025 $textSpan = $savedSpan; 1026 break; 1027 } 1028 1029 // We have a numeric string 1030 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 1031 if ($posStartNumber === false) { 1032 $posStartNumber = 0; 1033 } 1034 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 1035 $textSpan = substr($textSpan, 0, $posStartNumber); 1036 1037 // Look for more spaces and move them too 1038 while ($textSpan !== '') { 1039 if (str_ends_with($textSpan, ' ')) { 1040 $trailingString = ' ' . $trailingString; 1041 $textSpan = substr($textSpan, 0, -1); 1042 continue; 1043 } 1044 if (str_ends_with($textSpan, ' ')) { 1045 $trailingString = ' ' . $trailingString; 1046 $textSpan = substr($textSpan, 0, -1); 1047 continue; 1048 } 1049 break; 1050 } 1051 1052 self::$waitingText = $trailingString . self::$waitingText; 1053 break; 1054 } 1055 } 1056 1057 // Trailing " - " needs to be prefixed to the following span 1058 if (!$theEnd && str_ends_with('...' . $textSpan, ' - ')) { 1059 $textSpan = substr($textSpan, 0, -3); 1060 self::$waitingText = ' - ' . self::$waitingText; 1061 } 1062 1063 while (I18N::direction() === 'rtl') { 1064 // Look for " - " preceding <RTLbr> and relocate it to the front of the string 1065 $posDashString = strpos($textSpan, ' - <RTLbr>'); 1066 if ($posDashString === false) { 1067 break; 1068 } 1069 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1070 if ($posStringStart === false) { 1071 $posStringStart = 0; 1072 } else { 1073 $posStringStart += 9; 1074 } // Point to the first char following the last <RTLbr> 1075 1076 $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3); 1077 } 1078 1079 // Strip leading spaces from the RTL text 1080 $countLeadingSpaces = 0; 1081 while ($textSpan !== '') { 1082 if (str_starts_with($textSpan, ' ')) { 1083 $countLeadingSpaces++; 1084 $textSpan = substr($textSpan, 1); 1085 continue; 1086 } 1087 if (str_starts_with($textSpan, ' ')) { 1088 $countLeadingSpaces++; 1089 $textSpan = substr($textSpan, 6); 1090 continue; 1091 } 1092 break; 1093 } 1094 1095 // Strip trailing spaces from the RTL text 1096 $countTrailingSpaces = 0; 1097 while ($textSpan !== '') { 1098 if (str_ends_with($textSpan, ' ')) { 1099 $countTrailingSpaces++; 1100 $textSpan = substr($textSpan, 0, -1); 1101 continue; 1102 } 1103 if (str_ends_with($textSpan, ' ')) { 1104 $countTrailingSpaces++; 1105 $textSpan = substr($textSpan, 0, -6); 1106 continue; 1107 } 1108 break; 1109 } 1110 1111 // Look for trailing " -", reverse it, and relocate it to the front of the string 1112 if (str_ends_with($textSpan, ' -')) { 1113 $posDashString = strlen($textSpan) - 2; 1114 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1115 if ($posStringStart === false) { 1116 $posStringStart = 0; 1117 } else { 1118 $posStringStart += 9; 1119 } // Point to the first char following the last <RTLbr> 1120 1121 $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2); 1122 } 1123 1124 if ($countLeadingSpaces !== 0) { 1125 $newLength = strlen($textSpan) + $countLeadingSpaces; 1126 $textSpan = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT); 1127 } 1128 if ($countTrailingSpaces !== 0) { 1129 if (I18N::direction() === 'ltr') { 1130 if ($trailingBreaks === '') { 1131 // Move trailing RTL spaces to front of following LTR span 1132 $newLength = strlen(self::$waitingText) + $countTrailingSpaces; 1133 self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT); 1134 } 1135 } else { 1136 $newLength = strlen($textSpan) + $countTrailingSpaces; 1137 $textSpan = str_pad($textSpan, $newLength); 1138 } 1139 } 1140 1141 // We're done: finish the span 1142 $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags 1143 $result .= $textSpan . self::END_RTL; 1144 } 1145 1146 if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') { 1147 $result .= $textSpan; 1148 } 1149 1150 $result .= $trailingBreaks; // Get rid of any waiting <br> 1151 } 1152} 1153