1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Report; 21 22use Fisharebest\Webtrees\I18N; 23 24use function ord; 25use function preg_replace; 26use function str_contains; 27use function str_pad; 28use function str_replace; 29use function strlen; 30use function strpos; 31use function strrpos; 32use function strtolower; 33use function strtoupper; 34use function substr; 35 36use const STR_PAD_LEFT; 37use const STR_PAD_RIGHT; 38 39/** 40 * RTL Functions for use in the PDF reports 41 */ 42class RightToLeftSupport 43{ 44 private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark: zero-width character with LTR directionality) 45 private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark: zero-width character with RTL directionality) 46 private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode) 47 private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode) 48 private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text) 49 private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text) 50 private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE) 51 52 private const OPEN_PARENTHESES = '([{'; 53 54 private const CLOSE_PARENTHESES = ')]}'; 55 56 private const NUMBERS = '0123456789'; 57 58 private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings 59 60 private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings 61 62 private const PUNCTUATION = ',.:;?!'; 63 64 // Markup 65 private const START_LTR = '<LTR>'; 66 private const END_LTR = '</LTR>'; 67 private const START_RTL = '<RTL>'; 68 private const END_RTL = '</RTL>'; 69 private const LENGTH_START = 5; 70 private const LENGTH_END = 6; 71 72 /* Were we previously processing LTR or RTL. */ 73 private static string $previousState; 74 75 /* Are we currently processing LTR or RTL. */ 76 private static string $currentState; 77 78 /* Text waiting to be processed. */ 79 private static string $waitingText; 80 81 /* Offset into the text. */ 82 private static int $posSpanStart; 83 84 /** 85 * This function strips ‎ and ‏ from the input string. It should be used for all 86 * text that has been passed through the PrintReady() function before that text is stored 87 * in the database. The database should NEVER contain these characters. 88 * 89 * @param string $inputText The string from which the ‎ and ‏ characters should be stripped 90 * 91 * @return string The input string, with ‎ and ‏ stripped 92 */ 93 private static function stripLrmRlm(string $inputText): string 94 { 95 return str_replace([ 96 self::UTF8_LRM, 97 self::UTF8_RLM, 98 self::UTF8_LRO, 99 self::UTF8_RLO, 100 self::UTF8_LRE, 101 self::UTF8_RLE, 102 self::UTF8_PDF, 103 '‎', 104 '‏', 105 '&LRM;', 106 '&RLM;', 107 ], '', $inputText); 108 } 109 110 /** 111 * This function encapsulates all texts in the input with <span dir='xxx'> and </span> 112 * according to the directionality specified. 113 * 114 * @param string $inputText Raw input 115 * 116 * @return string The string with all texts encapsulated as required 117 */ 118 public static function spanLtrRtl(string $inputText): string 119 { 120 if ($inputText === '') { 121 // Nothing to do 122 return ''; 123 } 124 125 $workingText = str_replace("\n", '<br>', $inputText); 126 $workingText = str_replace([ 127 '<span class="starredname"><br>', 128 '<span<br>class="starredname">', 129 ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks 130 $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes 131 132 self::$previousState = ''; 133 self::$currentState = strtoupper(I18N::direction()); 134 $numberState = false; // Set when we're inside a numeric string 135 $result = ''; 136 self::$waitingText = ''; 137 $openParDirection = []; 138 139 self::beginCurrentSpan($result); 140 141 while ($workingText !== '') { 142 $charArray = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character 143 $currentLetter = $charArray['letter']; 144 $currentLen = $charArray['length']; 145 146 $openParIndex = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this? 147 $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this? 148 149 switch ($currentLetter) { 150 case '<': 151 // Assume this '<' starts an HTML element 152 $endPos = strpos($workingText, '>'); // look for the terminating '>' 153 if ($endPos === false) { 154 $endPos = 0; 155 } 156 $currentLen += $endPos; 157 $element = substr($workingText, 0, $currentLen); 158 $temp = strtolower(substr($element, 0, 3)); 159 if (strlen($element) < 7 && $temp === '<br') { 160 if ($numberState) { 161 $numberState = false; 162 if (self::$currentState === 'RTL') { 163 self::$waitingText .= self::UTF8_PDF; 164 } 165 } 166 self::breakCurrentSpan($result); 167 } elseif (self::$waitingText === '') { 168 $result .= $element; 169 } else { 170 self::$waitingText .= $element; 171 } 172 $workingText = substr($workingText, $currentLen); 173 break; 174 case '&': 175 // Assume this '&' starts an HTML entity 176 $endPos = strpos($workingText, ';'); // look for the terminating ';' 177 if ($endPos === false) { 178 $endPos = 0; 179 } 180 $currentLen += $endPos; 181 $entity = substr($workingText, 0, $currentLen); 182 if (strtolower($entity) === ' ') { 183 $entity = ' '; // Ensure consistent case for this entity 184 } 185 if (self::$waitingText === '') { 186 $result .= $entity; 187 } else { 188 self::$waitingText .= $entity; 189 } 190 $workingText = substr($workingText, $currentLen); 191 break; 192 case '{': 193 if (substr($workingText, 1, 1) === '{') { 194 // Assume this '{{' starts a TCPDF directive 195 $endPos = strpos($workingText, '}}'); // look for the terminating '}}' 196 if ($endPos === false) { 197 $endPos = 0; 198 } 199 $currentLen = $endPos + 2; 200 $directive = substr($workingText, 0, $currentLen); 201 $workingText = substr($workingText, $currentLen); 202 $result .= self::$waitingText . $directive; 203 self::$waitingText = ''; 204 break; 205 } 206 // no break 207 default: 208 // Look for strings of numbers with optional leading or trailing + or - 209 // and with optional embedded numeric punctuation 210 if ($numberState) { 211 // If we're inside a numeric string, look for reasons to end it 212 $offset = 0; // Be sure to look at the current character first 213 $charArray = self::getChar($workingText . "\n", $offset); 214 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 215 // This is not a digit. Is it numeric punctuation? 216 if (substr($workingText . "\n", $offset, 6) === ' ') { 217 $offset += 6; // This could be numeric punctuation 218 } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) { 219 $offset += $charArray['length']; // This could be numeric punctuation 220 } 221 // If the next character is a digit, the current character is numeric punctuation 222 $charArray = self::getChar($workingText . "\n", $offset); 223 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 224 // This is not a digit. End the run of digits and punctuation. 225 $numberState = false; 226 if (self::$currentState === 'RTL') { 227 if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) { 228 $currentLetter = self::UTF8_PDF . $currentLetter; 229 } else { 230 $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run 231 } 232 } 233 } 234 } 235 } elseif (str_contains(self::NUMBER_PREFIX, $currentLetter)) { 236 // If we're outside a numeric string, look for reasons to start it 237 // This might be a number lead-in 238 $offset = $currentLen; 239 $nextChar = substr($workingText . "\n", $offset, 1); 240 if (str_contains(self::NUMBERS, $nextChar)) { 241 $numberState = true; // We found a digit: the lead-in is therefore numeric 242 if (self::$currentState === 'RTL') { 243 $currentLetter = self::UTF8_LRE . $currentLetter; 244 } 245 } 246 } elseif (str_contains(self::NUMBERS, $currentLetter)) { 247 $numberState = true; // The current letter is a digit 248 if (self::$currentState === 'RTL') { 249 $currentLetter = self::UTF8_LRE . $currentLetter; 250 } 251 } 252 253 // Determine the directionality of the current UTF-8 character 254 $newState = self::$currentState; 255 256 while (true) { 257 if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') { 258 if (self::$currentState === '') { 259 $newState = 'RTL'; 260 break; 261 } 262 263 if (self::$currentState === 'RTL') { 264 break; 265 } 266 // Switch to RTL only if this isn't a solitary RTL letter 267 $tempText = substr($workingText, $currentLen); 268 while ($tempText !== '') { 269 $nextCharArray = self::getChar($tempText, 0); 270 $nextLetter = $nextCharArray['letter']; 271 $nextLen = $nextCharArray['length']; 272 $tempText = substr($tempText, $nextLen); 273 274 if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') { 275 $newState = 'RTL'; 276 break 2; 277 } 278 279 if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) { 280 $newState = 'RTL'; 281 break 2; 282 } 283 284 if ($nextLetter === ' ') { 285 break; 286 } 287 $nextLetter .= substr($tempText . "\n", 0, 5); 288 if ($nextLetter === ' ') { 289 break; 290 } 291 } 292 // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality 293 $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF; 294 $newState = 'LTR'; 295 break; 296 } 297 if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { 298 // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR 299 $newState = 'LTR'; 300 break; 301 } 302 if ($closeParIndex !== false) { 303 // This closing parenthesis has to inherit the matching opening parenthesis' directionality 304 if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') { 305 $newState = $openParDirection[$closeParIndex]; 306 } 307 $openParDirection[$closeParIndex] = ''; 308 break; 309 } 310 if ($openParIndex !== false) { 311 // Opening parentheses always inherit the following directionality 312 self::$waitingText .= $currentLetter; 313 $workingText = substr($workingText, $currentLen); 314 while (true) { 315 if ($workingText === '') { 316 break; 317 } 318 if (str_starts_with($workingText, ' ')) { 319 // Spaces following this left parenthesis inherit the following directionality too 320 self::$waitingText .= ' '; 321 $workingText = substr($workingText, 1); 322 continue; 323 } 324 if (str_starts_with($workingText, ' ')) { 325 // Spaces following this left parenthesis inherit the following directionality too 326 self::$waitingText .= ' '; 327 $workingText = substr($workingText, 6); 328 continue; 329 } 330 break; 331 } 332 $openParDirection[$openParIndex] = '?'; 333 break 2; // double break because we're waiting for more information 334 } 335 336 // We have a digit or a "normal" special character. 337 // 338 // When this character is not at the start of the input string, it inherits the preceding directionality; 339 // at the start of the input string, it assumes the following directionality. 340 // 341 // Exceptions to this rule will be handled later during final clean-up. 342 // 343 self::$waitingText .= $currentLetter; 344 $workingText = substr($workingText, $currentLen); 345 if (self::$currentState !== '') { 346 $result .= self::$waitingText; 347 self::$waitingText = ''; 348 } 349 break 2; // double break because we're waiting for more information 350 } 351 if ($newState !== self::$currentState) { 352 // A direction change has occurred 353 self::finishCurrentSpan($result); 354 self::$previousState = self::$currentState; 355 self::$currentState = $newState; 356 self::beginCurrentSpan($result); 357 } 358 self::$waitingText .= $currentLetter; 359 $workingText = substr($workingText, $currentLen); 360 $result .= self::$waitingText; 361 self::$waitingText = ''; 362 363 foreach ($openParDirection as $index => $value) { 364 // Since we now know the proper direction, remember it for all waiting opening parentheses 365 if ($value === '?') { 366 $openParDirection[$index] = self::$currentState; 367 } 368 } 369 370 break; 371 } 372 } 373 374 // We're done. Finish last <span> if necessary 375 if ($numberState) { 376 if (self::$waitingText === '') { 377 if (self::$currentState === 'RTL') { 378 $result .= self::UTF8_PDF; 379 } 380 } elseif (self::$currentState === 'RTL') { 381 self::$waitingText .= self::UTF8_PDF; 382 } 383 } 384 self::finishCurrentSpan($result, true); 385 386 // Get rid of any waiting text 387 if (self::$waitingText !== '') { 388 if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') { 389 $result .= self::START_RTL; 390 $result .= self::$waitingText; 391 $result .= self::END_RTL; 392 } else { 393 $result .= self::START_LTR; 394 $result .= self::$waitingText; 395 $result .= self::END_LTR; 396 } 397 self::$waitingText = ''; 398 } 399 400 // Lastly, do some more cleanups 401 402 // Move leading RTL numeric strings to following LTR text 403 // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) 404 while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) { 405 $spanEnd = strpos($result, self::END_RTL . self::START_LTR); 406 if ($spanEnd === false) { 407 break; 408 } 409 $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3)); 410 if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') { 411 break; 412 } 413 $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END); 414 break; 415 } 416 417 // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span 418 if (I18N::direction() === 'rtl') { 419 $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result); 420 } 421 422 // Trim trailing blanks preceding <br> in LTR text 423 while (self::$previousState !== 'RTL') { 424 if (str_contains($result, ' <LTRbr>')) { 425 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 426 continue; 427 } 428 if (str_contains($result, ' <LTRbr>')) { 429 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 430 continue; 431 } 432 if (str_contains($result, ' <br>')) { 433 $result = str_replace(' <br>', '<br>', $result); 434 continue; 435 } 436 if (str_contains($result, ' <br>')) { 437 $result = str_replace(' <br>', '<br>', $result); 438 continue; 439 } 440 break; // Neither space nor : we're done 441 } 442 443 // Trim trailing blanks preceding <br> in RTL text 444 while (true) { 445 if (str_contains($result, ' <RTLbr>')) { 446 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 447 continue; 448 } 449 if (str_contains($result, ' <RTLbr>')) { 450 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 451 continue; 452 } 453 break; // Neither space nor : we're done 454 } 455 456 // Convert '<LTRbr>' and '<RTLbr' 457 $result = str_replace([ 458 '<LTRbr>', 459 '<RTLbr>', 460 ], [ 461 self::END_LTR . '<br>' . self::START_LTR, 462 self::END_RTL . '<br>' . self::START_RTL, 463 ], $result); 464 465 // Include leading indeterminate directional text in whatever follows 466 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && !str_starts_with($result . "\n", '<br>')) { 467 $leadingText = ''; 468 while (true) { 469 if ($result === '') { 470 $result = $leadingText; 471 break; 472 } 473 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) { 474 $leadingText .= substr($result, 0, 1); 475 $result = substr($result, 1); 476 continue; 477 } 478 $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START); 479 break; 480 } 481 } 482 483 // Include solitary "-" and "+" in surrounding RTL text 484 $result = str_replace([ 485 self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL, 486 self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL, 487 ], [ 488 '-', 489 '+', 490 ], $result); 491 492 //$result = strtr($result, [ 493 // self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-', 494 // self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+', 495 //]); 496 497 // Remove empty spans 498 $result = str_replace([ 499 self::START_LTR . self::END_LTR, 500 self::START_RTL . self::END_RTL, 501 ], '', $result); 502 503 // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' 504 // LTR text: <span dir="ltr"> text </span> 505 // RTL text: <span dir="rtl"> text </span> 506 507 $result = str_replace([ 508 self::START_LTR, 509 self::END_LTR, 510 self::START_RTL, 511 self::END_RTL, 512 ], [ 513 '<span dir="ltr">', 514 '</span>', 515 '<span dir="rtl">', 516 '</span>', 517 ], $result); 518 519 return $result; 520 } 521 522 /** 523 * Wrap words that have an asterisk suffix in <u> and </u> tags. 524 * This should underline starred names to show the preferred name. 525 * 526 * @param string $textSpan 527 * @param string $direction 528 * 529 * @return string 530 */ 531 private static function starredName(string $textSpan, string $direction): string 532 { 533 // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags 534 // only when page and span directions are identical. 535 if ($direction === strtoupper(I18N::direction())) { 536 while (true) { 537 $starPos = strpos($textSpan, '*'); 538 if ($starPos === false) { 539 break; 540 } 541 $trailingText = substr($textSpan, $starPos + 1); 542 $textSpan = substr($textSpan, 0, $starPos); 543 $wordStart = strrpos($textSpan, ' '); // Find the start of the word 544 if ($wordStart !== false) { 545 $leadingText = substr($textSpan, 0, $wordStart + 1); 546 $wordText = substr($textSpan, $wordStart + 1); 547 } else { 548 $leadingText = ''; 549 $wordText = $textSpan; 550 } 551 $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText; 552 } 553 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan); 554 // The is a work-around for a TCPDF bug eating blanks. 555 $textSpan = str_replace([ 556 ' <u>', 557 '</u> ', 558 ], [ 559 ' <u>', 560 '</u> ', 561 ], $textSpan); 562 } else { 563 // Text and page directions differ: remove the <span> and </span> 564 $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan); 565 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan); 566 } 567 568 return $textSpan; 569 } 570 571 /** 572 * Get the next character from an input string 573 * 574 * @param string $text 575 * @param int $offset 576 * 577 * @return array{letter:string,length:int} 578 */ 579 private static function getChar(string $text, int $offset): array 580 { 581 if ($text === '') { 582 return [ 583 'letter' => '', 584 'length' => 0, 585 ]; 586 } 587 588 $char = substr($text, $offset, 1); 589 $length = 1; 590 if ((ord($char) & 0xE0) === 0xC0) { 591 $length = 2; 592 } 593 if ((ord($char) & 0xF0) === 0xE0) { 594 $length = 3; 595 } 596 if ((ord($char) & 0xF8) === 0xF0) { 597 $length = 4; 598 } 599 $letter = substr($text, $offset, $length); 600 601 return [ 602 'letter' => $letter, 603 'length' => $length, 604 ]; 605 } 606 607 /** 608 * Insert <br> into current span 609 * 610 * @param string $result 611 * 612 * @return void 613 */ 614 private static function breakCurrentSpan(string &$result): void 615 { 616 // Interrupt the current span, insert that <br>, and then continue the current span 617 $result .= self::$waitingText; 618 self::$waitingText = ''; 619 620 $breakString = '<' . self::$currentState . 'br>'; 621 $result .= $breakString; 622 } 623 624 /** 625 * Begin current span 626 * 627 * @param string $result 628 * 629 * @return void 630 */ 631 private static function beginCurrentSpan(string &$result): void 632 { 633 if (self::$currentState === 'LTR') { 634 $result .= self::START_LTR; 635 } 636 if (self::$currentState === 'RTL') { 637 $result .= self::START_RTL; 638 } 639 640 self::$posSpanStart = strlen($result); 641 } 642 643 /** 644 * Finish current span 645 * 646 * @param string $result 647 * @param bool $theEnd 648 * 649 * @return void 650 */ 651 private static function finishCurrentSpan(string &$result, bool $theEnd = false): void 652 { 653 $textSpan = substr($result, self::$posSpanStart); 654 $result = substr($result, 0, self::$posSpanStart); 655 656 // Get rid of empty spans, so that our check for presence of RTL will work 657 $result = str_replace([ 658 self::START_LTR . self::END_LTR, 659 self::START_RTL . self::END_RTL, 660 ], '', $result); 661 662 // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers. 663 $tempResult = ''; 664 while ($textSpan !== '') { 665 $posColon = strpos($textSpan, ':'); 666 if ($posColon === false) { 667 break; 668 } // No more possible time strings 669 $posLRE = strpos($textSpan, self::UTF8_LRE); 670 if ($posLRE === false) { 671 break; 672 } // No more numeric strings 673 $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE); 674 if ($posPDF === false) { 675 break; 676 } // No more numeric strings 677 678 $tempResult .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string 679 $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string 680 $textSpan = substr($textSpan, $posPDF + 3); 681 $posColon = strpos($numericString, ':'); 682 if ($posColon === false) { 683 // Nothing that looks like a time here 684 $tempResult .= $numericString; 685 continue; 686 } 687 $posBlank = strpos($numericString . ' ', ' '); 688 $posNbsp = strpos($numericString . ' ', ' '); 689 if ($posBlank < $posNbsp) { 690 $posSeparator = $posBlank; 691 $lengthSeparator = 1; 692 } else { 693 $posSeparator = $posNbsp; 694 $lengthSeparator = 6; 695 } 696 if ($posColon > $posSeparator) { 697 // We have a time string preceded by a blank: Exclude that blank from the numeric string 698 $tempResult .= substr($numericString, 0, $posSeparator); 699 $tempResult .= self::UTF8_PDF; 700 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 701 $tempResult .= self::UTF8_LRE; 702 $numericString = substr($numericString, $posSeparator + $lengthSeparator); 703 } 704 705 $posBlank = strpos($numericString, ' '); 706 $posNbsp = strpos($numericString, ' '); 707 if ($posBlank === false && $posNbsp === false) { 708 // The time string isn't followed by a blank 709 $textSpan = $numericString . $textSpan; 710 continue; 711 } 712 713 // We have a time string followed by a blank: Exclude that blank from the numeric string 714 if ($posBlank === false) { 715 $posSeparator = $posNbsp; 716 $lengthSeparator = 6; 717 } elseif ($posNbsp === false) { 718 $posSeparator = $posBlank; 719 $lengthSeparator = 1; 720 } elseif ($posBlank < $posNbsp) { 721 $posSeparator = $posBlank; 722 $lengthSeparator = 1; 723 } else { 724 $posSeparator = $posNbsp; 725 $lengthSeparator = 6; 726 } 727 $tempResult .= substr($numericString, 0, $posSeparator); 728 $tempResult .= self::UTF8_PDF; 729 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 730 $posSeparator += $lengthSeparator; 731 $numericString = substr($numericString, $posSeparator); 732 $textSpan = self::UTF8_LRE . $numericString . $textSpan; 733 } 734 $textSpan = $tempResult . $textSpan; 735 $trailingBlanks = ''; 736 $trailingBreaks = ''; 737 738 /* ****************************** LTR text handling ******************************** */ 739 740 if (self::$currentState === 'LTR') { 741 // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too. 742 if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) { 743 $trailingString = ''; 744 $savedSpan = $textSpan; 745 while ($textSpan !== '') { 746 // Look for trailing spaces and tentatively move them 747 if (str_ends_with($textSpan, ' ')) { 748 $trailingString = ' ' . $trailingString; 749 $textSpan = substr($textSpan, 0, -1); 750 continue; 751 } 752 if (str_ends_with($textSpan, ' ')) { 753 $trailingString = ' ' . $trailingString; 754 $textSpan = substr($textSpan, 0, -1); 755 continue; 756 } 757 if (substr($textSpan, -3) !== self::UTF8_PDF) { 758 // There is no trailing numeric string 759 $textSpan = $savedSpan; 760 break; 761 } 762 763 // We have a numeric string 764 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 765 if ($posStartNumber === false) { 766 $posStartNumber = 0; 767 } 768 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 769 $textSpan = substr($textSpan, 0, $posStartNumber); 770 771 // Look for more spaces and move them too 772 while ($textSpan !== '') { 773 if (str_ends_with($textSpan, ' ')) { 774 $trailingString = ' ' . $trailingString; 775 $textSpan = substr($textSpan, 0, -1); 776 continue; 777 } 778 if (str_ends_with($textSpan, ' ')) { 779 $trailingString = ' ' . $trailingString; 780 $textSpan = substr($textSpan, 0, -1); 781 continue; 782 } 783 break; 784 } 785 786 self::$waitingText = $trailingString . self::$waitingText; 787 break; 788 } 789 } 790 791 $savedSpan = $textSpan; 792 // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span 793 while ($textSpan !== '') { 794 if (str_ends_with($textSpan, ' ')) { 795 $trailingBlanks = ' ' . $trailingBlanks; 796 $textSpan = substr($textSpan, 0, -1); 797 continue; 798 } 799 if (str_ends_with('......' . $textSpan, ' ')) { 800 $trailingBlanks = ' ' . $trailingBlanks; 801 $textSpan = substr($textSpan, 0, -6); 802 continue; 803 } 804 break; 805 } 806 while (str_ends_with($textSpan, '<LTRbr>')) { 807 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 808 $textSpan = substr($textSpan, 0, -7); 809 } 810 if ($trailingBreaks !== '') { 811 while ($textSpan !== '') { 812 if (str_ends_with($textSpan, ' ')) { 813 $trailingBreaks = ' ' . $trailingBreaks; 814 $textSpan = substr($textSpan, 0, -1); 815 continue; 816 } 817 if (str_ends_with($textSpan, ' ')) { 818 $trailingBreaks = ' ' . $trailingBreaks; 819 $textSpan = substr($textSpan, 0, -6); 820 continue; 821 } 822 break; 823 } 824 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 825 } else { 826 $textSpan = $savedSpan; 827 } 828 829 $trailingBlanks = ''; 830 $trailingPunctuation = ''; 831 $trailingID = ''; 832 $trailingSeparator = ''; 833 $leadingSeparator = ''; 834 835 while (I18N::direction() === 'rtl') { 836 if (str_contains($result, self::START_RTL)) { 837 // Remove trailing blanks for inclusion in a separate LTR span 838 while ($textSpan !== '') { 839 if (str_ends_with($textSpan, ' ')) { 840 $trailingBlanks = ' ' . $trailingBlanks; 841 $textSpan = substr($textSpan, 0, -1); 842 continue; 843 } 844 if (str_ends_with($textSpan, ' ')) { 845 $trailingBlanks = ' ' . $trailingBlanks; 846 $textSpan = substr($textSpan, 0, -1); 847 continue; 848 } 849 break; 850 } 851 852 // Remove trailing punctuation for inclusion in a separate LTR span 853 if ($textSpan === '') { 854 $trailingChar = "\n"; 855 } else { 856 $trailingChar = substr($textSpan, -1); 857 } 858 if (str_contains(self::PUNCTUATION, $trailingChar)) { 859 $trailingPunctuation = $trailingChar; 860 $textSpan = substr($textSpan, 0, -1); 861 } 862 } 863 864 // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span 865 while (true) { 866 if (!str_ends_with($textSpan, ')')) { 867 break; 868 } // There is no trailing ')' 869 $posLeftParen = strrpos($textSpan, '('); 870 if ($posLeftParen === false) { 871 break; 872 } // There is no leading '(' 873 $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes 874 875 // If the parenthesized text doesn't look like an ID number, 876 // we don't want to touch it. 877 // This check won’t work if somebody uses ID numbers with an unusual format. 878 $offset = 1; 879 $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text 880 if (str_contains(self::NUMBERS, $charArray['letter'])) { 881 break; 882 } 883 $offset += $charArray['length']; // Point at 2nd character of parenthesized text 884 if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) { 885 break; 886 } 887 // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too 888 if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) { 889 break; 890 } 891 892 $trailingID = substr($textSpan, $posLeftParen); 893 $textSpan = substr($textSpan, 0, $posLeftParen); 894 break; 895 } 896 897 // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span 898 if ($trailingID !== '') { 899 while ($textSpan !== '') { 900 if (str_ends_with($textSpan, ' ')) { 901 $trailingSeparator = ' ' . $trailingSeparator; 902 $textSpan = substr($textSpan, 0, -1); 903 continue; 904 } 905 if (str_ends_with($textSpan, ' ')) { 906 $trailingSeparator = ' ' . $trailingSeparator; 907 $textSpan = substr($textSpan, 0, -6); 908 continue; 909 } 910 if (str_ends_with($textSpan, '-')) { 911 $trailingSeparator = '-' . $trailingSeparator; 912 $textSpan = substr($textSpan, 0, -1); 913 continue; 914 } 915 break; 916 } 917 } 918 919 // Look for " - " preceding the text and remove it for inclusion in a separate LTR span 920 $foundSeparator = false; 921 $savedSpan = $textSpan; 922 while ($textSpan !== '') { 923 if (str_starts_with($textSpan, ' ')) { 924 $leadingSeparator = ' ' . $leadingSeparator; 925 $textSpan = substr($textSpan, 1); 926 continue; 927 } 928 if (str_starts_with($textSpan, ' ')) { 929 $leadingSeparator = ' ' . $leadingSeparator; 930 $textSpan = substr($textSpan, 6); 931 continue; 932 } 933 if (str_starts_with($textSpan, '-')) { 934 $leadingSeparator = '-' . $leadingSeparator; 935 $textSpan = substr($textSpan, 1); 936 $foundSeparator = true; 937 continue; 938 } 939 break; 940 } 941 if (!$foundSeparator) { 942 $textSpan = $savedSpan; 943 $leadingSeparator = ''; 944 } 945 break; 946 } 947 948 // We're done: finish the span 949 $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags 950 while (true) { 951 // Remove blanks that precede <LTRbr> 952 if (str_contains($textSpan, ' <LTRbr>')) { 953 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 954 continue; 955 } 956 if (str_contains($textSpan, ' <LTRbr>')) { 957 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 958 continue; 959 } 960 break; 961 } 962 if ($leadingSeparator !== '') { 963 $result .= self::START_LTR . $leadingSeparator . self::END_LTR; 964 } 965 $result .= $textSpan . self::END_LTR; 966 if ($trailingSeparator !== '') { 967 $result .= self::START_LTR . $trailingSeparator . self::END_LTR; 968 } 969 if ($trailingID !== '') { 970 $result .= self::START_LTR . $trailingID . self::END_LTR; 971 } 972 if ($trailingPunctuation !== '') { 973 $result .= self::START_LTR . $trailingPunctuation . self::END_LTR; 974 } 975 if ($trailingBlanks !== '') { 976 $result .= self::START_LTR . $trailingBlanks . self::END_LTR; 977 } 978 } 979 980 /* ****************************** RTL text handling ******************************** */ 981 982 if (self::$currentState === 'RTL') { 983 $savedSpan = $textSpan; 984 985 // Move any trailing <br>, optionally followed by blanks, outside this RTL span 986 while ($textSpan !== '') { 987 if (str_ends_with($textSpan, ' ')) { 988 $trailingBlanks = ' ' . $trailingBlanks; 989 $textSpan = substr($textSpan, 0, -1); 990 continue; 991 } 992 if (str_ends_with('......' . $textSpan, ' ')) { 993 $trailingBlanks = ' ' . $trailingBlanks; 994 $textSpan = substr($textSpan, 0, -6); 995 continue; 996 } 997 break; 998 } 999 while (str_ends_with($textSpan, '<RTLbr>')) { 1000 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 1001 $textSpan = substr($textSpan, 0, -7); 1002 } 1003 if ($trailingBreaks !== '') { 1004 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 1005 } else { 1006 $textSpan = $savedSpan; 1007 } 1008 1009 // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too. 1010 if (!$theEnd && I18N::direction() !== 'rtl') { 1011 $trailingString = ''; 1012 $savedSpan = $textSpan; 1013 while ($textSpan !== '') { 1014 // Look for trailing spaces and tentatively move them 1015 if (str_ends_with($textSpan, ' ')) { 1016 $trailingString = ' ' . $trailingString; 1017 $textSpan = substr($textSpan, 0, -1); 1018 continue; 1019 } 1020 if (str_ends_with($textSpan, ' ')) { 1021 $trailingString = ' ' . $trailingString; 1022 $textSpan = substr($textSpan, 0, -1); 1023 continue; 1024 } 1025 if (substr($textSpan, -3) !== self::UTF8_PDF) { 1026 // There is no trailing numeric string 1027 $textSpan = $savedSpan; 1028 break; 1029 } 1030 1031 // We have a numeric string 1032 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 1033 if ($posStartNumber === false) { 1034 $posStartNumber = 0; 1035 } 1036 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 1037 $textSpan = substr($textSpan, 0, $posStartNumber); 1038 1039 // Look for more spaces and move them too 1040 while ($textSpan !== '') { 1041 if (str_ends_with($textSpan, ' ')) { 1042 $trailingString = ' ' . $trailingString; 1043 $textSpan = substr($textSpan, 0, -1); 1044 continue; 1045 } 1046 if (str_ends_with($textSpan, ' ')) { 1047 $trailingString = ' ' . $trailingString; 1048 $textSpan = substr($textSpan, 0, -1); 1049 continue; 1050 } 1051 break; 1052 } 1053 1054 self::$waitingText = $trailingString . self::$waitingText; 1055 break; 1056 } 1057 } 1058 1059 // Trailing " - " needs to be prefixed to the following span 1060 if (!$theEnd && str_ends_with('...' . $textSpan, ' - ')) { 1061 $textSpan = substr($textSpan, 0, -3); 1062 self::$waitingText = ' - ' . self::$waitingText; 1063 } 1064 1065 while (I18N::direction() === 'rtl') { 1066 // Look for " - " preceding <RTLbr> and relocate it to the front of the string 1067 $posDashString = strpos($textSpan, ' - <RTLbr>'); 1068 if ($posDashString === false) { 1069 break; 1070 } 1071 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1072 if ($posStringStart === false) { 1073 $posStringStart = 0; 1074 } else { 1075 $posStringStart += 9; 1076 } // Point to the first char following the last <RTLbr> 1077 1078 $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3); 1079 } 1080 1081 // Strip leading spaces from the RTL text 1082 $countLeadingSpaces = 0; 1083 while ($textSpan !== '') { 1084 if (str_starts_with($textSpan, ' ')) { 1085 $countLeadingSpaces++; 1086 $textSpan = substr($textSpan, 1); 1087 continue; 1088 } 1089 if (str_starts_with($textSpan, ' ')) { 1090 $countLeadingSpaces++; 1091 $textSpan = substr($textSpan, 6); 1092 continue; 1093 } 1094 break; 1095 } 1096 1097 // Strip trailing spaces from the RTL text 1098 $countTrailingSpaces = 0; 1099 while ($textSpan !== '') { 1100 if (str_ends_with($textSpan, ' ')) { 1101 $countTrailingSpaces++; 1102 $textSpan = substr($textSpan, 0, -1); 1103 continue; 1104 } 1105 if (str_ends_with($textSpan, ' ')) { 1106 $countTrailingSpaces++; 1107 $textSpan = substr($textSpan, 0, -6); 1108 continue; 1109 } 1110 break; 1111 } 1112 1113 // Look for trailing " -", reverse it, and relocate it to the front of the string 1114 if (str_ends_with($textSpan, ' -')) { 1115 $posDashString = strlen($textSpan) - 2; 1116 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1117 if ($posStringStart === false) { 1118 $posStringStart = 0; 1119 } else { 1120 $posStringStart += 9; 1121 } // Point to the first char following the last <RTLbr> 1122 1123 $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2); 1124 } 1125 1126 if ($countLeadingSpaces !== 0) { 1127 $newLength = strlen($textSpan) + $countLeadingSpaces; 1128 $textSpan = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT); 1129 } 1130 if ($countTrailingSpaces !== 0) { 1131 if (I18N::direction() === 'ltr') { 1132 if ($trailingBreaks === '') { 1133 // Move trailing RTL spaces to front of following LTR span 1134 $newLength = strlen(self::$waitingText) + $countTrailingSpaces; 1135 self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT); 1136 } 1137 } else { 1138 $newLength = strlen($textSpan) + $countTrailingSpaces; 1139 $textSpan = str_pad($textSpan, $newLength); 1140 } 1141 } 1142 1143 // We're done: finish the span 1144 $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags 1145 $result .= $textSpan . self::END_RTL; 1146 } 1147 1148 if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') { 1149 $result .= $textSpan; 1150 } 1151 1152 $result .= $trailingBreaks; // Get rid of any waiting <br> 1153 } 1154} 1155