1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Report; 21 22use Fisharebest\Webtrees\I18N; 23 24use function str_contains; 25 26/** 27 * RTL Functions for use in the PDF reports 28 */ 29class RightToLeftSupport 30{ 31 private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark: zero-width character with LTR directionality) 32 private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark: zero-width character with RTL directionality) 33 private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode) 34 private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode) 35 private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text) 36 private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text) 37 private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE) 38 39 private const OPEN_PARENTHESES = '([{'; 40 41 private const CLOSE_PARENTHESES = ')]}'; 42 43 private const NUMBERS = '0123456789'; 44 45 private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings 46 47 private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings 48 49 private const PUNCTUATION = ',.:;?!'; 50 51 // Markup 52 private const START_LTR = '<LTR>'; 53 private const END_LTR = '</LTR>'; 54 private const START_RTL = '<RTL>'; 55 private const END_RTL = '</RTL>'; 56 private const LENGTH_START = 5; 57 private const LENGTH_END = 6; 58 59 /* Were we previously processing LTR or RTL. */ 60 private static string $previousState; 61 62 /* Are we currently processing LTR or RTL. */ 63 private static string $currentState; 64 65 /* Text waiting to be processed. */ 66 private static string $waitingText; 67 68 /* Offset into the text. */ 69 private static int $posSpanStart; 70 71 /** 72 * This function strips ‎ and ‏ from the input string. It should be used for all 73 * text that has been passed through the PrintReady() function before that text is stored 74 * in the database. The database should NEVER contain these characters. 75 * 76 * @param string $inputText The string from which the ‎ and ‏ characters should be stripped 77 * 78 * @return string The input string, with ‎ and ‏ stripped 79 */ 80 private static function stripLrmRlm(string $inputText): string 81 { 82 return str_replace([ 83 self::UTF8_LRM, 84 self::UTF8_RLM, 85 self::UTF8_LRO, 86 self::UTF8_RLO, 87 self::UTF8_LRE, 88 self::UTF8_RLE, 89 self::UTF8_PDF, 90 '‎', 91 '‏', 92 '&LRM;', 93 '&RLM;', 94 ], '', $inputText); 95 } 96 97 /** 98 * This function encapsulates all texts in the input with <span dir='xxx'> and </span> 99 * according to the directionality specified. 100 * 101 * @param string $inputText Raw input 102 * 103 * @return string The string with all texts encapsulated as required 104 */ 105 public static function spanLtrRtl(string $inputText): string 106 { 107 if ($inputText === '') { 108 // Nothing to do 109 return ''; 110 } 111 112 $workingText = str_replace("\n", '<br>', $inputText); 113 $workingText = str_replace([ 114 '<span class="starredname"><br>', 115 '<span<br>class="starredname">', 116 ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks 117 $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes 118 119 self::$previousState = ''; 120 self::$currentState = strtoupper(I18N::direction()); 121 $numberState = false; // Set when we're inside a numeric string 122 $result = ''; 123 self::$waitingText = ''; 124 $openParDirection = []; 125 126 self::beginCurrentSpan($result); 127 128 while ($workingText !== '') { 129 $charArray = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character 130 $currentLetter = $charArray['letter']; 131 $currentLen = $charArray['length']; 132 133 $openParIndex = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this? 134 $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this? 135 136 switch ($currentLetter) { 137 case '<': 138 // Assume this '<' starts an HTML element 139 $endPos = strpos($workingText, '>'); // look for the terminating '>' 140 if ($endPos === false) { 141 $endPos = 0; 142 } 143 $currentLen += $endPos; 144 $element = substr($workingText, 0, $currentLen); 145 $temp = strtolower(substr($element, 0, 3)); 146 if (strlen($element) < 7 && $temp === '<br') { 147 if ($numberState) { 148 $numberState = false; 149 if (self::$currentState === 'RTL') { 150 self::$waitingText .= self::UTF8_PDF; 151 } 152 } 153 self::breakCurrentSpan($result); 154 } elseif (self::$waitingText === '') { 155 $result .= $element; 156 } else { 157 self::$waitingText .= $element; 158 } 159 $workingText = substr($workingText, $currentLen); 160 break; 161 case '&': 162 // Assume this '&' starts an HTML entity 163 $endPos = strpos($workingText, ';'); // look for the terminating ';' 164 if ($endPos === false) { 165 $endPos = 0; 166 } 167 $currentLen += $endPos; 168 $entity = substr($workingText, 0, $currentLen); 169 if (strtolower($entity) === ' ') { 170 $entity = ' '; // Ensure consistent case for this entity 171 } 172 if (self::$waitingText === '') { 173 $result .= $entity; 174 } else { 175 self::$waitingText .= $entity; 176 } 177 $workingText = substr($workingText, $currentLen); 178 break; 179 case '{': 180 if (substr($workingText, 1, 1) === '{') { 181 // Assume this '{{' starts a TCPDF directive 182 $endPos = strpos($workingText, '}}'); // look for the terminating '}}' 183 if ($endPos === false) { 184 $endPos = 0; 185 } 186 $currentLen = $endPos + 2; 187 $directive = substr($workingText, 0, $currentLen); 188 $workingText = substr($workingText, $currentLen); 189 $result .= self::$waitingText . $directive; 190 self::$waitingText = ''; 191 break; 192 } 193 // no break 194 default: 195 // Look for strings of numbers with optional leading or trailing + or - 196 // and with optional embedded numeric punctuation 197 if ($numberState) { 198 // If we're inside a numeric string, look for reasons to end it 199 $offset = 0; // Be sure to look at the current character first 200 $charArray = self::getChar($workingText . "\n", $offset); 201 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 202 // This is not a digit. Is it numeric punctuation? 203 if (substr($workingText . "\n", $offset, 6) === ' ') { 204 $offset += 6; // This could be numeric punctuation 205 } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) { 206 $offset += $charArray['length']; // This could be numeric punctuation 207 } 208 // If the next character is a digit, the current character is numeric punctuation 209 $charArray = self::getChar($workingText . "\n", $offset); 210 if (!str_contains(self::NUMBERS, $charArray['letter'])) { 211 // This is not a digit. End the run of digits and punctuation. 212 $numberState = false; 213 if (self::$currentState === 'RTL') { 214 if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) { 215 $currentLetter = self::UTF8_PDF . $currentLetter; 216 } else { 217 $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run 218 } 219 } 220 } 221 } 222 } else { 223 // If we're outside a numeric string, look for reasons to start it 224 if (str_contains(self::NUMBER_PREFIX, $currentLetter)) { 225 // This might be a number lead-in 226 $offset = $currentLen; 227 $nextChar = substr($workingText . "\n", $offset, 1); 228 if (str_contains(self::NUMBERS, $nextChar)) { 229 $numberState = true; // We found a digit: the lead-in is therefore numeric 230 if (self::$currentState === 'RTL') { 231 $currentLetter = self::UTF8_LRE . $currentLetter; 232 } 233 } 234 } elseif (str_contains(self::NUMBERS, $currentLetter)) { 235 $numberState = true; // The current letter is a digit 236 if (self::$currentState === 'RTL') { 237 $currentLetter = self::UTF8_LRE . $currentLetter; 238 } 239 } 240 } 241 242 // Determine the directionality of the current UTF-8 character 243 $newState = self::$currentState; 244 245 while (true) { 246 if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') { 247 if (self::$currentState === '') { 248 $newState = 'RTL'; 249 break; 250 } 251 252 if (self::$currentState === 'RTL') { 253 break; 254 } 255 // Switch to RTL only if this isn't a solitary RTL letter 256 $tempText = substr($workingText, $currentLen); 257 while ($tempText !== '') { 258 $nextCharArray = self::getChar($tempText, 0); 259 $nextLetter = $nextCharArray['letter']; 260 $nextLen = $nextCharArray['length']; 261 $tempText = substr($tempText, $nextLen); 262 263 if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') { 264 $newState = 'RTL'; 265 break 2; 266 } 267 268 if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) { 269 $newState = 'RTL'; 270 break 2; 271 } 272 273 if ($nextLetter === ' ') { 274 break; 275 } 276 $nextLetter .= substr($tempText . "\n", 0, 5); 277 if ($nextLetter === ' ') { 278 break; 279 } 280 } 281 // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality 282 $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF; 283 $newState = 'LTR'; 284 break; 285 } 286 if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') { 287 // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR 288 $newState = 'LTR'; 289 break; 290 } 291 if ($closeParIndex !== false) { 292 // This closing parenthesis has to inherit the matching opening parenthesis' directionality 293 if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') { 294 $newState = $openParDirection[$closeParIndex]; 295 } 296 $openParDirection[$closeParIndex] = ''; 297 break; 298 } 299 if ($openParIndex !== false) { 300 // Opening parentheses always inherit the following directionality 301 self::$waitingText .= $currentLetter; 302 $workingText = substr($workingText, $currentLen); 303 while (true) { 304 if ($workingText === '') { 305 break; 306 } 307 if (substr($workingText, 0, 1) === ' ') { 308 // Spaces following this left parenthesis inherit the following directionality too 309 self::$waitingText .= ' '; 310 $workingText = substr($workingText, 1); 311 continue; 312 } 313 if (substr($workingText, 0, 6) === ' ') { 314 // Spaces following this left parenthesis inherit the following directionality too 315 self::$waitingText .= ' '; 316 $workingText = substr($workingText, 6); 317 continue; 318 } 319 break; 320 } 321 $openParDirection[$openParIndex] = '?'; 322 break 2; // double break because we're waiting for more information 323 } 324 325 // We have a digit or a "normal" special character. 326 // 327 // When this character is not at the start of the input string, it inherits the preceding directionality; 328 // at the start of the input string, it assumes the following directionality. 329 // 330 // Exceptions to this rule will be handled later during final clean-up. 331 // 332 self::$waitingText .= $currentLetter; 333 $workingText = substr($workingText, $currentLen); 334 if (self::$currentState !== '') { 335 $result .= self::$waitingText; 336 self::$waitingText = ''; 337 } 338 break 2; // double break because we're waiting for more information 339 } 340 if ($newState !== self::$currentState) { 341 // A direction change has occurred 342 self::finishCurrentSpan($result); 343 self::$previousState = self::$currentState; 344 self::$currentState = $newState; 345 self::beginCurrentSpan($result); 346 } 347 self::$waitingText .= $currentLetter; 348 $workingText = substr($workingText, $currentLen); 349 $result .= self::$waitingText; 350 self::$waitingText = ''; 351 352 foreach ($openParDirection as $index => $value) { 353 // Since we now know the proper direction, remember it for all waiting opening parentheses 354 if ($value === '?') { 355 $openParDirection[$index] = self::$currentState; 356 } 357 } 358 359 break; 360 } 361 } 362 363 // We're done. Finish last <span> if necessary 364 if ($numberState) { 365 if (self::$waitingText === '') { 366 if (self::$currentState === 'RTL') { 367 $result .= self::UTF8_PDF; 368 } 369 } else { 370 if (self::$currentState === 'RTL') { 371 self::$waitingText .= self::UTF8_PDF; 372 } 373 } 374 } 375 self::finishCurrentSpan($result, true); 376 377 // Get rid of any waiting text 378 if (self::$waitingText !== '') { 379 if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') { 380 $result .= self::START_RTL; 381 $result .= self::$waitingText; 382 $result .= self::END_RTL; 383 } else { 384 $result .= self::START_LTR; 385 $result .= self::$waitingText; 386 $result .= self::END_LTR; 387 } 388 self::$waitingText = ''; 389 } 390 391 // Lastly, do some more cleanups 392 393 // Move leading RTL numeric strings to following LTR text 394 // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text) 395 while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) { 396 $spanEnd = strpos($result, self::END_RTL . self::START_LTR); 397 if ($spanEnd === false) { 398 break; 399 } 400 $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3)); 401 if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') { 402 break; 403 } 404 $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END); 405 break; 406 } 407 408 // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span 409 if (I18N::direction() === 'rtl') { 410 $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result); 411 } 412 413 // Trim trailing blanks preceding <br> in LTR text 414 while (self::$previousState !== 'RTL') { 415 if (str_contains($result, ' <LTRbr>')) { 416 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 417 continue; 418 } 419 if (str_contains($result, ' <LTRbr>')) { 420 $result = str_replace(' <LTRbr>', '<LTRbr>', $result); 421 continue; 422 } 423 if (str_contains($result, ' <br>')) { 424 $result = str_replace(' <br>', '<br>', $result); 425 continue; 426 } 427 if (str_contains($result, ' <br>')) { 428 $result = str_replace(' <br>', '<br>', $result); 429 continue; 430 } 431 break; // Neither space nor : we're done 432 } 433 434 // Trim trailing blanks preceding <br> in RTL text 435 while (true) { 436 if (str_contains($result, ' <RTLbr>')) { 437 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 438 continue; 439 } 440 if (str_contains($result, ' <RTLbr>')) { 441 $result = str_replace(' <RTLbr>', '<RTLbr>', $result); 442 continue; 443 } 444 break; // Neither space nor : we're done 445 } 446 447 // Convert '<LTRbr>' and '<RTLbr' 448 $result = str_replace([ 449 '<LTRbr>', 450 '<RTLbr>', 451 ], [ 452 self::END_LTR . '<br>' . self::START_LTR, 453 self::END_RTL . '<br>' . self::START_RTL, 454 ], $result); 455 456 // Include leading indeterminate directional text in whatever follows 457 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && substr($result . "\n", 0, 4) !== '<br>') { 458 $leadingText = ''; 459 while (true) { 460 if ($result === '') { 461 $result = $leadingText; 462 break; 463 } 464 if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) { 465 $leadingText .= substr($result, 0, 1); 466 $result = substr($result, 1); 467 continue; 468 } 469 $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START); 470 break; 471 } 472 } 473 474 // Include solitary "-" and "+" in surrounding RTL text 475 $result = str_replace([ 476 self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL, 477 self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL, 478 ], [ 479 '-', 480 '+', 481 ], $result); 482 483 //$result = strtr($result, [ 484 // self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-', 485 // self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+', 486 //]); 487 488 // Remove empty spans 489 $result = str_replace([ 490 self::START_LTR . self::END_LTR, 491 self::START_RTL . self::END_RTL, 492 ], '', $result); 493 494 // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>' 495 // LTR text: <span dir="ltr"> text </span> 496 // RTL text: <span dir="rtl"> text </span> 497 498 $result = str_replace([ 499 self::START_LTR, 500 self::END_LTR, 501 self::START_RTL, 502 self::END_RTL, 503 ], [ 504 '<span dir="ltr">', 505 '</span>', 506 '<span dir="rtl">', 507 '</span>', 508 ], $result); 509 510 return $result; 511 } 512 513 /** 514 * Wrap words that have an asterisk suffix in <u> and </u> tags. 515 * This should underline starred names to show the preferred name. 516 * 517 * @param string $textSpan 518 * @param string $direction 519 * 520 * @return string 521 */ 522 private static function starredName(string $textSpan, string $direction): string 523 { 524 // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags 525 // only when page and span directions are identical. 526 if ($direction === strtoupper(I18N::direction())) { 527 while (true) { 528 $starPos = strpos($textSpan, '*'); 529 if ($starPos === false) { 530 break; 531 } 532 $trailingText = substr($textSpan, $starPos + 1); 533 $textSpan = substr($textSpan, 0, $starPos); 534 $wordStart = strrpos($textSpan, ' '); // Find the start of the word 535 if ($wordStart !== false) { 536 $leadingText = substr($textSpan, 0, $wordStart + 1); 537 $wordText = substr($textSpan, $wordStart + 1); 538 } else { 539 $leadingText = ''; 540 $wordText = $textSpan; 541 } 542 $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText; 543 } 544 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan); 545 // The is a work-around for a TCPDF bug eating blanks. 546 $textSpan = str_replace([ 547 ' <u>', 548 '</u> ', 549 ], [ 550 ' <u>', 551 '</u> ', 552 ], $textSpan); 553 } else { 554 // Text and page directions differ: remove the <span> and </span> 555 $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan); 556 $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan); 557 } 558 559 return $textSpan; 560 } 561 562 /** 563 * Get the next character from an input string 564 * 565 * @param string $text 566 * @param int $offset 567 * 568 * @return array{'letter':string,'length':int} 569 */ 570 private static function getChar(string $text, int $offset): array 571 { 572 if ($text === '') { 573 return [ 574 'letter' => '', 575 'length' => 0, 576 ]; 577 } 578 579 $char = substr($text, $offset, 1); 580 $length = 1; 581 if ((ord($char) & 0xE0) === 0xC0) { 582 $length = 2; 583 } 584 if ((ord($char) & 0xF0) === 0xE0) { 585 $length = 3; 586 } 587 if ((ord($char) & 0xF8) === 0xF0) { 588 $length = 4; 589 } 590 $letter = substr($text, $offset, $length); 591 592 return [ 593 'letter' => $letter, 594 'length' => $length, 595 ]; 596 } 597 598 /** 599 * Insert <br> into current span 600 * 601 * @param string $result 602 * 603 * @return void 604 */ 605 private static function breakCurrentSpan(string &$result): void 606 { 607 // Interrupt the current span, insert that <br>, and then continue the current span 608 $result .= self::$waitingText; 609 self::$waitingText = ''; 610 611 $breakString = '<' . self::$currentState . 'br>'; 612 $result .= $breakString; 613 } 614 615 /** 616 * Begin current span 617 * 618 * @param string $result 619 * 620 * @return void 621 */ 622 private static function beginCurrentSpan(string &$result): void 623 { 624 if (self::$currentState === 'LTR') { 625 $result .= self::START_LTR; 626 } 627 if (self::$currentState === 'RTL') { 628 $result .= self::START_RTL; 629 } 630 631 self::$posSpanStart = strlen($result); 632 } 633 634 /** 635 * Finish current span 636 * 637 * @param string $result 638 * @param bool $theEnd 639 * 640 * @return void 641 */ 642 private static function finishCurrentSpan(string &$result, bool $theEnd = false): void 643 { 644 $textSpan = substr($result, self::$posSpanStart); 645 $result = substr($result, 0, self::$posSpanStart); 646 647 // Get rid of empty spans, so that our check for presence of RTL will work 648 $result = str_replace([ 649 self::START_LTR . self::END_LTR, 650 self::START_RTL . self::END_RTL, 651 ], '', $result); 652 653 // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers. 654 $tempResult = ''; 655 while ($textSpan !== '') { 656 $posColon = strpos($textSpan, ':'); 657 if ($posColon === false) { 658 break; 659 } // No more possible time strings 660 $posLRE = strpos($textSpan, self::UTF8_LRE); 661 if ($posLRE === false) { 662 break; 663 } // No more numeric strings 664 $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE); 665 if ($posPDF === false) { 666 break; 667 } // No more numeric strings 668 669 $tempResult .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string 670 $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string 671 $textSpan = substr($textSpan, $posPDF + 3); 672 $posColon = strpos($numericString, ':'); 673 if ($posColon === false) { 674 // Nothing that looks like a time here 675 $tempResult .= $numericString; 676 continue; 677 } 678 $posBlank = strpos($numericString . ' ', ' '); 679 $posNbsp = strpos($numericString . ' ', ' '); 680 if ($posBlank < $posNbsp) { 681 $posSeparator = $posBlank; 682 $lengthSeparator = 1; 683 } else { 684 $posSeparator = $posNbsp; 685 $lengthSeparator = 6; 686 } 687 if ($posColon > $posSeparator) { 688 // We have a time string preceded by a blank: Exclude that blank from the numeric string 689 $tempResult .= substr($numericString, 0, $posSeparator); 690 $tempResult .= self::UTF8_PDF; 691 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 692 $tempResult .= self::UTF8_LRE; 693 $numericString = substr($numericString, $posSeparator + $lengthSeparator); 694 } 695 696 $posBlank = strpos($numericString, ' '); 697 $posNbsp = strpos($numericString, ' '); 698 if ($posBlank === false && $posNbsp === false) { 699 // The time string isn't followed by a blank 700 $textSpan = $numericString . $textSpan; 701 continue; 702 } 703 704 // We have a time string followed by a blank: Exclude that blank from the numeric string 705 if ($posBlank === false) { 706 $posSeparator = $posNbsp; 707 $lengthSeparator = 6; 708 } elseif ($posNbsp === false) { 709 $posSeparator = $posBlank; 710 $lengthSeparator = 1; 711 } elseif ($posBlank < $posNbsp) { 712 $posSeparator = $posBlank; 713 $lengthSeparator = 1; 714 } else { 715 $posSeparator = $posNbsp; 716 $lengthSeparator = 6; 717 } 718 $tempResult .= substr($numericString, 0, $posSeparator); 719 $tempResult .= self::UTF8_PDF; 720 $tempResult .= substr($numericString, $posSeparator, $lengthSeparator); 721 $posSeparator += $lengthSeparator; 722 $numericString = substr($numericString, $posSeparator); 723 $textSpan = self::UTF8_LRE . $numericString . $textSpan; 724 } 725 $textSpan = $tempResult . $textSpan; 726 $trailingBlanks = ''; 727 $trailingBreaks = ''; 728 729 /* ****************************** LTR text handling ******************************** */ 730 731 if (self::$currentState === 'LTR') { 732 // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too. 733 if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) { 734 $trailingString = ''; 735 $savedSpan = $textSpan; 736 while ($textSpan !== '') { 737 // Look for trailing spaces and tentatively move them 738 if (substr($textSpan, -1) === ' ') { 739 $trailingString = ' ' . $trailingString; 740 $textSpan = substr($textSpan, 0, -1); 741 continue; 742 } 743 if (substr($textSpan, -6) === ' ') { 744 $trailingString = ' ' . $trailingString; 745 $textSpan = substr($textSpan, 0, -1); 746 continue; 747 } 748 if (substr($textSpan, -3) !== self::UTF8_PDF) { 749 // There is no trailing numeric string 750 $textSpan = $savedSpan; 751 break; 752 } 753 754 // We have a numeric string 755 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 756 if ($posStartNumber === false) { 757 $posStartNumber = 0; 758 } 759 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 760 $textSpan = substr($textSpan, 0, $posStartNumber); 761 762 // Look for more spaces and move them too 763 while ($textSpan !== '') { 764 if (substr($textSpan, -1) === ' ') { 765 $trailingString = ' ' . $trailingString; 766 $textSpan = substr($textSpan, 0, -1); 767 continue; 768 } 769 if (substr($textSpan, -6) === ' ') { 770 $trailingString = ' ' . $trailingString; 771 $textSpan = substr($textSpan, 0, -1); 772 continue; 773 } 774 break; 775 } 776 777 self::$waitingText = $trailingString . self::$waitingText; 778 break; 779 } 780 } 781 782 $savedSpan = $textSpan; 783 // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span 784 while ($textSpan !== '') { 785 if (substr($textSpan, -1) === ' ') { 786 $trailingBlanks = ' ' . $trailingBlanks; 787 $textSpan = substr($textSpan, 0, -1); 788 continue; 789 } 790 if (substr('......' . $textSpan, -6) === ' ') { 791 $trailingBlanks = ' ' . $trailingBlanks; 792 $textSpan = substr($textSpan, 0, -6); 793 continue; 794 } 795 break; 796 } 797 while (substr($textSpan, -7) === '<LTRbr>') { 798 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 799 $textSpan = substr($textSpan, 0, -7); 800 } 801 if ($trailingBreaks !== '') { 802 while ($textSpan !== '') { 803 if (substr($textSpan, -1) === ' ') { 804 $trailingBreaks = ' ' . $trailingBreaks; 805 $textSpan = substr($textSpan, 0, -1); 806 continue; 807 } 808 if (substr($textSpan, -6) === ' ') { 809 $trailingBreaks = ' ' . $trailingBreaks; 810 $textSpan = substr($textSpan, 0, -6); 811 continue; 812 } 813 break; 814 } 815 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 816 } else { 817 $textSpan = $savedSpan; 818 } 819 820 $trailingBlanks = ''; 821 $trailingPunctuation = ''; 822 $trailingID = ''; 823 $trailingSeparator = ''; 824 $leadingSeparator = ''; 825 826 while (I18N::direction() === 'rtl') { 827 if (str_contains($result, self::START_RTL)) { 828 // Remove trailing blanks for inclusion in a separate LTR span 829 while ($textSpan !== '') { 830 if (substr($textSpan, -1) === ' ') { 831 $trailingBlanks = ' ' . $trailingBlanks; 832 $textSpan = substr($textSpan, 0, -1); 833 continue; 834 } 835 if (substr($textSpan, -6) === ' ') { 836 $trailingBlanks = ' ' . $trailingBlanks; 837 $textSpan = substr($textSpan, 0, -1); 838 continue; 839 } 840 break; 841 } 842 843 // Remove trailing punctuation for inclusion in a separate LTR span 844 if ($textSpan === '') { 845 $trailingChar = "\n"; 846 } else { 847 $trailingChar = substr($textSpan, -1); 848 } 849 if (str_contains(self::PUNCTUATION, $trailingChar)) { 850 $trailingPunctuation = $trailingChar; 851 $textSpan = substr($textSpan, 0, -1); 852 } 853 } 854 855 // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span 856 while (true) { 857 if (substr($textSpan, -1) !== ')') { 858 break; 859 } // There is no trailing ')' 860 $posLeftParen = strrpos($textSpan, '('); 861 if ($posLeftParen === false) { 862 break; 863 } // There is no leading '(' 864 $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes 865 866 // If the parenthesized text doesn't look like an ID number, 867 // we don't want to touch it. 868 // This check won’t work if somebody uses ID numbers with an unusual format. 869 $offset = 1; 870 $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text 871 if (str_contains(self::NUMBERS, $charArray['letter'])) { 872 break; 873 } 874 $offset += $charArray['length']; // Point at 2nd character of parenthesized text 875 if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) { 876 break; 877 } 878 // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too 879 if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) { 880 break; 881 } 882 883 $trailingID = substr($textSpan, $posLeftParen); 884 $textSpan = substr($textSpan, 0, $posLeftParen); 885 break; 886 } 887 888 // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span 889 if ($trailingID !== '') { 890 while ($textSpan !== '') { 891 if (substr($textSpan, -1) === ' ') { 892 $trailingSeparator = ' ' . $trailingSeparator; 893 $textSpan = substr($textSpan, 0, -1); 894 continue; 895 } 896 if (substr($textSpan, -6) === ' ') { 897 $trailingSeparator = ' ' . $trailingSeparator; 898 $textSpan = substr($textSpan, 0, -6); 899 continue; 900 } 901 if (substr($textSpan, -1) === '-') { 902 $trailingSeparator = '-' . $trailingSeparator; 903 $textSpan = substr($textSpan, 0, -1); 904 continue; 905 } 906 break; 907 } 908 } 909 910 // Look for " - " preceding the text and remove it for inclusion in a separate LTR span 911 $foundSeparator = false; 912 $savedSpan = $textSpan; 913 while ($textSpan !== '') { 914 if (substr($textSpan, 0, 1) === ' ') { 915 $leadingSeparator = ' ' . $leadingSeparator; 916 $textSpan = substr($textSpan, 1); 917 continue; 918 } 919 if (substr($textSpan, 0, 6) === ' ') { 920 $leadingSeparator = ' ' . $leadingSeparator; 921 $textSpan = substr($textSpan, 6); 922 continue; 923 } 924 if (substr($textSpan, 0, 1) === '-') { 925 $leadingSeparator = '-' . $leadingSeparator; 926 $textSpan = substr($textSpan, 1); 927 $foundSeparator = true; 928 continue; 929 } 930 break; 931 } 932 if (!$foundSeparator) { 933 $textSpan = $savedSpan; 934 $leadingSeparator = ''; 935 } 936 break; 937 } 938 939 // We're done: finish the span 940 $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags 941 while (true) { 942 // Remove blanks that precede <LTRbr> 943 if (str_contains($textSpan, ' <LTRbr>')) { 944 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 945 continue; 946 } 947 if (str_contains($textSpan, ' <LTRbr>')) { 948 $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan); 949 continue; 950 } 951 break; 952 } 953 if ($leadingSeparator !== '') { 954 $result .= self::START_LTR . $leadingSeparator . self::END_LTR; 955 } 956 $result .= $textSpan . self::END_LTR; 957 if ($trailingSeparator !== '') { 958 $result .= self::START_LTR . $trailingSeparator . self::END_LTR; 959 } 960 if ($trailingID !== '') { 961 $result .= self::START_LTR . $trailingID . self::END_LTR; 962 } 963 if ($trailingPunctuation !== '') { 964 $result .= self::START_LTR . $trailingPunctuation . self::END_LTR; 965 } 966 if ($trailingBlanks !== '') { 967 $result .= self::START_LTR . $trailingBlanks . self::END_LTR; 968 } 969 } 970 971 /* ****************************** RTL text handling ******************************** */ 972 973 if (self::$currentState === 'RTL') { 974 $savedSpan = $textSpan; 975 976 // Move any trailing <br>, optionally followed by blanks, outside this RTL span 977 while ($textSpan !== '') { 978 if (substr($textSpan, -1) === ' ') { 979 $trailingBlanks = ' ' . $trailingBlanks; 980 $textSpan = substr($textSpan, 0, -1); 981 continue; 982 } 983 if (substr('......' . $textSpan, -6) === ' ') { 984 $trailingBlanks = ' ' . $trailingBlanks; 985 $textSpan = substr($textSpan, 0, -6); 986 continue; 987 } 988 break; 989 } 990 while (substr($textSpan, -7) === '<RTLbr>') { 991 $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span 992 $textSpan = substr($textSpan, 0, -7); 993 } 994 if ($trailingBreaks !== '') { 995 self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span 996 } else { 997 $textSpan = $savedSpan; 998 } 999 1000 // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too. 1001 if (!$theEnd && I18N::direction() !== 'rtl') { 1002 $trailingString = ''; 1003 $savedSpan = $textSpan; 1004 while ($textSpan !== '') { 1005 // Look for trailing spaces and tentatively move them 1006 if (substr($textSpan, -1) === ' ') { 1007 $trailingString = ' ' . $trailingString; 1008 $textSpan = substr($textSpan, 0, -1); 1009 continue; 1010 } 1011 if (substr($textSpan, -6) === ' ') { 1012 $trailingString = ' ' . $trailingString; 1013 $textSpan = substr($textSpan, 0, -1); 1014 continue; 1015 } 1016 if (substr($textSpan, -3) !== self::UTF8_PDF) { 1017 // There is no trailing numeric string 1018 $textSpan = $savedSpan; 1019 break; 1020 } 1021 1022 // We have a numeric string 1023 $posStartNumber = strrpos($textSpan, self::UTF8_LRE); 1024 if ($posStartNumber === false) { 1025 $posStartNumber = 0; 1026 } 1027 $trailingString = substr($textSpan, $posStartNumber) . $trailingString; 1028 $textSpan = substr($textSpan, 0, $posStartNumber); 1029 1030 // Look for more spaces and move them too 1031 while ($textSpan !== '') { 1032 if (substr($textSpan, -1) === ' ') { 1033 $trailingString = ' ' . $trailingString; 1034 $textSpan = substr($textSpan, 0, -1); 1035 continue; 1036 } 1037 if (substr($textSpan, -6) === ' ') { 1038 $trailingString = ' ' . $trailingString; 1039 $textSpan = substr($textSpan, 0, -1); 1040 continue; 1041 } 1042 break; 1043 } 1044 1045 self::$waitingText = $trailingString . self::$waitingText; 1046 break; 1047 } 1048 } 1049 1050 // Trailing " - " needs to be prefixed to the following span 1051 if (!$theEnd && substr('...' . $textSpan, -3) === ' - ') { 1052 $textSpan = substr($textSpan, 0, -3); 1053 self::$waitingText = ' - ' . self::$waitingText; 1054 } 1055 1056 while (I18N::direction() === 'rtl') { 1057 // Look for " - " preceding <RTLbr> and relocate it to the front of the string 1058 $posDashString = strpos($textSpan, ' - <RTLbr>'); 1059 if ($posDashString === false) { 1060 break; 1061 } 1062 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1063 if ($posStringStart === false) { 1064 $posStringStart = 0; 1065 } else { 1066 $posStringStart += 9; 1067 } // Point to the first char following the last <RTLbr> 1068 1069 $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3); 1070 } 1071 1072 // Strip leading spaces from the RTL text 1073 $countLeadingSpaces = 0; 1074 while ($textSpan !== '') { 1075 if (substr($textSpan, 0, 1) === ' ') { 1076 $countLeadingSpaces++; 1077 $textSpan = substr($textSpan, 1); 1078 continue; 1079 } 1080 if (substr($textSpan, 0, 6) === ' ') { 1081 $countLeadingSpaces++; 1082 $textSpan = substr($textSpan, 6); 1083 continue; 1084 } 1085 break; 1086 } 1087 1088 // Strip trailing spaces from the RTL text 1089 $countTrailingSpaces = 0; 1090 while ($textSpan !== '') { 1091 if (substr($textSpan, -1) === ' ') { 1092 $countTrailingSpaces++; 1093 $textSpan = substr($textSpan, 0, -1); 1094 continue; 1095 } 1096 if (substr($textSpan, -6) === ' ') { 1097 $countTrailingSpaces++; 1098 $textSpan = substr($textSpan, 0, -6); 1099 continue; 1100 } 1101 break; 1102 } 1103 1104 // Look for trailing " -", reverse it, and relocate it to the front of the string 1105 if (substr($textSpan, -2) === ' -') { 1106 $posDashString = strlen($textSpan) - 2; 1107 $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>'); 1108 if ($posStringStart === false) { 1109 $posStringStart = 0; 1110 } else { 1111 $posStringStart += 9; 1112 } // Point to the first char following the last <RTLbr> 1113 1114 $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2); 1115 } 1116 1117 if ($countLeadingSpaces !== 0) { 1118 $newLength = strlen($textSpan) + $countLeadingSpaces; 1119 $textSpan = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT); 1120 } 1121 if ($countTrailingSpaces !== 0) { 1122 if (I18N::direction() === 'ltr') { 1123 if ($trailingBreaks === '') { 1124 // Move trailing RTL spaces to front of following LTR span 1125 $newLength = strlen(self::$waitingText) + $countTrailingSpaces; 1126 self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT); 1127 } 1128 } else { 1129 $newLength = strlen($textSpan) + $countTrailingSpaces; 1130 $textSpan = str_pad($textSpan, $newLength); 1131 } 1132 } 1133 1134 // We're done: finish the span 1135 $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags 1136 $result .= $textSpan . self::END_RTL; 1137 } 1138 1139 if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') { 1140 $result .= $textSpan; 1141 } 1142 1143 $result .= $trailingBreaks; // Get rid of any waiting <br> 1144 } 1145} 1146