xref: /webtrees/app/Report/RightToLeftSupport.php (revision 6930e9b42b9925bfc3a874fc2aaa59aabd0d2418)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Report;
21
22use Fisharebest\Webtrees\I18N;
23
24use function ord;
25use function preg_replace;
26use function str_contains;
27use function str_pad;
28use function str_replace;
29use function strlen;
30use function strpos;
31use function strrpos;
32use function strtolower;
33use function strtoupper;
34use function substr;
35
36use const STR_PAD_LEFT;
37use const STR_PAD_RIGHT;
38
39/**
40 * RTL Functions for use in the PDF reports
41 */
42class RightToLeftSupport
43{
44    private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark:  zero-width character with LTR directionality)
45    private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark:  zero-width character with RTL directionality)
46    private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode)
47    private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode)
48    private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text)
49    private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text)
50    private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE)
51
52    private const OPEN_PARENTHESES = '([{';
53
54    private const CLOSE_PARENTHESES = ')]}';
55
56    private const NUMBERS = '0123456789';
57
58    private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings
59
60    private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings
61
62    private const PUNCTUATION = ',.:;?!';
63
64    // Markup
65    private const START_LTR    = '<LTR>';
66    private const END_LTR      = '</LTR>';
67    private const START_RTL    = '<RTL>';
68    private const END_RTL      = '</RTL>';
69    private const LENGTH_START = 5;
70    private const LENGTH_END   = 6;
71
72    /* Were we previously processing LTR or RTL. */
73    private static string $previousState;
74
75    /* Are we currently processing LTR or RTL. */
76    private static string $currentState;
77
78    /* Text waiting to be processed. */
79    private static string $waitingText;
80
81    /* Offset into the text. */
82    private static int $posSpanStart;
83
84    /**
85     * This function strips &lrm; and &rlm; from the input string. It should be used for all
86     * text that has been passed through the PrintReady() function before that text is stored
87     * in the database. The database should NEVER contain these characters.
88     *
89     * @param string $inputText The string from which the &lrm; and &rlm; characters should be stripped
90     *
91     * @return string The input string, with &lrm; and &rlm; stripped
92     */
93    private static function stripLrmRlm(string $inputText): string
94    {
95        return str_replace([
96            self::UTF8_LRM,
97            self::UTF8_RLM,
98            self::UTF8_LRO,
99            self::UTF8_RLO,
100            self::UTF8_LRE,
101            self::UTF8_RLE,
102            self::UTF8_PDF,
103            '&lrm;',
104            '&rlm;',
105            '&LRM;',
106            '&RLM;',
107        ], '', $inputText);
108    }
109
110    /**
111     * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
112     * according to the directionality specified.
113     *
114     * @param string $inputText Raw input
115     *
116     * @return string The string with all texts encapsulated as required
117     */
118    public static function spanLtrRtl(string $inputText): string
119    {
120        if ($inputText === '') {
121            // Nothing to do
122            return '';
123        }
124
125        $workingText = str_replace("\n", '<br>', $inputText);
126        $workingText = str_replace([
127            '<span class="starredname"><br>',
128            '<span<br>class="starredname">',
129        ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks
130        $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes
131
132        self::$previousState = '';
133        self::$currentState  = strtoupper(I18N::direction());
134        $numberState         = false; // Set when we're inside a numeric string
135        $result              = '';
136        self::$waitingText   = '';
137        $openParDirection    = [];
138
139        self::beginCurrentSpan($result);
140
141        while ($workingText !== '') {
142            $charArray     = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character
143            $currentLetter = $charArray['letter'];
144            $currentLen    = $charArray['length'];
145
146            $openParIndex  = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this?
147            $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this?
148
149            switch ($currentLetter) {
150                case '<':
151                    // Assume this '<' starts an HTML element
152                    $endPos = strpos($workingText, '>'); // look for the terminating '>'
153                    if ($endPos === false) {
154                        $endPos = 0;
155                    }
156                    $currentLen += $endPos;
157                    $element    = substr($workingText, 0, $currentLen);
158                    $temp       = strtolower(substr($element, 0, 3));
159                    if (strlen($element) < 7 && $temp === '<br') {
160                        if ($numberState) {
161                            $numberState = false;
162                            if (self::$currentState === 'RTL') {
163                                self::$waitingText .= self::UTF8_PDF;
164                            }
165                        }
166                        self::breakCurrentSpan($result);
167                    } elseif (self::$waitingText === '') {
168                        $result .= $element;
169                    } else {
170                        self::$waitingText .= $element;
171                    }
172                    $workingText = substr($workingText, $currentLen);
173                    break;
174                case '&':
175                    // Assume this '&' starts an HTML entity
176                    $endPos = strpos($workingText, ';'); // look for the terminating ';'
177                    if ($endPos === false) {
178                        $endPos = 0;
179                    }
180                    $currentLen += $endPos;
181                    $entity     = substr($workingText, 0, $currentLen);
182                    if (strtolower($entity) === '&nbsp;') {
183                        $entity = '&nbsp;'; // Ensure consistent case for this entity
184                    }
185                    if (self::$waitingText === '') {
186                        $result .= $entity;
187                    } else {
188                        self::$waitingText .= $entity;
189                    }
190                    $workingText = substr($workingText, $currentLen);
191                    break;
192                case '{':
193                    if (substr($workingText, 1, 1) === '{') {
194                        // Assume this '{{' starts a TCPDF directive
195                        $endPos = strpos($workingText, '}}'); // look for the terminating '}}'
196                        if ($endPos === false) {
197                            $endPos = 0;
198                        }
199                        $currentLen        = $endPos + 2;
200                        $directive         = substr($workingText, 0, $currentLen);
201                        $workingText       = substr($workingText, $currentLen);
202                        $result            .= self::$waitingText . $directive;
203                        self::$waitingText = '';
204                        break;
205                    }
206                // no break
207                default:
208                    // Look for strings of numbers with optional leading or trailing + or -
209                    // and with optional embedded numeric punctuation
210                    if ($numberState) {
211                        // If we're inside a numeric string, look for reasons to end it
212                        $offset    = 0; // Be sure to look at the current character first
213                        $charArray = self::getChar($workingText . "\n", $offset);
214                        if (!str_contains(self::NUMBERS, $charArray['letter'])) {
215                            // This is not a digit. Is it numeric punctuation?
216                            if (substr($workingText . "\n", $offset, 6) === '&nbsp;') {
217                                $offset += 6; // This could be numeric punctuation
218                            } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) {
219                                $offset += $charArray['length']; // This could be numeric punctuation
220                            }
221                            // If the next character is a digit, the current character is numeric punctuation
222                            $charArray = self::getChar($workingText . "\n", $offset);
223                            if (!str_contains(self::NUMBERS, $charArray['letter'])) {
224                                // This is not a digit. End the run of digits and punctuation.
225                                $numberState = false;
226                                if (self::$currentState === 'RTL') {
227                                    if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) {
228                                        $currentLetter = self::UTF8_PDF . $currentLetter;
229                                    } else {
230                                        $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run
231                                    }
232                                }
233                            }
234                        }
235                    } elseif (str_contains(self::NUMBER_PREFIX, $currentLetter)) {
236                        // If we're outside a numeric string, look for reasons to start it
237                        // This might be a number lead-in
238                        $offset   = $currentLen;
239                        $nextChar = substr($workingText . "\n", $offset, 1);
240                        if (str_contains(self::NUMBERS, $nextChar)) {
241                            $numberState = true; // We found a digit: the lead-in is therefore numeric
242                            if (self::$currentState === 'RTL') {
243                                $currentLetter = self::UTF8_LRE . $currentLetter;
244                            }
245                        }
246                    } elseif (str_contains(self::NUMBERS, $currentLetter)) {
247                        $numberState = true; // The current letter is a digit
248                        if (self::$currentState === 'RTL') {
249                            $currentLetter = self::UTF8_LRE . $currentLetter;
250                        }
251                    }
252
253                    // Determine the directionality of the current UTF-8 character
254                    $newState = self::$currentState;
255
256                    while (true) {
257                        if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') {
258                            if (self::$currentState === '') {
259                                $newState = 'RTL';
260                                break;
261                            }
262
263                            if (self::$currentState === 'RTL') {
264                                break;
265                            }
266                            // Switch to RTL only if this isn't a solitary RTL letter
267                            $tempText = substr($workingText, $currentLen);
268                            while ($tempText !== '') {
269                                $nextCharArray = self::getChar($tempText, 0);
270                                $nextLetter    = $nextCharArray['letter'];
271                                $nextLen       = $nextCharArray['length'];
272                                $tempText      = substr($tempText, $nextLen);
273
274                                if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') {
275                                    $newState = 'RTL';
276                                    break 2;
277                                }
278
279                                if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) {
280                                    $newState = 'RTL';
281                                    break 2;
282                                }
283
284                                if ($nextLetter === ' ') {
285                                    break;
286                                }
287                                $nextLetter .= substr($tempText . "\n", 0, 5);
288                                if ($nextLetter === '&nbsp;') {
289                                    break;
290                                }
291                            }
292                            // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
293                            $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF;
294                            $newState      = 'LTR';
295                            break;
296                        }
297                        if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') {
298                            // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
299                            $newState = 'LTR';
300                            break;
301                        }
302                        if ($closeParIndex !== false) {
303                            // This closing parenthesis has to inherit the matching opening parenthesis' directionality
304                            if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') {
305                                $newState = $openParDirection[$closeParIndex];
306                            }
307                            $openParDirection[$closeParIndex] = '';
308                            break;
309                        }
310                        if ($openParIndex !== false) {
311                            // Opening parentheses always inherit the following directionality
312                            self::$waitingText .= $currentLetter;
313                            $workingText       = substr($workingText, $currentLen);
314                            while (true) {
315                                if ($workingText === '') {
316                                    break;
317                                }
318                                if (str_starts_with($workingText, ' ')) {
319                                    // Spaces following this left parenthesis inherit the following directionality too
320                                    self::$waitingText .= ' ';
321                                    $workingText       = substr($workingText, 1);
322                                    continue;
323                                }
324                                if (str_starts_with($workingText, '&nbsp;')) {
325                                    // Spaces following this left parenthesis inherit the following directionality too
326                                    self::$waitingText .= '&nbsp;';
327                                    $workingText       = substr($workingText, 6);
328                                    continue;
329                                }
330                                break;
331                            }
332                            $openParDirection[$openParIndex] = '?';
333                            break 2; // double break because we're waiting for more information
334                        }
335
336                        // We have a digit or a "normal" special character.
337                        //
338                        // When this character is not at the start of the input string, it inherits the preceding directionality;
339                        // at the start of the input string, it assumes the following directionality.
340                        //
341                        // Exceptions to this rule will be handled later during final clean-up.
342                        //
343                        self::$waitingText .= $currentLetter;
344                        $workingText       = substr($workingText, $currentLen);
345                        if (self::$currentState !== '') {
346                            $result            .= self::$waitingText;
347                            self::$waitingText = '';
348                        }
349                        break 2; // double break because we're waiting for more information
350                    }
351                    if ($newState !== self::$currentState) {
352                        // A direction change has occurred
353                        self::finishCurrentSpan($result);
354                        self::$previousState = self::$currentState;
355                        self::$currentState  = $newState;
356                        self::beginCurrentSpan($result);
357                    }
358                    self::$waitingText .= $currentLetter;
359                    $workingText       = substr($workingText, $currentLen);
360                    $result            .= self::$waitingText;
361                    self::$waitingText = '';
362
363                    foreach ($openParDirection as $index => $value) {
364                        // Since we now know the proper direction, remember it for all waiting opening parentheses
365                        if ($value === '?') {
366                            $openParDirection[$index] = self::$currentState;
367                        }
368                    }
369
370                    break;
371            }
372        }
373
374        // We're done. Finish last <span> if necessary
375        if ($numberState) {
376            if (self::$waitingText === '') {
377                if (self::$currentState === 'RTL') {
378                    $result .= self::UTF8_PDF;
379                }
380            } elseif (self::$currentState === 'RTL') {
381                self::$waitingText .= self::UTF8_PDF;
382            }
383        }
384        self::finishCurrentSpan($result, true);
385
386        // Get rid of any waiting text
387        if (self::$waitingText !== '') {
388            if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') {
389                $result .= self::START_RTL;
390                $result .= self::$waitingText;
391                $result .= self::END_RTL;
392            } else {
393                $result .= self::START_LTR;
394                $result .= self::$waitingText;
395                $result .= self::END_LTR;
396            }
397            self::$waitingText = '';
398        }
399
400        // Lastly, do some more cleanups
401
402        // Move leading RTL numeric strings to following LTR text
403        // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
404        while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) {
405            $spanEnd = strpos($result, self::END_RTL . self::START_LTR);
406            if ($spanEnd === false) {
407                break;
408            }
409            $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3));
410            if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') {
411                break;
412            }
413            $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END);
414            break;
415        }
416
417        // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
418        if (I18N::direction() === 'rtl') {
419            $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result);
420        }
421
422        // Trim trailing blanks preceding <br> in LTR text
423        while (self::$previousState !== 'RTL') {
424            if (str_contains($result, ' <LTRbr>')) {
425                $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
426                continue;
427            }
428            if (str_contains($result, '&nbsp;<LTRbr>')) {
429                $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
430                continue;
431            }
432            if (str_contains($result, ' <br>')) {
433                $result = str_replace(' <br>', '<br>', $result);
434                continue;
435            }
436            if (str_contains($result, '&nbsp;<br>')) {
437                $result = str_replace('&nbsp;<br>', '<br>', $result);
438                continue;
439            }
440            break; // Neither space nor &nbsp; : we're done
441        }
442
443        // Trim trailing blanks preceding <br> in RTL text
444        while (true) {
445            if (str_contains($result, ' <RTLbr>')) {
446                $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
447                continue;
448            }
449            if (str_contains($result, '&nbsp;<RTLbr>')) {
450                $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
451                continue;
452            }
453            break; // Neither space nor &nbsp; : we're done
454        }
455
456        // Convert '<LTRbr>' and '<RTLbr'
457        $result = str_replace([
458            '<LTRbr>',
459            '<RTLbr>',
460        ], [
461            self::END_LTR . '<br>' . self::START_LTR,
462            self::END_RTL . '<br>' . self::START_RTL,
463        ], $result);
464
465        // Include leading indeterminate directional text in whatever follows
466        if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && !str_starts_with($result . "\n", '<br>')) {
467            $leadingText = '';
468            while (true) {
469                if ($result === '') {
470                    $result = $leadingText;
471                    break;
472                }
473                if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) {
474                    $leadingText .= substr($result, 0, 1);
475                    $result      = substr($result, 1);
476                    continue;
477                }
478                $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START);
479                break;
480            }
481        }
482
483        // Include solitary "-" and "+" in surrounding RTL text
484        $result = str_replace([
485            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
486            self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL,
487        ], [
488            '-',
489            '+',
490        ], $result);
491
492        //$result = strtr($result, [
493        //    self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-',
494        //    self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+',
495        //]);
496
497        // Remove empty spans
498        $result = str_replace([
499            self::START_LTR . self::END_LTR,
500            self::START_RTL . self::END_RTL,
501        ], '', $result);
502
503        // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
504        // LTR text: <span dir="ltr"> text </span>
505        // RTL text: <span dir="rtl"> text </span>
506
507        $result = str_replace([
508            self::START_LTR,
509            self::END_LTR,
510            self::START_RTL,
511            self::END_RTL,
512        ], [
513            '<span dir="ltr">',
514            '</span>',
515            '<span dir="rtl">',
516            '</span>',
517        ], $result);
518
519        return $result;
520    }
521
522    /**
523     * Wrap words that have an asterisk suffix in <u> and </u> tags.
524     * This should underline starred names to show the preferred name.
525     *
526     * @param string $textSpan
527     * @param string $direction
528     *
529     * @return string
530     */
531    private static function starredName(string $textSpan, string $direction): string
532    {
533        // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags
534        // only when page and span directions are identical.
535        if ($direction === strtoupper(I18N::direction())) {
536            while (true) {
537                $starPos = strpos($textSpan, '*');
538                if ($starPos === false) {
539                    break;
540                }
541                $trailingText = substr($textSpan, $starPos + 1);
542                $textSpan     = substr($textSpan, 0, $starPos);
543                $wordStart    = strrpos($textSpan, ' '); // Find the start of the word
544                if ($wordStart !== false) {
545                    $leadingText = substr($textSpan, 0, $wordStart + 1);
546                    $wordText    = substr($textSpan, $wordStart + 1);
547                } else {
548                    $leadingText = '';
549                    $wordText    = $textSpan;
550                }
551                $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText;
552            }
553            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan);
554            // The &nbsp; is a work-around for a TCPDF bug eating blanks.
555            $textSpan = str_replace([
556                ' <u>',
557                '</u> ',
558            ], [
559                '&nbsp;<u>',
560                '</u>&nbsp;',
561            ], $textSpan);
562        } else {
563            // Text and page directions differ:  remove the <span> and </span>
564            $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan);
565            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan);
566        }
567
568        return $textSpan;
569    }
570
571    /**
572     * Get the next character from an input string
573     *
574     * @param string $text
575     * @param int    $offset
576     *
577     * @return array{'letter':string,'length':int}
578     */
579    private static function getChar(string $text, int $offset): array
580    {
581        if ($text === '') {
582            return [
583                'letter' => '',
584                'length' => 0,
585            ];
586        }
587
588        $char   = substr($text, $offset, 1);
589        $length = 1;
590        if ((ord($char) & 0xE0) === 0xC0) {
591            $length = 2;
592        }
593        if ((ord($char) & 0xF0) === 0xE0) {
594            $length = 3;
595        }
596        if ((ord($char) & 0xF8) === 0xF0) {
597            $length = 4;
598        }
599        $letter = substr($text, $offset, $length);
600
601        return [
602            'letter' => $letter,
603            'length' => $length,
604        ];
605    }
606
607    /**
608     * Insert <br> into current span
609     *
610     * @param string $result
611     *
612     * @return void
613     */
614    private static function breakCurrentSpan(string &$result): void
615    {
616        // Interrupt the current span, insert that <br>, and then continue the current span
617        $result            .= self::$waitingText;
618        self::$waitingText = '';
619
620        $breakString = '<' . self::$currentState . 'br>';
621        $result      .= $breakString;
622    }
623
624    /**
625     * Begin current span
626     *
627     * @param string $result
628     *
629     * @return void
630     */
631    private static function beginCurrentSpan(string &$result): void
632    {
633        if (self::$currentState === 'LTR') {
634            $result .= self::START_LTR;
635        }
636        if (self::$currentState === 'RTL') {
637            $result .= self::START_RTL;
638        }
639
640        self::$posSpanStart = strlen($result);
641    }
642
643    /**
644     * Finish current span
645     *
646     * @param string $result
647     * @param bool   $theEnd
648     *
649     * @return void
650     */
651    private static function finishCurrentSpan(string &$result, bool $theEnd = false): void
652    {
653        $textSpan = substr($result, self::$posSpanStart);
654        $result   = substr($result, 0, self::$posSpanStart);
655
656        // Get rid of empty spans, so that our check for presence of RTL will work
657        $result = str_replace([
658            self::START_LTR . self::END_LTR,
659            self::START_RTL . self::END_RTL,
660        ], '', $result);
661
662        // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers.
663        $tempResult = '';
664        while ($textSpan !== '') {
665            $posColon = strpos($textSpan, ':');
666            if ($posColon === false) {
667                break;
668            } // No more possible time strings
669            $posLRE = strpos($textSpan, self::UTF8_LRE);
670            if ($posLRE === false) {
671                break;
672            } // No more numeric strings
673            $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE);
674            if ($posPDF === false) {
675                break;
676            } // No more numeric strings
677
678            $tempResult    .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string
679            $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string
680            $textSpan      = substr($textSpan, $posPDF + 3);
681            $posColon      = strpos($numericString, ':');
682            if ($posColon === false) {
683                // Nothing that looks like a time here
684                $tempResult .= $numericString;
685                continue;
686            }
687            $posBlank = strpos($numericString . ' ', ' ');
688            $posNbsp  = strpos($numericString . '&nbsp;', '&nbsp;');
689            if ($posBlank < $posNbsp) {
690                $posSeparator    = $posBlank;
691                $lengthSeparator = 1;
692            } else {
693                $posSeparator    = $posNbsp;
694                $lengthSeparator = 6;
695            }
696            if ($posColon > $posSeparator) {
697                // We have a time string preceded by a blank: Exclude that blank from the numeric string
698                $tempResult    .= substr($numericString, 0, $posSeparator);
699                $tempResult    .= self::UTF8_PDF;
700                $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
701                $tempResult    .= self::UTF8_LRE;
702                $numericString = substr($numericString, $posSeparator + $lengthSeparator);
703            }
704
705            $posBlank = strpos($numericString, ' ');
706            $posNbsp  = strpos($numericString, '&nbsp;');
707            if ($posBlank === false && $posNbsp === false) {
708                // The time string isn't followed by a blank
709                $textSpan = $numericString . $textSpan;
710                continue;
711            }
712
713            // We have a time string followed by a blank: Exclude that blank from the numeric string
714            if ($posBlank === false) {
715                $posSeparator    = $posNbsp;
716                $lengthSeparator = 6;
717            } elseif ($posNbsp === false) {
718                $posSeparator    = $posBlank;
719                $lengthSeparator = 1;
720            } elseif ($posBlank < $posNbsp) {
721                $posSeparator    = $posBlank;
722                $lengthSeparator = 1;
723            } else {
724                $posSeparator    = $posNbsp;
725                $lengthSeparator = 6;
726            }
727            $tempResult    .= substr($numericString, 0, $posSeparator);
728            $tempResult    .= self::UTF8_PDF;
729            $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
730            $posSeparator  += $lengthSeparator;
731            $numericString = substr($numericString, $posSeparator);
732            $textSpan      = self::UTF8_LRE . $numericString . $textSpan;
733        }
734        $textSpan       = $tempResult . $textSpan;
735        $trailingBlanks = '';
736        $trailingBreaks = '';
737
738        /* ****************************** LTR text handling ******************************** */
739
740        if (self::$currentState === 'LTR') {
741            // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too.
742            if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) {
743                $trailingString = '';
744                $savedSpan      = $textSpan;
745                while ($textSpan !== '') {
746                    // Look for trailing spaces and tentatively move them
747                    if (str_ends_with($textSpan, ' ')) {
748                        $trailingString = ' ' . $trailingString;
749                        $textSpan       = substr($textSpan, 0, -1);
750                        continue;
751                    }
752                    if (str_ends_with($textSpan, '&nbsp;')) {
753                        $trailingString = '&nbsp;' . $trailingString;
754                        $textSpan       = substr($textSpan, 0, -1);
755                        continue;
756                    }
757                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
758                        // There is no trailing numeric string
759                        $textSpan = $savedSpan;
760                        break;
761                    }
762
763                    // We have a numeric string
764                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
765                    if ($posStartNumber === false) {
766                        $posStartNumber = 0;
767                    }
768                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
769                    $textSpan       = substr($textSpan, 0, $posStartNumber);
770
771                    // Look for more spaces and move them too
772                    while ($textSpan !== '') {
773                        if (str_ends_with($textSpan, ' ')) {
774                            $trailingString = ' ' . $trailingString;
775                            $textSpan       = substr($textSpan, 0, -1);
776                            continue;
777                        }
778                        if (str_ends_with($textSpan, '&nbsp;')) {
779                            $trailingString = '&nbsp;' . $trailingString;
780                            $textSpan       = substr($textSpan, 0, -1);
781                            continue;
782                        }
783                        break;
784                    }
785
786                    self::$waitingText = $trailingString . self::$waitingText;
787                    break;
788                }
789            }
790
791            $savedSpan = $textSpan;
792            // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span
793            while ($textSpan !== '') {
794                if (str_ends_with($textSpan, ' ')) {
795                    $trailingBlanks = ' ' . $trailingBlanks;
796                    $textSpan       = substr($textSpan, 0, -1);
797                    continue;
798                }
799                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
800                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
801                    $textSpan       = substr($textSpan, 0, -6);
802                    continue;
803                }
804                break;
805            }
806            while (str_ends_with($textSpan, '<LTRbr>')) {
807                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
808                $textSpan       = substr($textSpan, 0, -7);
809            }
810            if ($trailingBreaks !== '') {
811                while ($textSpan !== '') {
812                    if (str_ends_with($textSpan, ' ')) {
813                        $trailingBreaks = ' ' . $trailingBreaks;
814                        $textSpan       = substr($textSpan, 0, -1);
815                        continue;
816                    }
817                    if (str_ends_with($textSpan, '&nbsp;')) {
818                        $trailingBreaks = '&nbsp;' . $trailingBreaks;
819                        $textSpan       = substr($textSpan, 0, -6);
820                        continue;
821                    }
822                    break;
823                }
824                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
825            } else {
826                $textSpan = $savedSpan;
827            }
828
829            $trailingBlanks      = '';
830            $trailingPunctuation = '';
831            $trailingID          = '';
832            $trailingSeparator   = '';
833            $leadingSeparator    = '';
834
835            while (I18N::direction() === 'rtl') {
836                if (str_contains($result, self::START_RTL)) {
837                    // Remove trailing blanks for inclusion in a separate LTR span
838                    while ($textSpan !== '') {
839                        if (str_ends_with($textSpan, ' ')) {
840                            $trailingBlanks = ' ' . $trailingBlanks;
841                            $textSpan       = substr($textSpan, 0, -1);
842                            continue;
843                        }
844                        if (str_ends_with($textSpan, '&nbsp;')) {
845                            $trailingBlanks = '&nbsp;' . $trailingBlanks;
846                            $textSpan       = substr($textSpan, 0, -1);
847                            continue;
848                        }
849                        break;
850                    }
851
852                    // Remove trailing punctuation for inclusion in a separate LTR span
853                    if ($textSpan === '') {
854                        $trailingChar = "\n";
855                    } else {
856                        $trailingChar = substr($textSpan, -1);
857                    }
858                    if (str_contains(self::PUNCTUATION, $trailingChar)) {
859                        $trailingPunctuation = $trailingChar;
860                        $textSpan            = substr($textSpan, 0, -1);
861                    }
862                }
863
864                // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span
865                while (true) {
866                    if (!str_ends_with($textSpan, ')')) {
867                        break;
868                    } // There is no trailing ')'
869                    $posLeftParen = strrpos($textSpan, '(');
870                    if ($posLeftParen === false) {
871                        break;
872                    } // There is no leading '('
873                    $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes
874
875                    // If the parenthesized text doesn't look like an ID number,
876                    // we don't want to touch it.
877                    // This check won’t work if somebody uses ID numbers with an unusual format.
878                    $offset    = 1;
879                    $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text
880                    if (str_contains(self::NUMBERS, $charArray['letter'])) {
881                        break;
882                    }
883                    $offset += $charArray['length']; // Point at 2nd character of parenthesized text
884                    if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) {
885                        break;
886                    }
887                    // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too
888                    if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) {
889                        break;
890                    }
891
892                    $trailingID = substr($textSpan, $posLeftParen);
893                    $textSpan   = substr($textSpan, 0, $posLeftParen);
894                    break;
895                }
896
897                // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span
898                if ($trailingID !== '') {
899                    while ($textSpan !== '') {
900                        if (str_ends_with($textSpan, ' ')) {
901                            $trailingSeparator = ' ' . $trailingSeparator;
902                            $textSpan          = substr($textSpan, 0, -1);
903                            continue;
904                        }
905                        if (str_ends_with($textSpan, '&nbsp;')) {
906                            $trailingSeparator = '&nbsp;' . $trailingSeparator;
907                            $textSpan          = substr($textSpan, 0, -6);
908                            continue;
909                        }
910                        if (str_ends_with($textSpan, '-')) {
911                            $trailingSeparator = '-' . $trailingSeparator;
912                            $textSpan          = substr($textSpan, 0, -1);
913                            continue;
914                        }
915                        break;
916                    }
917                }
918
919                // Look for " - " preceding the text and remove it for inclusion in a separate LTR span
920                $foundSeparator = false;
921                $savedSpan      = $textSpan;
922                while ($textSpan !== '') {
923                    if (str_starts_with($textSpan, ' ')) {
924                        $leadingSeparator = ' ' . $leadingSeparator;
925                        $textSpan         = substr($textSpan, 1);
926                        continue;
927                    }
928                    if (str_starts_with($textSpan, '&nbsp;')) {
929                        $leadingSeparator = '&nbsp;' . $leadingSeparator;
930                        $textSpan         = substr($textSpan, 6);
931                        continue;
932                    }
933                    if (str_starts_with($textSpan, '-')) {
934                        $leadingSeparator = '-' . $leadingSeparator;
935                        $textSpan         = substr($textSpan, 1);
936                        $foundSeparator   = true;
937                        continue;
938                    }
939                    break;
940                }
941                if (!$foundSeparator) {
942                    $textSpan         = $savedSpan;
943                    $leadingSeparator = '';
944                }
945                break;
946            }
947
948            // We're done: finish the span
949            $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags
950            while (true) {
951                // Remove blanks that precede <LTRbr>
952                if (str_contains($textSpan, ' <LTRbr>')) {
953                    $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan);
954                    continue;
955                }
956                if (str_contains($textSpan, '&nbsp;<LTRbr>')) {
957                    $textSpan = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $textSpan);
958                    continue;
959                }
960                break;
961            }
962            if ($leadingSeparator !== '') {
963                $result .= self::START_LTR . $leadingSeparator . self::END_LTR;
964            }
965            $result .= $textSpan . self::END_LTR;
966            if ($trailingSeparator !== '') {
967                $result .= self::START_LTR . $trailingSeparator . self::END_LTR;
968            }
969            if ($trailingID !== '') {
970                $result .= self::START_LTR . $trailingID . self::END_LTR;
971            }
972            if ($trailingPunctuation !== '') {
973                $result .= self::START_LTR . $trailingPunctuation . self::END_LTR;
974            }
975            if ($trailingBlanks !== '') {
976                $result .= self::START_LTR . $trailingBlanks . self::END_LTR;
977            }
978        }
979
980        /* ****************************** RTL text handling ******************************** */
981
982        if (self::$currentState === 'RTL') {
983            $savedSpan = $textSpan;
984
985            // Move any trailing <br>, optionally followed by blanks, outside this RTL span
986            while ($textSpan !== '') {
987                if (str_ends_with($textSpan, ' ')) {
988                    $trailingBlanks = ' ' . $trailingBlanks;
989                    $textSpan       = substr($textSpan, 0, -1);
990                    continue;
991                }
992                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
993                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
994                    $textSpan       = substr($textSpan, 0, -6);
995                    continue;
996                }
997                break;
998            }
999            while (str_ends_with($textSpan, '<RTLbr>')) {
1000                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
1001                $textSpan       = substr($textSpan, 0, -7);
1002            }
1003            if ($trailingBreaks !== '') {
1004                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
1005            } else {
1006                $textSpan = $savedSpan;
1007            }
1008
1009            // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too.
1010            if (!$theEnd && I18N::direction() !== 'rtl') {
1011                $trailingString = '';
1012                $savedSpan      = $textSpan;
1013                while ($textSpan !== '') {
1014                    // Look for trailing spaces and tentatively move them
1015                    if (str_ends_with($textSpan, ' ')) {
1016                        $trailingString = ' ' . $trailingString;
1017                        $textSpan       = substr($textSpan, 0, -1);
1018                        continue;
1019                    }
1020                    if (str_ends_with($textSpan, '&nbsp;')) {
1021                        $trailingString = '&nbsp;' . $trailingString;
1022                        $textSpan       = substr($textSpan, 0, -1);
1023                        continue;
1024                    }
1025                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
1026                        // There is no trailing numeric string
1027                        $textSpan = $savedSpan;
1028                        break;
1029                    }
1030
1031                    // We have a numeric string
1032                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
1033                    if ($posStartNumber === false) {
1034                        $posStartNumber = 0;
1035                    }
1036                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
1037                    $textSpan       = substr($textSpan, 0, $posStartNumber);
1038
1039                    // Look for more spaces and move them too
1040                    while ($textSpan !== '') {
1041                        if (str_ends_with($textSpan, ' ')) {
1042                            $trailingString = ' ' . $trailingString;
1043                            $textSpan       = substr($textSpan, 0, -1);
1044                            continue;
1045                        }
1046                        if (str_ends_with($textSpan, '&nbsp;')) {
1047                            $trailingString = '&nbsp;' . $trailingString;
1048                            $textSpan       = substr($textSpan, 0, -1);
1049                            continue;
1050                        }
1051                        break;
1052                    }
1053
1054                    self::$waitingText = $trailingString . self::$waitingText;
1055                    break;
1056                }
1057            }
1058
1059            // Trailing " - " needs to be prefixed to the following span
1060            if (!$theEnd && str_ends_with('...' . $textSpan, ' - ')) {
1061                $textSpan          = substr($textSpan, 0, -3);
1062                self::$waitingText = ' - ' . self::$waitingText;
1063            }
1064
1065            while (I18N::direction() === 'rtl') {
1066                // Look for " - " preceding <RTLbr> and relocate it to the front of the string
1067                $posDashString = strpos($textSpan, ' - <RTLbr>');
1068                if ($posDashString === false) {
1069                    break;
1070                }
1071                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1072                if ($posStringStart === false) {
1073                    $posStringStart = 0;
1074                } else {
1075                    $posStringStart += 9;
1076                } // Point to the first char following the last <RTLbr>
1077
1078                $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3);
1079            }
1080
1081            // Strip leading spaces from the RTL text
1082            $countLeadingSpaces = 0;
1083            while ($textSpan !== '') {
1084                if (str_starts_with($textSpan, ' ')) {
1085                    $countLeadingSpaces++;
1086                    $textSpan = substr($textSpan, 1);
1087                    continue;
1088                }
1089                if (str_starts_with($textSpan, '&nbsp;')) {
1090                    $countLeadingSpaces++;
1091                    $textSpan = substr($textSpan, 6);
1092                    continue;
1093                }
1094                break;
1095            }
1096
1097            // Strip trailing spaces from the RTL text
1098            $countTrailingSpaces = 0;
1099            while ($textSpan !== '') {
1100                if (str_ends_with($textSpan, ' ')) {
1101                    $countTrailingSpaces++;
1102                    $textSpan = substr($textSpan, 0, -1);
1103                    continue;
1104                }
1105                if (str_ends_with($textSpan, '&nbsp;')) {
1106                    $countTrailingSpaces++;
1107                    $textSpan = substr($textSpan, 0, -6);
1108                    continue;
1109                }
1110                break;
1111            }
1112
1113            // Look for trailing " -", reverse it, and relocate it to the front of the string
1114            if (str_ends_with($textSpan, ' -')) {
1115                $posDashString  = strlen($textSpan) - 2;
1116                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1117                if ($posStringStart === false) {
1118                    $posStringStart = 0;
1119                } else {
1120                    $posStringStart += 9;
1121                } // Point to the first char following the last <RTLbr>
1122
1123                $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2);
1124            }
1125
1126            if ($countLeadingSpaces !== 0) {
1127                $newLength = strlen($textSpan) + $countLeadingSpaces;
1128                $textSpan  = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT);
1129            }
1130            if ($countTrailingSpaces !== 0) {
1131                if (I18N::direction() === 'ltr') {
1132                    if ($trailingBreaks === '') {
1133                        // Move trailing RTL spaces to front of following LTR span
1134                        $newLength         = strlen(self::$waitingText) + $countTrailingSpaces;
1135                        self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT);
1136                    }
1137                } else {
1138                    $newLength = strlen($textSpan) + $countTrailingSpaces;
1139                    $textSpan  = str_pad($textSpan, $newLength);
1140                }
1141            }
1142
1143            // We're done: finish the span
1144            $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags
1145            $result   .= $textSpan . self::END_RTL;
1146        }
1147
1148        if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') {
1149            $result .= $textSpan;
1150        }
1151
1152        $result .= $trailingBreaks; // Get rid of any waiting <br>
1153    }
1154}
1155