xref: /webtrees/app/Report/RightToLeftSupport.php (revision ac71572d8462e396ed5a307f05b29381e49f9e6e)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Report;
21
22use Fisharebest\Webtrees\I18N;
23
24use function ord;
25use function preg_replace;
26use function str_contains;
27use function str_pad;
28use function str_replace;
29use function strlen;
30use function strpos;
31use function strrpos;
32use function strtolower;
33use function strtoupper;
34use function substr;
35
36use const STR_PAD_LEFT;
37use const STR_PAD_RIGHT;
38
39/**
40 * RTL Functions for use in the PDF reports
41 */
42class RightToLeftSupport
43{
44    private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark:  zero-width character with LTR directionality)
45    private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark:  zero-width character with RTL directionality)
46    private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode)
47    private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode)
48    private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text)
49    private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text)
50    private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE)
51
52    private const OPEN_PARENTHESES = '([{';
53
54    private const CLOSE_PARENTHESES = ')]}';
55
56    private const NUMBERS = '0123456789';
57
58    private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings
59
60    private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings
61
62    private const PUNCTUATION = ',.:;?!';
63
64    // Markup
65    private const START_LTR    = '<LTR>';
66    private const END_LTR      = '</LTR>';
67    private const START_RTL    = '<RTL>';
68    private const END_RTL      = '</RTL>';
69    private const LENGTH_START = 5;
70    private const LENGTH_END   = 6;
71
72    /* Were we previously processing LTR or RTL. */
73    private static string $previousState;
74
75    /* Are we currently processing LTR or RTL. */
76    private static string $currentState;
77
78    /* Text waiting to be processed. */
79    private static string $waitingText;
80
81    /* Offset into the text. */
82    private static int $posSpanStart;
83
84    /**
85     * This function strips &lrm; and &rlm; from the input string. It should be used for all
86     * text that has been passed through the PrintReady() function before that text is stored
87     * in the database. The database should NEVER contain these characters.
88     *
89     * @param string $inputText The string from which the &lrm; and &rlm; characters should be stripped
90     *
91     * @return string The input string, with &lrm; and &rlm; stripped
92     */
93    private static function stripLrmRlm(string $inputText): string
94    {
95        return str_replace([
96            self::UTF8_LRM,
97            self::UTF8_RLM,
98            self::UTF8_LRO,
99            self::UTF8_RLO,
100            self::UTF8_LRE,
101            self::UTF8_RLE,
102            self::UTF8_PDF,
103            '&lrm;',
104            '&rlm;',
105            '&LRM;',
106            '&RLM;',
107        ], '', $inputText);
108    }
109
110    /**
111     * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
112     * according to the directionality specified.
113     *
114     * @param string $inputText Raw input
115     *
116     * @return string The string with all texts encapsulated as required
117     */
118    public static function spanLtrRtl(string $inputText): string
119    {
120        if ($inputText === '') {
121            // Nothing to do
122            return '';
123        }
124
125        $workingText = str_replace("\n", '<br>', $inputText);
126        $workingText = str_replace([
127            '<span class="starredname"><br>',
128            '<span<br>class="starredname">',
129        ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks
130        $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes
131
132        self::$previousState = '';
133        self::$currentState  = strtoupper(I18N::direction());
134        $numberState         = false; // Set when we're inside a numeric string
135        $result              = '';
136        self::$waitingText   = '';
137        $openParDirection    = [];
138
139        self::beginCurrentSpan($result);
140
141        while ($workingText !== '') {
142            $charArray     = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character
143            $currentLetter = $charArray['letter'];
144            $currentLen    = $charArray['length'];
145
146            $openParIndex  = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this?
147            $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this?
148
149            switch ($currentLetter) {
150                case '<':
151                    // Assume this '<' starts an HTML element
152                    $endPos = strpos($workingText, '>'); // look for the terminating '>'
153                    if ($endPos === false) {
154                        $endPos = 0;
155                    }
156                    $currentLen += $endPos;
157                    $element    = substr($workingText, 0, $currentLen);
158                    $temp       = strtolower(substr($element, 0, 3));
159                    if (strlen($element) < 7 && $temp === '<br') {
160                        if ($numberState) {
161                            $numberState = false;
162                            if (self::$currentState === 'RTL') {
163                                self::$waitingText .= self::UTF8_PDF;
164                            }
165                        }
166                        self::breakCurrentSpan($result);
167                    } elseif (self::$waitingText === '') {
168                        $result .= $element;
169                    } else {
170                        self::$waitingText .= $element;
171                    }
172                    $workingText = substr($workingText, $currentLen);
173                    break;
174                case '&':
175                    // Assume this '&' starts an HTML entity
176                    $endPos = strpos($workingText, ';'); // look for the terminating ';'
177                    if ($endPos === false) {
178                        $endPos = 0;
179                    }
180                    $currentLen += $endPos;
181                    $entity     = substr($workingText, 0, $currentLen);
182                    if (strtolower($entity) === '&nbsp;') {
183                        $entity = '&nbsp;'; // Ensure consistent case for this entity
184                    }
185                    if (self::$waitingText === '') {
186                        $result .= $entity;
187                    } else {
188                        self::$waitingText .= $entity;
189                    }
190                    $workingText = substr($workingText, $currentLen);
191                    break;
192                case '{':
193                    if (substr($workingText, 1, 1) === '{') {
194                        // Assume this '{{' starts a TCPDF directive
195                        $endPos = strpos($workingText, '}}'); // look for the terminating '}}'
196                        if ($endPos === false) {
197                            $endPos = 0;
198                        }
199                        $currentLen        = $endPos + 2;
200                        $directive         = substr($workingText, 0, $currentLen);
201                        $workingText       = substr($workingText, $currentLen);
202                        $result            .= self::$waitingText . $directive;
203                        self::$waitingText = '';
204                        break;
205                    }
206                // no break
207                default:
208                    // Look for strings of numbers with optional leading or trailing + or -
209                    // and with optional embedded numeric punctuation
210                    if ($numberState) {
211                        // If we're inside a numeric string, look for reasons to end it
212                        $offset    = 0; // Be sure to look at the current character first
213                        $charArray = self::getChar($workingText . "\n", $offset);
214                        if (!str_contains(self::NUMBERS, $charArray['letter'])) {
215                            // This is not a digit. Is it numeric punctuation?
216                            if (substr($workingText . "\n", $offset, 6) === '&nbsp;') {
217                                $offset += 6; // This could be numeric punctuation
218                            } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) {
219                                $offset += $charArray['length']; // This could be numeric punctuation
220                            }
221                            // If the next character is a digit, the current character is numeric punctuation
222                            $charArray = self::getChar($workingText . "\n", $offset);
223                            if (!str_contains(self::NUMBERS, $charArray['letter'])) {
224                                // This is not a digit. End the run of digits and punctuation.
225                                $numberState = false;
226                                if (self::$currentState === 'RTL') {
227                                    if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) {
228                                        $currentLetter = self::UTF8_PDF . $currentLetter;
229                                    } else {
230                                        $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run
231                                    }
232                                }
233                            }
234                        }
235                    } elseif (str_contains(self::NUMBER_PREFIX, $currentLetter)) {
236                        // If we're outside a numeric string, look for reasons to start it
237                        // This might be a number lead-in
238                        $offset   = $currentLen;
239                        $nextChar = substr($workingText . "\n", $offset, 1);
240                        if (str_contains(self::NUMBERS, $nextChar)) {
241                            $numberState = true; // We found a digit: the lead-in is therefore numeric
242                            if (self::$currentState === 'RTL') {
243                                $currentLetter = self::UTF8_LRE . $currentLetter;
244                            }
245                        }
246                    } elseif (str_contains(self::NUMBERS, $currentLetter)) {
247                        $numberState = true; // The current letter is a digit
248                        if (self::$currentState === 'RTL') {
249                            $currentLetter = self::UTF8_LRE . $currentLetter;
250                        }
251                    }
252
253                    // Determine the directionality of the current UTF-8 character
254                    $newState = self::$currentState;
255
256                    while (true) {
257                        if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') {
258                            if (self::$currentState === '') {
259                                $newState = 'RTL';
260                                break;
261                            }
262
263                            if (self::$currentState === 'RTL') {
264                                break;
265                            }
266                            // Switch to RTL only if this isn't a solitary RTL letter
267                            $tempText = substr($workingText, $currentLen);
268                            while ($tempText !== '') {
269                                $nextCharArray = self::getChar($tempText, 0);
270                                $nextLetter    = $nextCharArray['letter'];
271                                $nextLen       = $nextCharArray['length'];
272                                $tempText      = substr($tempText, $nextLen);
273
274                                if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') {
275                                    $newState = 'RTL';
276                                    break 2;
277                                }
278
279                                if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) {
280                                    $newState = 'RTL';
281                                    break 2;
282                                }
283
284                                if ($nextLetter === ' ') {
285                                    break;
286                                }
287                                $nextLetter .= substr($tempText . "\n", 0, 5);
288                                if ($nextLetter === '&nbsp;') {
289                                    break;
290                                }
291                            }
292                            // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
293                            $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF;
294                            $newState      = 'LTR';
295                            break;
296                        }
297                        if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') {
298                            // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
299                            $newState = 'LTR';
300                            break;
301                        }
302                        if ($closeParIndex !== false) {
303                            // This closing parenthesis has to inherit the matching opening parenthesis' directionality
304                            if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') {
305                                $newState = $openParDirection[$closeParIndex];
306                            }
307                            $openParDirection[$closeParIndex] = '';
308                            break;
309                        }
310                        self::$waitingText .= $currentLetter;
311                        $workingText       = substr($workingText, $currentLen);
312                        if ($openParIndex !== false) {
313                            // Opening parentheses always inherit the following directionality
314                            while (true) {
315                                if ($workingText === '') {
316                                    break;
317                                }
318                                if (str_starts_with($workingText, ' ')) {
319                                    // Spaces following this left parenthesis inherit the following directionality too
320                                    self::$waitingText .= ' ';
321                                    $workingText       = substr($workingText, 1);
322                                    continue;
323                                }
324                                if (str_starts_with($workingText, '&nbsp;')) {
325                                    // Spaces following this left parenthesis inherit the following directionality too
326                                    self::$waitingText .= '&nbsp;';
327                                    $workingText       = substr($workingText, 6);
328                                    continue;
329                                }
330                                break;
331                            }
332                            $openParDirection[$openParIndex] = '?';
333                            break 2; // double break because we're waiting for more information
334                        }
335
336                        // We have a digit or a "normal" special character.
337                        //
338                        // When this character is not at the start of the input string, it inherits the preceding directionality;
339                        // at the start of the input string, it assumes the following directionality.
340                        //
341                        // Exceptions to this rule will be handled later during final clean-up.
342                        //
343                        if (self::$currentState !== '') {
344                            $result            .= self::$waitingText;
345                            self::$waitingText = '';
346                        }
347                        break 2; // double break because we're waiting for more information
348                    }
349                    if ($newState !== self::$currentState) {
350                        // A direction change has occurred
351                        self::finishCurrentSpan($result);
352                        self::$previousState = self::$currentState;
353                        self::$currentState  = $newState;
354                        self::beginCurrentSpan($result);
355                    }
356                    self::$waitingText .= $currentLetter;
357                    $workingText       = substr($workingText, $currentLen);
358                    $result            .= self::$waitingText;
359                    self::$waitingText = '';
360
361                    foreach ($openParDirection as $index => $value) {
362                        // Since we now know the proper direction, remember it for all waiting opening parentheses
363                        if ($value === '?') {
364                            $openParDirection[$index] = self::$currentState;
365                        }
366                    }
367
368                    break;
369            }
370        }
371
372        // We're done. Finish last <span> if necessary
373        if ($numberState) {
374            if (self::$waitingText === '') {
375                if (self::$currentState === 'RTL') {
376                    $result .= self::UTF8_PDF;
377                }
378            } elseif (self::$currentState === 'RTL') {
379                self::$waitingText .= self::UTF8_PDF;
380            }
381        }
382        self::finishCurrentSpan($result, true);
383
384        // Get rid of any waiting text
385        if (self::$waitingText !== '') {
386            if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') {
387                $result .= self::START_RTL;
388                $result .= self::$waitingText;
389                $result .= self::END_RTL;
390            } else {
391                $result .= self::START_LTR;
392                $result .= self::$waitingText;
393                $result .= self::END_LTR;
394            }
395            self::$waitingText = '';
396        }
397
398        // Lastly, do some more cleanups
399
400        // Move leading RTL numeric strings to following LTR text
401        // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
402        while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) {
403            $spanEnd = strpos($result, self::END_RTL . self::START_LTR);
404            if ($spanEnd === false) {
405                break;
406            }
407            $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3));
408            if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') {
409                break;
410            }
411            $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END);
412            break;
413        }
414
415        // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
416        if (I18N::direction() === 'rtl') {
417            $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result);
418        }
419
420        // Trim trailing blanks preceding <br> in LTR text
421        while (self::$previousState !== 'RTL') {
422            if (str_contains($result, ' <LTRbr>')) {
423                $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
424                continue;
425            }
426            if (str_contains($result, '&nbsp;<LTRbr>')) {
427                $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
428                continue;
429            }
430            if (str_contains($result, ' <br>')) {
431                $result = str_replace(' <br>', '<br>', $result);
432                continue;
433            }
434            if (str_contains($result, '&nbsp;<br>')) {
435                $result = str_replace('&nbsp;<br>', '<br>', $result);
436                continue;
437            }
438            break; // Neither space nor &nbsp; : we're done
439        }
440
441        // Trim trailing blanks preceding <br> in RTL text
442        while (true) {
443            if (str_contains($result, ' <RTLbr>')) {
444                $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
445                continue;
446            }
447            if (str_contains($result, '&nbsp;<RTLbr>')) {
448                $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
449                continue;
450            }
451            break; // Neither space nor &nbsp; : we're done
452        }
453
454        // Convert '<LTRbr>' and '<RTLbr'
455        $result = str_replace([
456            '<LTRbr>',
457            '<RTLbr>',
458        ], [
459            self::END_LTR . '<br>' . self::START_LTR,
460            self::END_RTL . '<br>' . self::START_RTL,
461        ], $result);
462
463        // Include leading indeterminate directional text in whatever follows
464        if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && !str_starts_with($result . "\n", '<br>')) {
465            $leadingText = '';
466            while (true) {
467                if ($result === '') {
468                    $result = $leadingText;
469                    break;
470                }
471                if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) {
472                    $leadingText .= substr($result, 0, 1);
473                    $result      = substr($result, 1);
474                    continue;
475                }
476                $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START);
477                break;
478            }
479        }
480
481        // Include solitary "-" and "+" in surrounding RTL text
482        $result = str_replace([
483            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
484            self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL,
485        ], [
486            '-',
487            '+',
488        ], $result);
489
490        //$result = strtr($result, [
491        //    self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-',
492        //    self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+',
493        //]);
494
495        // Remove empty spans
496        $result = str_replace([
497            self::START_LTR . self::END_LTR,
498            self::START_RTL . self::END_RTL,
499        ], '', $result);
500
501        // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
502        // LTR text: <span dir="ltr"> text </span>
503        // RTL text: <span dir="rtl"> text </span>
504
505        $result = str_replace([
506            self::START_LTR,
507            self::END_LTR,
508            self::START_RTL,
509            self::END_RTL,
510        ], [
511            '<span dir="ltr">',
512            '</span>',
513            '<span dir="rtl">',
514            '</span>',
515        ], $result);
516
517        return $result;
518    }
519
520    /**
521     * Wrap words that have an asterisk suffix in <u> and </u> tags.
522     * This should underline starred names to show the preferred name.
523     *
524     * @param string $textSpan
525     * @param string $direction
526     *
527     * @return string
528     */
529    private static function starredName(string $textSpan, string $direction): string
530    {
531        // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags
532        // only when page and span directions are identical.
533        if ($direction === strtoupper(I18N::direction())) {
534            while (true) {
535                $starPos = strpos($textSpan, '*');
536                if ($starPos === false) {
537                    break;
538                }
539                $trailingText = substr($textSpan, $starPos + 1);
540                $textSpan     = substr($textSpan, 0, $starPos);
541                $wordStart    = strrpos($textSpan, ' '); // Find the start of the word
542                if ($wordStart !== false) {
543                    $leadingText = substr($textSpan, 0, $wordStart + 1);
544                    $wordText    = substr($textSpan, $wordStart + 1);
545                } else {
546                    $leadingText = '';
547                    $wordText    = $textSpan;
548                }
549                $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText;
550            }
551            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan);
552            // The &nbsp; is a work-around for a TCPDF bug eating blanks.
553            $textSpan = str_replace([
554                ' <u>',
555                '</u> ',
556            ], [
557                '&nbsp;<u>',
558                '</u>&nbsp;',
559            ], $textSpan);
560        } else {
561            // Text and page directions differ:  remove the <span> and </span>
562            $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan);
563            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan);
564        }
565
566        return $textSpan;
567    }
568
569    /**
570     * Get the next character from an input string
571     *
572     * @param string $text
573     * @param int    $offset
574     *
575     * @return array{letter:string,length:int}
576     */
577    private static function getChar(string $text, int $offset): array
578    {
579        if ($text === '') {
580            return [
581                'letter' => '',
582                'length' => 0,
583            ];
584        }
585
586        $char   = substr($text, $offset, 1);
587        $length = 1;
588        if ((ord($char) & 0xE0) === 0xC0) {
589            $length = 2;
590        }
591        if ((ord($char) & 0xF0) === 0xE0) {
592            $length = 3;
593        }
594        if ((ord($char) & 0xF8) === 0xF0) {
595            $length = 4;
596        }
597        $letter = substr($text, $offset, $length);
598
599        return [
600            'letter' => $letter,
601            'length' => $length,
602        ];
603    }
604
605    /**
606     * Insert <br> into current span
607     *
608     * @param string $result
609     *
610     * @return void
611     */
612    private static function breakCurrentSpan(string &$result): void
613    {
614        // Interrupt the current span, insert that <br>, and then continue the current span
615        $result            .= self::$waitingText;
616        self::$waitingText = '';
617
618        $breakString = '<' . self::$currentState . 'br>';
619        $result      .= $breakString;
620    }
621
622    /**
623     * Begin current span
624     *
625     * @param string $result
626     *
627     * @return void
628     */
629    private static function beginCurrentSpan(string &$result): void
630    {
631        if (self::$currentState === 'LTR') {
632            $result .= self::START_LTR;
633        }
634        if (self::$currentState === 'RTL') {
635            $result .= self::START_RTL;
636        }
637
638        self::$posSpanStart = strlen($result);
639    }
640
641    /**
642     * Finish current span
643     *
644     * @param string $result
645     * @param bool   $theEnd
646     *
647     * @return void
648     */
649    private static function finishCurrentSpan(string &$result, bool $theEnd = false): void
650    {
651        $textSpan = substr($result, self::$posSpanStart);
652        $result   = substr($result, 0, self::$posSpanStart);
653
654        // Get rid of empty spans, so that our check for presence of RTL will work
655        $result = str_replace([
656            self::START_LTR . self::END_LTR,
657            self::START_RTL . self::END_RTL,
658        ], '', $result);
659
660        // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers.
661        $tempResult = '';
662        while ($textSpan !== '') {
663            $posColon = strpos($textSpan, ':');
664            if ($posColon === false) {
665                break;
666            } // No more possible time strings
667            $posLRE = strpos($textSpan, self::UTF8_LRE);
668            if ($posLRE === false) {
669                break;
670            } // No more numeric strings
671            $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE);
672            if ($posPDF === false) {
673                break;
674            } // No more numeric strings
675
676            $tempResult    .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string
677            $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string
678            $textSpan      = substr($textSpan, $posPDF + 3);
679            $posColon      = strpos($numericString, ':');
680            if ($posColon === false) {
681                // Nothing that looks like a time here
682                $tempResult .= $numericString;
683                continue;
684            }
685            $posBlank = strpos($numericString . ' ', ' ');
686            $posNbsp  = strpos($numericString . '&nbsp;', '&nbsp;');
687            if ($posBlank < $posNbsp) {
688                $posSeparator    = $posBlank;
689                $lengthSeparator = 1;
690            } else {
691                $posSeparator    = $posNbsp;
692                $lengthSeparator = 6;
693            }
694            if ($posColon > $posSeparator) {
695                // We have a time string preceded by a blank: Exclude that blank from the numeric string
696                $tempResult    .= substr($numericString, 0, $posSeparator);
697                $tempResult    .= self::UTF8_PDF;
698                $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
699                $tempResult    .= self::UTF8_LRE;
700                $numericString = substr($numericString, $posSeparator + $lengthSeparator);
701            }
702
703            $posBlank = strpos($numericString, ' ');
704            $posNbsp  = strpos($numericString, '&nbsp;');
705            if ($posBlank === false && $posNbsp === false) {
706                // The time string isn't followed by a blank
707                $textSpan = $numericString . $textSpan;
708                continue;
709            }
710
711            // We have a time string followed by a blank: Exclude that blank from the numeric string
712            if ($posBlank === false) {
713                $posSeparator    = $posNbsp;
714                $lengthSeparator = 6;
715            } elseif ($posNbsp === false) {
716                $posSeparator    = $posBlank;
717                $lengthSeparator = 1;
718            } elseif ($posBlank < $posNbsp) {
719                $posSeparator    = $posBlank;
720                $lengthSeparator = 1;
721            } else {
722                $posSeparator    = $posNbsp;
723                $lengthSeparator = 6;
724            }
725            $tempResult    .= substr($numericString, 0, $posSeparator);
726            $tempResult    .= self::UTF8_PDF;
727            $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
728            $posSeparator  += $lengthSeparator;
729            $numericString = substr($numericString, $posSeparator);
730            $textSpan      = self::UTF8_LRE . $numericString . $textSpan;
731        }
732        $textSpan       = $tempResult . $textSpan;
733        $trailingBlanks = '';
734        $trailingBreaks = '';
735
736        /* ****************************** LTR text handling ******************************** */
737
738        if (self::$currentState === 'LTR') {
739            // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too.
740            if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) {
741                $trailingString = '';
742                $savedSpan      = $textSpan;
743                while ($textSpan !== '') {
744                    // Look for trailing spaces and tentatively move them
745                    if (str_ends_with($textSpan, ' ')) {
746                        $trailingString = ' ' . $trailingString;
747                        $textSpan       = substr($textSpan, 0, -1);
748                        continue;
749                    }
750                    if (str_ends_with($textSpan, '&nbsp;')) {
751                        $trailingString = '&nbsp;' . $trailingString;
752                        $textSpan       = substr($textSpan, 0, -1);
753                        continue;
754                    }
755                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
756                        // There is no trailing numeric string
757                        $textSpan = $savedSpan;
758                        break;
759                    }
760
761                    // We have a numeric string
762                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
763                    if ($posStartNumber === false) {
764                        $posStartNumber = 0;
765                    }
766                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
767                    $textSpan       = substr($textSpan, 0, $posStartNumber);
768
769                    // Look for more spaces and move them too
770                    while ($textSpan !== '') {
771                        if (str_ends_with($textSpan, ' ')) {
772                            $trailingString = ' ' . $trailingString;
773                            $textSpan       = substr($textSpan, 0, -1);
774                            continue;
775                        }
776                        if (str_ends_with($textSpan, '&nbsp;')) {
777                            $trailingString = '&nbsp;' . $trailingString;
778                            $textSpan       = substr($textSpan, 0, -1);
779                            continue;
780                        }
781                        break;
782                    }
783
784                    self::$waitingText = $trailingString . self::$waitingText;
785                    break;
786                }
787            }
788
789            $savedSpan = $textSpan;
790            // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span
791            while ($textSpan !== '') {
792                if (str_ends_with($textSpan, ' ')) {
793                    $trailingBlanks = ' ' . $trailingBlanks;
794                    $textSpan       = substr($textSpan, 0, -1);
795                    continue;
796                }
797                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
798                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
799                    $textSpan       = substr($textSpan, 0, -6);
800                    continue;
801                }
802                break;
803            }
804            while (str_ends_with($textSpan, '<LTRbr>')) {
805                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
806                $textSpan       = substr($textSpan, 0, -7);
807            }
808            if ($trailingBreaks !== '') {
809                while ($textSpan !== '') {
810                    if (str_ends_with($textSpan, ' ')) {
811                        $trailingBreaks = ' ' . $trailingBreaks;
812                        $textSpan       = substr($textSpan, 0, -1);
813                        continue;
814                    }
815                    if (str_ends_with($textSpan, '&nbsp;')) {
816                        $trailingBreaks = '&nbsp;' . $trailingBreaks;
817                        $textSpan       = substr($textSpan, 0, -6);
818                        continue;
819                    }
820                    break;
821                }
822                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
823            } else {
824                $textSpan = $savedSpan;
825            }
826
827            $trailingBlanks      = '';
828            $trailingPunctuation = '';
829            $trailingID          = '';
830            $trailingSeparator   = '';
831            $leadingSeparator    = '';
832
833            while (I18N::direction() === 'rtl') {
834                if (str_contains($result, self::START_RTL)) {
835                    // Remove trailing blanks for inclusion in a separate LTR span
836                    while ($textSpan !== '') {
837                        if (str_ends_with($textSpan, ' ')) {
838                            $trailingBlanks = ' ' . $trailingBlanks;
839                            $textSpan       = substr($textSpan, 0, -1);
840                            continue;
841                        }
842                        if (str_ends_with($textSpan, '&nbsp;')) {
843                            $trailingBlanks = '&nbsp;' . $trailingBlanks;
844                            $textSpan       = substr($textSpan, 0, -1);
845                            continue;
846                        }
847                        break;
848                    }
849
850                    // Remove trailing punctuation for inclusion in a separate LTR span
851                    if ($textSpan === '') {
852                        $trailingChar = "\n";
853                    } else {
854                        $trailingChar = substr($textSpan, -1);
855                    }
856                    if (str_contains(self::PUNCTUATION, $trailingChar)) {
857                        $trailingPunctuation = $trailingChar;
858                        $textSpan            = substr($textSpan, 0, -1);
859                    }
860                }
861
862                // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span
863                while (true) {
864                    if (!str_ends_with($textSpan, ')')) {
865                        break;
866                    } // There is no trailing ')'
867                    $posLeftParen = strrpos($textSpan, '(');
868                    if ($posLeftParen === false) {
869                        break;
870                    } // There is no leading '('
871                    $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes
872
873                    // If the parenthesized text doesn't look like an ID number,
874                    // we don't want to touch it.
875                    // This check won’t work if somebody uses ID numbers with an unusual format.
876                    $offset    = 1;
877                    $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text
878                    if (str_contains(self::NUMBERS, $charArray['letter'])) {
879                        break;
880                    }
881                    $offset += $charArray['length']; // Point at 2nd character of parenthesized text
882                    if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) {
883                        break;
884                    }
885                    // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too
886                    if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) {
887                        break;
888                    }
889
890                    $trailingID = substr($textSpan, $posLeftParen);
891                    $textSpan   = substr($textSpan, 0, $posLeftParen);
892                    break;
893                }
894
895                // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span
896                if ($trailingID !== '') {
897                    while ($textSpan !== '') {
898                        if (str_ends_with($textSpan, ' ')) {
899                            $trailingSeparator = ' ' . $trailingSeparator;
900                            $textSpan          = substr($textSpan, 0, -1);
901                            continue;
902                        }
903                        if (str_ends_with($textSpan, '&nbsp;')) {
904                            $trailingSeparator = '&nbsp;' . $trailingSeparator;
905                            $textSpan          = substr($textSpan, 0, -6);
906                            continue;
907                        }
908                        if (str_ends_with($textSpan, '-')) {
909                            $trailingSeparator = '-' . $trailingSeparator;
910                            $textSpan          = substr($textSpan, 0, -1);
911                            continue;
912                        }
913                        break;
914                    }
915                }
916
917                // Look for " - " preceding the text and remove it for inclusion in a separate LTR span
918                $foundSeparator = false;
919                $savedSpan      = $textSpan;
920                while ($textSpan !== '') {
921                    if (str_starts_with($textSpan, ' ')) {
922                        $leadingSeparator = ' ' . $leadingSeparator;
923                        $textSpan         = substr($textSpan, 1);
924                        continue;
925                    }
926                    if (str_starts_with($textSpan, '&nbsp;')) {
927                        $leadingSeparator = '&nbsp;' . $leadingSeparator;
928                        $textSpan         = substr($textSpan, 6);
929                        continue;
930                    }
931                    if (str_starts_with($textSpan, '-')) {
932                        $leadingSeparator = '-' . $leadingSeparator;
933                        $textSpan         = substr($textSpan, 1);
934                        $foundSeparator   = true;
935                        continue;
936                    }
937                    break;
938                }
939                if (!$foundSeparator) {
940                    $textSpan         = $savedSpan;
941                    $leadingSeparator = '';
942                }
943                break;
944            }
945
946            // We're done: finish the span
947            $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags
948            while (true) {
949                // Remove blanks that precede <LTRbr>
950                if (str_contains($textSpan, ' <LTRbr>')) {
951                    $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan);
952                    continue;
953                }
954                if (str_contains($textSpan, '&nbsp;<LTRbr>')) {
955                    $textSpan = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $textSpan);
956                    continue;
957                }
958                break;
959            }
960            if ($leadingSeparator !== '') {
961                $result .= self::START_LTR . $leadingSeparator . self::END_LTR;
962            }
963            $result .= $textSpan . self::END_LTR;
964            if ($trailingSeparator !== '') {
965                $result .= self::START_LTR . $trailingSeparator . self::END_LTR;
966            }
967            if ($trailingID !== '') {
968                $result .= self::START_LTR . $trailingID . self::END_LTR;
969            }
970            if ($trailingPunctuation !== '') {
971                $result .= self::START_LTR . $trailingPunctuation . self::END_LTR;
972            }
973            if ($trailingBlanks !== '') {
974                $result .= self::START_LTR . $trailingBlanks . self::END_LTR;
975            }
976        }
977
978        /* ****************************** RTL text handling ******************************** */
979
980        if (self::$currentState === 'RTL') {
981            $savedSpan = $textSpan;
982
983            // Move any trailing <br>, optionally followed by blanks, outside this RTL span
984            while ($textSpan !== '') {
985                if (str_ends_with($textSpan, ' ')) {
986                    $trailingBlanks = ' ' . $trailingBlanks;
987                    $textSpan       = substr($textSpan, 0, -1);
988                    continue;
989                }
990                if (str_ends_with('......' . $textSpan, '&nbsp;')) {
991                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
992                    $textSpan       = substr($textSpan, 0, -6);
993                    continue;
994                }
995                break;
996            }
997            while (str_ends_with($textSpan, '<RTLbr>')) {
998                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
999                $textSpan       = substr($textSpan, 0, -7);
1000            }
1001            if ($trailingBreaks !== '') {
1002                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
1003            } else {
1004                $textSpan = $savedSpan;
1005            }
1006
1007            // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too.
1008            if (!$theEnd && I18N::direction() !== 'rtl') {
1009                $trailingString = '';
1010                $savedSpan      = $textSpan;
1011                while ($textSpan !== '') {
1012                    // Look for trailing spaces and tentatively move them
1013                    if (str_ends_with($textSpan, ' ')) {
1014                        $trailingString = ' ' . $trailingString;
1015                        $textSpan       = substr($textSpan, 0, -1);
1016                        continue;
1017                    }
1018                    if (str_ends_with($textSpan, '&nbsp;')) {
1019                        $trailingString = '&nbsp;' . $trailingString;
1020                        $textSpan       = substr($textSpan, 0, -1);
1021                        continue;
1022                    }
1023                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
1024                        // There is no trailing numeric string
1025                        $textSpan = $savedSpan;
1026                        break;
1027                    }
1028
1029                    // We have a numeric string
1030                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
1031                    if ($posStartNumber === false) {
1032                        $posStartNumber = 0;
1033                    }
1034                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
1035                    $textSpan       = substr($textSpan, 0, $posStartNumber);
1036
1037                    // Look for more spaces and move them too
1038                    while ($textSpan !== '') {
1039                        if (str_ends_with($textSpan, ' ')) {
1040                            $trailingString = ' ' . $trailingString;
1041                            $textSpan       = substr($textSpan, 0, -1);
1042                            continue;
1043                        }
1044                        if (str_ends_with($textSpan, '&nbsp;')) {
1045                            $trailingString = '&nbsp;' . $trailingString;
1046                            $textSpan       = substr($textSpan, 0, -1);
1047                            continue;
1048                        }
1049                        break;
1050                    }
1051
1052                    self::$waitingText = $trailingString . self::$waitingText;
1053                    break;
1054                }
1055            }
1056
1057            // Trailing " - " needs to be prefixed to the following span
1058            if (!$theEnd && str_ends_with('...' . $textSpan, ' - ')) {
1059                $textSpan          = substr($textSpan, 0, -3);
1060                self::$waitingText = ' - ' . self::$waitingText;
1061            }
1062
1063            while (I18N::direction() === 'rtl') {
1064                // Look for " - " preceding <RTLbr> and relocate it to the front of the string
1065                $posDashString = strpos($textSpan, ' - <RTLbr>');
1066                if ($posDashString === false) {
1067                    break;
1068                }
1069                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1070                if ($posStringStart === false) {
1071                    $posStringStart = 0;
1072                } else {
1073                    $posStringStart += 9;
1074                } // Point to the first char following the last <RTLbr>
1075
1076                $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3);
1077            }
1078
1079            // Strip leading spaces from the RTL text
1080            $countLeadingSpaces = 0;
1081            while ($textSpan !== '') {
1082                if (str_starts_with($textSpan, ' ')) {
1083                    $countLeadingSpaces++;
1084                    $textSpan = substr($textSpan, 1);
1085                    continue;
1086                }
1087                if (str_starts_with($textSpan, '&nbsp;')) {
1088                    $countLeadingSpaces++;
1089                    $textSpan = substr($textSpan, 6);
1090                    continue;
1091                }
1092                break;
1093            }
1094
1095            // Strip trailing spaces from the RTL text
1096            $countTrailingSpaces = 0;
1097            while ($textSpan !== '') {
1098                if (str_ends_with($textSpan, ' ')) {
1099                    $countTrailingSpaces++;
1100                    $textSpan = substr($textSpan, 0, -1);
1101                    continue;
1102                }
1103                if (str_ends_with($textSpan, '&nbsp;')) {
1104                    $countTrailingSpaces++;
1105                    $textSpan = substr($textSpan, 0, -6);
1106                    continue;
1107                }
1108                break;
1109            }
1110
1111            // Look for trailing " -", reverse it, and relocate it to the front of the string
1112            if (str_ends_with($textSpan, ' -')) {
1113                $posDashString  = strlen($textSpan) - 2;
1114                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1115                if ($posStringStart === false) {
1116                    $posStringStart = 0;
1117                } else {
1118                    $posStringStart += 9;
1119                } // Point to the first char following the last <RTLbr>
1120
1121                $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2);
1122            }
1123
1124            if ($countLeadingSpaces !== 0) {
1125                $newLength = strlen($textSpan) + $countLeadingSpaces;
1126                $textSpan  = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT);
1127            }
1128            if ($countTrailingSpaces !== 0) {
1129                if (I18N::direction() === 'ltr') {
1130                    if ($trailingBreaks === '') {
1131                        // Move trailing RTL spaces to front of following LTR span
1132                        $newLength         = strlen(self::$waitingText) + $countTrailingSpaces;
1133                        self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT);
1134                    }
1135                } else {
1136                    $newLength = strlen($textSpan) + $countTrailingSpaces;
1137                    $textSpan  = str_pad($textSpan, $newLength);
1138                }
1139            }
1140
1141            // We're done: finish the span
1142            $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags
1143            $result   .= $textSpan . self::END_RTL;
1144        }
1145
1146        if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') {
1147            $result .= $textSpan;
1148        }
1149
1150        $result .= $trailingBreaks; // Get rid of any waiting <br>
1151    }
1152}
1153