xref: /webtrees/app/Report/RightToLeftSupport.php (revision 34ed513f1c8687261b812389f04f857ef07b0a5a)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Report;
21
22use Fisharebest\Webtrees\I18N;
23
24use function str_contains;
25
26/**
27 * RTL Functions for use in the PDF reports
28 */
29class RightToLeftSupport
30{
31    private const UTF8_LRM = "\xE2\x80\x8E"; // U+200E (Left to Right mark:  zero-width character with LTR directionality)
32    private const UTF8_RLM = "\xE2\x80\x8F"; // U+200F (Right to Left mark:  zero-width character with RTL directionality)
33    private const UTF8_LRO = "\xE2\x80\xAD"; // U+202D (Left to Right override: force everything following to LTR mode)
34    private const UTF8_RLO = "\xE2\x80\xAE"; // U+202E (Right to Left override: force everything following to RTL mode)
35    private const UTF8_LRE = "\xE2\x80\xAA"; // U+202A (Left to Right embedding: treat everything following as LTR text)
36    private const UTF8_RLE = "\xE2\x80\xAB"; // U+202B (Right to Left embedding: treat everything following as RTL text)
37    private const UTF8_PDF = "\xE2\x80\xAC"; // U+202C (Pop directional formatting: restore state prior to last LRO, RLO, LRE, RLE)
38
39    private const OPEN_PARENTHESES = '([{';
40
41    private const CLOSE_PARENTHESES = ')]}';
42
43    private const NUMBERS = '0123456789';
44
45    private const NUMBER_PREFIX = '+-'; // Treat these like numbers when at beginning or end of numeric strings
46
47    private const NUMBER_PUNCTUATION = '- ,.:/'; // Treat these like numbers when inside numeric strings
48
49    private const PUNCTUATION = ',.:;?!';
50
51    // Markup
52    private const START_LTR    = '<LTR>';
53    private const END_LTR      = '</LTR>';
54    private const START_RTL    = '<RTL>';
55    private const END_RTL      = '</RTL>';
56    private const LENGTH_START = 5;
57    private const LENGTH_END   = 6;
58
59    /** @var string Were we previously processing LTR or RTL. */
60    private static $previousState;
61
62    /** @var string Are we currently processing LTR or RTL. */
63    private static $currentState;
64
65    /** @var string Text waiting to be processed. */
66    private static $waitingText;
67
68    /** @var int Offset into the text. */
69    private static $posSpanStart;
70
71    /**
72     * This function strips &lrm; and &rlm; from the input string. It should be used for all
73     * text that has been passed through the PrintReady() function before that text is stored
74     * in the database. The database should NEVER contain these characters.
75     *
76     * @param string $inputText The string from which the &lrm; and &rlm; characters should be stripped
77     *
78     * @return string The input string, with &lrm; and &rlm; stripped
79     */
80    private static function stripLrmRlm(string $inputText): string
81    {
82        return str_replace([
83            self::UTF8_LRM,
84            self::UTF8_RLM,
85            self::UTF8_LRO,
86            self::UTF8_RLO,
87            self::UTF8_LRE,
88            self::UTF8_RLE,
89            self::UTF8_PDF,
90            '&lrm;',
91            '&rlm;',
92            '&LRM;',
93            '&RLM;',
94        ], '', $inputText);
95    }
96
97    /**
98     * This function encapsulates all texts in the input with <span dir='xxx'> and </span>
99     * according to the directionality specified.
100     *
101     * @param string $inputText Raw input
102     *
103     * @return string The string with all texts encapsulated as required
104     */
105    public static function spanLtrRtl(string $inputText): string
106    {
107        if ($inputText === '') {
108            // Nothing to do
109            return '';
110        }
111
112        $workingText = str_replace("\n", '<br>', $inputText);
113        $workingText = str_replace([
114            '<span class="starredname"><br>',
115            '<span<br>class="starredname">',
116        ], '<br><span class="starredname">', $workingText); // Reposition some incorrectly placed line breaks
117        $workingText = self::stripLrmRlm($workingText); // Get rid of any existing UTF8 control codes
118
119        self::$previousState = '';
120        self::$currentState  = strtoupper(I18N::direction());
121        $numberState         = false; // Set when we're inside a numeric string
122        $result              = '';
123        self::$waitingText   = '';
124        $openParDirection    = [];
125
126        self::beginCurrentSpan($result);
127
128        while ($workingText !== '') {
129            $charArray     = self::getChar($workingText, 0); // Get the next ASCII or UTF-8 character
130            $currentLetter = $charArray['letter'];
131            $currentLen    = $charArray['length'];
132
133            $openParIndex  = strpos(self::OPEN_PARENTHESES, $currentLetter); // Which opening parenthesis is this?
134            $closeParIndex = strpos(self::CLOSE_PARENTHESES, $currentLetter); // Which closing parenthesis is this?
135
136            switch ($currentLetter) {
137                case '<':
138                    // Assume this '<' starts an HTML element
139                    $endPos = strpos($workingText, '>'); // look for the terminating '>'
140                    if ($endPos === false) {
141                        $endPos = 0;
142                    }
143                    $currentLen += $endPos;
144                    $element    = substr($workingText, 0, $currentLen);
145                    $temp       = strtolower(substr($element, 0, 3));
146                    if (strlen($element) < 7 && $temp === '<br') {
147                        if ($numberState) {
148                            $numberState = false;
149                            if (self::$currentState === 'RTL') {
150                                self::$waitingText .= self::UTF8_PDF;
151                            }
152                        }
153                        self::breakCurrentSpan($result);
154                    } elseif (self::$waitingText === '') {
155                        $result .= $element;
156                    } else {
157                        self::$waitingText .= $element;
158                    }
159                    $workingText = substr($workingText, $currentLen);
160                    break;
161                case '&':
162                    // Assume this '&' starts an HTML entity
163                    $endPos = strpos($workingText, ';'); // look for the terminating ';'
164                    if ($endPos === false) {
165                        $endPos = 0;
166                    }
167                    $currentLen += $endPos;
168                    $entity     = substr($workingText, 0, $currentLen);
169                    if (strtolower($entity) === '&nbsp;') {
170                        $entity = '&nbsp;'; // Ensure consistent case for this entity
171                    }
172                    if (self::$waitingText === '') {
173                        $result .= $entity;
174                    } else {
175                        self::$waitingText .= $entity;
176                    }
177                    $workingText = substr($workingText, $currentLen);
178                    break;
179                case '{':
180                    if (substr($workingText, 1, 1) === '{') {
181                        // Assume this '{{' starts a TCPDF directive
182                        $endPos = strpos($workingText, '}}'); // look for the terminating '}}'
183                        if ($endPos === false) {
184                            $endPos = 0;
185                        }
186                        $currentLen        = $endPos + 2;
187                        $directive         = substr($workingText, 0, $currentLen);
188                        $workingText       = substr($workingText, $currentLen);
189                        $result            .= self::$waitingText . $directive;
190                        self::$waitingText = '';
191                        break;
192                    }
193                // no break
194                default:
195                    // Look for strings of numbers with optional leading or trailing + or -
196                    // and with optional embedded numeric punctuation
197                    if ($numberState) {
198                        // If we're inside a numeric string, look for reasons to end it
199                        $offset    = 0; // Be sure to look at the current character first
200                        $charArray = self::getChar($workingText . "\n", $offset);
201                        if (!str_contains(self::NUMBERS, $charArray['letter'])) {
202                            // This is not a digit. Is it numeric punctuation?
203                            if (substr($workingText . "\n", $offset, 6) === '&nbsp;') {
204                                $offset += 6; // This could be numeric punctuation
205                            } elseif (str_contains(self::NUMBER_PUNCTUATION, $charArray['letter'])) {
206                                $offset += $charArray['length']; // This could be numeric punctuation
207                            }
208                            // If the next character is a digit, the current character is numeric punctuation
209                            $charArray = self::getChar($workingText . "\n", $offset);
210                            if (!str_contains(self::NUMBERS, $charArray['letter'])) {
211                                // This is not a digit. End the run of digits and punctuation.
212                                $numberState = false;
213                                if (self::$currentState === 'RTL') {
214                                    if (!str_contains(self::NUMBER_PREFIX, $currentLetter)) {
215                                        $currentLetter = self::UTF8_PDF . $currentLetter;
216                                    } else {
217                                        $currentLetter .= self::UTF8_PDF; // Include a trailing + or - in the run
218                                    }
219                                }
220                            }
221                        }
222                    } else {
223                        // If we're outside a numeric string, look for reasons to start it
224                        if (str_contains(self::NUMBER_PREFIX, $currentLetter)) {
225                            // This might be a number lead-in
226                            $offset   = $currentLen;
227                            $nextChar = substr($workingText . "\n", $offset, 1);
228                            if (str_contains(self::NUMBERS, $nextChar)) {
229                                $numberState = true; // We found a digit: the lead-in is therefore numeric
230                                if (self::$currentState === 'RTL') {
231                                    $currentLetter = self::UTF8_LRE . $currentLetter;
232                                }
233                            }
234                        } elseif (str_contains(self::NUMBERS, $currentLetter)) {
235                            $numberState = true; // The current letter is a digit
236                            if (self::$currentState === 'RTL') {
237                                $currentLetter = self::UTF8_LRE . $currentLetter;
238                            }
239                        }
240                    }
241
242                    // Determine the directionality of the current UTF-8 character
243                    $newState = self::$currentState;
244
245                    while (true) {
246                        if (I18N::scriptDirection(I18N::textScript($currentLetter)) === 'rtl') {
247                            if (self::$currentState === '') {
248                                $newState = 'RTL';
249                                break;
250                            }
251
252                            if (self::$currentState === 'RTL') {
253                                break;
254                            }
255                            // Switch to RTL only if this isn't a solitary RTL letter
256                            $tempText = substr($workingText, $currentLen);
257                            while ($tempText !== '') {
258                                $nextCharArray = self::getChar($tempText, 0);
259                                $nextLetter    = $nextCharArray['letter'];
260                                $nextLen       = $nextCharArray['length'];
261                                $tempText      = substr($tempText, $nextLen);
262
263                                if (I18N::scriptDirection(I18N::textScript($nextLetter)) === 'rtl') {
264                                    $newState = 'RTL';
265                                    break 2;
266                                }
267
268                                if (str_contains(self::PUNCTUATION, $nextLetter) || str_contains(self::OPEN_PARENTHESES, $nextLetter)) {
269                                    $newState = 'RTL';
270                                    break 2;
271                                }
272
273                                if ($nextLetter === ' ') {
274                                    break;
275                                }
276                                $nextLetter .= substr($tempText . "\n", 0, 5);
277                                if ($nextLetter === '&nbsp;') {
278                                    break;
279                                }
280                            }
281                            // This is a solitary RTL letter : wrap it in UTF8 control codes to force LTR directionality
282                            $currentLetter = self::UTF8_LRO . $currentLetter . self::UTF8_PDF;
283                            $newState      = 'LTR';
284                            break;
285                        }
286                        if ($currentLen !== 1 || $currentLetter >= 'A' && $currentLetter <= 'Z' || $currentLetter >= 'a' && $currentLetter <= 'z') {
287                            // Since it’s neither Hebrew nor Arabic, this UTF-8 character or ASCII letter must be LTR
288                            $newState = 'LTR';
289                            break;
290                        }
291                        if ($closeParIndex !== false) {
292                            // This closing parenthesis has to inherit the matching opening parenthesis' directionality
293                            if (!empty($openParDirection[$closeParIndex]) && $openParDirection[$closeParIndex] !== '?') {
294                                $newState = $openParDirection[$closeParIndex];
295                            }
296                            $openParDirection[$closeParIndex] = '';
297                            break;
298                        }
299                        if ($openParIndex !== false) {
300                            // Opening parentheses always inherit the following directionality
301                            self::$waitingText .= $currentLetter;
302                            $workingText       = substr($workingText, $currentLen);
303                            while (true) {
304                                if ($workingText === '') {
305                                    break;
306                                }
307                                if (substr($workingText, 0, 1) === ' ') {
308                                    // Spaces following this left parenthesis inherit the following directionality too
309                                    self::$waitingText .= ' ';
310                                    $workingText       = substr($workingText, 1);
311                                    continue;
312                                }
313                                if (substr($workingText, 0, 6) === '&nbsp;') {
314                                    // Spaces following this left parenthesis inherit the following directionality too
315                                    self::$waitingText .= '&nbsp;';
316                                    $workingText       = substr($workingText, 6);
317                                    continue;
318                                }
319                                break;
320                            }
321                            $openParDirection[$openParIndex] = '?';
322                            break 2; // double break because we're waiting for more information
323                        }
324
325                        // We have a digit or a "normal" special character.
326                        //
327                        // When this character is not at the start of the input string, it inherits the preceding directionality;
328                        // at the start of the input string, it assumes the following directionality.
329                        //
330                        // Exceptions to this rule will be handled later during final clean-up.
331                        //
332                        self::$waitingText .= $currentLetter;
333                        $workingText       = substr($workingText, $currentLen);
334                        if (self::$currentState !== '') {
335                            $result            .= self::$waitingText;
336                            self::$waitingText = '';
337                        }
338                        break 2; // double break because we're waiting for more information
339                    }
340                    if ($newState !== self::$currentState) {
341                        // A direction change has occurred
342                        self::finishCurrentSpan($result);
343                        self::$previousState = self::$currentState;
344                        self::$currentState  = $newState;
345                        self::beginCurrentSpan($result);
346                    }
347                    self::$waitingText .= $currentLetter;
348                    $workingText       = substr($workingText, $currentLen);
349                    $result            .= self::$waitingText;
350                    self::$waitingText = '';
351
352                    foreach ($openParDirection as $index => $value) {
353                        // Since we now know the proper direction, remember it for all waiting opening parentheses
354                        if ($value === '?') {
355                            $openParDirection[$index] = self::$currentState;
356                        }
357                    }
358
359                    break;
360            }
361        }
362
363        // We're done. Finish last <span> if necessary
364        if ($numberState) {
365            if (self::$waitingText === '') {
366                if (self::$currentState === 'RTL') {
367                    $result .= self::UTF8_PDF;
368                }
369            } else {
370                if (self::$currentState === 'RTL') {
371                    self::$waitingText .= self::UTF8_PDF;
372                }
373            }
374        }
375        self::finishCurrentSpan($result, true);
376
377        // Get rid of any waiting text
378        if (self::$waitingText !== '') {
379            if (I18N::direction() === 'rtl' && self::$currentState === 'LTR') {
380                $result .= self::START_RTL;
381                $result .= self::$waitingText;
382                $result .= self::END_RTL;
383            } else {
384                $result .= self::START_LTR;
385                $result .= self::$waitingText;
386                $result .= self::END_LTR;
387            }
388            self::$waitingText = '';
389        }
390
391        // Lastly, do some more cleanups
392
393        // Move leading RTL numeric strings to following LTR text
394        // (this happens when the page direction is RTL and the original text begins with a number and is followed by LTR text)
395        while (substr($result, 0, self::LENGTH_START + 3) === self::START_RTL . self::UTF8_LRE) {
396            $spanEnd = strpos($result, self::END_RTL . self::START_LTR);
397            if ($spanEnd === false) {
398                break;
399            }
400            $textSpan = self::stripLrmRlm(substr($result, self::LENGTH_START + 3, $spanEnd - self::LENGTH_START - 3));
401            if (I18N::scriptDirection(I18N::textScript($textSpan)) === 'rtl') {
402                break;
403            }
404            $result = self::START_LTR . substr($result, self::LENGTH_START, $spanEnd - self::LENGTH_START) . substr($result, $spanEnd + self::LENGTH_START + self::LENGTH_END);
405            break;
406        }
407
408        // On RTL pages, put trailing "." in RTL numeric strings into its own RTL span
409        if (I18N::direction() === 'rtl') {
410            $result = str_replace(self::UTF8_PDF . '.' . self::END_RTL, self::UTF8_PDF . self::END_RTL . self::START_RTL . '.' . self::END_RTL, $result);
411        }
412
413        // Trim trailing blanks preceding <br> in LTR text
414        while (self::$previousState !== 'RTL') {
415            if (str_contains($result, ' <LTRbr>')) {
416                $result = str_replace(' <LTRbr>', '<LTRbr>', $result);
417                continue;
418            }
419            if (str_contains($result, '&nbsp;<LTRbr>')) {
420                $result = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $result);
421                continue;
422            }
423            if (str_contains($result, ' <br>')) {
424                $result = str_replace(' <br>', '<br>', $result);
425                continue;
426            }
427            if (str_contains($result, '&nbsp;<br>')) {
428                $result = str_replace('&nbsp;<br>', '<br>', $result);
429                continue;
430            }
431            break; // Neither space nor &nbsp; : we're done
432        }
433
434        // Trim trailing blanks preceding <br> in RTL text
435        while (true) {
436            if (str_contains($result, ' <RTLbr>')) {
437                $result = str_replace(' <RTLbr>', '<RTLbr>', $result);
438                continue;
439            }
440            if (str_contains($result, '&nbsp;<RTLbr>')) {
441                $result = str_replace('&nbsp;<RTLbr>', '<RTLbr>', $result);
442                continue;
443            }
444            break; // Neither space nor &nbsp; : we're done
445        }
446
447        // Convert '<LTRbr>' and '<RTLbr'
448        $result = str_replace([
449            '<LTRbr>',
450            '<RTLbr>',
451        ], [
452            self::END_LTR . '<br>' . self::START_LTR,
453            self::END_RTL . '<br>' . self::START_RTL,
454        ], $result);
455
456        // Include leading indeterminate directional text in whatever follows
457        if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL && substr($result . "\n", 0, 4) !== '<br>') {
458            $leadingText = '';
459            while (true) {
460                if ($result === '') {
461                    $result = $leadingText;
462                    break;
463                }
464                if (substr($result . "\n", 0, self::LENGTH_START) !== self::START_LTR && substr($result . "\n", 0, self::LENGTH_START) !== self::START_RTL) {
465                    $leadingText .= substr($result, 0, 1);
466                    $result      = substr($result, 1);
467                    continue;
468                }
469                $result = substr($result, 0, self::LENGTH_START) . $leadingText . substr($result, self::LENGTH_START);
470                break;
471            }
472        }
473
474        // Include solitary "-" and "+" in surrounding RTL text
475        $result = str_replace([
476            self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL,
477            self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL,
478        ], [
479            '-',
480            '+',
481        ], $result);
482
483        //$result = strtr($result, [
484        //    self::END_RTL . self::START_LTR . '-' . self::END_LTR . self::START_RTL => '-',
485        //    self::END_RTL . self::START_LTR . '+' . self::END_LTR . self::START_RTL => '+',
486        //]);
487
488        // Remove empty spans
489        $result = str_replace([
490            self::START_LTR . self::END_LTR,
491            self::START_RTL . self::END_RTL,
492        ], '', $result);
493
494        // Finally, correct '<LTR>', '</LTR>', '<RTL>', and '</RTL>'
495        // LTR text: <span dir="ltr"> text </span>
496        // RTL text: <span dir="rtl"> text </span>
497
498        $result = str_replace([
499            self::START_LTR,
500            self::END_LTR,
501            self::START_RTL,
502            self::END_RTL,
503        ], [
504            '<span dir="ltr">',
505            '</span>',
506            '<span dir="rtl">',
507            '</span>',
508        ], $result);
509
510        return $result;
511    }
512
513    /**
514     * Wrap words that have an asterisk suffix in <u> and </u> tags.
515     * This should underline starred names to show the preferred name.
516     *
517     * @param string $textSpan
518     * @param string $direction
519     *
520     * @return string
521     */
522    private static function starredName(string $textSpan, string $direction): string
523    {
524        // To avoid a TCPDF bug that mixes up the word order, insert those <u> and </u> tags
525        // only when page and span directions are identical.
526        if ($direction === strtoupper(I18N::direction())) {
527            while (true) {
528                $starPos = strpos($textSpan, '*');
529                if ($starPos === false) {
530                    break;
531                }
532                $trailingText = substr($textSpan, $starPos + 1);
533                $textSpan     = substr($textSpan, 0, $starPos);
534                $wordStart    = strrpos($textSpan, ' '); // Find the start of the word
535                if ($wordStart !== false) {
536                    $leadingText = substr($textSpan, 0, $wordStart + 1);
537                    $wordText    = substr($textSpan, $wordStart + 1);
538                } else {
539                    $leadingText = '';
540                    $wordText    = $textSpan;
541                }
542                $textSpan = $leadingText . '<u>' . $wordText . '</u>' . $trailingText;
543            }
544            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '<u>\1</u>', $textSpan);
545            // The &nbsp; is a work-around for a TCPDF bug eating blanks.
546            $textSpan = str_replace([
547                ' <u>',
548                '</u> ',
549            ], [
550                '&nbsp;<u>',
551                '</u>&nbsp;',
552            ], $textSpan);
553        } else {
554            // Text and page directions differ:  remove the <span> and </span>
555            $textSpan = preg_replace('~(.*)\*~', '\1', $textSpan);
556            $textSpan = preg_replace('~<span class="starredname">(.*)</span>~', '\1', $textSpan);
557        }
558
559        return $textSpan;
560    }
561
562    /**
563     * Get the next character from an input string
564     *
565     * @param string $text
566     * @param int    $offset
567     *
568     * @return array{'letter':string,'length':int}
569     */
570    private static function getChar(string $text, int $offset): array
571    {
572        if ($text === '') {
573            return [
574                'letter' => '',
575                'length' => 0,
576            ];
577        }
578
579        $char   = substr($text, $offset, 1);
580        $length = 1;
581        if ((ord($char) & 0xE0) === 0xC0) {
582            $length = 2;
583        }
584        if ((ord($char) & 0xF0) === 0xE0) {
585            $length = 3;
586        }
587        if ((ord($char) & 0xF8) === 0xF0) {
588            $length = 4;
589        }
590        $letter = substr($text, $offset, $length);
591
592        return [
593            'letter' => $letter,
594            'length' => $length,
595        ];
596    }
597
598    /**
599     * Insert <br> into current span
600     *
601     * @param string $result
602     *
603     * @return void
604     */
605    private static function breakCurrentSpan(string &$result): void
606    {
607        // Interrupt the current span, insert that <br>, and then continue the current span
608        $result            .= self::$waitingText;
609        self::$waitingText = '';
610
611        $breakString = '<' . self::$currentState . 'br>';
612        $result      .= $breakString;
613    }
614
615    /**
616     * Begin current span
617     *
618     * @param string $result
619     *
620     * @return void
621     */
622    private static function beginCurrentSpan(string &$result): void
623    {
624        if (self::$currentState === 'LTR') {
625            $result .= self::START_LTR;
626        }
627        if (self::$currentState === 'RTL') {
628            $result .= self::START_RTL;
629        }
630
631        self::$posSpanStart = strlen($result);
632    }
633
634    /**
635     * Finish current span
636     *
637     * @param string $result
638     * @param bool   $theEnd
639     *
640     * @return void
641     */
642    private static function finishCurrentSpan(string &$result, bool $theEnd = false): void
643    {
644        $textSpan = substr($result, self::$posSpanStart);
645        $result   = substr($result, 0, self::$posSpanStart);
646
647        // Get rid of empty spans, so that our check for presence of RTL will work
648        $result = str_replace([
649            self::START_LTR . self::END_LTR,
650            self::START_RTL . self::END_RTL,
651        ], '', $result);
652
653        // Look for numeric strings that are times (hh:mm:ss). These have to be separated from surrounding numbers.
654        $tempResult = '';
655        while ($textSpan !== '') {
656            $posColon = strpos($textSpan, ':');
657            if ($posColon === false) {
658                break;
659            } // No more possible time strings
660            $posLRE = strpos($textSpan, self::UTF8_LRE);
661            if ($posLRE === false) {
662                break;
663            } // No more numeric strings
664            $posPDF = strpos($textSpan, self::UTF8_PDF, $posLRE);
665            if ($posPDF === false) {
666                break;
667            } // No more numeric strings
668
669            $tempResult    .= substr($textSpan, 0, $posLRE + 3); // Copy everything preceding the numeric string
670            $numericString = substr($textSpan, $posLRE + 3, $posPDF - $posLRE); // Separate the entire numeric string
671            $textSpan      = substr($textSpan, $posPDF + 3);
672            $posColon      = strpos($numericString, ':');
673            if ($posColon === false) {
674                // Nothing that looks like a time here
675                $tempResult .= $numericString;
676                continue;
677            }
678            $posBlank = strpos($numericString . ' ', ' ');
679            $posNbsp  = strpos($numericString . '&nbsp;', '&nbsp;');
680            if ($posBlank < $posNbsp) {
681                $posSeparator    = $posBlank;
682                $lengthSeparator = 1;
683            } else {
684                $posSeparator    = $posNbsp;
685                $lengthSeparator = 6;
686            }
687            if ($posColon > $posSeparator) {
688                // We have a time string preceded by a blank: Exclude that blank from the numeric string
689                $tempResult    .= substr($numericString, 0, $posSeparator);
690                $tempResult    .= self::UTF8_PDF;
691                $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
692                $tempResult    .= self::UTF8_LRE;
693                $numericString = substr($numericString, $posSeparator + $lengthSeparator);
694            }
695
696            $posBlank = strpos($numericString, ' ');
697            $posNbsp  = strpos($numericString, '&nbsp;');
698            if ($posBlank === false && $posNbsp === false) {
699                // The time string isn't followed by a blank
700                $textSpan = $numericString . $textSpan;
701                continue;
702            }
703
704            // We have a time string followed by a blank: Exclude that blank from the numeric string
705            if ($posBlank === false) {
706                $posSeparator    = $posNbsp;
707                $lengthSeparator = 6;
708            } elseif ($posNbsp === false) {
709                $posSeparator    = $posBlank;
710                $lengthSeparator = 1;
711            } elseif ($posBlank < $posNbsp) {
712                $posSeparator    = $posBlank;
713                $lengthSeparator = 1;
714            } else {
715                $posSeparator    = $posNbsp;
716                $lengthSeparator = 6;
717            }
718            $tempResult    .= substr($numericString, 0, $posSeparator);
719            $tempResult    .= self::UTF8_PDF;
720            $tempResult    .= substr($numericString, $posSeparator, $lengthSeparator);
721            $posSeparator  += $lengthSeparator;
722            $numericString = substr($numericString, $posSeparator);
723            $textSpan      = self::UTF8_LRE . $numericString . $textSpan;
724        }
725        $textSpan       = $tempResult . $textSpan;
726        $trailingBlanks = '';
727        $trailingBreaks = '';
728
729        /* ****************************** LTR text handling ******************************** */
730
731        if (self::$currentState === 'LTR') {
732            // Move trailing numeric strings to the following RTL text. Include any blanks preceding or following the numeric text too.
733            if (I18N::direction() === 'rtl' && self::$previousState === 'RTL' && !$theEnd) {
734                $trailingString = '';
735                $savedSpan      = $textSpan;
736                while ($textSpan !== '') {
737                    // Look for trailing spaces and tentatively move them
738                    if (substr($textSpan, -1) === ' ') {
739                        $trailingString = ' ' . $trailingString;
740                        $textSpan       = substr($textSpan, 0, -1);
741                        continue;
742                    }
743                    if (substr($textSpan, -6) === '&nbsp;') {
744                        $trailingString = '&nbsp;' . $trailingString;
745                        $textSpan       = substr($textSpan, 0, -1);
746                        continue;
747                    }
748                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
749                        // There is no trailing numeric string
750                        $textSpan = $savedSpan;
751                        break;
752                    }
753
754                    // We have a numeric string
755                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
756                    if ($posStartNumber === false) {
757                        $posStartNumber = 0;
758                    }
759                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
760                    $textSpan       = substr($textSpan, 0, $posStartNumber);
761
762                    // Look for more spaces and move them too
763                    while ($textSpan !== '') {
764                        if (substr($textSpan, -1) === ' ') {
765                            $trailingString = ' ' . $trailingString;
766                            $textSpan       = substr($textSpan, 0, -1);
767                            continue;
768                        }
769                        if (substr($textSpan, -6) === '&nbsp;') {
770                            $trailingString = '&nbsp;' . $trailingString;
771                            $textSpan       = substr($textSpan, 0, -1);
772                            continue;
773                        }
774                        break;
775                    }
776
777                    self::$waitingText = $trailingString . self::$waitingText;
778                    break;
779                }
780            }
781
782            $savedSpan = $textSpan;
783            // Move any trailing <br>, optionally preceded or followed by blanks, outside this LTR span
784            while ($textSpan !== '') {
785                if (substr($textSpan, -1) === ' ') {
786                    $trailingBlanks = ' ' . $trailingBlanks;
787                    $textSpan       = substr($textSpan, 0, -1);
788                    continue;
789                }
790                if (substr('......' . $textSpan, -6) === '&nbsp;') {
791                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
792                    $textSpan       = substr($textSpan, 0, -6);
793                    continue;
794                }
795                break;
796            }
797            while (substr($textSpan, -7) === '<LTRbr>') {
798                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
799                $textSpan       = substr($textSpan, 0, -7);
800            }
801            if ($trailingBreaks !== '') {
802                while ($textSpan !== '') {
803                    if (substr($textSpan, -1) === ' ') {
804                        $trailingBreaks = ' ' . $trailingBreaks;
805                        $textSpan       = substr($textSpan, 0, -1);
806                        continue;
807                    }
808                    if (substr($textSpan, -6) === '&nbsp;') {
809                        $trailingBreaks = '&nbsp;' . $trailingBreaks;
810                        $textSpan       = substr($textSpan, 0, -6);
811                        continue;
812                    }
813                    break;
814                }
815                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
816            } else {
817                $textSpan = $savedSpan;
818            }
819
820            $trailingBlanks      = '';
821            $trailingPunctuation = '';
822            $trailingID          = '';
823            $trailingSeparator   = '';
824            $leadingSeparator    = '';
825
826            while (I18N::direction() === 'rtl') {
827                if (str_contains($result, self::START_RTL)) {
828                    // Remove trailing blanks for inclusion in a separate LTR span
829                    while ($textSpan !== '') {
830                        if (substr($textSpan, -1) === ' ') {
831                            $trailingBlanks = ' ' . $trailingBlanks;
832                            $textSpan       = substr($textSpan, 0, -1);
833                            continue;
834                        }
835                        if (substr($textSpan, -6) === '&nbsp;') {
836                            $trailingBlanks = '&nbsp;' . $trailingBlanks;
837                            $textSpan       = substr($textSpan, 0, -1);
838                            continue;
839                        }
840                        break;
841                    }
842
843                    // Remove trailing punctuation for inclusion in a separate LTR span
844                    if ($textSpan === '') {
845                        $trailingChar = "\n";
846                    } else {
847                        $trailingChar = substr($textSpan, -1);
848                    }
849                    if (str_contains(self::PUNCTUATION, $trailingChar)) {
850                        $trailingPunctuation = $trailingChar;
851                        $textSpan            = substr($textSpan, 0, -1);
852                    }
853                }
854
855                // Remove trailing ID numbers that look like "(xnnn)" for inclusion in a separate LTR span
856                while (true) {
857                    if (substr($textSpan, -1) !== ')') {
858                        break;
859                    } // There is no trailing ')'
860                    $posLeftParen = strrpos($textSpan, '(');
861                    if ($posLeftParen === false) {
862                        break;
863                    } // There is no leading '('
864                    $temp = self::stripLrmRlm(substr($textSpan, $posLeftParen)); // Get rid of UTF8 control codes
865
866                    // If the parenthesized text doesn't look like an ID number,
867                    // we don't want to touch it.
868                    // This check won’t work if somebody uses ID numbers with an unusual format.
869                    $offset    = 1;
870                    $charArray = self::getChar($temp, $offset); // Get 1st character of parenthesized text
871                    if (str_contains(self::NUMBERS, $charArray['letter'])) {
872                        break;
873                    }
874                    $offset += $charArray['length']; // Point at 2nd character of parenthesized text
875                    if (!str_contains(self::NUMBERS, substr($temp, $offset, 1))) {
876                        break;
877                    }
878                    // 1st character of parenthesized text is alpha, 2nd character is a digit; last has to be a digit too
879                    if (!str_contains(self::NUMBERS, substr($temp, -2, 1))) {
880                        break;
881                    }
882
883                    $trailingID = substr($textSpan, $posLeftParen);
884                    $textSpan   = substr($textSpan, 0, $posLeftParen);
885                    break;
886                }
887
888                // Look for " - " or blank preceding the ID number and remove it for inclusion in a separate LTR span
889                if ($trailingID !== '') {
890                    while ($textSpan !== '') {
891                        if (substr($textSpan, -1) === ' ') {
892                            $trailingSeparator = ' ' . $trailingSeparator;
893                            $textSpan          = substr($textSpan, 0, -1);
894                            continue;
895                        }
896                        if (substr($textSpan, -6) === '&nbsp;') {
897                            $trailingSeparator = '&nbsp;' . $trailingSeparator;
898                            $textSpan          = substr($textSpan, 0, -6);
899                            continue;
900                        }
901                        if (substr($textSpan, -1) === '-') {
902                            $trailingSeparator = '-' . $trailingSeparator;
903                            $textSpan          = substr($textSpan, 0, -1);
904                            continue;
905                        }
906                        break;
907                    }
908                }
909
910                // Look for " - " preceding the text and remove it for inclusion in a separate LTR span
911                $foundSeparator = false;
912                $savedSpan      = $textSpan;
913                while ($textSpan !== '') {
914                    if (substr($textSpan, 0, 1) === ' ') {
915                        $leadingSeparator = ' ' . $leadingSeparator;
916                        $textSpan         = substr($textSpan, 1);
917                        continue;
918                    }
919                    if (substr($textSpan, 0, 6) === '&nbsp;') {
920                        $leadingSeparator = '&nbsp;' . $leadingSeparator;
921                        $textSpan         = substr($textSpan, 6);
922                        continue;
923                    }
924                    if (substr($textSpan, 0, 1) === '-') {
925                        $leadingSeparator = '-' . $leadingSeparator;
926                        $textSpan         = substr($textSpan, 1);
927                        $foundSeparator   = true;
928                        continue;
929                    }
930                    break;
931                }
932                if (!$foundSeparator) {
933                    $textSpan         = $savedSpan;
934                    $leadingSeparator = '';
935                }
936                break;
937            }
938
939            // We're done: finish the span
940            $textSpan = self::starredName($textSpan, 'LTR'); // Wrap starred name in <u> and </u> tags
941            while (true) {
942                // Remove blanks that precede <LTRbr>
943                if (str_contains($textSpan, ' <LTRbr>')) {
944                    $textSpan = str_replace(' <LTRbr>', '<LTRbr>', $textSpan);
945                    continue;
946                }
947                if (str_contains($textSpan, '&nbsp;<LTRbr>')) {
948                    $textSpan = str_replace('&nbsp;<LTRbr>', '<LTRbr>', $textSpan);
949                    continue;
950                }
951                break;
952            }
953            if ($leadingSeparator !== '') {
954                $result .= self::START_LTR . $leadingSeparator . self::END_LTR;
955            }
956            $result .= $textSpan . self::END_LTR;
957            if ($trailingSeparator !== '') {
958                $result .= self::START_LTR . $trailingSeparator . self::END_LTR;
959            }
960            if ($trailingID !== '') {
961                $result .= self::START_LTR . $trailingID . self::END_LTR;
962            }
963            if ($trailingPunctuation !== '') {
964                $result .= self::START_LTR . $trailingPunctuation . self::END_LTR;
965            }
966            if ($trailingBlanks !== '') {
967                $result .= self::START_LTR . $trailingBlanks . self::END_LTR;
968            }
969        }
970
971        /* ****************************** RTL text handling ******************************** */
972
973        if (self::$currentState === 'RTL') {
974            $savedSpan = $textSpan;
975
976            // Move any trailing <br>, optionally followed by blanks, outside this RTL span
977            while ($textSpan !== '') {
978                if (substr($textSpan, -1) === ' ') {
979                    $trailingBlanks = ' ' . $trailingBlanks;
980                    $textSpan       = substr($textSpan, 0, -1);
981                    continue;
982                }
983                if (substr('......' . $textSpan, -6) === '&nbsp;') {
984                    $trailingBlanks = '&nbsp;' . $trailingBlanks;
985                    $textSpan       = substr($textSpan, 0, -6);
986                    continue;
987                }
988                break;
989            }
990            while (substr($textSpan, -7) === '<RTLbr>') {
991                $trailingBreaks = '<br>' . $trailingBreaks; // Plain <br> because it’s outside a span
992                $textSpan       = substr($textSpan, 0, -7);
993            }
994            if ($trailingBreaks !== '') {
995                self::$waitingText = $trailingBlanks . self::$waitingText; // Put those trailing blanks inside the following span
996            } else {
997                $textSpan = $savedSpan;
998            }
999
1000            // Move trailing numeric strings to the following LTR text. Include any blanks preceding or following the numeric text too.
1001            if (!$theEnd && I18N::direction() !== 'rtl') {
1002                $trailingString = '';
1003                $savedSpan      = $textSpan;
1004                while ($textSpan !== '') {
1005                    // Look for trailing spaces and tentatively move them
1006                    if (substr($textSpan, -1) === ' ') {
1007                        $trailingString = ' ' . $trailingString;
1008                        $textSpan       = substr($textSpan, 0, -1);
1009                        continue;
1010                    }
1011                    if (substr($textSpan, -6) === '&nbsp;') {
1012                        $trailingString = '&nbsp;' . $trailingString;
1013                        $textSpan       = substr($textSpan, 0, -1);
1014                        continue;
1015                    }
1016                    if (substr($textSpan, -3) !== self::UTF8_PDF) {
1017                        // There is no trailing numeric string
1018                        $textSpan = $savedSpan;
1019                        break;
1020                    }
1021
1022                    // We have a numeric string
1023                    $posStartNumber = strrpos($textSpan, self::UTF8_LRE);
1024                    if ($posStartNumber === false) {
1025                        $posStartNumber = 0;
1026                    }
1027                    $trailingString = substr($textSpan, $posStartNumber) . $trailingString;
1028                    $textSpan       = substr($textSpan, 0, $posStartNumber);
1029
1030                    // Look for more spaces and move them too
1031                    while ($textSpan !== '') {
1032                        if (substr($textSpan, -1) === ' ') {
1033                            $trailingString = ' ' . $trailingString;
1034                            $textSpan       = substr($textSpan, 0, -1);
1035                            continue;
1036                        }
1037                        if (substr($textSpan, -6) === '&nbsp;') {
1038                            $trailingString = '&nbsp;' . $trailingString;
1039                            $textSpan       = substr($textSpan, 0, -1);
1040                            continue;
1041                        }
1042                        break;
1043                    }
1044
1045                    self::$waitingText = $trailingString . self::$waitingText;
1046                    break;
1047                }
1048            }
1049
1050            // Trailing " - " needs to be prefixed to the following span
1051            if (!$theEnd && substr('...' . $textSpan, -3) === ' - ') {
1052                $textSpan          = substr($textSpan, 0, -3);
1053                self::$waitingText = ' - ' . self::$waitingText;
1054            }
1055
1056            while (I18N::direction() === 'rtl') {
1057                // Look for " - " preceding <RTLbr> and relocate it to the front of the string
1058                $posDashString = strpos($textSpan, ' - <RTLbr>');
1059                if ($posDashString === false) {
1060                    break;
1061                }
1062                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1063                if ($posStringStart === false) {
1064                    $posStringStart = 0;
1065                } else {
1066                    $posStringStart += 9;
1067                } // Point to the first char following the last <RTLbr>
1068
1069                $textSpan = substr($textSpan, 0, $posStringStart) . ' - ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 3);
1070            }
1071
1072            // Strip leading spaces from the RTL text
1073            $countLeadingSpaces = 0;
1074            while ($textSpan !== '') {
1075                if (substr($textSpan, 0, 1) === ' ') {
1076                    $countLeadingSpaces++;
1077                    $textSpan = substr($textSpan, 1);
1078                    continue;
1079                }
1080                if (substr($textSpan, 0, 6) === '&nbsp;') {
1081                    $countLeadingSpaces++;
1082                    $textSpan = substr($textSpan, 6);
1083                    continue;
1084                }
1085                break;
1086            }
1087
1088            // Strip trailing spaces from the RTL text
1089            $countTrailingSpaces = 0;
1090            while ($textSpan !== '') {
1091                if (substr($textSpan, -1) === ' ') {
1092                    $countTrailingSpaces++;
1093                    $textSpan = substr($textSpan, 0, -1);
1094                    continue;
1095                }
1096                if (substr($textSpan, -6) === '&nbsp;') {
1097                    $countTrailingSpaces++;
1098                    $textSpan = substr($textSpan, 0, -6);
1099                    continue;
1100                }
1101                break;
1102            }
1103
1104            // Look for trailing " -", reverse it, and relocate it to the front of the string
1105            if (substr($textSpan, -2) === ' -') {
1106                $posDashString  = strlen($textSpan) - 2;
1107                $posStringStart = strrpos(substr($textSpan, 0, $posDashString), '<RTLbr>');
1108                if ($posStringStart === false) {
1109                    $posStringStart = 0;
1110                } else {
1111                    $posStringStart += 9;
1112                } // Point to the first char following the last <RTLbr>
1113
1114                $textSpan = substr($textSpan, 0, $posStringStart) . '- ' . substr($textSpan, $posStringStart, $posDashString - $posStringStart) . substr($textSpan, $posDashString + 2);
1115            }
1116
1117            if ($countLeadingSpaces !== 0) {
1118                $newLength = strlen($textSpan) + $countLeadingSpaces;
1119                $textSpan  = str_pad($textSpan, $newLength, ' ', I18N::direction() === 'rtl' ? STR_PAD_LEFT : STR_PAD_RIGHT);
1120            }
1121            if ($countTrailingSpaces !== 0) {
1122                if (I18N::direction() === 'ltr') {
1123                    if ($trailingBreaks === '') {
1124                        // Move trailing RTL spaces to front of following LTR span
1125                        $newLength         = strlen(self::$waitingText) + $countTrailingSpaces;
1126                        self::$waitingText = str_pad(self::$waitingText, $newLength, ' ', STR_PAD_LEFT);
1127                    }
1128                } else {
1129                    $newLength = strlen($textSpan) + $countTrailingSpaces;
1130                    $textSpan  = str_pad($textSpan, $newLength);
1131                }
1132            }
1133
1134            // We're done: finish the span
1135            $textSpan = self::starredName($textSpan, 'RTL'); // Wrap starred name in <u> and </u> tags
1136            $result   .= $textSpan . self::END_RTL;
1137        }
1138
1139        if (self::$currentState !== 'LTR' && self::$currentState !== 'RTL') {
1140            $result .= $textSpan;
1141        }
1142
1143        $result .= $trailingBreaks; // Get rid of any waiting <br>
1144    }
1145}
1146