xref: /webtrees/app/Services/GedcomService.php (revision d812eb6b3dd6c33a96d07cd964cb4c3b88cec447)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17declare(strict_types=1);
18
19namespace Fisharebest\Webtrees\Services;
20
21/**
22 * Utilities for manipulating GEDCOM data.
23 */
24class GedcomService
25{
26    // Gedcom allows 255 characters (not bytes), including the EOL character.
27    private const EOL         = "\r\n";
28    private const EOL_REGEX   = '\r|\r\n|\n|\n\r';
29    private const LINE_LENGTH = 255 - 2;
30
31
32    // User defined tags begin with an underscore
33    private const USER_DEFINED_TAG_PREFIX = '_';
34
35    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
36    private const TAG_NAMES = [
37        'ABBREVIATION'      => 'ABBR',
38        'ADDRESS'           => 'ADDR',
39        'ADDRESS1'          => 'ADR1',
40        'ADDRESS2'          => 'ADR2',
41        'ADOPTION'          => 'ADOP',
42        'AFN'               => 'AFN',
43        'AGE'               => 'AGE',
44        'AGENCY'            => 'AGNC',
45        'ALIAS'             => 'ALIA',
46        'ANCESTORS'         => 'ANCE',
47        'ANCES_INTEREST'    => 'ANCI',
48        'ANULMENT'          => 'ANUL',
49        'ASSOCIATES'        => 'ASSO',
50        'AUTHOR'            => 'AUTH',
51        'BAPTISM-LDS'       => 'BAPL',
52        'BAPTISM'           => 'BAPM',
53        'BAR_MITZVAH'       => 'BARM',
54        'BAS_MITZVAH'       => 'BASM',
55        'BIRTH'             => 'BIRT',
56        'BLESSING'          => 'BLES',
57        'BURIAL'            => 'BURI',
58        'CALL_NUMBER'       => 'CALN',
59        'CASTE'             => 'CAST',
60        'CAUSE'             => 'CAUS',
61        'CENSUS'            => 'CENS',
62        'CHANGE'            => 'CHAN',
63        'CHARACTER'         => 'CHAR',
64        'CHILD'             => 'CHIL',
65        'CHRISTENING'       => 'CHR',
66        'ADULT_CHRISTENING' => 'CHRA',
67        'CITY'              => 'CITY',
68        'CONCATENATION'     => 'CONC',
69        'CONFIRMATION'      => 'CONF',
70        'CONFIRMATION-LDS'  => 'CONL',
71        'CONTINUED'         => 'CONT',
72        'COPYRIGHT'         => 'COPY',
73        'CORPORTATE'        => 'CORP',
74        'CREMATION'         => 'CREM',
75        'COUNTRY'           => 'CTRY',
76        'DATA'              => 'DATA',
77        'DATE'              => 'DATE',
78        'DEATH'             => 'DEAT',
79        'DESCENDANTS'       => 'DESC',
80        'DESCENDANTS_INT'   => 'DESI',
81        'DESTINATION'       => 'DEST',
82        'DIVORCE'           => 'DIV',
83        'DIVORCE_FILED'     => 'DIVF',
84        'PHY_DESCRIPTION'   => 'DSCR',
85        'EDUCATION'         => 'EDUC',
86        'EMAIL'             => 'EMAI',
87        'EMIGRATION'        => 'EMIG',
88        'ENDOWMENT'         => 'ENDL',
89        'ENGAGEMENT'        => 'ENGA',
90        'EVENT'             => 'EVEN',
91        'FACT'              => 'FACT',
92        'FAMILY'            => 'FAM',
93        'FAMILY_CHILD'      => 'FAMC',
94        'FAMILY_FILE'       => 'FAMF',
95        'FAMILY_SPOUSE'     => 'FAMS',
96        'FACIMILIE'         => 'FAX',
97        'FIRST_COMMUNION'   => 'FCOM',
98        'FILE'              => 'FILE',
99        'FORMAT'            => 'FORM',
100        'PHONETIC'          => 'FONE',
101        'GEDCOM'            => 'GEDC',
102        'GIVEN_NAME'        => 'GIVN',
103        'GRADUATION'        => 'GRAD',
104        'HEADER'            => 'HEAD',
105        'HUSBAND'           => 'HUSB',
106        'IDENT_NUMBER'      => 'IDNO',
107        'IMMIGRATION'       => 'IMMI',
108        'INDIVIDUAL'        => 'INDI',
109        'LANGUAGE'          => 'LANG',
110        'LATITUDE'          => 'LATI',
111        'LONGITUDE'         => 'LONG',
112        'MAP'               => 'MAP',
113        'MARRIAGE_BANN'     => 'MARB',
114        'MARR_CONTRACT'     => 'MARC',
115        'MARR_LICENSE'      => 'MARL',
116        'MARRIAGE'          => 'MARR',
117        'MEDIA'             => 'MEDI',
118        'NAME'              => 'NAME',
119        'NATIONALITY'       => 'NATI',
120        'NATURALIZATION'    => 'NATU',
121        'CHILDREN_COUNT'    => 'NCHI',
122        'NICKNAME'          => 'NICK',
123        'MARRIAGE_COUNT'    => 'NMR',
124        'NOTE'              => 'NOTE',
125        'NAME_PREFIX'       => 'NPFX',
126        'NAME_SUFFIX'       => 'NSFX',
127        'OBJECT'            => 'OBJE',
128        'OCCUPATION'        => 'OCCU',
129        'ORDINANCE'         => 'ORDI',
130        'ORDINATION'        => 'ORDN',
131        'PAGE'              => 'PAGE',
132        'PEDIGREE'          => 'PEDI',
133        'PHONE'             => 'PHON',
134        'PLACE'             => 'PLAC',
135        'POSTAL_CODE'       => 'POST',
136        'PROBATE'           => 'PROB',
137        'PROPERTY'          => 'PROP',
138        'PUBLICATION'       => 'PUBL',
139        'QUALITY_OF_DATA'   => 'QUAY',
140        'REFERENCE'         => 'REFN',
141        'RELATIONSHIP'      => 'RELA',
142        'RELIGION'          => 'RELI',
143        'REPOSITORY'        => 'REPO',
144        'RESIDENCE'         => 'RESI',
145        'RESTRICTION'       => 'RESN',
146        'RETIREMENT'        => 'RETI',
147        'REC_FILE_NUMBER'   => 'RFN',
148        'REC_ID_NUMBER'     => 'RIN',
149        'ROLE'              => 'ROLE',
150        'ROMANIZED'         => 'ROMN',
151        'SEALING_CHILD'     => 'SLGC',
152        'SEALING_SPOUSE'    => 'SLGS',
153        'SEX'               => 'SEX',
154        'SOURCE'            => 'SOUR',
155        'SURN_PREFIX'       => 'SPFX',
156        'SOC_SEC_NUMBER'    => 'SSN',
157        'STATE'             => 'STAE',
158        'STATUS'            => 'STAT',
159        'SUBMITTER'         => 'SUBM',
160        'SUBMISSION'        => 'SUBN',
161        'SURNAME'           => 'SURN',
162        'TEMPLE'            => 'TEMP',
163        'TEXT'              => 'TEXT',
164        'TIME'              => 'TIME',
165        'TITLE'             => 'TITL',
166        'TRAILER'           => 'TRLR',
167        'TYPE'              => 'TYPE',
168        'VERSION'           => 'VERS',
169        'WIFE'              => 'WIFE',
170        'WILL'              => 'WILL',
171        'WEB'               => 'WWW',
172        '_DEATH_OF_SPOUSE'  => 'DETS',
173        '_DEGREE'           => '_DEG',
174        '_FILE'             => 'FILE',
175        '_MEDICAL'          => '_MCL',
176        '_MILITARY_SERVICE' => '_MILT',
177    ];
178
179    // Custom tags used by other applications, with direct synonyms
180    private const TAG_SYNONYMS = [
181    ];
182
183    // LATI and LONG tags
184    private const DEGREE_FORMAT  = ' % .5f%s';
185    private const LATITUDE_NORTH = 'N';
186    private const LATITUDE_SOUTH = 'S';
187    private const LONGITUDE_EAST = 'E';
188    private const LONGITUDE_WEST = 'W';
189
190    // PLAC tags
191    private const PLACE_SEPARATOR       = ', ';
192    private const PLACE_SEPARATOR_REGEX = ' *, *';
193
194    // SEX tags
195    private const SEX_FEMALE  = 'F';
196    private const SEX_MALE    = 'M';
197    private const SEX_UNKNOWN = 'U';
198
199    /**
200     * Convert a GEDCOM tag to a canonical form.
201     *
202     * @param string $tag
203     *
204     * @return string
205     */
206    public function canonicalTag(string $tag): string
207    {
208        $tag = strtoupper($tag);
209
210        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
211
212        return $tag;
213    }
214
215    /**
216     * @param string $tag
217     *
218     * @return bool
219     */
220    public function isUserDefinedTag(string $tag): bool
221    {
222        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
223    }
224
225    /**
226     * @param string $text
227     *
228     * @return float
229     */
230    public function readLatitude(string $text): float
231    {
232        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
233    }
234
235    /**
236     * @param string $text
237     *
238     * @return float
239     */
240    public function readLongitude(string $text): float
241    {
242        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
243    }
244
245    /**
246     * @param string $text
247     * @param string $positive
248     * @param string $negative
249     *
250     * @return float
251     */
252    private function readDegrees(string $text, string $positive, string $negative): float
253    {
254        $text       = trim($text);
255        $hemisphere = substr($text, 0, 1);
256        $degrees    = substr($text, 1);
257
258        // Match a valid GEDCOM format
259        if (is_numeric($degrees)) {
260            $hemisphere = strtoupper($hemisphere);
261            $degrees    = (float) $degrees;
262
263            if ($hemisphere === $positive) {
264                return $degrees;
265            }
266
267            if ($hemisphere === $negative) {
268                return -$degrees;
269            }
270        }
271
272        // Just a number?
273        if (is_numeric($text)) {
274            return (float) $text;
275        }
276
277        // Can't match anything.
278        return 0.0;
279    }
280
281    /**
282     * @param float $latitude
283     *
284     * @return string
285     */
286    public function writeLatitude(float $latitude): string
287    {
288        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
289    }
290
291    /**
292     * @param float $longitude
293     *
294     * @return string
295     */
296    public function writeLongitude(float $longitude): string
297    {
298        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
299    }
300
301    /**
302     * @param float  $degrees
303     * @param string $positive
304     * @param string $negative
305     *
306     * @return string
307     */
308    private function writeDegrees(float $degrees, string $positive, string $negative): string
309    {
310        if ($degrees < 0.0) {
311            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
312        }
313
314        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
315    }
316
317    /**
318     * Although empty placenames are valid "Town, , Country", it is only meaningful
319     * when structured places are used (PLAC:FORM town, county, country), and
320     * structured places are discouraged.
321     *
322     * @param string $text
323     *
324     * @return string[]
325     */
326    public function readPlace(string $text): array
327    {
328        $text = trim($text);
329
330        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
331    }
332
333    /**
334     * @param string[] $place
335     *
336     * @return string
337     */
338    public function writePlace(array $place): string
339    {
340        return implode(self::PLACE_SEPARATOR, $place);
341    }
342
343    /**
344     * Some applications use non-standard values for unknown.
345     *
346     * @param string $text
347     *
348     * @return string
349     */
350    public function readSex(string $text): string
351    {
352        $text = strtoupper($text);
353
354        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
355            $text = self::SEX_UNKNOWN;
356        }
357
358        return $text;
359    }
360}
361