xref: /webtrees/app/Services/GedcomService.php (revision 8d6560c40d2d2d26dd23f877bd58f736e0388d8f)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees\Services;
19
20/**
21 * Utilities for manipulating GEDCOM data.
22 */
23class GedcomService
24{
25    // Gedcom allows 255 characters (not bytes), including the EOL character.
26    private const EOL         = "\r\n";
27    private const EOL_REGEX   = '\r|\r\n|\n|\n\r';
28    private const LINE_LENGTH = 255 - 2;
29
30
31    // User defined tags begin with an underscore
32    private const USER_DEFINED_TAG_PREFIX = '_';
33
34    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
35    private const TAG_NAMES = [
36        'ABBREVIATION'      => 'ABBR',
37        'ADDRESS'           => 'ADDR',
38        'ADDRESS1'          => 'ADR1',
39        'ADDRESS2'          => 'ADR2',
40        'ADOPTION'          => 'ADOP',
41        'AFN'               => 'AFN',
42        'AGE'               => 'AGE',
43        'AGENCY'            => 'AGNC',
44        'ALIAS'             => 'ALIA',
45        'ANCESTORS'         => 'ANCE',
46        'ANCES_INTEREST'    => 'ANCI',
47        'ANULMENT'          => 'ANUL',
48        'ASSOCIATES'        => 'ASSO',
49        'AUTHOR'            => 'AUTH',
50        'BAPTISM-LDS'       => 'BAPL',
51        'BAPTISM'           => 'BAPM',
52        'BAR_MITZVAH'       => 'BARM',
53        'BAS_MITZVAH'       => 'BASM',
54        'BIRTH'             => 'BIRT',
55        'BLESSING'          => 'BLES',
56        'BURIAL'            => 'BURI',
57        'CALL_NUMBER'       => 'CALN',
58        'CASTE'             => 'CAST',
59        'CAUSE'             => 'CAUS',
60        'CENSUS'            => 'CENS',
61        'CHANGE'            => 'CHAN',
62        'CHARACTER'         => 'CHAR',
63        'CHILD'             => 'CHIL',
64        'CHRISTENING'       => 'CHR',
65        'ADULT_CHRISTENING' => 'CHRA',
66        'CITY'              => 'CITY',
67        'CONCATENATION'     => 'CONC',
68        'CONFIRMATION'      => 'CONF',
69        'CONFIRMATION-LDS'  => 'CONL',
70        'CONTINUED'         => 'CONT',
71        'COPYRIGHT'         => 'COPY',
72        'CORPORTATE'        => 'CORP',
73        'CREMATION'         => 'CREM',
74        'COUNTRY'           => 'CTRY',
75        'DATA'              => 'DATA',
76        'DATE'              => 'DATE',
77        'DEATH'             => 'DEAT',
78        'DESCENDANTS'       => 'DESC',
79        'DESCENDANTS_INT'   => 'DESI',
80        'DESTINATION'       => 'DEST',
81        'DIVORCE'           => 'DIV',
82        'DIVORCE_FILED'     => 'DIVF',
83        'PHY_DESCRIPTION'   => 'DSCR',
84        'EDUCATION'         => 'EDUC',
85        'EMAIL'             => 'EMAI',
86        'EMIGRATION'        => 'EMIG',
87        'ENDOWMENT'         => 'ENDL',
88        'ENGAGEMENT'        => 'ENGA',
89        'EVENT'             => 'EVEN',
90        'FACT'              => 'FACT',
91        'FAMILY'            => 'FAM',
92        'FAMILY_CHILD'      => 'FAMC',
93        'FAMILY_FILE'       => 'FAMF',
94        'FAMILY_SPOUSE'     => 'FAMS',
95        'FACIMILIE'         => 'FAX',
96        'FIRST_COMMUNION'   => 'FCOM',
97        'FILE'              => 'FILE',
98        'FORMAT'            => 'FORM',
99        'PHONETIC'          => 'FONE',
100        'GEDCOM'            => 'GEDC',
101        'GIVEN_NAME'        => 'GIVN',
102        'GRADUATION'        => 'GRAD',
103        'HEADER'            => 'HEAD',
104        'HUSBAND'           => 'HUSB',
105        'IDENT_NUMBER'      => 'IDNO',
106        'IMMIGRATION'       => 'IMMI',
107        'INDIVIDUAL'        => 'INDI',
108        'LANGUAGE'          => 'LANG',
109        'LATITUDE'          => 'LATI',
110        'LONGITUDE'         => 'LONG',
111        'MAP'               => 'MAP',
112        'MARRIAGE_BANN'     => 'MARB',
113        'MARR_CONTRACT'     => 'MARC',
114        'MARR_LICENSE'      => 'MARL',
115        'MARRIAGE'          => 'MARR',
116        'MEDIA'             => 'MEDI',
117        'NAME'              => 'NAME',
118        'NATIONALITY'       => 'NATI',
119        'NATURALIZATION'    => 'NATU',
120        'CHILDREN_COUNT'    => 'NCHI',
121        'NICKNAME'          => 'NICK',
122        'MARRIAGE_COUNT'    => 'NMR',
123        'NOTE'              => 'NOTE',
124        'NAME_PREFIX'       => 'NPFX',
125        'NAME_SUFFIX'       => 'NSFX',
126        'OBJECT'            => 'OBJE',
127        'OCCUPATION'        => 'OCCU',
128        'ORDINANCE'         => 'ORDI',
129        'ORDINATION'        => 'ORDN',
130        'PAGE'              => 'PAGE',
131        'PEDIGREE'          => 'PEDI',
132        'PHONE'             => 'PHON',
133        'PLACE'             => 'PLAC',
134        'POSTAL_CODE'       => 'POST',
135        'PROBATE'           => 'PROB',
136        'PROPERTY'          => 'PROP',
137        'PUBLICATION'       => 'PUBL',
138        'QUALITY_OF_DATA'   => 'QUAY',
139        'REFERENCE'         => 'REFN',
140        'RELATIONSHIP'      => 'RELA',
141        'RELIGION'          => 'RELI',
142        'REPOSITORY'        => 'REPO',
143        'RESIDENCE'         => 'RESI',
144        'RESTRICTION'       => 'RESN',
145        'RETIREMENT'        => 'RETI',
146        'REC_FILE_NUMBER'   => 'RFN',
147        'REC_ID_NUMBER'     => 'RIN',
148        'ROLE'              => 'ROLE',
149        'ROMANIZED'         => 'ROMN',
150        'SEALING_CHILD'     => 'SLGC',
151        'SEALING_SPOUSE'    => 'SLGS',
152        'SEX'               => 'SEX',
153        'SOURCE'            => 'SOUR',
154        'SURN_PREFIX'       => 'SPFX',
155        'SOC_SEC_NUMBER'    => 'SSN',
156        'STATE'             => 'STAE',
157        'STATUS'            => 'STAT',
158        'SUBMITTER'         => 'SUBM',
159        'SUBMISSION'        => 'SUBN',
160        'SURNAME'           => 'SURN',
161        'TEMPLE'            => 'TEMP',
162        'TEXT'              => 'TEXT',
163        'TIME'              => 'TIME',
164        'TITLE'             => 'TITL',
165        'TRAILER'           => 'TRLR',
166        'TYPE'              => 'TYPE',
167        'VERSION'           => 'VERS',
168        'WIFE'              => 'WIFE',
169        'WILL'              => 'WILL',
170        'WEB'               => 'WWW',
171        '_DEATH_OF_SPOUSE'  => 'DETS',
172        '_DEGREE'           => '_DEG',
173        '_FILE'             => 'FILE',
174        '_MEDICAL'          => '_MCL',
175        '_MILITARY_SERVICE' => '_MILT',
176    ];
177
178    // Custom tags used by other applications, with direct synonyms
179    private const TAG_SYNONYMS = [
180    ];
181
182    // LATI and LONG tags
183    private const DEGREE_FORMAT  = ' % .5f%s';
184    private const LATITUDE_NORTH = 'N';
185    private const LATITUDE_SOUTH = 'S';
186    private const LONGITUDE_EAST = 'E';
187    private const LONGITUDE_WEST = 'W';
188
189    // PLAC tags
190    private const PLACE_SEPARATOR       = ', ';
191    private const PLACE_SEPARATOR_REGEX = ' *, *';
192
193    // SEX tags
194    private const SEX_FEMALE  = 'F';
195    private const SEX_MALE    = 'M';
196    private const SEX_UNKNOWN = 'U';
197
198    /**
199     * Convert a GEDCOM tag to a canonical form.
200     *
201     * @param string $tag
202     *
203     * @return string
204     */
205    public function canonicalTag(string $tag): string
206    {
207        $tag = strtoupper($tag);
208
209        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
210
211        return $tag;
212    }
213
214    /**
215     * @param string $tag
216     *
217     * @return bool
218     */
219    public function isUserDefinedTag(string $tag): bool
220    {
221        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
222    }
223
224    /**
225     * @param string $text
226     *
227     * @return float
228     */
229    public function readLatitude(string $text): float
230    {
231        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
232    }
233
234    /**
235     * @param string $text
236     *
237     * @return float
238     */
239    public function readLongitude(string $text): float
240    {
241        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
242    }
243
244    /**
245     * @param string $text
246     * @param string $positive
247     * @param string $negative
248     *
249     * @return float
250     */
251    private function readDegrees(string $text, string $positive, string $negative): float
252    {
253        $text       = trim($text);
254        $hemisphere = substr($text, 0, 1);
255        $degrees    = substr($text, 1);
256
257        // Match a valid GEDCOM format
258        if (is_numeric($degrees)) {
259            $hemisphere = strtoupper($hemisphere);
260            $degrees    = (float) $degrees;
261
262            if ($hemisphere === $positive) {
263                return $degrees;
264            }
265
266            if ($hemisphere === $negative) {
267                return -$degrees;
268            }
269        }
270
271        // Just a number?
272        if (is_numeric($text)) {
273            return (float) $text;
274        }
275
276        // Can't match anything.
277        return 0.0;
278    }
279
280    /**
281     * @param float $latitude
282     *
283     * @return string
284     */
285    public function writeLatitude(float $latitude): string
286    {
287        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
288    }
289
290    /**
291     * @param float $longitude
292     *
293     * @return string
294     */
295    public function writeLongitude(float $longitude): string
296    {
297        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
298    }
299
300    /**
301     * @param float  $degrees
302     * @param string $positive
303     * @param string $negative
304     *
305     * @return string
306     */
307    private function writeDegrees(float $degrees, string $positive, string $negative): string
308    {
309        if ($degrees < 0.0) {
310            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
311        }
312
313        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
314    }
315
316    /**
317     * Although empty placenames are valid "Town, , Country", it is only meaningful
318     * when structured places are used (PLAC:FORM town, county, country), and
319     * structured places are discouraged.
320     *
321     * @param string $text
322     *
323     * @return string[]
324     */
325    public function readPlace(string $text): array
326    {
327        $text = trim($text);
328
329        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
330    }
331
332    /**
333     * @param string[] $place
334     *
335     * @return string
336     */
337    public function writePlace(array $place): string
338    {
339        return implode(self::PLACE_SEPARATOR, $place);
340    }
341
342    /**
343     * Some applications use non-standard values for unknown.
344     *
345     * @param string $text
346     *
347     * @return string
348     */
349    public function readSex(string $text): string
350    {
351        $text = strtoupper($text);
352
353        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
354            $text = self::SEX_UNKNOWN;
355        }
356
357        return $text;
358    }
359}
360