xref: /webtrees/app/Services/GedcomService.php (revision fc26b4f6f90ed5ccd1299f32215addfce7801e68)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22/**
23 * Utilities for manipulating GEDCOM data.
24 */
25class GedcomService
26{
27    // Gedcom allows 255 characters (not bytes), including the EOL character.
28    private const EOL         = "\r\n";
29    private const EOL_REGEX   = '\r|\r\n|\n|\n\r';
30    private const LINE_LENGTH = 255 - 2;
31
32
33    // User defined tags begin with an underscore
34    private const USER_DEFINED_TAG_PREFIX = '_';
35
36    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
37    private const TAG_NAMES = [
38        'ABBREVIATION'      => 'ABBR',
39        'ADDRESS'           => 'ADDR',
40        'ADDRESS1'          => 'ADR1',
41        'ADDRESS2'          => 'ADR2',
42        'ADOPTION'          => 'ADOP',
43        'AGENCY'            => 'AGNC',
44        'ALIAS'             => 'ALIA',
45        'ANCESTORS'         => 'ANCE',
46        'ANCES_INTEREST'    => 'ANCI',
47        'ANULMENT'          => 'ANUL',
48        'ASSOCIATES'        => 'ASSO',
49        'AUTHOR'            => 'AUTH',
50        'BAPTISM-LDS'       => 'BAPL',
51        'BAPTISM'           => 'BAPM',
52        'BAR_MITZVAH'       => 'BARM',
53        'BAS_MITZVAH'       => 'BASM',
54        'BIRTH'             => 'BIRT',
55        'BLESSING'          => 'BLES',
56        'BURIAL'            => 'BURI',
57        'CALL_NUMBER'       => 'CALN',
58        'CASTE'             => 'CAST',
59        'CAUSE'             => 'CAUS',
60        'CENSUS'            => 'CENS',
61        'CHANGE'            => 'CHAN',
62        'CHARACTER'         => 'CHAR',
63        'CHILD'             => 'CHIL',
64        'CHRISTENING'       => 'CHR',
65        'ADULT_CHRISTENING' => 'CHRA',
66        'CONCATENATION'     => 'CONC',
67        'CONFIRMATION'      => 'CONF',
68        'CONFIRMATION-LDS'  => 'CONL',
69        'CONTINUED'         => 'CONT',
70        'COPYRIGHT'         => 'COPY',
71        'CORPORTATE'        => 'CORP',
72        'CREMATION'         => 'CREM',
73        'COUNTRY'           => 'CTRY',
74        'DEATH'             => 'DEAT',
75        'DESCENDANTS'       => 'DESC',
76        'DESCENDANTS_INT'   => 'DESI',
77        'DESTINATION'       => 'DEST',
78        'DIVORCE'           => 'DIV',
79        'DIVORCE_FILED'     => 'DIVF',
80        'PHY_DESCRIPTION'   => 'DSCR',
81        'EDUCATION'         => 'EDUC',
82        'EMAIL'             => 'EMAI',
83        'EMIGRATION'        => 'EMIG',
84        'ENDOWMENT'         => 'ENDL',
85        'ENGAGEMENT'        => 'ENGA',
86        'EVENT'             => 'EVEN',
87        'FAMILY'            => 'FAM',
88        'FAMILY_CHILD'      => 'FAMC',
89        'FAMILY_FILE'       => 'FAMF',
90        'FAMILY_SPOUSE'     => 'FAMS',
91        'FACIMILIE'         => 'FAX',
92        'FIRST_COMMUNION'   => 'FCOM',
93        'FORMAT'            => 'FORM',
94        'PHONETIC'          => 'FONE',
95        'GEDCOM'            => 'GEDC',
96        'GIVEN_NAME'        => 'GIVN',
97        'GRADUATION'        => 'GRAD',
98        'HEADER'            => 'HEAD',
99        'HUSBAND'           => 'HUSB',
100        'IDENT_NUMBER'      => 'IDNO',
101        'IMMIGRATION'       => 'IMMI',
102        'INDIVIDUAL'        => 'INDI',
103        'LANGUAGE'          => 'LANG',
104        'LATITUDE'          => 'LATI',
105        'LONGITUDE'         => 'LONG',
106        'MARRIAGE_BANN'     => 'MARB',
107        'MARR_CONTRACT'     => 'MARC',
108        'MARR_LICENSE'      => 'MARL',
109        'MARRIAGE'          => 'MARR',
110        'MEDIA'             => 'MEDI',
111        'NATIONALITY'       => 'NATI',
112        'NATURALIZATION'    => 'NATU',
113        'CHILDREN_COUNT'    => 'NCHI',
114        'NICKNAME'          => 'NICK',
115        'MARRIAGE_COUNT'    => 'NMR',
116        'NAME_PREFIX'       => 'NPFX',
117        'NAME_SUFFIX'       => 'NSFX',
118        'OBJECT'            => 'OBJE',
119        'OCCUPATION'        => 'OCCU',
120        'ORDINANCE'         => 'ORDI',
121        'ORDINATION'        => 'ORDN',
122        'PEDIGREE'          => 'PEDI',
123        'PHONE'             => 'PHON',
124        'PLACE'             => 'PLAC',
125        'POSTAL_CODE'       => 'POST',
126        'PROBATE'           => 'PROB',
127        'PROPERTY'          => 'PROP',
128        'PUBLICATION'       => 'PUBL',
129        'QUALITY_OF_DATA'   => 'QUAY',
130        'REFERENCE'         => 'REFN',
131        'RELATIONSHIP'      => 'RELA',
132        'RELIGION'          => 'RELI',
133        'REPOSITORY'        => 'REPO',
134        'RESIDENCE'         => 'RESI',
135        'RESTRICTION'       => 'RESN',
136        'RETIREMENT'        => 'RETI',
137        'REC_FILE_NUMBER'   => 'RFN',
138        'REC_ID_NUMBER'     => 'RIN',
139        'ROMANIZED'         => 'ROMN',
140        'SEALING_CHILD'     => 'SLGC',
141        'SEALING_SPOUSE'    => 'SLGS',
142        'SOURCE'            => 'SOUR',
143        'SURN_PREFIX'       => 'SPFX',
144        'SOC_SEC_NUMBER'    => 'SSN',
145        'STATE'             => 'STAE',
146        'STATUS'            => 'STAT',
147        'SUBMITTER'         => 'SUBM',
148        'SUBMISSION'        => 'SUBN',
149        'SURNAME'           => 'SURN',
150        'TEMPLE'            => 'TEMP',
151        'TITLE'             => 'TITL',
152        'TRAILER'           => 'TRLR',
153        'VERSION'           => 'VERS',
154        'WEB'               => 'WWW',
155        '_DEATH_OF_SPOUSE'  => 'DETS',
156        '_DEGREE'           => '_DEG',
157        '_MEDICAL'          => '_MCL',
158        '_MILITARY_SERVICE' => '_MILT',
159    ];
160
161    // Custom tags used by other applications, with direct synonyms
162    private const TAG_SYNONYMS = [
163        // Convert PhpGedView tag to webtrees
164        '_PGVU'     => '_WT_USER',
165        '_PGV_OBJS' => '_WT_OBJE_SORT',
166    ];
167
168    // LATI and LONG tags
169    private const DEGREE_FORMAT  = ' % .5f%s';
170    private const LATITUDE_NORTH = 'N';
171    private const LATITUDE_SOUTH = 'S';
172    private const LONGITUDE_EAST = 'E';
173    private const LONGITUDE_WEST = 'W';
174
175    // PLAC tags
176    private const PLACE_SEPARATOR       = ', ';
177    private const PLACE_SEPARATOR_REGEX = ' *, *';
178
179    // SEX tags
180    private const SEX_FEMALE  = 'F';
181    private const SEX_MALE    = 'M';
182    private const SEX_UNKNOWN = 'U';
183
184    /**
185     * Convert a GEDCOM tag to a canonical form.
186     *
187     * @param string $tag
188     *
189     * @return string
190     */
191    public function canonicalTag(string $tag): string
192    {
193        $tag = strtoupper($tag);
194
195        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
196
197        return $tag;
198    }
199
200    /**
201     * @param string $tag
202     *
203     * @return bool
204     */
205    public function isUserDefinedTag(string $tag): bool
206    {
207        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
208    }
209
210    /**
211     * @param string $text
212     *
213     * @return float
214     */
215    public function readLatitude(string $text): float
216    {
217        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
218    }
219
220    /**
221     * @param string $text
222     *
223     * @return float
224     */
225    public function readLongitude(string $text): float
226    {
227        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
228    }
229
230    /**
231     * @param string $text
232     * @param string $positive
233     * @param string $negative
234     *
235     * @return float
236     */
237    private function readDegrees(string $text, string $positive, string $negative): float
238    {
239        $text       = trim($text);
240        $hemisphere = substr($text, 0, 1);
241        $degrees    = substr($text, 1);
242
243        // Match a valid GEDCOM format
244        if (is_numeric($degrees)) {
245            $hemisphere = strtoupper($hemisphere);
246            $degrees    = (float) $degrees;
247
248            if ($hemisphere === $positive) {
249                return $degrees;
250            }
251
252            if ($hemisphere === $negative) {
253                return -$degrees;
254            }
255        }
256
257        // Just a number?
258        if (is_numeric($text)) {
259            return (float) $text;
260        }
261
262        // Can't match anything.
263        return 0.0;
264    }
265
266    /**
267     * @param float $latitude
268     *
269     * @return string
270     */
271    public function writeLatitude(float $latitude): string
272    {
273        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
274    }
275
276    /**
277     * @param float $longitude
278     *
279     * @return string
280     */
281    public function writeLongitude(float $longitude): string
282    {
283        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
284    }
285
286    /**
287     * @param float  $degrees
288     * @param string $positive
289     * @param string $negative
290     *
291     * @return string
292     */
293    private function writeDegrees(float $degrees, string $positive, string $negative): string
294    {
295        if ($degrees < 0.0) {
296            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
297        }
298
299        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
300    }
301
302    /**
303     * Although empty placenames are valid "Town, , Country", it is only meaningful
304     * when structured places are used (PLAC:FORM town, county, country), and
305     * structured places are discouraged.
306     *
307     * @param string $text
308     *
309     * @return string[]
310     */
311    public function readPlace(string $text): array
312    {
313        $text = trim($text);
314
315        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
316    }
317
318    /**
319     * @param string[] $place
320     *
321     * @return string
322     */
323    public function writePlace(array $place): string
324    {
325        return implode(self::PLACE_SEPARATOR, $place);
326    }
327
328    /**
329     * Some applications use non-standard values for unknown.
330     *
331     * @param string $text
332     *
333     * @return string
334     */
335    public function readSex(string $text): string
336    {
337        $text = strtoupper($text);
338
339        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
340            $text = self::SEX_UNKNOWN;
341        }
342
343        return $text;
344    }
345}
346