xref: /webtrees/app/Services/GedcomService.php (revision e172383b8d1dc462218f743b1e04ca6a5babd14e)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22/**
23 * Utilities for manipulating GEDCOM data.
24 */
25class GedcomService
26{
27    // User defined tags begin with an underscore
28    private const USER_DEFINED_TAG_PREFIX = '_';
29
30    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
31    private const TAG_NAMES = [
32        'ABBREVIATION'      => 'ABBR',
33        'ADDRESS'           => 'ADDR',
34        'ADDRESS1'          => 'ADR1',
35        'ADDRESS2'          => 'ADR2',
36        'ADOPTION'          => 'ADOP',
37        'AGENCY'            => 'AGNC',
38        'ALIAS'             => 'ALIA',
39        'ANCESTORS'         => 'ANCE',
40        'ANCES_INTEREST'    => 'ANCI',
41        'ANULMENT'          => 'ANUL',
42        'ASSOCIATES'        => 'ASSO',
43        'AUTHOR'            => 'AUTH',
44        'BAPTISM-LDS'       => 'BAPL',
45        'BAPTISM'           => 'BAPM',
46        'BAR_MITZVAH'       => 'BARM',
47        'BAS_MITZVAH'       => 'BASM',
48        'BIRTH'             => 'BIRT',
49        'BLESSING'          => 'BLES',
50        'BURIAL'            => 'BURI',
51        'CALL_NUMBER'       => 'CALN',
52        'CASTE'             => 'CAST',
53        'CAUSE'             => 'CAUS',
54        'CENSUS'            => 'CENS',
55        'CHANGE'            => 'CHAN',
56        'CHARACTER'         => 'CHAR',
57        'CHILD'             => 'CHIL',
58        'CHRISTENING'       => 'CHR',
59        'ADULT_CHRISTENING' => 'CHRA',
60        'CONCATENATION'     => 'CONC',
61        'CONFIRMATION'      => 'CONF',
62        'CONFIRMATION-LDS'  => 'CONL',
63        'CONTINUED'         => 'CONT',
64        'COPYRIGHT'         => 'COPY',
65        'CORPORTATE'        => 'CORP',
66        'CREMATION'         => 'CREM',
67        'COUNTRY'           => 'CTRY',
68        'DEATH'             => 'DEAT',
69        'DESCENDANTS'       => 'DESC',
70        'DESCENDANTS_INT'   => 'DESI',
71        'DESTINATION'       => 'DEST',
72        'DIVORCE'           => 'DIV',
73        'DIVORCE_FILED'     => 'DIVF',
74        'PHY_DESCRIPTION'   => 'DSCR',
75        'EDUCATION'         => 'EDUC',
76        'EMIGRATION'        => 'EMIG',
77        'ENDOWMENT'         => 'ENDL',
78        'ENGAGEMENT'        => 'ENGA',
79        'EVENT'             => 'EVEN',
80        'FAMILY'            => 'FAM',
81        'FAMILY_CHILD'      => 'FAMC',
82        'FAMILY_FILE'       => 'FAMF',
83        'FAMILY_SPOUSE'     => 'FAMS',
84        'FACIMILIE'         => 'FAX',
85        'FIRST_COMMUNION'   => 'FCOM',
86        'FORMAT'            => 'FORM',
87        'PHONETIC'          => 'FONE',
88        'GEDCOM'            => 'GEDC',
89        'GIVEN_NAME'        => 'GIVN',
90        'GRADUATION'        => 'GRAD',
91        'HEADER'            => 'HEAD',
92        'HUSBAND'           => 'HUSB',
93        'IDENT_NUMBER'      => 'IDNO',
94        'IMMIGRATION'       => 'IMMI',
95        'INDIVIDUAL'        => 'INDI',
96        'LANGUAGE'          => 'LANG',
97        'LATITUDE'          => 'LATI',
98        'LONGITUDE'         => 'LONG',
99        'MARRIAGE_BANN'     => 'MARB',
100        'MARR_CONTRACT'     => 'MARC',
101        'MARR_LICENSE'      => 'MARL',
102        'MARRIAGE'          => 'MARR',
103        'MEDIA'             => 'MEDI',
104        'NATIONALITY'       => 'NATI',
105        'NATURALIZATION'    => 'NATU',
106        'CHILDREN_COUNT'    => 'NCHI',
107        'NICKNAME'          => 'NICK',
108        'MARRIAGE_COUNT'    => 'NMR',
109        'NAME_PREFIX'       => 'NPFX',
110        'NAME_SUFFIX'       => 'NSFX',
111        'OBJECT'            => 'OBJE',
112        'OCCUPATION'        => 'OCCU',
113        'ORDINANCE'         => 'ORDI',
114        'ORDINATION'        => 'ORDN',
115        'PEDIGREE'          => 'PEDI',
116        'PHONE'             => 'PHON',
117        'PLACE'             => 'PLAC',
118        'POSTAL_CODE'       => 'POST',
119        'PROBATE'           => 'PROB',
120        'PROPERTY'          => 'PROP',
121        'PUBLICATION'       => 'PUBL',
122        'QUALITY_OF_DATA'   => 'QUAY',
123        'REFERENCE'         => 'REFN',
124        'RELATIONSHIP'      => 'RELA',
125        'RELIGION'          => 'RELI',
126        'REPOSITORY'        => 'REPO',
127        'RESIDENCE'         => 'RESI',
128        'RESTRICTION'       => 'RESN',
129        'RETIREMENT'        => 'RETI',
130        'REC_FILE_NUMBER'   => 'RFN',
131        'REC_ID_NUMBER'     => 'RIN',
132        'ROMANIZED'         => 'ROMN',
133        'SEALING_CHILD'     => 'SLGC',
134        'SEALING_SPOUSE'    => 'SLGS',
135        'SOURCE'            => 'SOUR',
136        'SURN_PREFIX'       => 'SPFX',
137        'SOC_SEC_NUMBER'    => 'SSN',
138        'STATE'             => 'STAE',
139        'STATUS'            => 'STAT',
140        'SUBMITTER'         => 'SUBM',
141        'SUBMISSION'        => 'SUBN',
142        'SURNAME'           => 'SURN',
143        'TEMPLE'            => 'TEMP',
144        'TITLE'             => 'TITL',
145        'TRAILER'           => 'TRLR',
146        'VERSION'           => 'VERS',
147        'WEB'               => 'WWW',
148        '_DEATH_OF_SPOUSE'  => 'DETS',
149        '_DEGREE'           => '_DEG',
150        '_MEDICAL'          => '_MCL',
151        '_MILITARY_SERVICE' => '_MILT',
152    ];
153
154    // Custom tags used by other applications, with direct synonyms
155    private const TAG_SYNONYMS = [
156        // Convert PhpGedView tag to webtrees
157        '_PGVU'     => '_WT_USER',
158        '_PGV_OBJS' => '_WT_OBJE_SORT',
159    ];
160
161    // LATI and LONG tags
162    private const DEGREE_FORMAT  = ' % .5f%s';
163    private const LATITUDE_NORTH = 'N';
164    private const LATITUDE_SOUTH = 'S';
165    private const LONGITUDE_EAST = 'E';
166    private const LONGITUDE_WEST = 'W';
167
168    // PLAC tags
169    private const PLACE_SEPARATOR       = ', ';
170    private const PLACE_SEPARATOR_REGEX = ' *, *';
171
172    // SEX tags
173    private const SEX_FEMALE  = 'F';
174    private const SEX_MALE    = 'M';
175    private const SEX_UNKNOWN = 'U';
176
177    /**
178     * Convert a GEDCOM tag to a canonical form.
179     *
180     * @param string $tag
181     *
182     * @return string
183     */
184    public function canonicalTag(string $tag): string
185    {
186        $tag = strtoupper($tag);
187
188        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
189
190        return $tag;
191    }
192
193    /**
194     * @param string $tag
195     *
196     * @return bool
197     */
198    public function isUserDefinedTag(string $tag): bool
199    {
200        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
201    }
202
203    /**
204     * @param string $text
205     *
206     * @return float
207     */
208    public function readLatitude(string $text): float
209    {
210        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
211    }
212
213    /**
214     * @param string $text
215     *
216     * @return float
217     */
218    public function readLongitude(string $text): float
219    {
220        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
221    }
222
223    /**
224     * @param string $text
225     * @param string $positive
226     * @param string $negative
227     *
228     * @return float
229     */
230    private function readDegrees(string $text, string $positive, string $negative): float
231    {
232        $text       = trim($text);
233        $hemisphere = substr($text, 0, 1);
234        $degrees    = substr($text, 1);
235
236        // Match a valid GEDCOM format
237        if (is_numeric($degrees)) {
238            $hemisphere = strtoupper($hemisphere);
239            $degrees    = (float) $degrees;
240
241            if ($hemisphere === $positive) {
242                return $degrees;
243            }
244
245            if ($hemisphere === $negative) {
246                return -$degrees;
247            }
248        }
249
250        // Just a number?
251        if (is_numeric($text)) {
252            return (float) $text;
253        }
254
255        // Can't match anything.
256        return 0.0;
257    }
258
259    /**
260     * @param float $latitude
261     *
262     * @return string
263     */
264    public function writeLatitude(float $latitude): string
265    {
266        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
267    }
268
269    /**
270     * @param float $longitude
271     *
272     * @return string
273     */
274    public function writeLongitude(float $longitude): string
275    {
276        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
277    }
278
279    /**
280     * @param float  $degrees
281     * @param string $positive
282     * @param string $negative
283     *
284     * @return string
285     */
286    private function writeDegrees(float $degrees, string $positive, string $negative): string
287    {
288        if ($degrees < 0.0) {
289            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
290        }
291
292        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
293    }
294
295    /**
296     * Although empty placenames are valid "Town, , Country", it is only meaningful
297     * when structured places are used (PLAC:FORM town, county, country), and
298     * structured places are discouraged.
299     *
300     * @param string $text
301     *
302     * @return string[]
303     */
304    public function readPlace(string $text): array
305    {
306        $text = trim($text);
307
308        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
309    }
310
311    /**
312     * @param string[] $place
313     *
314     * @return string
315     */
316    public function writePlace(array $place): string
317    {
318        return implode(self::PLACE_SEPARATOR, $place);
319    }
320
321    /**
322     * Some applications use non-standard values for unknown.
323     *
324     * @param string $text
325     *
326     * @return string
327     */
328    public function readSex(string $text): string
329    {
330        $text = strtoupper($text);
331
332        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
333            $text = self::SEX_UNKNOWN;
334        }
335
336        return $text;
337    }
338}
339