xref: /webtrees/app/Services/GedcomService.php (revision 663dd9d85326d3016550248671b2f0eba8949cb3)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Gedcom;
23
24use function abs;
25
26/**
27 * Utilities for manipulating GEDCOM data.
28 */
29class GedcomService
30{
31    // User defined tags begin with an underscore
32    private const USER_DEFINED_TAG_PREFIX = '_';
33
34    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
35    private const TAG_NAMES = [
36        'ABBREVIATION'      => 'ABBR',
37        'ADDRESS'           => 'ADDR',
38        'ADDRESS1'          => 'ADR1',
39        'ADDRESS2'          => 'ADR2',
40        'ADOPTION'          => 'ADOP',
41        'AGENCY'            => 'AGNC',
42        'ALIAS'             => 'ALIA',
43        'ANCESTORS'         => 'ANCE',
44        'ANCES_INTEREST'    => 'ANCI',
45        'ANULMENT'          => 'ANUL',
46        'ASSOCIATES'        => 'ASSO',
47        'AUTHOR'            => 'AUTH',
48        'BAPTISM-LDS'       => 'BAPL',
49        'BAPTISM'           => 'BAPM',
50        'BAR_MITZVAH'       => 'BARM',
51        'BAS_MITZVAH'       => 'BASM',
52        'BIRTH'             => 'BIRT',
53        'BLESSING'          => 'BLES',
54        'BURIAL'            => 'BURI',
55        'CALL_NUMBER'       => 'CALN',
56        'CASTE'             => 'CAST',
57        'CAUSE'             => 'CAUS',
58        'CENSUS'            => 'CENS',
59        'CHANGE'            => 'CHAN',
60        'CHARACTER'         => 'CHAR',
61        'CHILD'             => 'CHIL',
62        'CHRISTENING'       => 'CHR',
63        'ADULT_CHRISTENING' => 'CHRA',
64        'CONCATENATION'     => 'CONC',
65        'CONFIRMATION'      => 'CONF',
66        'CONFIRMATION-LDS'  => 'CONL',
67        'CONTINUED'         => 'CONT',
68        'COPYRIGHT'         => 'COPY',
69        'CORPORTATE'        => 'CORP',
70        'CREMATION'         => 'CREM',
71        'COUNTRY'           => 'CTRY',
72        'DEATH'             => 'DEAT',
73        'DESCENDANTS'       => 'DESC',
74        'DESCENDANTS_INT'   => 'DESI',
75        'DESTINATION'       => 'DEST',
76        'DIVORCE'           => 'DIV',
77        'DIVORCE_FILED'     => 'DIVF',
78        'PHY_DESCRIPTION'   => 'DSCR',
79        'EDUCATION'         => 'EDUC',
80        'EMIGRATION'        => 'EMIG',
81        'ENDOWMENT'         => 'ENDL',
82        'ENGAGEMENT'        => 'ENGA',
83        'EVENT'             => 'EVEN',
84        'FAMILY'            => 'FAM',
85        'FAMILY_CHILD'      => 'FAMC',
86        'FAMILY_FILE'       => 'FAMF',
87        'FAMILY_SPOUSE'     => 'FAMS',
88        'FACIMILIE'         => 'FAX',
89        'FIRST_COMMUNION'   => 'FCOM',
90        'FORMAT'            => 'FORM',
91        'PHONETIC'          => 'FONE',
92        'GEDCOM'            => 'GEDC',
93        'GIVEN_NAME'        => 'GIVN',
94        'GRADUATION'        => 'GRAD',
95        'HEADER'            => 'HEAD',
96        'HUSBAND'           => 'HUSB',
97        'IDENT_NUMBER'      => 'IDNO',
98        'IMMIGRATION'       => 'IMMI',
99        'INDIVIDUAL'        => 'INDI',
100        'LANGUAGE'          => 'LANG',
101        'LATITUDE'          => 'LATI',
102        'LONGITUDE'         => 'LONG',
103        'MARRIAGE_BANN'     => 'MARB',
104        'MARR_CONTRACT'     => 'MARC',
105        'MARR_LICENSE'      => 'MARL',
106        'MARRIAGE'          => 'MARR',
107        'MEDIA'             => 'MEDI',
108        'NATIONALITY'       => 'NATI',
109        'NATURALIZATION'    => 'NATU',
110        'CHILDREN_COUNT'    => 'NCHI',
111        'NICKNAME'          => 'NICK',
112        'MARRIAGE_COUNT'    => 'NMR',
113        'NAME_PREFIX'       => 'NPFX',
114        'NAME_SUFFIX'       => 'NSFX',
115        'OBJECT'            => 'OBJE',
116        'OCCUPATION'        => 'OCCU',
117        'ORDINANCE'         => 'ORDI',
118        'ORDINATION'        => 'ORDN',
119        'PEDIGREE'          => 'PEDI',
120        'PHONE'             => 'PHON',
121        'PLACE'             => 'PLAC',
122        'POSTAL_CODE'       => 'POST',
123        'PROBATE'           => 'PROB',
124        'PROPERTY'          => 'PROP',
125        'PUBLICATION'       => 'PUBL',
126        'QUALITY_OF_DATA'   => 'QUAY',
127        'REFERENCE'         => 'REFN',
128        'RELATIONSHIP'      => 'RELA',
129        'RELIGION'          => 'RELI',
130        'REPOSITORY'        => 'REPO',
131        'RESIDENCE'         => 'RESI',
132        'RESTRICTION'       => 'RESN',
133        'RETIREMENT'        => 'RETI',
134        'REC_FILE_NUMBER'   => 'RFN',
135        'REC_ID_NUMBER'     => 'RIN',
136        'ROMANIZED'         => 'ROMN',
137        'SEALING_CHILD'     => 'SLGC',
138        'SEALING_SPOUSE'    => 'SLGS',
139        'SOURCE'            => 'SOUR',
140        'SURN_PREFIX'       => 'SPFX',
141        'SOC_SEC_NUMBER'    => 'SSN',
142        'STATE'             => 'STAE',
143        'STATUS'            => 'STAT',
144        'SUBMITTER'         => 'SUBM',
145        'SUBMISSION'        => 'SUBN',
146        'SURNAME'           => 'SURN',
147        'TEMPLE'            => 'TEMP',
148        'TITLE'             => 'TITL',
149        'TRAILER'           => 'TRLR',
150        'VERSION'           => 'VERS',
151        'WEB'               => 'WWW',
152        '_DEATH_OF_SPOUSE'  => 'DETS',
153        '_DEGREE'           => '_DEG',
154        '_MEDICAL'          => '_MCL',
155        '_MILITARY_SERVICE' => '_MILT',
156    ];
157
158    // Custom tags used by other applications, with direct synonyms
159    private const TAG_SYNONYMS = [
160        // Convert PhpGedView tag to webtrees
161        '_PGVU'     => '_WT_USER',
162        '_PGV_OBJS' => '_WT_OBJE_SORT',
163    ];
164
165    // SEX tags
166    private const SEX_FEMALE  = 'F';
167    private const SEX_MALE    = 'M';
168    private const SEX_UNKNOWN = 'U';
169
170    /**
171     * Convert a GEDCOM tag to a canonical form.
172     *
173     * @param string $tag
174     *
175     * @return string
176     */
177    public function canonicalTag(string $tag): string
178    {
179        $tag = strtoupper($tag);
180
181        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
182
183        return $tag;
184    }
185
186    /**
187     * @param string $tag
188     *
189     * @return bool
190     */
191    public function isUserDefinedTag(string $tag): bool
192    {
193        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
194    }
195
196    /**
197     * @param string $text
198     *
199     * @return float|null
200     */
201    public function readLatitude(string $text): ?float
202    {
203        return $this->readDegrees($text, Gedcom::LATITUDE_NORTH, Gedcom::LATITUDE_SOUTH);
204    }
205
206    /**
207     * @param string $text
208     *
209     * @return float|null
210     */
211    public function readLongitude(string $text): ?float
212    {
213        return $this->readDegrees($text, Gedcom::LONGITUDE_EAST, Gedcom::LONGITUDE_WEST);
214    }
215
216    /**
217     * @param string $text
218     * @param string $positive
219     * @param string $negative
220     *
221     * @return float|null
222     */
223    private function readDegrees(string $text, string $positive, string $negative): ?float
224    {
225        $text       = trim($text);
226        $hemisphere = substr($text, 0, 1);
227        $degrees    = substr($text, 1);
228
229        // Match a valid GEDCOM format
230        if (is_numeric($degrees)) {
231            $hemisphere = strtoupper($hemisphere);
232            $degrees    = (float) $degrees;
233
234            if ($hemisphere === $positive) {
235                return $degrees;
236            }
237
238            if ($hemisphere === $negative) {
239                return -$degrees;
240            }
241        }
242
243        // Just a number?
244        if (is_numeric($text)) {
245            return (float) $text;
246        }
247
248        // Can't match anything.
249        return null;
250    }
251
252    /**
253     * Although empty placenames are valid "Town, , Country", it is only meaningful
254     * when structured places are used (PLAC:FORM town, county, country), and
255     * structured places are discouraged.
256     *
257     * @param string $text
258     *
259     * @return array<string>
260     */
261    public function readPlace(string $text): array
262    {
263        $text = trim($text);
264
265        return preg_split(Gedcom::PLACE_SEPARATOR_REGEX, $text);
266    }
267
268    /**
269     * @param string[] $place
270     *
271     * @return string
272     */
273    public function writePlace(array $place): string
274    {
275        return implode(Gedcom::PLACE_SEPARATOR, $place);
276    }
277
278    /**
279     * Some applications use non-standard values for unknown.
280     *
281     * @param string $text
282     *
283     * @return string
284     */
285    public function readSex(string $text): string
286    {
287        $text = strtoupper($text);
288
289        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
290            $text = self::SEX_UNKNOWN;
291        }
292
293        return $text;
294    }
295}
296