xref: /webtrees/app/Services/GedcomService.php (revision 8dded14167f3cf2866bfdc9fea81c35d3d36ea75)
1*8dded141SGreg Roach<?php
2*8dded141SGreg Roach/**
3*8dded141SGreg Roach * webtrees: online genealogy
4*8dded141SGreg Roach * Copyright (C) 2018 webtrees development team
5*8dded141SGreg Roach * This program is free software: you can redistribute it and/or modify
6*8dded141SGreg Roach * it under the terms of the GNU General Public License as published by
7*8dded141SGreg Roach * the Free Software Foundation, either version 3 of the License, or
8*8dded141SGreg Roach * (at your option) any later version.
9*8dded141SGreg Roach * This program is distributed in the hope that it will be useful,
10*8dded141SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
11*8dded141SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*8dded141SGreg Roach * GNU General Public License for more details.
13*8dded141SGreg Roach * You should have received a copy of the GNU General Public License
14*8dded141SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
15*8dded141SGreg Roach */
16*8dded141SGreg Roachdeclare(strict_types=1);
17*8dded141SGreg Roach
18*8dded141SGreg Roachnamespace Fisharebest\Webtrees\Services;
19*8dded141SGreg Roach
20*8dded141SGreg Roach/**
21*8dded141SGreg Roach * Utilities for manipulating GEDCOM data.
22*8dded141SGreg Roach */
23*8dded141SGreg Roachclass GedcomService
24*8dded141SGreg Roach{
25*8dded141SGreg Roach    // Gedcom allows 255 characters (not bytes), including the EOL character.
26*8dded141SGreg Roach    const EOL         = "\r\n";
27*8dded141SGreg Roach    const EOL_REGEX   = '\r|\r\n|\n|\n\r';
28*8dded141SGreg Roach    const LINE_LENGTH = 255 - 2;
29*8dded141SGreg Roach
30*8dded141SGreg Roach
31*8dded141SGreg Roach    // User defined tags begin with an underscore
32*8dded141SGreg Roach    const USER_DEFINED_TAG_PREFIX = '_';
33*8dded141SGreg Roach
34*8dded141SGreg Roach    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
35*8dded141SGreg Roach    const TAG_NAMES = [
36*8dded141SGreg Roach        'ABBREVIATION'      => 'ABBR',
37*8dded141SGreg Roach        'ADDRESS'           => 'ADDR',
38*8dded141SGreg Roach        'ADDRESS1'          => 'ADR1',
39*8dded141SGreg Roach        'ADDRESS2'          => 'ADR2',
40*8dded141SGreg Roach        'ADOPTION'          => 'ADOP',
41*8dded141SGreg Roach        'AFN'               => 'AFN',
42*8dded141SGreg Roach        'AGE'               => 'AGE',
43*8dded141SGreg Roach        'AGENCY'            => 'AGNC',
44*8dded141SGreg Roach        'ALIAS'             => 'ALIA',
45*8dded141SGreg Roach        'ANCESTORS'         => 'ANCE',
46*8dded141SGreg Roach        'ANCES_INTEREST'    => 'ANCI',
47*8dded141SGreg Roach        'ANULMENT'          => 'ANUL',
48*8dded141SGreg Roach        'ASSOCIATES'        => 'ASSO',
49*8dded141SGreg Roach        'AUTHOR'            => 'AUTH',
50*8dded141SGreg Roach        'BAPTISM-LDS'       => 'BAPL',
51*8dded141SGreg Roach        'BAPTISM'           => 'BAPM',
52*8dded141SGreg Roach        'BAR_MITZVAH'       => 'BARM',
53*8dded141SGreg Roach        'BAS_MITZVAH'       => 'BASM',
54*8dded141SGreg Roach        'BIRTH'             => 'BIRT',
55*8dded141SGreg Roach        'BLESSING'          => 'BLES',
56*8dded141SGreg Roach        'BURIAL'            => 'BURI',
57*8dded141SGreg Roach        'CALL_NUMBER'       => 'CALN',
58*8dded141SGreg Roach        'CASTE'             => 'CAST',
59*8dded141SGreg Roach        'CAUSE'             => 'CAUS',
60*8dded141SGreg Roach        'CENSUS'            => 'CENS',
61*8dded141SGreg Roach        'CHANGE'            => 'CHAN',
62*8dded141SGreg Roach        'CHARACTER'         => 'CHAR',
63*8dded141SGreg Roach        'CHILD'             => 'CHIL',
64*8dded141SGreg Roach        'CHRISTENING'       => 'CHR',
65*8dded141SGreg Roach        'ADULT_CHRISTENING' => 'CHRA',
66*8dded141SGreg Roach        'CITY'              => 'CITY',
67*8dded141SGreg Roach        'CONCATENATION'     => 'CONC',
68*8dded141SGreg Roach        'CONFIRMATION'      => 'CONF',
69*8dded141SGreg Roach        'CONFIRMATION-LDS'  => 'CONL',
70*8dded141SGreg Roach        'CONTINUED'         => 'CONT',
71*8dded141SGreg Roach        'COPYRIGHT'         => 'COPY',
72*8dded141SGreg Roach        'CORPORTATE'        => 'CORP',
73*8dded141SGreg Roach        'CREMATION'         => 'CREM',
74*8dded141SGreg Roach        'COUNTRY'           => 'CTRY',
75*8dded141SGreg Roach        'DATA'              => 'DATA',
76*8dded141SGreg Roach        'DATE'              => 'DATE',
77*8dded141SGreg Roach        'DEATH'             => 'DEAT',
78*8dded141SGreg Roach        'DESCENDANTS'       => 'DESC',
79*8dded141SGreg Roach        'DESCENDANTS_INT'   => 'DESI',
80*8dded141SGreg Roach        'DESTINATION'       => 'DEST',
81*8dded141SGreg Roach        'DIVORCE'           => 'DIV',
82*8dded141SGreg Roach        'DIVORCE_FILED'     => 'DIVF',
83*8dded141SGreg Roach        'PHY_DESCRIPTION'   => 'DSCR',
84*8dded141SGreg Roach        'EDUCATION'         => 'EDUC',
85*8dded141SGreg Roach        'EMAIL'             => 'EMAI',
86*8dded141SGreg Roach        'EMIGRATION'        => 'EMIG',
87*8dded141SGreg Roach        'ENDOWMENT'         => 'ENDL',
88*8dded141SGreg Roach        'ENGAGEMENT'        => 'ENGA',
89*8dded141SGreg Roach        'EVENT'             => 'EVEN',
90*8dded141SGreg Roach        'FACT'              => 'FACT',
91*8dded141SGreg Roach        'FAMILY'            => 'FAM',
92*8dded141SGreg Roach        'FAMILY_CHILD'      => 'FAMC',
93*8dded141SGreg Roach        'FAMILY_FILE'       => 'FAMF',
94*8dded141SGreg Roach        'FAMILY_SPOUSE'     => 'FAMS',
95*8dded141SGreg Roach        'FACIMILIE'         => 'FAX',
96*8dded141SGreg Roach        'FIRST_COMMUNION'   => 'FCOM',
97*8dded141SGreg Roach        'FILE'              => 'FILE',
98*8dded141SGreg Roach        'FORMAT'            => 'FORM',
99*8dded141SGreg Roach        'PHONETIC'          => 'FONE',
100*8dded141SGreg Roach        'GEDCOM'            => 'GEDC',
101*8dded141SGreg Roach        'GIVEN_NAME'        => 'GIVN',
102*8dded141SGreg Roach        'GRADUATION'        => 'GRAD',
103*8dded141SGreg Roach        'HEADER'            => 'HEAD',
104*8dded141SGreg Roach        'HUSBAND'           => 'HUSB',
105*8dded141SGreg Roach        'IDENT_NUMBER'      => 'IDNO',
106*8dded141SGreg Roach        'IMMIGRATION'       => 'IMMI',
107*8dded141SGreg Roach        'INDIVIDUAL'        => 'INDI',
108*8dded141SGreg Roach        'LANGUAGE'          => 'LANG',
109*8dded141SGreg Roach        'LATITUDE'          => 'LATI',
110*8dded141SGreg Roach        'LONGITUDE'         => 'LONG',
111*8dded141SGreg Roach        'MAP'               => 'MAP',
112*8dded141SGreg Roach        'MARRIAGE_BANN'     => 'MARB',
113*8dded141SGreg Roach        'MARR_CONTRACT'     => 'MARC',
114*8dded141SGreg Roach        'MARR_LICENSE'      => 'MARL',
115*8dded141SGreg Roach        'MARRIAGE'          => 'MARR',
116*8dded141SGreg Roach        'MEDIA'             => 'MEDI',
117*8dded141SGreg Roach        'NAME'              => 'NAME',
118*8dded141SGreg Roach        'NATIONALITY'       => 'NATI',
119*8dded141SGreg Roach        'NATURALIZATION'    => 'NATU',
120*8dded141SGreg Roach        'CHILDREN_COUNT'    => 'NCHI',
121*8dded141SGreg Roach        'NICKNAME'          => 'NICK',
122*8dded141SGreg Roach        'MARRIAGE_COUNT'    => 'NMR',
123*8dded141SGreg Roach        'NOTE'              => 'NOTE',
124*8dded141SGreg Roach        'NAME_PREFIX'       => 'NPFX',
125*8dded141SGreg Roach        'NAME_SUFFIX'       => 'NSFX',
126*8dded141SGreg Roach        'OBJECT'            => 'OBJE',
127*8dded141SGreg Roach        'OCCUPATION'        => 'OCCU',
128*8dded141SGreg Roach        'ORDINANCE'         => 'ORDI',
129*8dded141SGreg Roach        'ORDINATION'        => 'ORDN',
130*8dded141SGreg Roach        'PAGE'              => 'PAGE',
131*8dded141SGreg Roach        'PEDIGREE'          => 'PEDI',
132*8dded141SGreg Roach        'PHONE'             => 'PHON',
133*8dded141SGreg Roach        'PLACE'             => 'PLAC',
134*8dded141SGreg Roach        'POSTAL_CODE'       => 'POST',
135*8dded141SGreg Roach        'PROBATE'           => 'PROB',
136*8dded141SGreg Roach        'PROPERTY'          => 'PROP',
137*8dded141SGreg Roach        'PUHBLICATION'      => 'PUBL',
138*8dded141SGreg Roach        'QUALITY_OF_DATA'   => 'QUAY',
139*8dded141SGreg Roach        'REFERENCE'         => 'REFN',
140*8dded141SGreg Roach        'RELATIONSHIP'      => 'RELA',
141*8dded141SGreg Roach        'RELIGION'          => 'RELI',
142*8dded141SGreg Roach        'REPOSITORY'        => 'REPO',
143*8dded141SGreg Roach        'RESIDENCE'         => 'RESI',
144*8dded141SGreg Roach        'RESTRICTION'       => 'RESN',
145*8dded141SGreg Roach        'RETIREMENT'        => 'RETI',
146*8dded141SGreg Roach        'REC_FILE_NUMBER'   => 'RFN',
147*8dded141SGreg Roach        'REC_ID_NUMBER'     => 'RIN',
148*8dded141SGreg Roach        'ROLE'              => 'ROLE',
149*8dded141SGreg Roach        'ROMANIZED'         => 'ROMN',
150*8dded141SGreg Roach        'SEALING_CHILD'     => 'SLGC',
151*8dded141SGreg Roach        'SEALING_SPOUSE'    => 'SLGS',
152*8dded141SGreg Roach        'SEX'               => 'SEX',
153*8dded141SGreg Roach        'SOURCE'            => 'SOUR',
154*8dded141SGreg Roach        'SURN_PREFIX'       => 'SPFX',
155*8dded141SGreg Roach        'SOC_SEC_NUMBER'    => 'SSN',
156*8dded141SGreg Roach        'STATE'             => 'STAE',
157*8dded141SGreg Roach        'STATUS'            => 'STAT',
158*8dded141SGreg Roach        'SUBMITTER'         => 'SUBM',
159*8dded141SGreg Roach        'SUBMISSION'        => 'SUBN',
160*8dded141SGreg Roach        'SURNAME'           => 'SURN',
161*8dded141SGreg Roach        'TEMPLE'            => 'TEMP',
162*8dded141SGreg Roach        'TEXT'              => 'TEXT',
163*8dded141SGreg Roach        'TIME'              => 'TIME',
164*8dded141SGreg Roach        'TITLE'             => 'TITL',
165*8dded141SGreg Roach        'TRAILER'           => 'TRLR',
166*8dded141SGreg Roach        'TYPE'              => 'TYPE',
167*8dded141SGreg Roach        'VERSION'           => 'VERS',
168*8dded141SGreg Roach        'WIFE'              => 'WIFE',
169*8dded141SGreg Roach        'WILL'              => 'WILL',
170*8dded141SGreg Roach        'WEB'               => 'WWW',
171*8dded141SGreg Roach        '_DEATH_OF_SPOUSE'  => 'DETS',
172*8dded141SGreg Roach        '_DEGREE'           => '_DEG',
173*8dded141SGreg Roach        '_FILE'             => 'FILE',
174*8dded141SGreg Roach        '_MEDICAL'          => '_MCL',
175*8dded141SGreg Roach        '_MILITARY_SERVICE' => '_MILT',
176*8dded141SGreg Roach    ];
177*8dded141SGreg Roach
178*8dded141SGreg Roach    // Custom tags used by other applications, with direct synonyms
179*8dded141SGreg Roach    const TAG_SYNONYMS = [
180*8dded141SGreg Roach    ];
181*8dded141SGreg Roach
182*8dded141SGreg Roach    // LATI and LONG tags
183*8dded141SGreg Roach    const DEGREE_FORMAT  = ' % .5f%s';
184*8dded141SGreg Roach    const LATITUDE_NORTH = 'N';
185*8dded141SGreg Roach    const LATITUDE_SOUTH = 'S';
186*8dded141SGreg Roach    const LONGITUDE_EAST = 'E';
187*8dded141SGreg Roach    const LONGITUDE_WEST = 'W';
188*8dded141SGreg Roach
189*8dded141SGreg Roach    // PLAC tags
190*8dded141SGreg Roach    const PLACE_SEPARATOR       = ', ';
191*8dded141SGreg Roach    const PLACE_SEPARATOR_REGEX = ' *, *';
192*8dded141SGreg Roach
193*8dded141SGreg Roach    // SEX tags
194*8dded141SGreg Roach    const SEX_FEMALE  = 'F';
195*8dded141SGreg Roach    const SEX_MALE    = 'M';
196*8dded141SGreg Roach    const SEX_UNKNOWN = 'U';
197*8dded141SGreg Roach
198*8dded141SGreg Roach    /**
199*8dded141SGreg Roach     * Convert a GEDCOM tag to a canonical form.
200*8dded141SGreg Roach     *
201*8dded141SGreg Roach     * @param string $tag
202*8dded141SGreg Roach     *
203*8dded141SGreg Roach     * @return string
204*8dded141SGreg Roach     */
205*8dded141SGreg Roach    public function canonicalTag(string $tag): string
206*8dded141SGreg Roach    {
207*8dded141SGreg Roach        $tag = strtoupper($tag);
208*8dded141SGreg Roach
209*8dded141SGreg Roach        $tag = self::TAG_NAMES[$tag] ?? $tag;
210*8dded141SGreg Roach
211*8dded141SGreg Roach        return $tag;
212*8dded141SGreg Roach    }
213*8dded141SGreg Roach
214*8dded141SGreg Roach    /**
215*8dded141SGreg Roach     * @param string $tag
216*8dded141SGreg Roach     *
217*8dded141SGreg Roach     * @return bool
218*8dded141SGreg Roach     */
219*8dded141SGreg Roach    public function isUserDefinedTag(string $tag): bool
220*8dded141SGreg Roach    {
221*8dded141SGreg Roach        return substr($tag, 0, 1) === self::USER_DEFINED_TAG_PREFIX;
222*8dded141SGreg Roach    }
223*8dded141SGreg Roach
224*8dded141SGreg Roach    /**
225*8dded141SGreg Roach     * @param string $text
226*8dded141SGreg Roach     *
227*8dded141SGreg Roach     * @return float
228*8dded141SGreg Roach     */
229*8dded141SGreg Roach    public function readLatitude(string $text): float
230*8dded141SGreg Roach    {
231*8dded141SGreg Roach        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
232*8dded141SGreg Roach    }
233*8dded141SGreg Roach
234*8dded141SGreg Roach    /**
235*8dded141SGreg Roach     * @param string $text
236*8dded141SGreg Roach     *
237*8dded141SGreg Roach     * @return float
238*8dded141SGreg Roach     */
239*8dded141SGreg Roach    public function readLongitude(string $text): float
240*8dded141SGreg Roach    {
241*8dded141SGreg Roach        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
242*8dded141SGreg Roach    }
243*8dded141SGreg Roach
244*8dded141SGreg Roach    /**
245*8dded141SGreg Roach     * @param string $text
246*8dded141SGreg Roach     * @param string $positive
247*8dded141SGreg Roach     * @param string $negative
248*8dded141SGreg Roach     *
249*8dded141SGreg Roach     * @return float
250*8dded141SGreg Roach     */
251*8dded141SGreg Roach    private function readDegrees(string $text, string $positive, string $negative): float
252*8dded141SGreg Roach    {
253*8dded141SGreg Roach        $text       = trim($text);
254*8dded141SGreg Roach        $hemisphere = substr($text, 0, 1);
255*8dded141SGreg Roach        $degrees    = substr($text, 1);
256*8dded141SGreg Roach
257*8dded141SGreg Roach        // Match a valid GEDCOM format
258*8dded141SGreg Roach        if (is_numeric($degrees)) {
259*8dded141SGreg Roach            $hemisphere = strtoupper($hemisphere);
260*8dded141SGreg Roach            $degrees    = (float) $degrees;
261*8dded141SGreg Roach
262*8dded141SGreg Roach            if ($hemisphere === $positive) {
263*8dded141SGreg Roach                return $degrees;
264*8dded141SGreg Roach            }
265*8dded141SGreg Roach
266*8dded141SGreg Roach            if ($hemisphere === $negative) {
267*8dded141SGreg Roach                return -$degrees;
268*8dded141SGreg Roach            }
269*8dded141SGreg Roach        }
270*8dded141SGreg Roach
271*8dded141SGreg Roach        // Just a number?
272*8dded141SGreg Roach        if (is_numeric($text)) {
273*8dded141SGreg Roach            return (float) $text;
274*8dded141SGreg Roach        }
275*8dded141SGreg Roach
276*8dded141SGreg Roach        // Can't match anything.
277*8dded141SGreg Roach        return 0.0;
278*8dded141SGreg Roach    }
279*8dded141SGreg Roach
280*8dded141SGreg Roach    /**
281*8dded141SGreg Roach     * @param float $latitude
282*8dded141SGreg Roach     *
283*8dded141SGreg Roach     * @return string
284*8dded141SGreg Roach     */
285*8dded141SGreg Roach    public function writeLatitude(float $latitude): string
286*8dded141SGreg Roach    {
287*8dded141SGreg Roach        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
288*8dded141SGreg Roach    }
289*8dded141SGreg Roach
290*8dded141SGreg Roach    /**
291*8dded141SGreg Roach     * @param float $longitude
292*8dded141SGreg Roach     *
293*8dded141SGreg Roach     * @return string
294*8dded141SGreg Roach     */
295*8dded141SGreg Roach    public function writeLongitude(float $longitude): string
296*8dded141SGreg Roach    {
297*8dded141SGreg Roach        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
298*8dded141SGreg Roach    }
299*8dded141SGreg Roach
300*8dded141SGreg Roach    /**
301*8dded141SGreg Roach     * @param float  $degrees
302*8dded141SGreg Roach     * @param string $positive
303*8dded141SGreg Roach     * @param string $negative
304*8dded141SGreg Roach     *
305*8dded141SGreg Roach     * @return string
306*8dded141SGreg Roach     */
307*8dded141SGreg Roach    private function writeDegrees(float $degrees, string $positive, string $negative): string
308*8dded141SGreg Roach    {
309*8dded141SGreg Roach        if ($degrees < 0.0) {
310*8dded141SGreg Roach            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
311*8dded141SGreg Roach        }
312*8dded141SGreg Roach
313*8dded141SGreg Roach        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
314*8dded141SGreg Roach    }
315*8dded141SGreg Roach
316*8dded141SGreg Roach    /**
317*8dded141SGreg Roach     * Although empty placenames are valid "Town, , Country", it is only meaningful
318*8dded141SGreg Roach     * when structured places are used (PLAC:FORM town, county, country), and
319*8dded141SGreg Roach     * structured places are discouraged.
320*8dded141SGreg Roach     *
321*8dded141SGreg Roach     * @param string $text
322*8dded141SGreg Roach     *
323*8dded141SGreg Roach     * @return string[]
324*8dded141SGreg Roach     */
325*8dded141SGreg Roach    public function readPlace(string $text): array
326*8dded141SGreg Roach    {
327*8dded141SGreg Roach        $text = trim($text);
328*8dded141SGreg Roach
329*8dded141SGreg Roach        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
330*8dded141SGreg Roach    }
331*8dded141SGreg Roach
332*8dded141SGreg Roach    /**
333*8dded141SGreg Roach     * @param string[] $place
334*8dded141SGreg Roach     *
335*8dded141SGreg Roach     * @return string
336*8dded141SGreg Roach     */
337*8dded141SGreg Roach    public function writePlace(array $place): string
338*8dded141SGreg Roach    {
339*8dded141SGreg Roach        return implode(self::PLACE_SEPARATOR, $place);
340*8dded141SGreg Roach    }
341*8dded141SGreg Roach
342*8dded141SGreg Roach    /**
343*8dded141SGreg Roach     * Some applications use non-standard values for unknown.
344*8dded141SGreg Roach     *
345*8dded141SGreg Roach     * @param string $text
346*8dded141SGreg Roach     *
347*8dded141SGreg Roach     * @return string
348*8dded141SGreg Roach     */
349*8dded141SGreg Roach    public function readSex(string $text): string
350*8dded141SGreg Roach    {
351*8dded141SGreg Roach        $text = strtoupper($text);
352*8dded141SGreg Roach
353*8dded141SGreg Roach        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
354*8dded141SGreg Roach            $text = self::SEX_UNKNOWN;
355*8dded141SGreg Roach        }
356*8dded141SGreg Roach
357*8dded141SGreg Roach        return $text;
358*8dded141SGreg Roach    }
359*8dded141SGreg Roach}
360