xref: /webtrees/app/Gedcom.php (revision f033cc5a342c113ca18f2362b72af056c61e8606)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees;
21
22/**
23 * GEDCOM 5.5.1 specification
24 */
25class Gedcom
26{
27    // Use MSDOS style line endings, for maximum compatibility.
28    public const EOL = "\r\n";
29
30    // 255 less the EOL character.
31    public const LINE_LENGTH = 253;
32
33    // Gedcom tags which indicate the start of life.
34    public const BIRTH_EVENTS = ['BIRT', 'CHR', 'BAPM'];
35
36    // Gedcom tags which indicate the end of life.
37    public const DEATH_EVENTS = ['DEAT', 'BURI', 'CREM'];
38
39    // Gedcom tags which indicate the start of a relationship.
40    public const MARRIAGE_EVENTS = ['MARR', '_NMR'];
41
42    // Gedcom tags which indicate the end of a relationship.
43    public const DIVORCE_EVENTS = ['DIV', 'ANUL', '_SEPR'];
44
45    // Regular expression to match a GEDCOM tag.
46    public const REGEX_TAG = '[_A-Z][_A-Z0-9]*';
47
48    // Regular expression to match a GEDCOM XREF.
49    public const REGEX_XREF = '[A-Za-z0-9:_.-]{1,20}';
50
51    // UTF-8 encoded files may begin with an optional byte-order-mark (U+FEFF).
52    public const UTF8_BOM = "\xEF\xBB\xBF";
53
54    // Separates parts of a place name.
55    public const PLACE_SEPARATOR = ', ';
56
57    // Regex to match a (badly formed) GEDCOM place separator.
58    public const PLACE_SEPARATOR_REGEX = '/ *,[, ]*/';
59
60    // LATI and LONG tags
61    public const LATITUDE_NORTH = 'N';
62    public const LATITUDE_SOUTH = 'S';
63    public const LONGITUDE_EAST = 'E';
64    public const LONGITUDE_WEST = 'W';
65
66    // Not all record types allow a CHAN event.
67    public const RECORDS_WITH_CHAN = [
68        Family::RECORD_TYPE,
69        Individual::RECORD_TYPE,
70        Media::RECORD_TYPE,
71        Note::RECORD_TYPE,
72        Repository::RECORD_TYPE,
73        Source::RECORD_TYPE,
74        Submitter::RECORD_TYPE,
75    ];
76
77    // These preferences control multiple tag definitions
78    public const HIDDEN_TAGS = [
79        // Individual names
80        'NAME_NPFX' => ['INDI:NAME:NPFX', 'INDI:NAME:FONE:NPFX', 'INDI:NAME:ROMN:NPFX'],
81        'NAME_SPFX' => ['INDI:NAME:SPFX', 'INDI:NAME:FONE:SPFX', 'INDI:NAME:ROMN:SPFX'],
82        'NAME_NSFX' => ['INDI:NAME:NSFX', 'INDI:NAME:FONE:NSFX', 'INDI:NAME:ROMN:NSFX'],
83        'NAME_NICK' => ['INDI:NAME:NICK', 'INDI:NAME:FONE:NICK', 'INDI:NAME:ROMN:NICK'],
84        'NAME_FONE' => ['INDI:NAME:FONE'],
85        'NAME_ROMN' => ['INDI:NAME:ROMN'],
86        'NAME_NOTE' => ['INDI:NAME:NOTE'],
87        'NAME_SOUR' => ['INDI:NAME:SOUR'],
88        // Places
89        'PLAC_MAP'  => ['PLAC:MAP'],
90        'PLAC_FONE' => ['PLAC:FONE'],
91        'PLAC_ROMN' => ['PLAC:ROMN'],
92        'PLAC_FORM' => ['PLAC:FORM', 'HEAD:PLAC'],
93        'PLAC_NOTE' => ['PLAC:NOTE'],
94        // Addresses
95        'ADDR_FAX'  => ['FAX'],
96        'ADDR_PHON' => ['PHON'],
97        'ADDR_WWW'  => ['WWW'],
98        // Source citations
99        'SOUR_EVEN' => [':SOUR:EVEN'],
100        'SOUR_DATE' => [':SOUR:DATA:DATE'],
101        'SOUR_NOTE' => [':SOUR:NOTE'],
102        'SOUR_QUAY' => [':SOUR:QUAY'],
103        // Sources
104        'SOUR_DATA' => ['SOUR:DATA:EVEN', 'SOUR:DATA:AGNC', 'SOUR:DATA:NOTE'],
105        // Individuals
106        'BIRT_FAMC' => ['INDI:BIRT:FAMC'],
107        'RELI'      => ['INDI:RELI'],
108        'BAPM'      => ['INDI:BAPM'],
109        'CHR'       => ['INDI:CHR', 'INDI:CHRA'],
110        'FCOM'      => ['INDI:FCOM', 'INDI:CONF'],
111        'ORDN'      => ['INDI:ORDN'],
112        'BARM'      => ['INDI:BARM', 'INDI:BASM'],
113        'ALIA'      => ['INDI:ALIA'],
114        'ASSO'      => ['INDI:ASSO'],
115        // Families
116        'ENGA'      => ['FAM:ENGA'],
117        'MARB'      => ['FAM:MARB'],
118        'MARC'      => ['FAM:MARC'],
119        'MARL'      => ['FAM:MARL'],
120        'MARS'      => ['FAM:MARS'],
121        'ANUL'      => ['FAM:ANUL'],
122        'DIVF'      => ['FAM:DIVF'],
123        'FAM_RESI'  => ['FAM:RESI'],
124        'FAM_CENS'  => ['FAM:CENS'],
125        // LDS church
126        'LDS'       => ['INDI:BAPL', 'INDI:CONL', 'INDI:ENDL', 'INDI:SLGC', 'FAM:SLGS', 'HEAD:SUBN'],
127        // Identifiers
128        'AFN'       => ['INDI:AFN'],
129        'IDNO'      => ['INDI:IDNO'],
130        'SSN'       => ['INDI:SSN'],
131        'RFN'       => ['RFN'],
132        'REFN'      => ['REFN'],
133        'RIN'       => ['RIN'],
134        // Submitters
135        'SUBM'      => ['INDI:SUBM', 'FAM:SUBM'],
136        'ANCI'      => ['INDI:ANCI', 'INDI:DESI'],
137    ];
138}
139