1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Gedcom; 23 24use function abs; 25 26/** 27 * Utilities for manipulating GEDCOM data. 28 */ 29class GedcomService 30{ 31 // User defined tags begin with an underscore 32 private const USER_DEFINED_TAG_PREFIX = '_'; 33 34 // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 35 private const TAG_NAMES = [ 36 'ABBREVIATION' => 'ABBR', 37 'ADDRESS' => 'ADDR', 38 'ADDRESS1' => 'ADR1', 39 'ADDRESS2' => 'ADR2', 40 'ADOPTION' => 'ADOP', 41 'AGENCY' => 'AGNC', 42 'ALIAS' => 'ALIA', 43 'ANCESTORS' => 'ANCE', 44 'ANCES_INTEREST' => 'ANCI', 45 'ANULMENT' => 'ANUL', 46 'ASSOCIATES' => 'ASSO', 47 'AUTHOR' => 'AUTH', 48 'BAPTISM-LDS' => 'BAPL', 49 'BAPTISM' => 'BAPM', 50 'BAR_MITZVAH' => 'BARM', 51 'BAS_MITZVAH' => 'BASM', 52 'BIRTH' => 'BIRT', 53 'BLESSING' => 'BLES', 54 'BURIAL' => 'BURI', 55 'CALL_NUMBER' => 'CALN', 56 'CASTE' => 'CAST', 57 'CAUSE' => 'CAUS', 58 'CENSUS' => 'CENS', 59 'CHANGE' => 'CHAN', 60 'CHARACTER' => 'CHAR', 61 'CHILD' => 'CHIL', 62 'CHRISTENING' => 'CHR', 63 'ADULT_CHRISTENING' => 'CHRA', 64 'CONCATENATION' => 'CONC', 65 'CONFIRMATION' => 'CONF', 66 'CONFIRMATION-LDS' => 'CONL', 67 'CONTINUED' => 'CONT', 68 'COPYRIGHT' => 'COPY', 69 'CORPORTATE' => 'CORP', 70 'CREMATION' => 'CREM', 71 'COUNTRY' => 'CTRY', 72 'DEATH' => 'DEAT', 73 'DESCENDANTS' => 'DESC', 74 'DESCENDANTS_INT' => 'DESI', 75 'DESTINATION' => 'DEST', 76 'DIVORCE' => 'DIV', 77 'DIVORCE_FILED' => 'DIVF', 78 'PHY_DESCRIPTION' => 'DSCR', 79 'EDUCATION' => 'EDUC', 80 'EMIGRATION' => 'EMIG', 81 'ENDOWMENT' => 'ENDL', 82 'ENGAGEMENT' => 'ENGA', 83 'EVENT' => 'EVEN', 84 'FAMILY' => 'FAM', 85 'FAMILY_CHILD' => 'FAMC', 86 'FAMILY_FILE' => 'FAMF', 87 'FAMILY_SPOUSE' => 'FAMS', 88 'FACIMILIE' => 'FAX', 89 'FIRST_COMMUNION' => 'FCOM', 90 'FORMAT' => 'FORM', 91 'PHONETIC' => 'FONE', 92 'GEDCOM' => 'GEDC', 93 'GIVEN_NAME' => 'GIVN', 94 'GRADUATION' => 'GRAD', 95 'HEADER' => 'HEAD', 96 'HUSBAND' => 'HUSB', 97 'IDENT_NUMBER' => 'IDNO', 98 'IMMIGRATION' => 'IMMI', 99 'INDIVIDUAL' => 'INDI', 100 'LANGUAGE' => 'LANG', 101 'LATITUDE' => 'LATI', 102 'LONGITUDE' => 'LONG', 103 'MARRIAGE_BANN' => 'MARB', 104 'MARR_CONTRACT' => 'MARC', 105 'MARR_LICENSE' => 'MARL', 106 'MARRIAGE' => 'MARR', 107 'MEDIA' => 'MEDI', 108 'NATIONALITY' => 'NATI', 109 'NATURALIZATION' => 'NATU', 110 'CHILDREN_COUNT' => 'NCHI', 111 'NICKNAME' => 'NICK', 112 'MARRIAGE_COUNT' => 'NMR', 113 'NAME_PREFIX' => 'NPFX', 114 'NAME_SUFFIX' => 'NSFX', 115 'OBJECT' => 'OBJE', 116 'OCCUPATION' => 'OCCU', 117 'ORDINANCE' => 'ORDI', 118 'ORDINATION' => 'ORDN', 119 'PEDIGREE' => 'PEDI', 120 'PHONE' => 'PHON', 121 'PLACE' => 'PLAC', 122 'POSTAL_CODE' => 'POST', 123 'PROBATE' => 'PROB', 124 'PROPERTY' => 'PROP', 125 'PUBLICATION' => 'PUBL', 126 'QUALITY_OF_DATA' => 'QUAY', 127 'REFERENCE' => 'REFN', 128 'RELATIONSHIP' => 'RELA', 129 'RELIGION' => 'RELI', 130 'REPOSITORY' => 'REPO', 131 'RESIDENCE' => 'RESI', 132 'RESTRICTION' => 'RESN', 133 'RETIREMENT' => 'RETI', 134 'REC_FILE_NUMBER' => 'RFN', 135 'REC_ID_NUMBER' => 'RIN', 136 'ROMANIZED' => 'ROMN', 137 'SEALING_CHILD' => 'SLGC', 138 'SEALING_SPOUSE' => 'SLGS', 139 'SOURCE' => 'SOUR', 140 'SURN_PREFIX' => 'SPFX', 141 'SOC_SEC_NUMBER' => 'SSN', 142 'STATE' => 'STAE', 143 'STATUS' => 'STAT', 144 'SUBMITTER' => 'SUBM', 145 'SUBMISSION' => 'SUBN', 146 'SURNAME' => 'SURN', 147 'TEMPLE' => 'TEMP', 148 'TITLE' => 'TITL', 149 'TRAILER' => 'TRLR', 150 'VERSION' => 'VERS', 151 'WEB' => 'WWW', 152 '_DEATH_OF_SPOUSE' => 'DETS', 153 '_DEGREE' => '_DEG', 154 '_MEDICAL' => '_MCL', 155 '_MILITARY_SERVICE' => '_MILT', 156 ]; 157 158 // Custom tags used by other applications, with direct synonyms 159 private const TAG_SYNONYMS = [ 160 // Convert PhpGedView tag to webtrees 161 '_PGVU' => '_WT_USER', 162 '_PGV_OBJS' => '_WT_OBJE_SORT', 163 ]; 164 165 // SEX tags 166 private const SEX_FEMALE = 'F'; 167 private const SEX_MALE = 'M'; 168 private const SEX_UNKNOWN = 'U'; 169 170 /** 171 * Convert a GEDCOM tag to a canonical form. 172 * 173 * @param string $tag 174 * 175 * @return string 176 */ 177 public function canonicalTag(string $tag): string 178 { 179 $tag = strtoupper($tag); 180 181 $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 182 183 return $tag; 184 } 185 186 /** 187 * @param string $tag 188 * 189 * @return bool 190 */ 191 public function isUserDefinedTag(string $tag): bool 192 { 193 return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 194 } 195 196 /** 197 * @param string $text 198 * 199 * @return float|null 200 */ 201 public function readLatitude(string $text): ?float 202 { 203 return $this->readDegrees($text, Gedcom::LATITUDE_NORTH, Gedcom::LATITUDE_SOUTH); 204 } 205 206 /** 207 * @param string $text 208 * 209 * @return float|null 210 */ 211 public function readLongitude(string $text): ?float 212 { 213 return $this->readDegrees($text, Gedcom::LONGITUDE_EAST, Gedcom::LONGITUDE_WEST); 214 } 215 216 /** 217 * @param string $text 218 * @param string $positive 219 * @param string $negative 220 * 221 * @return float|null 222 */ 223 private function readDegrees(string $text, string $positive, string $negative): ?float 224 { 225 $text = trim($text); 226 $hemisphere = substr($text, 0, 1); 227 $degrees = substr($text, 1); 228 229 // Match a valid GEDCOM format 230 if (is_numeric($degrees)) { 231 $hemisphere = strtoupper($hemisphere); 232 $degrees = (float) $degrees; 233 234 if ($hemisphere === $positive) { 235 return $degrees; 236 } 237 238 if ($hemisphere === $negative) { 239 return -$degrees; 240 } 241 } 242 243 // Just a number? 244 if (is_numeric($text)) { 245 return (float) $text; 246 } 247 248 // Can't match anything. 249 return null; 250 } 251 252 /** 253 * Although empty placenames are valid "Town, , Country", it is only meaningful 254 * when structured places are used (PLAC:FORM town, county, country), and 255 * structured places are discouraged. 256 * 257 * @param string $text 258 * 259 * @return string[] 260 */ 261 public function readPlace(string $text): array 262 { 263 $text = trim($text); 264 265 return preg_split(Gedcom::PLACE_SEPARATOR_REGEX, $text); 266 } 267 268 /** 269 * @param string[] $place 270 * 271 * @return string 272 */ 273 public function writePlace(array $place): string 274 { 275 return implode(Gedcom::PLACE_SEPARATOR, $place); 276 } 277 278 /** 279 * Some applications use non-standard values for unknown. 280 * 281 * @param string $text 282 * 283 * @return string 284 */ 285 public function readSex(string $text): string 286 { 287 $text = strtoupper($text); 288 289 if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 290 $text = self::SEX_UNKNOWN; 291 } 292 293 return $text; 294 } 295} 296