1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees\Services; 19 20/** 21 * Utilities for manipulating GEDCOM data. 22 */ 23class GedcomService 24{ 25 // Gedcom allows 255 characters (not bytes), including the EOL character. 26 private const EOL = "\r\n"; 27 private const EOL_REGEX = '\r|\r\n|\n|\n\r'; 28 private const LINE_LENGTH = 255 - 2; 29 30 31 // User defined tags begin with an underscore 32 private const USER_DEFINED_TAG_PREFIX = '_'; 33 34 // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 35 private const TAG_NAMES = [ 36 'ABBREVIATION' => 'ABBR', 37 'ADDRESS' => 'ADDR', 38 'ADDRESS1' => 'ADR1', 39 'ADDRESS2' => 'ADR2', 40 'ADOPTION' => 'ADOP', 41 'AFN' => 'AFN', 42 'AGE' => 'AGE', 43 'AGENCY' => 'AGNC', 44 'ALIAS' => 'ALIA', 45 'ANCESTORS' => 'ANCE', 46 'ANCES_INTEREST' => 'ANCI', 47 'ANULMENT' => 'ANUL', 48 'ASSOCIATES' => 'ASSO', 49 'AUTHOR' => 'AUTH', 50 'BAPTISM-LDS' => 'BAPL', 51 'BAPTISM' => 'BAPM', 52 'BAR_MITZVAH' => 'BARM', 53 'BAS_MITZVAH' => 'BASM', 54 'BIRTH' => 'BIRT', 55 'BLESSING' => 'BLES', 56 'BURIAL' => 'BURI', 57 'CALL_NUMBER' => 'CALN', 58 'CASTE' => 'CAST', 59 'CAUSE' => 'CAUS', 60 'CENSUS' => 'CENS', 61 'CHANGE' => 'CHAN', 62 'CHARACTER' => 'CHAR', 63 'CHILD' => 'CHIL', 64 'CHRISTENING' => 'CHR', 65 'ADULT_CHRISTENING' => 'CHRA', 66 'CITY' => 'CITY', 67 'CONCATENATION' => 'CONC', 68 'CONFIRMATION' => 'CONF', 69 'CONFIRMATION-LDS' => 'CONL', 70 'CONTINUED' => 'CONT', 71 'COPYRIGHT' => 'COPY', 72 'CORPORTATE' => 'CORP', 73 'CREMATION' => 'CREM', 74 'COUNTRY' => 'CTRY', 75 'DATA' => 'DATA', 76 'DATE' => 'DATE', 77 'DEATH' => 'DEAT', 78 'DESCENDANTS' => 'DESC', 79 'DESCENDANTS_INT' => 'DESI', 80 'DESTINATION' => 'DEST', 81 'DIVORCE' => 'DIV', 82 'DIVORCE_FILED' => 'DIVF', 83 'PHY_DESCRIPTION' => 'DSCR', 84 'EDUCATION' => 'EDUC', 85 'EMAIL' => 'EMAI', 86 'EMIGRATION' => 'EMIG', 87 'ENDOWMENT' => 'ENDL', 88 'ENGAGEMENT' => 'ENGA', 89 'EVENT' => 'EVEN', 90 'FACT' => 'FACT', 91 'FAMILY' => 'FAM', 92 'FAMILY_CHILD' => 'FAMC', 93 'FAMILY_FILE' => 'FAMF', 94 'FAMILY_SPOUSE' => 'FAMS', 95 'FACIMILIE' => 'FAX', 96 'FIRST_COMMUNION' => 'FCOM', 97 'FILE' => 'FILE', 98 'FORMAT' => 'FORM', 99 'PHONETIC' => 'FONE', 100 'GEDCOM' => 'GEDC', 101 'GIVEN_NAME' => 'GIVN', 102 'GRADUATION' => 'GRAD', 103 'HEADER' => 'HEAD', 104 'HUSBAND' => 'HUSB', 105 'IDENT_NUMBER' => 'IDNO', 106 'IMMIGRATION' => 'IMMI', 107 'INDIVIDUAL' => 'INDI', 108 'LANGUAGE' => 'LANG', 109 'LATITUDE' => 'LATI', 110 'LONGITUDE' => 'LONG', 111 'MAP' => 'MAP', 112 'MARRIAGE_BANN' => 'MARB', 113 'MARR_CONTRACT' => 'MARC', 114 'MARR_LICENSE' => 'MARL', 115 'MARRIAGE' => 'MARR', 116 'MEDIA' => 'MEDI', 117 'NAME' => 'NAME', 118 'NATIONALITY' => 'NATI', 119 'NATURALIZATION' => 'NATU', 120 'CHILDREN_COUNT' => 'NCHI', 121 'NICKNAME' => 'NICK', 122 'MARRIAGE_COUNT' => 'NMR', 123 'NOTE' => 'NOTE', 124 'NAME_PREFIX' => 'NPFX', 125 'NAME_SUFFIX' => 'NSFX', 126 'OBJECT' => 'OBJE', 127 'OCCUPATION' => 'OCCU', 128 'ORDINANCE' => 'ORDI', 129 'ORDINATION' => 'ORDN', 130 'PAGE' => 'PAGE', 131 'PEDIGREE' => 'PEDI', 132 'PHONE' => 'PHON', 133 'PLACE' => 'PLAC', 134 'POSTAL_CODE' => 'POST', 135 'PROBATE' => 'PROB', 136 'PROPERTY' => 'PROP', 137 'PUBLICATION' => 'PUBL', 138 'QUALITY_OF_DATA' => 'QUAY', 139 'REFERENCE' => 'REFN', 140 'RELATIONSHIP' => 'RELA', 141 'RELIGION' => 'RELI', 142 'REPOSITORY' => 'REPO', 143 'RESIDENCE' => 'RESI', 144 'RESTRICTION' => 'RESN', 145 'RETIREMENT' => 'RETI', 146 'REC_FILE_NUMBER' => 'RFN', 147 'REC_ID_NUMBER' => 'RIN', 148 'ROLE' => 'ROLE', 149 'ROMANIZED' => 'ROMN', 150 'SEALING_CHILD' => 'SLGC', 151 'SEALING_SPOUSE' => 'SLGS', 152 'SEX' => 'SEX', 153 'SOURCE' => 'SOUR', 154 'SURN_PREFIX' => 'SPFX', 155 'SOC_SEC_NUMBER' => 'SSN', 156 'STATE' => 'STAE', 157 'STATUS' => 'STAT', 158 'SUBMITTER' => 'SUBM', 159 'SUBMISSION' => 'SUBN', 160 'SURNAME' => 'SURN', 161 'TEMPLE' => 'TEMP', 162 'TEXT' => 'TEXT', 163 'TIME' => 'TIME', 164 'TITLE' => 'TITL', 165 'TRAILER' => 'TRLR', 166 'TYPE' => 'TYPE', 167 'VERSION' => 'VERS', 168 'WIFE' => 'WIFE', 169 'WILL' => 'WILL', 170 'WEB' => 'WWW', 171 '_DEATH_OF_SPOUSE' => 'DETS', 172 '_DEGREE' => '_DEG', 173 '_FILE' => 'FILE', 174 '_MEDICAL' => '_MCL', 175 '_MILITARY_SERVICE' => '_MILT', 176 ]; 177 178 // Custom tags used by other applications, with direct synonyms 179 private const TAG_SYNONYMS = [ 180 ]; 181 182 // LATI and LONG tags 183 private const DEGREE_FORMAT = ' % .5f%s'; 184 private const LATITUDE_NORTH = 'N'; 185 private const LATITUDE_SOUTH = 'S'; 186 private const LONGITUDE_EAST = 'E'; 187 private const LONGITUDE_WEST = 'W'; 188 189 // PLAC tags 190 private const PLACE_SEPARATOR = ', '; 191 private const PLACE_SEPARATOR_REGEX = ' *, *'; 192 193 // SEX tags 194 private const SEX_FEMALE = 'F'; 195 private const SEX_MALE = 'M'; 196 private const SEX_UNKNOWN = 'U'; 197 198 /** 199 * Convert a GEDCOM tag to a canonical form. 200 * 201 * @param string $tag 202 * 203 * @return string 204 */ 205 public function canonicalTag(string $tag): string 206 { 207 $tag = strtoupper($tag); 208 209 $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 210 211 return $tag; 212 } 213 214 /** 215 * @param string $tag 216 * 217 * @return bool 218 */ 219 public function isUserDefinedTag(string $tag): bool 220 { 221 return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 222 } 223 224 /** 225 * @param string $text 226 * 227 * @return float 228 */ 229 public function readLatitude(string $text): float 230 { 231 return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 232 } 233 234 /** 235 * @param string $text 236 * 237 * @return float 238 */ 239 public function readLongitude(string $text): float 240 { 241 return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 242 } 243 244 /** 245 * @param string $text 246 * @param string $positive 247 * @param string $negative 248 * 249 * @return float 250 */ 251 private function readDegrees(string $text, string $positive, string $negative): float 252 { 253 $text = trim($text); 254 $hemisphere = substr($text, 0, 1); 255 $degrees = substr($text, 1); 256 257 // Match a valid GEDCOM format 258 if (is_numeric($degrees)) { 259 $hemisphere = strtoupper($hemisphere); 260 $degrees = (float) $degrees; 261 262 if ($hemisphere === $positive) { 263 return $degrees; 264 } 265 266 if ($hemisphere === $negative) { 267 return -$degrees; 268 } 269 } 270 271 // Just a number? 272 if (is_numeric($text)) { 273 return (float) $text; 274 } 275 276 // Can't match anything. 277 return 0.0; 278 } 279 280 /** 281 * @param float $latitude 282 * 283 * @return string 284 */ 285 public function writeLatitude(float $latitude): string 286 { 287 return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 288 } 289 290 /** 291 * @param float $longitude 292 * 293 * @return string 294 */ 295 public function writeLongitude(float $longitude): string 296 { 297 return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 298 } 299 300 /** 301 * @param float $degrees 302 * @param string $positive 303 * @param string $negative 304 * 305 * @return string 306 */ 307 private function writeDegrees(float $degrees, string $positive, string $negative): string 308 { 309 if ($degrees < 0.0) { 310 return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 311 } 312 313 return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 314 } 315 316 /** 317 * Although empty placenames are valid "Town, , Country", it is only meaningful 318 * when structured places are used (PLAC:FORM town, county, country), and 319 * structured places are discouraged. 320 * 321 * @param string $text 322 * 323 * @return string[] 324 */ 325 public function readPlace(string $text): array 326 { 327 $text = trim($text); 328 329 return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 330 } 331 332 /** 333 * @param string[] $place 334 * 335 * @return string 336 */ 337 public function writePlace(array $place): string 338 { 339 return implode(self::PLACE_SEPARATOR, $place); 340 } 341 342 /** 343 * Some applications use non-standard values for unknown. 344 * 345 * @param string $text 346 * 347 * @return string 348 */ 349 public function readSex(string $text): string 350 { 351 $text = strtoupper($text); 352 353 if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 354 $text = self::SEX_UNKNOWN; 355 } 356 357 return $text; 358 } 359} 360