1*8dded141SGreg Roach<?php 2*8dded141SGreg Roach/** 3*8dded141SGreg Roach * webtrees: online genealogy 4*8dded141SGreg Roach * Copyright (C) 2018 webtrees development team 5*8dded141SGreg Roach * This program is free software: you can redistribute it and/or modify 6*8dded141SGreg Roach * it under the terms of the GNU General Public License as published by 7*8dded141SGreg Roach * the Free Software Foundation, either version 3 of the License, or 8*8dded141SGreg Roach * (at your option) any later version. 9*8dded141SGreg Roach * This program is distributed in the hope that it will be useful, 10*8dded141SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 11*8dded141SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*8dded141SGreg Roach * GNU General Public License for more details. 13*8dded141SGreg Roach * You should have received a copy of the GNU General Public License 14*8dded141SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 15*8dded141SGreg Roach */ 16*8dded141SGreg Roachdeclare(strict_types=1); 17*8dded141SGreg Roach 18*8dded141SGreg Roachnamespace Fisharebest\Webtrees\Services; 19*8dded141SGreg Roach 20*8dded141SGreg Roach/** 21*8dded141SGreg Roach * Utilities for manipulating GEDCOM data. 22*8dded141SGreg Roach */ 23*8dded141SGreg Roachclass GedcomService 24*8dded141SGreg Roach{ 25*8dded141SGreg Roach // Gedcom allows 255 characters (not bytes), including the EOL character. 26*8dded141SGreg Roach const EOL = "\r\n"; 27*8dded141SGreg Roach const EOL_REGEX = '\r|\r\n|\n|\n\r'; 28*8dded141SGreg Roach const LINE_LENGTH = 255 - 2; 29*8dded141SGreg Roach 30*8dded141SGreg Roach 31*8dded141SGreg Roach // User defined tags begin with an underscore 32*8dded141SGreg Roach const USER_DEFINED_TAG_PREFIX = '_'; 33*8dded141SGreg Roach 34*8dded141SGreg Roach // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 35*8dded141SGreg Roach const TAG_NAMES = [ 36*8dded141SGreg Roach 'ABBREVIATION' => 'ABBR', 37*8dded141SGreg Roach 'ADDRESS' => 'ADDR', 38*8dded141SGreg Roach 'ADDRESS1' => 'ADR1', 39*8dded141SGreg Roach 'ADDRESS2' => 'ADR2', 40*8dded141SGreg Roach 'ADOPTION' => 'ADOP', 41*8dded141SGreg Roach 'AFN' => 'AFN', 42*8dded141SGreg Roach 'AGE' => 'AGE', 43*8dded141SGreg Roach 'AGENCY' => 'AGNC', 44*8dded141SGreg Roach 'ALIAS' => 'ALIA', 45*8dded141SGreg Roach 'ANCESTORS' => 'ANCE', 46*8dded141SGreg Roach 'ANCES_INTEREST' => 'ANCI', 47*8dded141SGreg Roach 'ANULMENT' => 'ANUL', 48*8dded141SGreg Roach 'ASSOCIATES' => 'ASSO', 49*8dded141SGreg Roach 'AUTHOR' => 'AUTH', 50*8dded141SGreg Roach 'BAPTISM-LDS' => 'BAPL', 51*8dded141SGreg Roach 'BAPTISM' => 'BAPM', 52*8dded141SGreg Roach 'BAR_MITZVAH' => 'BARM', 53*8dded141SGreg Roach 'BAS_MITZVAH' => 'BASM', 54*8dded141SGreg Roach 'BIRTH' => 'BIRT', 55*8dded141SGreg Roach 'BLESSING' => 'BLES', 56*8dded141SGreg Roach 'BURIAL' => 'BURI', 57*8dded141SGreg Roach 'CALL_NUMBER' => 'CALN', 58*8dded141SGreg Roach 'CASTE' => 'CAST', 59*8dded141SGreg Roach 'CAUSE' => 'CAUS', 60*8dded141SGreg Roach 'CENSUS' => 'CENS', 61*8dded141SGreg Roach 'CHANGE' => 'CHAN', 62*8dded141SGreg Roach 'CHARACTER' => 'CHAR', 63*8dded141SGreg Roach 'CHILD' => 'CHIL', 64*8dded141SGreg Roach 'CHRISTENING' => 'CHR', 65*8dded141SGreg Roach 'ADULT_CHRISTENING' => 'CHRA', 66*8dded141SGreg Roach 'CITY' => 'CITY', 67*8dded141SGreg Roach 'CONCATENATION' => 'CONC', 68*8dded141SGreg Roach 'CONFIRMATION' => 'CONF', 69*8dded141SGreg Roach 'CONFIRMATION-LDS' => 'CONL', 70*8dded141SGreg Roach 'CONTINUED' => 'CONT', 71*8dded141SGreg Roach 'COPYRIGHT' => 'COPY', 72*8dded141SGreg Roach 'CORPORTATE' => 'CORP', 73*8dded141SGreg Roach 'CREMATION' => 'CREM', 74*8dded141SGreg Roach 'COUNTRY' => 'CTRY', 75*8dded141SGreg Roach 'DATA' => 'DATA', 76*8dded141SGreg Roach 'DATE' => 'DATE', 77*8dded141SGreg Roach 'DEATH' => 'DEAT', 78*8dded141SGreg Roach 'DESCENDANTS' => 'DESC', 79*8dded141SGreg Roach 'DESCENDANTS_INT' => 'DESI', 80*8dded141SGreg Roach 'DESTINATION' => 'DEST', 81*8dded141SGreg Roach 'DIVORCE' => 'DIV', 82*8dded141SGreg Roach 'DIVORCE_FILED' => 'DIVF', 83*8dded141SGreg Roach 'PHY_DESCRIPTION' => 'DSCR', 84*8dded141SGreg Roach 'EDUCATION' => 'EDUC', 85*8dded141SGreg Roach 'EMAIL' => 'EMAI', 86*8dded141SGreg Roach 'EMIGRATION' => 'EMIG', 87*8dded141SGreg Roach 'ENDOWMENT' => 'ENDL', 88*8dded141SGreg Roach 'ENGAGEMENT' => 'ENGA', 89*8dded141SGreg Roach 'EVENT' => 'EVEN', 90*8dded141SGreg Roach 'FACT' => 'FACT', 91*8dded141SGreg Roach 'FAMILY' => 'FAM', 92*8dded141SGreg Roach 'FAMILY_CHILD' => 'FAMC', 93*8dded141SGreg Roach 'FAMILY_FILE' => 'FAMF', 94*8dded141SGreg Roach 'FAMILY_SPOUSE' => 'FAMS', 95*8dded141SGreg Roach 'FACIMILIE' => 'FAX', 96*8dded141SGreg Roach 'FIRST_COMMUNION' => 'FCOM', 97*8dded141SGreg Roach 'FILE' => 'FILE', 98*8dded141SGreg Roach 'FORMAT' => 'FORM', 99*8dded141SGreg Roach 'PHONETIC' => 'FONE', 100*8dded141SGreg Roach 'GEDCOM' => 'GEDC', 101*8dded141SGreg Roach 'GIVEN_NAME' => 'GIVN', 102*8dded141SGreg Roach 'GRADUATION' => 'GRAD', 103*8dded141SGreg Roach 'HEADER' => 'HEAD', 104*8dded141SGreg Roach 'HUSBAND' => 'HUSB', 105*8dded141SGreg Roach 'IDENT_NUMBER' => 'IDNO', 106*8dded141SGreg Roach 'IMMIGRATION' => 'IMMI', 107*8dded141SGreg Roach 'INDIVIDUAL' => 'INDI', 108*8dded141SGreg Roach 'LANGUAGE' => 'LANG', 109*8dded141SGreg Roach 'LATITUDE' => 'LATI', 110*8dded141SGreg Roach 'LONGITUDE' => 'LONG', 111*8dded141SGreg Roach 'MAP' => 'MAP', 112*8dded141SGreg Roach 'MARRIAGE_BANN' => 'MARB', 113*8dded141SGreg Roach 'MARR_CONTRACT' => 'MARC', 114*8dded141SGreg Roach 'MARR_LICENSE' => 'MARL', 115*8dded141SGreg Roach 'MARRIAGE' => 'MARR', 116*8dded141SGreg Roach 'MEDIA' => 'MEDI', 117*8dded141SGreg Roach 'NAME' => 'NAME', 118*8dded141SGreg Roach 'NATIONALITY' => 'NATI', 119*8dded141SGreg Roach 'NATURALIZATION' => 'NATU', 120*8dded141SGreg Roach 'CHILDREN_COUNT' => 'NCHI', 121*8dded141SGreg Roach 'NICKNAME' => 'NICK', 122*8dded141SGreg Roach 'MARRIAGE_COUNT' => 'NMR', 123*8dded141SGreg Roach 'NOTE' => 'NOTE', 124*8dded141SGreg Roach 'NAME_PREFIX' => 'NPFX', 125*8dded141SGreg Roach 'NAME_SUFFIX' => 'NSFX', 126*8dded141SGreg Roach 'OBJECT' => 'OBJE', 127*8dded141SGreg Roach 'OCCUPATION' => 'OCCU', 128*8dded141SGreg Roach 'ORDINANCE' => 'ORDI', 129*8dded141SGreg Roach 'ORDINATION' => 'ORDN', 130*8dded141SGreg Roach 'PAGE' => 'PAGE', 131*8dded141SGreg Roach 'PEDIGREE' => 'PEDI', 132*8dded141SGreg Roach 'PHONE' => 'PHON', 133*8dded141SGreg Roach 'PLACE' => 'PLAC', 134*8dded141SGreg Roach 'POSTAL_CODE' => 'POST', 135*8dded141SGreg Roach 'PROBATE' => 'PROB', 136*8dded141SGreg Roach 'PROPERTY' => 'PROP', 137*8dded141SGreg Roach 'PUHBLICATION' => 'PUBL', 138*8dded141SGreg Roach 'QUALITY_OF_DATA' => 'QUAY', 139*8dded141SGreg Roach 'REFERENCE' => 'REFN', 140*8dded141SGreg Roach 'RELATIONSHIP' => 'RELA', 141*8dded141SGreg Roach 'RELIGION' => 'RELI', 142*8dded141SGreg Roach 'REPOSITORY' => 'REPO', 143*8dded141SGreg Roach 'RESIDENCE' => 'RESI', 144*8dded141SGreg Roach 'RESTRICTION' => 'RESN', 145*8dded141SGreg Roach 'RETIREMENT' => 'RETI', 146*8dded141SGreg Roach 'REC_FILE_NUMBER' => 'RFN', 147*8dded141SGreg Roach 'REC_ID_NUMBER' => 'RIN', 148*8dded141SGreg Roach 'ROLE' => 'ROLE', 149*8dded141SGreg Roach 'ROMANIZED' => 'ROMN', 150*8dded141SGreg Roach 'SEALING_CHILD' => 'SLGC', 151*8dded141SGreg Roach 'SEALING_SPOUSE' => 'SLGS', 152*8dded141SGreg Roach 'SEX' => 'SEX', 153*8dded141SGreg Roach 'SOURCE' => 'SOUR', 154*8dded141SGreg Roach 'SURN_PREFIX' => 'SPFX', 155*8dded141SGreg Roach 'SOC_SEC_NUMBER' => 'SSN', 156*8dded141SGreg Roach 'STATE' => 'STAE', 157*8dded141SGreg Roach 'STATUS' => 'STAT', 158*8dded141SGreg Roach 'SUBMITTER' => 'SUBM', 159*8dded141SGreg Roach 'SUBMISSION' => 'SUBN', 160*8dded141SGreg Roach 'SURNAME' => 'SURN', 161*8dded141SGreg Roach 'TEMPLE' => 'TEMP', 162*8dded141SGreg Roach 'TEXT' => 'TEXT', 163*8dded141SGreg Roach 'TIME' => 'TIME', 164*8dded141SGreg Roach 'TITLE' => 'TITL', 165*8dded141SGreg Roach 'TRAILER' => 'TRLR', 166*8dded141SGreg Roach 'TYPE' => 'TYPE', 167*8dded141SGreg Roach 'VERSION' => 'VERS', 168*8dded141SGreg Roach 'WIFE' => 'WIFE', 169*8dded141SGreg Roach 'WILL' => 'WILL', 170*8dded141SGreg Roach 'WEB' => 'WWW', 171*8dded141SGreg Roach '_DEATH_OF_SPOUSE' => 'DETS', 172*8dded141SGreg Roach '_DEGREE' => '_DEG', 173*8dded141SGreg Roach '_FILE' => 'FILE', 174*8dded141SGreg Roach '_MEDICAL' => '_MCL', 175*8dded141SGreg Roach '_MILITARY_SERVICE' => '_MILT', 176*8dded141SGreg Roach ]; 177*8dded141SGreg Roach 178*8dded141SGreg Roach // Custom tags used by other applications, with direct synonyms 179*8dded141SGreg Roach const TAG_SYNONYMS = [ 180*8dded141SGreg Roach ]; 181*8dded141SGreg Roach 182*8dded141SGreg Roach // LATI and LONG tags 183*8dded141SGreg Roach const DEGREE_FORMAT = ' % .5f%s'; 184*8dded141SGreg Roach const LATITUDE_NORTH = 'N'; 185*8dded141SGreg Roach const LATITUDE_SOUTH = 'S'; 186*8dded141SGreg Roach const LONGITUDE_EAST = 'E'; 187*8dded141SGreg Roach const LONGITUDE_WEST = 'W'; 188*8dded141SGreg Roach 189*8dded141SGreg Roach // PLAC tags 190*8dded141SGreg Roach const PLACE_SEPARATOR = ', '; 191*8dded141SGreg Roach const PLACE_SEPARATOR_REGEX = ' *, *'; 192*8dded141SGreg Roach 193*8dded141SGreg Roach // SEX tags 194*8dded141SGreg Roach const SEX_FEMALE = 'F'; 195*8dded141SGreg Roach const SEX_MALE = 'M'; 196*8dded141SGreg Roach const SEX_UNKNOWN = 'U'; 197*8dded141SGreg Roach 198*8dded141SGreg Roach /** 199*8dded141SGreg Roach * Convert a GEDCOM tag to a canonical form. 200*8dded141SGreg Roach * 201*8dded141SGreg Roach * @param string $tag 202*8dded141SGreg Roach * 203*8dded141SGreg Roach * @return string 204*8dded141SGreg Roach */ 205*8dded141SGreg Roach public function canonicalTag(string $tag): string 206*8dded141SGreg Roach { 207*8dded141SGreg Roach $tag = strtoupper($tag); 208*8dded141SGreg Roach 209*8dded141SGreg Roach $tag = self::TAG_NAMES[$tag] ?? $tag; 210*8dded141SGreg Roach 211*8dded141SGreg Roach return $tag; 212*8dded141SGreg Roach } 213*8dded141SGreg Roach 214*8dded141SGreg Roach /** 215*8dded141SGreg Roach * @param string $tag 216*8dded141SGreg Roach * 217*8dded141SGreg Roach * @return bool 218*8dded141SGreg Roach */ 219*8dded141SGreg Roach public function isUserDefinedTag(string $tag): bool 220*8dded141SGreg Roach { 221*8dded141SGreg Roach return substr($tag, 0, 1) === self::USER_DEFINED_TAG_PREFIX; 222*8dded141SGreg Roach } 223*8dded141SGreg Roach 224*8dded141SGreg Roach /** 225*8dded141SGreg Roach * @param string $text 226*8dded141SGreg Roach * 227*8dded141SGreg Roach * @return float 228*8dded141SGreg Roach */ 229*8dded141SGreg Roach public function readLatitude(string $text): float 230*8dded141SGreg Roach { 231*8dded141SGreg Roach return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 232*8dded141SGreg Roach } 233*8dded141SGreg Roach 234*8dded141SGreg Roach /** 235*8dded141SGreg Roach * @param string $text 236*8dded141SGreg Roach * 237*8dded141SGreg Roach * @return float 238*8dded141SGreg Roach */ 239*8dded141SGreg Roach public function readLongitude(string $text): float 240*8dded141SGreg Roach { 241*8dded141SGreg Roach return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 242*8dded141SGreg Roach } 243*8dded141SGreg Roach 244*8dded141SGreg Roach /** 245*8dded141SGreg Roach * @param string $text 246*8dded141SGreg Roach * @param string $positive 247*8dded141SGreg Roach * @param string $negative 248*8dded141SGreg Roach * 249*8dded141SGreg Roach * @return float 250*8dded141SGreg Roach */ 251*8dded141SGreg Roach private function readDegrees(string $text, string $positive, string $negative): float 252*8dded141SGreg Roach { 253*8dded141SGreg Roach $text = trim($text); 254*8dded141SGreg Roach $hemisphere = substr($text, 0, 1); 255*8dded141SGreg Roach $degrees = substr($text, 1); 256*8dded141SGreg Roach 257*8dded141SGreg Roach // Match a valid GEDCOM format 258*8dded141SGreg Roach if (is_numeric($degrees)) { 259*8dded141SGreg Roach $hemisphere = strtoupper($hemisphere); 260*8dded141SGreg Roach $degrees = (float) $degrees; 261*8dded141SGreg Roach 262*8dded141SGreg Roach if ($hemisphere === $positive) { 263*8dded141SGreg Roach return $degrees; 264*8dded141SGreg Roach } 265*8dded141SGreg Roach 266*8dded141SGreg Roach if ($hemisphere === $negative) { 267*8dded141SGreg Roach return -$degrees; 268*8dded141SGreg Roach } 269*8dded141SGreg Roach } 270*8dded141SGreg Roach 271*8dded141SGreg Roach // Just a number? 272*8dded141SGreg Roach if (is_numeric($text)) { 273*8dded141SGreg Roach return (float) $text; 274*8dded141SGreg Roach } 275*8dded141SGreg Roach 276*8dded141SGreg Roach // Can't match anything. 277*8dded141SGreg Roach return 0.0; 278*8dded141SGreg Roach } 279*8dded141SGreg Roach 280*8dded141SGreg Roach /** 281*8dded141SGreg Roach * @param float $latitude 282*8dded141SGreg Roach * 283*8dded141SGreg Roach * @return string 284*8dded141SGreg Roach */ 285*8dded141SGreg Roach public function writeLatitude(float $latitude): string 286*8dded141SGreg Roach { 287*8dded141SGreg Roach return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 288*8dded141SGreg Roach } 289*8dded141SGreg Roach 290*8dded141SGreg Roach /** 291*8dded141SGreg Roach * @param float $longitude 292*8dded141SGreg Roach * 293*8dded141SGreg Roach * @return string 294*8dded141SGreg Roach */ 295*8dded141SGreg Roach public function writeLongitude(float $longitude): string 296*8dded141SGreg Roach { 297*8dded141SGreg Roach return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 298*8dded141SGreg Roach } 299*8dded141SGreg Roach 300*8dded141SGreg Roach /** 301*8dded141SGreg Roach * @param float $degrees 302*8dded141SGreg Roach * @param string $positive 303*8dded141SGreg Roach * @param string $negative 304*8dded141SGreg Roach * 305*8dded141SGreg Roach * @return string 306*8dded141SGreg Roach */ 307*8dded141SGreg Roach private function writeDegrees(float $degrees, string $positive, string $negative): string 308*8dded141SGreg Roach { 309*8dded141SGreg Roach if ($degrees < 0.0) { 310*8dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 311*8dded141SGreg Roach } 312*8dded141SGreg Roach 313*8dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 314*8dded141SGreg Roach } 315*8dded141SGreg Roach 316*8dded141SGreg Roach /** 317*8dded141SGreg Roach * Although empty placenames are valid "Town, , Country", it is only meaningful 318*8dded141SGreg Roach * when structured places are used (PLAC:FORM town, county, country), and 319*8dded141SGreg Roach * structured places are discouraged. 320*8dded141SGreg Roach * 321*8dded141SGreg Roach * @param string $text 322*8dded141SGreg Roach * 323*8dded141SGreg Roach * @return string[] 324*8dded141SGreg Roach */ 325*8dded141SGreg Roach public function readPlace(string $text): array 326*8dded141SGreg Roach { 327*8dded141SGreg Roach $text = trim($text); 328*8dded141SGreg Roach 329*8dded141SGreg Roach return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 330*8dded141SGreg Roach } 331*8dded141SGreg Roach 332*8dded141SGreg Roach /** 333*8dded141SGreg Roach * @param string[] $place 334*8dded141SGreg Roach * 335*8dded141SGreg Roach * @return string 336*8dded141SGreg Roach */ 337*8dded141SGreg Roach public function writePlace(array $place): string 338*8dded141SGreg Roach { 339*8dded141SGreg Roach return implode(self::PLACE_SEPARATOR, $place); 340*8dded141SGreg Roach } 341*8dded141SGreg Roach 342*8dded141SGreg Roach /** 343*8dded141SGreg Roach * Some applications use non-standard values for unknown. 344*8dded141SGreg Roach * 345*8dded141SGreg Roach * @param string $text 346*8dded141SGreg Roach * 347*8dded141SGreg Roach * @return string 348*8dded141SGreg Roach */ 349*8dded141SGreg Roach public function readSex(string $text): string 350*8dded141SGreg Roach { 351*8dded141SGreg Roach $text = strtoupper($text); 352*8dded141SGreg Roach 353*8dded141SGreg Roach if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 354*8dded141SGreg Roach $text = self::SEX_UNKNOWN; 355*8dded141SGreg Roach } 356*8dded141SGreg Roach 357*8dded141SGreg Roach return $text; 358*8dded141SGreg Roach } 359*8dded141SGreg Roach} 360