1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17declare(strict_types=1); 18 19namespace Fisharebest\Webtrees\Services; 20 21/** 22 * Utilities for manipulating GEDCOM data. 23 */ 24class GedcomService 25{ 26 // Gedcom allows 255 characters (not bytes), including the EOL character. 27 private const EOL = "\r\n"; 28 private const EOL_REGEX = '\r|\r\n|\n|\n\r'; 29 private const LINE_LENGTH = 255 - 2; 30 31 32 // User defined tags begin with an underscore 33 private const USER_DEFINED_TAG_PREFIX = '_'; 34 35 // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 36 private const TAG_NAMES = [ 37 'ABBREVIATION' => 'ABBR', 38 'ADDRESS' => 'ADDR', 39 'ADDRESS1' => 'ADR1', 40 'ADDRESS2' => 'ADR2', 41 'ADOPTION' => 'ADOP', 42 'AFN' => 'AFN', 43 'AGE' => 'AGE', 44 'AGENCY' => 'AGNC', 45 'ALIAS' => 'ALIA', 46 'ANCESTORS' => 'ANCE', 47 'ANCES_INTEREST' => 'ANCI', 48 'ANULMENT' => 'ANUL', 49 'ASSOCIATES' => 'ASSO', 50 'AUTHOR' => 'AUTH', 51 'BAPTISM-LDS' => 'BAPL', 52 'BAPTISM' => 'BAPM', 53 'BAR_MITZVAH' => 'BARM', 54 'BAS_MITZVAH' => 'BASM', 55 'BIRTH' => 'BIRT', 56 'BLESSING' => 'BLES', 57 'BURIAL' => 'BURI', 58 'CALL_NUMBER' => 'CALN', 59 'CASTE' => 'CAST', 60 'CAUSE' => 'CAUS', 61 'CENSUS' => 'CENS', 62 'CHANGE' => 'CHAN', 63 'CHARACTER' => 'CHAR', 64 'CHILD' => 'CHIL', 65 'CHRISTENING' => 'CHR', 66 'ADULT_CHRISTENING' => 'CHRA', 67 'CITY' => 'CITY', 68 'CONCATENATION' => 'CONC', 69 'CONFIRMATION' => 'CONF', 70 'CONFIRMATION-LDS' => 'CONL', 71 'CONTINUED' => 'CONT', 72 'COPYRIGHT' => 'COPY', 73 'CORPORTATE' => 'CORP', 74 'CREMATION' => 'CREM', 75 'COUNTRY' => 'CTRY', 76 'DATA' => 'DATA', 77 'DATE' => 'DATE', 78 'DEATH' => 'DEAT', 79 'DESCENDANTS' => 'DESC', 80 'DESCENDANTS_INT' => 'DESI', 81 'DESTINATION' => 'DEST', 82 'DIVORCE' => 'DIV', 83 'DIVORCE_FILED' => 'DIVF', 84 'PHY_DESCRIPTION' => 'DSCR', 85 'EDUCATION' => 'EDUC', 86 'EMAIL' => 'EMAI', 87 'EMIGRATION' => 'EMIG', 88 'ENDOWMENT' => 'ENDL', 89 'ENGAGEMENT' => 'ENGA', 90 'EVENT' => 'EVEN', 91 'FACT' => 'FACT', 92 'FAMILY' => 'FAM', 93 'FAMILY_CHILD' => 'FAMC', 94 'FAMILY_FILE' => 'FAMF', 95 'FAMILY_SPOUSE' => 'FAMS', 96 'FACIMILIE' => 'FAX', 97 'FIRST_COMMUNION' => 'FCOM', 98 'FILE' => 'FILE', 99 'FORMAT' => 'FORM', 100 'PHONETIC' => 'FONE', 101 'GEDCOM' => 'GEDC', 102 'GIVEN_NAME' => 'GIVN', 103 'GRADUATION' => 'GRAD', 104 'HEADER' => 'HEAD', 105 'HUSBAND' => 'HUSB', 106 'IDENT_NUMBER' => 'IDNO', 107 'IMMIGRATION' => 'IMMI', 108 'INDIVIDUAL' => 'INDI', 109 'LANGUAGE' => 'LANG', 110 'LATITUDE' => 'LATI', 111 'LONGITUDE' => 'LONG', 112 'MAP' => 'MAP', 113 'MARRIAGE_BANN' => 'MARB', 114 'MARR_CONTRACT' => 'MARC', 115 'MARR_LICENSE' => 'MARL', 116 'MARRIAGE' => 'MARR', 117 'MEDIA' => 'MEDI', 118 'NAME' => 'NAME', 119 'NATIONALITY' => 'NATI', 120 'NATURALIZATION' => 'NATU', 121 'CHILDREN_COUNT' => 'NCHI', 122 'NICKNAME' => 'NICK', 123 'MARRIAGE_COUNT' => 'NMR', 124 'NOTE' => 'NOTE', 125 'NAME_PREFIX' => 'NPFX', 126 'NAME_SUFFIX' => 'NSFX', 127 'OBJECT' => 'OBJE', 128 'OCCUPATION' => 'OCCU', 129 'ORDINANCE' => 'ORDI', 130 'ORDINATION' => 'ORDN', 131 'PAGE' => 'PAGE', 132 'PEDIGREE' => 'PEDI', 133 'PHONE' => 'PHON', 134 'PLACE' => 'PLAC', 135 'POSTAL_CODE' => 'POST', 136 'PROBATE' => 'PROB', 137 'PROPERTY' => 'PROP', 138 'PUBLICATION' => 'PUBL', 139 'QUALITY_OF_DATA' => 'QUAY', 140 'REFERENCE' => 'REFN', 141 'RELATIONSHIP' => 'RELA', 142 'RELIGION' => 'RELI', 143 'REPOSITORY' => 'REPO', 144 'RESIDENCE' => 'RESI', 145 'RESTRICTION' => 'RESN', 146 'RETIREMENT' => 'RETI', 147 'REC_FILE_NUMBER' => 'RFN', 148 'REC_ID_NUMBER' => 'RIN', 149 'ROLE' => 'ROLE', 150 'ROMANIZED' => 'ROMN', 151 'SEALING_CHILD' => 'SLGC', 152 'SEALING_SPOUSE' => 'SLGS', 153 'SEX' => 'SEX', 154 'SOURCE' => 'SOUR', 155 'SURN_PREFIX' => 'SPFX', 156 'SOC_SEC_NUMBER' => 'SSN', 157 'STATE' => 'STAE', 158 'STATUS' => 'STAT', 159 'SUBMITTER' => 'SUBM', 160 'SUBMISSION' => 'SUBN', 161 'SURNAME' => 'SURN', 162 'TEMPLE' => 'TEMP', 163 'TEXT' => 'TEXT', 164 'TIME' => 'TIME', 165 'TITLE' => 'TITL', 166 'TRAILER' => 'TRLR', 167 'TYPE' => 'TYPE', 168 'VERSION' => 'VERS', 169 'WIFE' => 'WIFE', 170 'WILL' => 'WILL', 171 'WEB' => 'WWW', 172 '_DEATH_OF_SPOUSE' => 'DETS', 173 '_DEGREE' => '_DEG', 174 '_FILE' => 'FILE', 175 '_MEDICAL' => '_MCL', 176 '_MILITARY_SERVICE' => '_MILT', 177 ]; 178 179 // Custom tags used by other applications, with direct synonyms 180 private const TAG_SYNONYMS = [ 181 ]; 182 183 // LATI and LONG tags 184 private const DEGREE_FORMAT = ' % .5f%s'; 185 private const LATITUDE_NORTH = 'N'; 186 private const LATITUDE_SOUTH = 'S'; 187 private const LONGITUDE_EAST = 'E'; 188 private const LONGITUDE_WEST = 'W'; 189 190 // PLAC tags 191 private const PLACE_SEPARATOR = ', '; 192 private const PLACE_SEPARATOR_REGEX = ' *, *'; 193 194 // SEX tags 195 private const SEX_FEMALE = 'F'; 196 private const SEX_MALE = 'M'; 197 private const SEX_UNKNOWN = 'U'; 198 199 /** 200 * Convert a GEDCOM tag to a canonical form. 201 * 202 * @param string $tag 203 * 204 * @return string 205 */ 206 public function canonicalTag(string $tag): string 207 { 208 $tag = strtoupper($tag); 209 210 $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 211 212 return $tag; 213 } 214 215 /** 216 * @param string $tag 217 * 218 * @return bool 219 */ 220 public function isUserDefinedTag(string $tag): bool 221 { 222 return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 223 } 224 225 /** 226 * @param string $text 227 * 228 * @return float 229 */ 230 public function readLatitude(string $text): float 231 { 232 return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 233 } 234 235 /** 236 * @param string $text 237 * 238 * @return float 239 */ 240 public function readLongitude(string $text): float 241 { 242 return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 243 } 244 245 /** 246 * @param string $text 247 * @param string $positive 248 * @param string $negative 249 * 250 * @return float 251 */ 252 private function readDegrees(string $text, string $positive, string $negative): float 253 { 254 $text = trim($text); 255 $hemisphere = substr($text, 0, 1); 256 $degrees = substr($text, 1); 257 258 // Match a valid GEDCOM format 259 if (is_numeric($degrees)) { 260 $hemisphere = strtoupper($hemisphere); 261 $degrees = (float) $degrees; 262 263 if ($hemisphere === $positive) { 264 return $degrees; 265 } 266 267 if ($hemisphere === $negative) { 268 return -$degrees; 269 } 270 } 271 272 // Just a number? 273 if (is_numeric($text)) { 274 return (float) $text; 275 } 276 277 // Can't match anything. 278 return 0.0; 279 } 280 281 /** 282 * @param float $latitude 283 * 284 * @return string 285 */ 286 public function writeLatitude(float $latitude): string 287 { 288 return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 289 } 290 291 /** 292 * @param float $longitude 293 * 294 * @return string 295 */ 296 public function writeLongitude(float $longitude): string 297 { 298 return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 299 } 300 301 /** 302 * @param float $degrees 303 * @param string $positive 304 * @param string $negative 305 * 306 * @return string 307 */ 308 private function writeDegrees(float $degrees, string $positive, string $negative): string 309 { 310 if ($degrees < 0.0) { 311 return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 312 } 313 314 return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 315 } 316 317 /** 318 * Although empty placenames are valid "Town, , Country", it is only meaningful 319 * when structured places are used (PLAC:FORM town, county, country), and 320 * structured places are discouraged. 321 * 322 * @param string $text 323 * 324 * @return string[] 325 */ 326 public function readPlace(string $text): array 327 { 328 $text = trim($text); 329 330 return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 331 } 332 333 /** 334 * @param string[] $place 335 * 336 * @return string 337 */ 338 public function writePlace(array $place): string 339 { 340 return implode(self::PLACE_SEPARATOR, $place); 341 } 342 343 /** 344 * Some applications use non-standard values for unknown. 345 * 346 * @param string $text 347 * 348 * @return string 349 */ 350 public function readSex(string $text): string 351 { 352 $text = strtoupper($text); 353 354 if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 355 $text = self::SEX_UNKNOWN; 356 } 357 358 return $text; 359 } 360} 361