1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22/** 23 * Utilities for manipulating GEDCOM data. 24 */ 25class GedcomService 26{ 27 // User defined tags begin with an underscore 28 private const USER_DEFINED_TAG_PREFIX = '_'; 29 30 // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 31 private const TAG_NAMES = [ 32 'ABBREVIATION' => 'ABBR', 33 'ADDRESS' => 'ADDR', 34 'ADDRESS1' => 'ADR1', 35 'ADDRESS2' => 'ADR2', 36 'ADOPTION' => 'ADOP', 37 'AGENCY' => 'AGNC', 38 'ALIAS' => 'ALIA', 39 'ANCESTORS' => 'ANCE', 40 'ANCES_INTEREST' => 'ANCI', 41 'ANULMENT' => 'ANUL', 42 'ASSOCIATES' => 'ASSO', 43 'AUTHOR' => 'AUTH', 44 'BAPTISM-LDS' => 'BAPL', 45 'BAPTISM' => 'BAPM', 46 'BAR_MITZVAH' => 'BARM', 47 'BAS_MITZVAH' => 'BASM', 48 'BIRTH' => 'BIRT', 49 'BLESSING' => 'BLES', 50 'BURIAL' => 'BURI', 51 'CALL_NUMBER' => 'CALN', 52 'CASTE' => 'CAST', 53 'CAUSE' => 'CAUS', 54 'CENSUS' => 'CENS', 55 'CHANGE' => 'CHAN', 56 'CHARACTER' => 'CHAR', 57 'CHILD' => 'CHIL', 58 'CHRISTENING' => 'CHR', 59 'ADULT_CHRISTENING' => 'CHRA', 60 'CONCATENATION' => 'CONC', 61 'CONFIRMATION' => 'CONF', 62 'CONFIRMATION-LDS' => 'CONL', 63 'CONTINUED' => 'CONT', 64 'COPYRIGHT' => 'COPY', 65 'CORPORTATE' => 'CORP', 66 'CREMATION' => 'CREM', 67 'COUNTRY' => 'CTRY', 68 'DEATH' => 'DEAT', 69 'DESCENDANTS' => 'DESC', 70 'DESCENDANTS_INT' => 'DESI', 71 'DESTINATION' => 'DEST', 72 'DIVORCE' => 'DIV', 73 'DIVORCE_FILED' => 'DIVF', 74 'PHY_DESCRIPTION' => 'DSCR', 75 'EDUCATION' => 'EDUC', 76 'EMIGRATION' => 'EMIG', 77 'ENDOWMENT' => 'ENDL', 78 'ENGAGEMENT' => 'ENGA', 79 'EVENT' => 'EVEN', 80 'FAMILY' => 'FAM', 81 'FAMILY_CHILD' => 'FAMC', 82 'FAMILY_FILE' => 'FAMF', 83 'FAMILY_SPOUSE' => 'FAMS', 84 'FACIMILIE' => 'FAX', 85 'FIRST_COMMUNION' => 'FCOM', 86 'FORMAT' => 'FORM', 87 'PHONETIC' => 'FONE', 88 'GEDCOM' => 'GEDC', 89 'GIVEN_NAME' => 'GIVN', 90 'GRADUATION' => 'GRAD', 91 'HEADER' => 'HEAD', 92 'HUSBAND' => 'HUSB', 93 'IDENT_NUMBER' => 'IDNO', 94 'IMMIGRATION' => 'IMMI', 95 'INDIVIDUAL' => 'INDI', 96 'LANGUAGE' => 'LANG', 97 'LATITUDE' => 'LATI', 98 'LONGITUDE' => 'LONG', 99 'MARRIAGE_BANN' => 'MARB', 100 'MARR_CONTRACT' => 'MARC', 101 'MARR_LICENSE' => 'MARL', 102 'MARRIAGE' => 'MARR', 103 'MEDIA' => 'MEDI', 104 'NATIONALITY' => 'NATI', 105 'NATURALIZATION' => 'NATU', 106 'CHILDREN_COUNT' => 'NCHI', 107 'NICKNAME' => 'NICK', 108 'MARRIAGE_COUNT' => 'NMR', 109 'NAME_PREFIX' => 'NPFX', 110 'NAME_SUFFIX' => 'NSFX', 111 'OBJECT' => 'OBJE', 112 'OCCUPATION' => 'OCCU', 113 'ORDINANCE' => 'ORDI', 114 'ORDINATION' => 'ORDN', 115 'PEDIGREE' => 'PEDI', 116 'PHONE' => 'PHON', 117 'PLACE' => 'PLAC', 118 'POSTAL_CODE' => 'POST', 119 'PROBATE' => 'PROB', 120 'PROPERTY' => 'PROP', 121 'PUBLICATION' => 'PUBL', 122 'QUALITY_OF_DATA' => 'QUAY', 123 'REFERENCE' => 'REFN', 124 'RELATIONSHIP' => 'RELA', 125 'RELIGION' => 'RELI', 126 'REPOSITORY' => 'REPO', 127 'RESIDENCE' => 'RESI', 128 'RESTRICTION' => 'RESN', 129 'RETIREMENT' => 'RETI', 130 'REC_FILE_NUMBER' => 'RFN', 131 'REC_ID_NUMBER' => 'RIN', 132 'ROMANIZED' => 'ROMN', 133 'SEALING_CHILD' => 'SLGC', 134 'SEALING_SPOUSE' => 'SLGS', 135 'SOURCE' => 'SOUR', 136 'SURN_PREFIX' => 'SPFX', 137 'SOC_SEC_NUMBER' => 'SSN', 138 'STATE' => 'STAE', 139 'STATUS' => 'STAT', 140 'SUBMITTER' => 'SUBM', 141 'SUBMISSION' => 'SUBN', 142 'SURNAME' => 'SURN', 143 'TEMPLE' => 'TEMP', 144 'TITLE' => 'TITL', 145 'TRAILER' => 'TRLR', 146 'VERSION' => 'VERS', 147 'WEB' => 'WWW', 148 '_DEATH_OF_SPOUSE' => 'DETS', 149 '_DEGREE' => '_DEG', 150 '_MEDICAL' => '_MCL', 151 '_MILITARY_SERVICE' => '_MILT', 152 ]; 153 154 // Custom tags used by other applications, with direct synonyms 155 private const TAG_SYNONYMS = [ 156 // Convert PhpGedView tag to webtrees 157 '_PGVU' => '_WT_USER', 158 '_PGV_OBJS' => '_WT_OBJE_SORT', 159 ]; 160 161 // LATI and LONG tags 162 private const DEGREE_FORMAT = ' % .5f%s'; 163 private const LATITUDE_NORTH = 'N'; 164 private const LATITUDE_SOUTH = 'S'; 165 private const LONGITUDE_EAST = 'E'; 166 private const LONGITUDE_WEST = 'W'; 167 168 // PLAC tags 169 private const PLACE_SEPARATOR = ', '; 170 private const PLACE_SEPARATOR_REGEX = ' *, *'; 171 172 // SEX tags 173 private const SEX_FEMALE = 'F'; 174 private const SEX_MALE = 'M'; 175 private const SEX_UNKNOWN = 'U'; 176 177 /** 178 * Convert a GEDCOM tag to a canonical form. 179 * 180 * @param string $tag 181 * 182 * @return string 183 */ 184 public function canonicalTag(string $tag): string 185 { 186 $tag = strtoupper($tag); 187 188 $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 189 190 return $tag; 191 } 192 193 /** 194 * @param string $tag 195 * 196 * @return bool 197 */ 198 public function isUserDefinedTag(string $tag): bool 199 { 200 return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 201 } 202 203 /** 204 * @param string $text 205 * 206 * @return float 207 */ 208 public function readLatitude(string $text): float 209 { 210 return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 211 } 212 213 /** 214 * @param string $text 215 * 216 * @return float 217 */ 218 public function readLongitude(string $text): float 219 { 220 return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 221 } 222 223 /** 224 * @param string $text 225 * @param string $positive 226 * @param string $negative 227 * 228 * @return float 229 */ 230 private function readDegrees(string $text, string $positive, string $negative): float 231 { 232 $text = trim($text); 233 $hemisphere = substr($text, 0, 1); 234 $degrees = substr($text, 1); 235 236 // Match a valid GEDCOM format 237 if (is_numeric($degrees)) { 238 $hemisphere = strtoupper($hemisphere); 239 $degrees = (float) $degrees; 240 241 if ($hemisphere === $positive) { 242 return $degrees; 243 } 244 245 if ($hemisphere === $negative) { 246 return -$degrees; 247 } 248 } 249 250 // Just a number? 251 if (is_numeric($text)) { 252 return (float) $text; 253 } 254 255 // Can't match anything. 256 return 0.0; 257 } 258 259 /** 260 * @param float $latitude 261 * 262 * @return string 263 */ 264 public function writeLatitude(float $latitude): string 265 { 266 return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 267 } 268 269 /** 270 * @param float $longitude 271 * 272 * @return string 273 */ 274 public function writeLongitude(float $longitude): string 275 { 276 return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 277 } 278 279 /** 280 * @param float $degrees 281 * @param string $positive 282 * @param string $negative 283 * 284 * @return string 285 */ 286 private function writeDegrees(float $degrees, string $positive, string $negative): string 287 { 288 if ($degrees < 0.0) { 289 return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 290 } 291 292 return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 293 } 294 295 /** 296 * Although empty placenames are valid "Town, , Country", it is only meaningful 297 * when structured places are used (PLAC:FORM town, county, country), and 298 * structured places are discouraged. 299 * 300 * @param string $text 301 * 302 * @return string[] 303 */ 304 public function readPlace(string $text): array 305 { 306 $text = trim($text); 307 308 return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 309 } 310 311 /** 312 * @param string[] $place 313 * 314 * @return string 315 */ 316 public function writePlace(array $place): string 317 { 318 return implode(self::PLACE_SEPARATOR, $place); 319 } 320 321 /** 322 * Some applications use non-standard values for unknown. 323 * 324 * @param string $text 325 * 326 * @return string 327 */ 328 public function readSex(string $text): string 329 { 330 $text = strtoupper($text); 331 332 if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 333 $text = self::SEX_UNKNOWN; 334 } 335 336 return $text; 337 } 338} 339