1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22/** 23 * Utilities for manipulating GEDCOM data. 24 */ 25class GedcomService 26{ 27 // Gedcom allows 255 characters (not bytes), including the EOL character. 28 private const EOL = "\r\n"; 29 private const EOL_REGEX = '\r|\r\n|\n|\n\r'; 30 private const LINE_LENGTH = 255 - 2; 31 32 33 // User defined tags begin with an underscore 34 private const USER_DEFINED_TAG_PREFIX = '_'; 35 36 // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 37 private const TAG_NAMES = [ 38 'ABBREVIATION' => 'ABBR', 39 'ADDRESS' => 'ADDR', 40 'ADDRESS1' => 'ADR1', 41 'ADDRESS2' => 'ADR2', 42 'ADOPTION' => 'ADOP', 43 'AGENCY' => 'AGNC', 44 'ALIAS' => 'ALIA', 45 'ANCESTORS' => 'ANCE', 46 'ANCES_INTEREST' => 'ANCI', 47 'ANULMENT' => 'ANUL', 48 'ASSOCIATES' => 'ASSO', 49 'AUTHOR' => 'AUTH', 50 'BAPTISM-LDS' => 'BAPL', 51 'BAPTISM' => 'BAPM', 52 'BAR_MITZVAH' => 'BARM', 53 'BAS_MITZVAH' => 'BASM', 54 'BIRTH' => 'BIRT', 55 'BLESSING' => 'BLES', 56 'BURIAL' => 'BURI', 57 'CALL_NUMBER' => 'CALN', 58 'CASTE' => 'CAST', 59 'CAUSE' => 'CAUS', 60 'CENSUS' => 'CENS', 61 'CHANGE' => 'CHAN', 62 'CHARACTER' => 'CHAR', 63 'CHILD' => 'CHIL', 64 'CHRISTENING' => 'CHR', 65 'ADULT_CHRISTENING' => 'CHRA', 66 'CONCATENATION' => 'CONC', 67 'CONFIRMATION' => 'CONF', 68 'CONFIRMATION-LDS' => 'CONL', 69 'CONTINUED' => 'CONT', 70 'COPYRIGHT' => 'COPY', 71 'CORPORTATE' => 'CORP', 72 'CREMATION' => 'CREM', 73 'COUNTRY' => 'CTRY', 74 'DEATH' => 'DEAT', 75 'DESCENDANTS' => 'DESC', 76 'DESCENDANTS_INT' => 'DESI', 77 'DESTINATION' => 'DEST', 78 'DIVORCE' => 'DIV', 79 'DIVORCE_FILED' => 'DIVF', 80 'PHY_DESCRIPTION' => 'DSCR', 81 'EDUCATION' => 'EDUC', 82 'EMIGRATION' => 'EMIG', 83 'ENDOWMENT' => 'ENDL', 84 'ENGAGEMENT' => 'ENGA', 85 'EVENT' => 'EVEN', 86 'FAMILY' => 'FAM', 87 'FAMILY_CHILD' => 'FAMC', 88 'FAMILY_FILE' => 'FAMF', 89 'FAMILY_SPOUSE' => 'FAMS', 90 'FACIMILIE' => 'FAX', 91 'FIRST_COMMUNION' => 'FCOM', 92 'FORMAT' => 'FORM', 93 'PHONETIC' => 'FONE', 94 'GEDCOM' => 'GEDC', 95 'GIVEN_NAME' => 'GIVN', 96 'GRADUATION' => 'GRAD', 97 'HEADER' => 'HEAD', 98 'HUSBAND' => 'HUSB', 99 'IDENT_NUMBER' => 'IDNO', 100 'IMMIGRATION' => 'IMMI', 101 'INDIVIDUAL' => 'INDI', 102 'LANGUAGE' => 'LANG', 103 'LATITUDE' => 'LATI', 104 'LONGITUDE' => 'LONG', 105 'MARRIAGE_BANN' => 'MARB', 106 'MARR_CONTRACT' => 'MARC', 107 'MARR_LICENSE' => 'MARL', 108 'MARRIAGE' => 'MARR', 109 'MEDIA' => 'MEDI', 110 'NATIONALITY' => 'NATI', 111 'NATURALIZATION' => 'NATU', 112 'CHILDREN_COUNT' => 'NCHI', 113 'NICKNAME' => 'NICK', 114 'MARRIAGE_COUNT' => 'NMR', 115 'NAME_PREFIX' => 'NPFX', 116 'NAME_SUFFIX' => 'NSFX', 117 'OBJECT' => 'OBJE', 118 'OCCUPATION' => 'OCCU', 119 'ORDINANCE' => 'ORDI', 120 'ORDINATION' => 'ORDN', 121 'PEDIGREE' => 'PEDI', 122 'PHONE' => 'PHON', 123 'PLACE' => 'PLAC', 124 'POSTAL_CODE' => 'POST', 125 'PROBATE' => 'PROB', 126 'PROPERTY' => 'PROP', 127 'PUBLICATION' => 'PUBL', 128 'QUALITY_OF_DATA' => 'QUAY', 129 'REFERENCE' => 'REFN', 130 'RELATIONSHIP' => 'RELA', 131 'RELIGION' => 'RELI', 132 'REPOSITORY' => 'REPO', 133 'RESIDENCE' => 'RESI', 134 'RESTRICTION' => 'RESN', 135 'RETIREMENT' => 'RETI', 136 'REC_FILE_NUMBER' => 'RFN', 137 'REC_ID_NUMBER' => 'RIN', 138 'ROMANIZED' => 'ROMN', 139 'SEALING_CHILD' => 'SLGC', 140 'SEALING_SPOUSE' => 'SLGS', 141 'SOURCE' => 'SOUR', 142 'SURN_PREFIX' => 'SPFX', 143 'SOC_SEC_NUMBER' => 'SSN', 144 'STATE' => 'STAE', 145 'STATUS' => 'STAT', 146 'SUBMITTER' => 'SUBM', 147 'SUBMISSION' => 'SUBN', 148 'SURNAME' => 'SURN', 149 'TEMPLE' => 'TEMP', 150 'TITLE' => 'TITL', 151 'TRAILER' => 'TRLR', 152 'VERSION' => 'VERS', 153 'WEB' => 'WWW', 154 '_DEATH_OF_SPOUSE' => 'DETS', 155 '_DEGREE' => '_DEG', 156 '_MEDICAL' => '_MCL', 157 '_MILITARY_SERVICE' => '_MILT', 158 ]; 159 160 // Custom tags used by other applications, with direct synonyms 161 private const TAG_SYNONYMS = [ 162 // Convert PhpGedView tag to webtrees 163 '_PGVU' => '_WT_USER', 164 '_PGV_OBJS' => '_WT_OBJE_SORT', 165 ]; 166 167 // LATI and LONG tags 168 private const DEGREE_FORMAT = ' % .5f%s'; 169 private const LATITUDE_NORTH = 'N'; 170 private const LATITUDE_SOUTH = 'S'; 171 private const LONGITUDE_EAST = 'E'; 172 private const LONGITUDE_WEST = 'W'; 173 174 // PLAC tags 175 private const PLACE_SEPARATOR = ', '; 176 private const PLACE_SEPARATOR_REGEX = ' *, *'; 177 178 // SEX tags 179 private const SEX_FEMALE = 'F'; 180 private const SEX_MALE = 'M'; 181 private const SEX_UNKNOWN = 'U'; 182 183 /** 184 * Convert a GEDCOM tag to a canonical form. 185 * 186 * @param string $tag 187 * 188 * @return string 189 */ 190 public function canonicalTag(string $tag): string 191 { 192 $tag = strtoupper($tag); 193 194 $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 195 196 return $tag; 197 } 198 199 /** 200 * @param string $tag 201 * 202 * @return bool 203 */ 204 public function isUserDefinedTag(string $tag): bool 205 { 206 return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 207 } 208 209 /** 210 * @param string $text 211 * 212 * @return float 213 */ 214 public function readLatitude(string $text): float 215 { 216 return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 217 } 218 219 /** 220 * @param string $text 221 * 222 * @return float 223 */ 224 public function readLongitude(string $text): float 225 { 226 return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 227 } 228 229 /** 230 * @param string $text 231 * @param string $positive 232 * @param string $negative 233 * 234 * @return float 235 */ 236 private function readDegrees(string $text, string $positive, string $negative): float 237 { 238 $text = trim($text); 239 $hemisphere = substr($text, 0, 1); 240 $degrees = substr($text, 1); 241 242 // Match a valid GEDCOM format 243 if (is_numeric($degrees)) { 244 $hemisphere = strtoupper($hemisphere); 245 $degrees = (float) $degrees; 246 247 if ($hemisphere === $positive) { 248 return $degrees; 249 } 250 251 if ($hemisphere === $negative) { 252 return -$degrees; 253 } 254 } 255 256 // Just a number? 257 if (is_numeric($text)) { 258 return (float) $text; 259 } 260 261 // Can't match anything. 262 return 0.0; 263 } 264 265 /** 266 * @param float $latitude 267 * 268 * @return string 269 */ 270 public function writeLatitude(float $latitude): string 271 { 272 return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 273 } 274 275 /** 276 * @param float $longitude 277 * 278 * @return string 279 */ 280 public function writeLongitude(float $longitude): string 281 { 282 return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 283 } 284 285 /** 286 * @param float $degrees 287 * @param string $positive 288 * @param string $negative 289 * 290 * @return string 291 */ 292 private function writeDegrees(float $degrees, string $positive, string $negative): string 293 { 294 if ($degrees < 0.0) { 295 return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 296 } 297 298 return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 299 } 300 301 /** 302 * Although empty placenames are valid "Town, , Country", it is only meaningful 303 * when structured places are used (PLAC:FORM town, county, country), and 304 * structured places are discouraged. 305 * 306 * @param string $text 307 * 308 * @return string[] 309 */ 310 public function readPlace(string $text): array 311 { 312 $text = trim($text); 313 314 return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 315 } 316 317 /** 318 * @param string[] $place 319 * 320 * @return string 321 */ 322 public function writePlace(array $place): string 323 { 324 return implode(self::PLACE_SEPARATOR, $place); 325 } 326 327 /** 328 * Some applications use non-standard values for unknown. 329 * 330 * @param string $text 331 * 332 * @return string 333 */ 334 public function readSex(string $text): string 335 { 336 $text = strtoupper($text); 337 338 if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 339 $text = self::SEX_UNKNOWN; 340 } 341 342 return $text; 343 } 344} 345