18dded141SGreg Roach<?php 28dded141SGreg Roach/** 38dded141SGreg Roach * webtrees: online genealogy 48fcd0d32SGreg Roach * Copyright (C) 2019 webtrees development team 58dded141SGreg Roach * This program is free software: you can redistribute it and/or modify 68dded141SGreg Roach * it under the terms of the GNU General Public License as published by 78dded141SGreg Roach * the Free Software Foundation, either version 3 of the License, or 88dded141SGreg Roach * (at your option) any later version. 98dded141SGreg Roach * This program is distributed in the hope that it will be useful, 108dded141SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 118dded141SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 128dded141SGreg Roach * GNU General Public License for more details. 138dded141SGreg Roach * You should have received a copy of the GNU General Public License 148dded141SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 158dded141SGreg Roach */ 168dded141SGreg Roachdeclare(strict_types=1); 178dded141SGreg Roach 188dded141SGreg Roachnamespace Fisharebest\Webtrees\Services; 198dded141SGreg Roach 208dded141SGreg Roach/** 218dded141SGreg Roach * Utilities for manipulating GEDCOM data. 228dded141SGreg Roach */ 238dded141SGreg Roachclass GedcomService 248dded141SGreg Roach{ 258dded141SGreg Roach // Gedcom allows 255 characters (not bytes), including the EOL character. 2616d6367aSGreg Roach private const EOL = "\r\n"; 2716d6367aSGreg Roach private const EOL_REGEX = '\r|\r\n|\n|\n\r'; 2816d6367aSGreg Roach private const LINE_LENGTH = 255 - 2; 298dded141SGreg Roach 308dded141SGreg Roach 318dded141SGreg Roach // User defined tags begin with an underscore 3216d6367aSGreg Roach private const USER_DEFINED_TAG_PREFIX = '_'; 338dded141SGreg Roach 348dded141SGreg Roach // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 3516d6367aSGreg Roach private const TAG_NAMES = [ 368dded141SGreg Roach 'ABBREVIATION' => 'ABBR', 378dded141SGreg Roach 'ADDRESS' => 'ADDR', 388dded141SGreg Roach 'ADDRESS1' => 'ADR1', 398dded141SGreg Roach 'ADDRESS2' => 'ADR2', 408dded141SGreg Roach 'ADOPTION' => 'ADOP', 418dded141SGreg Roach 'AFN' => 'AFN', 428dded141SGreg Roach 'AGE' => 'AGE', 438dded141SGreg Roach 'AGENCY' => 'AGNC', 448dded141SGreg Roach 'ALIAS' => 'ALIA', 458dded141SGreg Roach 'ANCESTORS' => 'ANCE', 468dded141SGreg Roach 'ANCES_INTEREST' => 'ANCI', 478dded141SGreg Roach 'ANULMENT' => 'ANUL', 488dded141SGreg Roach 'ASSOCIATES' => 'ASSO', 498dded141SGreg Roach 'AUTHOR' => 'AUTH', 508dded141SGreg Roach 'BAPTISM-LDS' => 'BAPL', 518dded141SGreg Roach 'BAPTISM' => 'BAPM', 528dded141SGreg Roach 'BAR_MITZVAH' => 'BARM', 538dded141SGreg Roach 'BAS_MITZVAH' => 'BASM', 548dded141SGreg Roach 'BIRTH' => 'BIRT', 558dded141SGreg Roach 'BLESSING' => 'BLES', 568dded141SGreg Roach 'BURIAL' => 'BURI', 578dded141SGreg Roach 'CALL_NUMBER' => 'CALN', 588dded141SGreg Roach 'CASTE' => 'CAST', 598dded141SGreg Roach 'CAUSE' => 'CAUS', 608dded141SGreg Roach 'CENSUS' => 'CENS', 618dded141SGreg Roach 'CHANGE' => 'CHAN', 628dded141SGreg Roach 'CHARACTER' => 'CHAR', 638dded141SGreg Roach 'CHILD' => 'CHIL', 648dded141SGreg Roach 'CHRISTENING' => 'CHR', 658dded141SGreg Roach 'ADULT_CHRISTENING' => 'CHRA', 668dded141SGreg Roach 'CITY' => 'CITY', 678dded141SGreg Roach 'CONCATENATION' => 'CONC', 688dded141SGreg Roach 'CONFIRMATION' => 'CONF', 698dded141SGreg Roach 'CONFIRMATION-LDS' => 'CONL', 708dded141SGreg Roach 'CONTINUED' => 'CONT', 718dded141SGreg Roach 'COPYRIGHT' => 'COPY', 728dded141SGreg Roach 'CORPORTATE' => 'CORP', 738dded141SGreg Roach 'CREMATION' => 'CREM', 748dded141SGreg Roach 'COUNTRY' => 'CTRY', 758dded141SGreg Roach 'DATA' => 'DATA', 768dded141SGreg Roach 'DATE' => 'DATE', 778dded141SGreg Roach 'DEATH' => 'DEAT', 788dded141SGreg Roach 'DESCENDANTS' => 'DESC', 798dded141SGreg Roach 'DESCENDANTS_INT' => 'DESI', 808dded141SGreg Roach 'DESTINATION' => 'DEST', 818dded141SGreg Roach 'DIVORCE' => 'DIV', 828dded141SGreg Roach 'DIVORCE_FILED' => 'DIVF', 838dded141SGreg Roach 'PHY_DESCRIPTION' => 'DSCR', 848dded141SGreg Roach 'EDUCATION' => 'EDUC', 858dded141SGreg Roach 'EMAIL' => 'EMAI', 868dded141SGreg Roach 'EMIGRATION' => 'EMIG', 878dded141SGreg Roach 'ENDOWMENT' => 'ENDL', 888dded141SGreg Roach 'ENGAGEMENT' => 'ENGA', 898dded141SGreg Roach 'EVENT' => 'EVEN', 908dded141SGreg Roach 'FACT' => 'FACT', 918dded141SGreg Roach 'FAMILY' => 'FAM', 928dded141SGreg Roach 'FAMILY_CHILD' => 'FAMC', 938dded141SGreg Roach 'FAMILY_FILE' => 'FAMF', 948dded141SGreg Roach 'FAMILY_SPOUSE' => 'FAMS', 958dded141SGreg Roach 'FACIMILIE' => 'FAX', 968dded141SGreg Roach 'FIRST_COMMUNION' => 'FCOM', 978dded141SGreg Roach 'FILE' => 'FILE', 988dded141SGreg Roach 'FORMAT' => 'FORM', 998dded141SGreg Roach 'PHONETIC' => 'FONE', 1008dded141SGreg Roach 'GEDCOM' => 'GEDC', 1018dded141SGreg Roach 'GIVEN_NAME' => 'GIVN', 1028dded141SGreg Roach 'GRADUATION' => 'GRAD', 1038dded141SGreg Roach 'HEADER' => 'HEAD', 1048dded141SGreg Roach 'HUSBAND' => 'HUSB', 1058dded141SGreg Roach 'IDENT_NUMBER' => 'IDNO', 1068dded141SGreg Roach 'IMMIGRATION' => 'IMMI', 1078dded141SGreg Roach 'INDIVIDUAL' => 'INDI', 1088dded141SGreg Roach 'LANGUAGE' => 'LANG', 1098dded141SGreg Roach 'LATITUDE' => 'LATI', 1108dded141SGreg Roach 'LONGITUDE' => 'LONG', 1118dded141SGreg Roach 'MAP' => 'MAP', 1128dded141SGreg Roach 'MARRIAGE_BANN' => 'MARB', 1138dded141SGreg Roach 'MARR_CONTRACT' => 'MARC', 1148dded141SGreg Roach 'MARR_LICENSE' => 'MARL', 1158dded141SGreg Roach 'MARRIAGE' => 'MARR', 1168dded141SGreg Roach 'MEDIA' => 'MEDI', 1178dded141SGreg Roach 'NAME' => 'NAME', 1188dded141SGreg Roach 'NATIONALITY' => 'NATI', 1198dded141SGreg Roach 'NATURALIZATION' => 'NATU', 1208dded141SGreg Roach 'CHILDREN_COUNT' => 'NCHI', 1218dded141SGreg Roach 'NICKNAME' => 'NICK', 1228dded141SGreg Roach 'MARRIAGE_COUNT' => 'NMR', 1238dded141SGreg Roach 'NOTE' => 'NOTE', 1248dded141SGreg Roach 'NAME_PREFIX' => 'NPFX', 1258dded141SGreg Roach 'NAME_SUFFIX' => 'NSFX', 1268dded141SGreg Roach 'OBJECT' => 'OBJE', 1278dded141SGreg Roach 'OCCUPATION' => 'OCCU', 1288dded141SGreg Roach 'ORDINANCE' => 'ORDI', 1298dded141SGreg Roach 'ORDINATION' => 'ORDN', 1308dded141SGreg Roach 'PAGE' => 'PAGE', 1318dded141SGreg Roach 'PEDIGREE' => 'PEDI', 1328dded141SGreg Roach 'PHONE' => 'PHON', 1338dded141SGreg Roach 'PLACE' => 'PLAC', 1348dded141SGreg Roach 'POSTAL_CODE' => 'POST', 1358dded141SGreg Roach 'PROBATE' => 'PROB', 1368dded141SGreg Roach 'PROPERTY' => 'PROP', 137*c1afbf58SGreg Roach 'PUBLICATION' => 'PUBL', 1388dded141SGreg Roach 'QUALITY_OF_DATA' => 'QUAY', 1398dded141SGreg Roach 'REFERENCE' => 'REFN', 1408dded141SGreg Roach 'RELATIONSHIP' => 'RELA', 1418dded141SGreg Roach 'RELIGION' => 'RELI', 1428dded141SGreg Roach 'REPOSITORY' => 'REPO', 1438dded141SGreg Roach 'RESIDENCE' => 'RESI', 1448dded141SGreg Roach 'RESTRICTION' => 'RESN', 1458dded141SGreg Roach 'RETIREMENT' => 'RETI', 1468dded141SGreg Roach 'REC_FILE_NUMBER' => 'RFN', 1478dded141SGreg Roach 'REC_ID_NUMBER' => 'RIN', 1488dded141SGreg Roach 'ROLE' => 'ROLE', 1498dded141SGreg Roach 'ROMANIZED' => 'ROMN', 1508dded141SGreg Roach 'SEALING_CHILD' => 'SLGC', 1518dded141SGreg Roach 'SEALING_SPOUSE' => 'SLGS', 1528dded141SGreg Roach 'SEX' => 'SEX', 1538dded141SGreg Roach 'SOURCE' => 'SOUR', 1548dded141SGreg Roach 'SURN_PREFIX' => 'SPFX', 1558dded141SGreg Roach 'SOC_SEC_NUMBER' => 'SSN', 1568dded141SGreg Roach 'STATE' => 'STAE', 1578dded141SGreg Roach 'STATUS' => 'STAT', 1588dded141SGreg Roach 'SUBMITTER' => 'SUBM', 1598dded141SGreg Roach 'SUBMISSION' => 'SUBN', 1608dded141SGreg Roach 'SURNAME' => 'SURN', 1618dded141SGreg Roach 'TEMPLE' => 'TEMP', 1628dded141SGreg Roach 'TEXT' => 'TEXT', 1638dded141SGreg Roach 'TIME' => 'TIME', 1648dded141SGreg Roach 'TITLE' => 'TITL', 1658dded141SGreg Roach 'TRAILER' => 'TRLR', 1668dded141SGreg Roach 'TYPE' => 'TYPE', 1678dded141SGreg Roach 'VERSION' => 'VERS', 1688dded141SGreg Roach 'WIFE' => 'WIFE', 1698dded141SGreg Roach 'WILL' => 'WILL', 1708dded141SGreg Roach 'WEB' => 'WWW', 1718dded141SGreg Roach '_DEATH_OF_SPOUSE' => 'DETS', 1728dded141SGreg Roach '_DEGREE' => '_DEG', 1738dded141SGreg Roach '_FILE' => 'FILE', 1748dded141SGreg Roach '_MEDICAL' => '_MCL', 1758dded141SGreg Roach '_MILITARY_SERVICE' => '_MILT', 1768dded141SGreg Roach ]; 1778dded141SGreg Roach 1788dded141SGreg Roach // Custom tags used by other applications, with direct synonyms 17916d6367aSGreg Roach private const TAG_SYNONYMS = [ 1808dded141SGreg Roach ]; 1818dded141SGreg Roach 1828dded141SGreg Roach // LATI and LONG tags 18316d6367aSGreg Roach private const DEGREE_FORMAT = ' % .5f%s'; 18416d6367aSGreg Roach private const LATITUDE_NORTH = 'N'; 18516d6367aSGreg Roach private const LATITUDE_SOUTH = 'S'; 18616d6367aSGreg Roach private const LONGITUDE_EAST = 'E'; 18716d6367aSGreg Roach private const LONGITUDE_WEST = 'W'; 1888dded141SGreg Roach 1898dded141SGreg Roach // PLAC tags 19016d6367aSGreg Roach private const PLACE_SEPARATOR = ', '; 19116d6367aSGreg Roach private const PLACE_SEPARATOR_REGEX = ' *, *'; 1928dded141SGreg Roach 1938dded141SGreg Roach // SEX tags 19416d6367aSGreg Roach private const SEX_FEMALE = 'F'; 19516d6367aSGreg Roach private const SEX_MALE = 'M'; 19616d6367aSGreg Roach private const SEX_UNKNOWN = 'U'; 1978dded141SGreg Roach 1988dded141SGreg Roach /** 1998dded141SGreg Roach * Convert a GEDCOM tag to a canonical form. 2008dded141SGreg Roach * 2018dded141SGreg Roach * @param string $tag 2028dded141SGreg Roach * 2038dded141SGreg Roach * @return string 2048dded141SGreg Roach */ 2058dded141SGreg Roach public function canonicalTag(string $tag): string 2068dded141SGreg Roach { 2078dded141SGreg Roach $tag = strtoupper($tag); 2088dded141SGreg Roach 209c70c3c8cSGreg Roach $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 2108dded141SGreg Roach 2118dded141SGreg Roach return $tag; 2128dded141SGreg Roach } 2138dded141SGreg Roach 2148dded141SGreg Roach /** 2158dded141SGreg Roach * @param string $tag 2168dded141SGreg Roach * 2178dded141SGreg Roach * @return bool 2188dded141SGreg Roach */ 2198dded141SGreg Roach public function isUserDefinedTag(string $tag): bool 2208dded141SGreg Roach { 2214d798ef2SGreg Roach return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 2228dded141SGreg Roach } 2238dded141SGreg Roach 2248dded141SGreg Roach /** 2258dded141SGreg Roach * @param string $text 2268dded141SGreg Roach * 2278dded141SGreg Roach * @return float 2288dded141SGreg Roach */ 2298dded141SGreg Roach public function readLatitude(string $text): float 2308dded141SGreg Roach { 2318dded141SGreg Roach return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 2328dded141SGreg Roach } 2338dded141SGreg Roach 2348dded141SGreg Roach /** 2358dded141SGreg Roach * @param string $text 2368dded141SGreg Roach * 2378dded141SGreg Roach * @return float 2388dded141SGreg Roach */ 2398dded141SGreg Roach public function readLongitude(string $text): float 2408dded141SGreg Roach { 2418dded141SGreg Roach return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 2428dded141SGreg Roach } 2438dded141SGreg Roach 2448dded141SGreg Roach /** 2458dded141SGreg Roach * @param string $text 2468dded141SGreg Roach * @param string $positive 2478dded141SGreg Roach * @param string $negative 2488dded141SGreg Roach * 2498dded141SGreg Roach * @return float 2508dded141SGreg Roach */ 2518dded141SGreg Roach private function readDegrees(string $text, string $positive, string $negative): float 2528dded141SGreg Roach { 2538dded141SGreg Roach $text = trim($text); 2548dded141SGreg Roach $hemisphere = substr($text, 0, 1); 2558dded141SGreg Roach $degrees = substr($text, 1); 2568dded141SGreg Roach 2578dded141SGreg Roach // Match a valid GEDCOM format 2588dded141SGreg Roach if (is_numeric($degrees)) { 2598dded141SGreg Roach $hemisphere = strtoupper($hemisphere); 2608dded141SGreg Roach $degrees = (float) $degrees; 2618dded141SGreg Roach 2628dded141SGreg Roach if ($hemisphere === $positive) { 2638dded141SGreg Roach return $degrees; 2648dded141SGreg Roach } 2658dded141SGreg Roach 2668dded141SGreg Roach if ($hemisphere === $negative) { 2678dded141SGreg Roach return -$degrees; 2688dded141SGreg Roach } 2698dded141SGreg Roach } 2708dded141SGreg Roach 2718dded141SGreg Roach // Just a number? 2728dded141SGreg Roach if (is_numeric($text)) { 2738dded141SGreg Roach return (float) $text; 2748dded141SGreg Roach } 2758dded141SGreg Roach 2768dded141SGreg Roach // Can't match anything. 2778dded141SGreg Roach return 0.0; 2788dded141SGreg Roach } 2798dded141SGreg Roach 2808dded141SGreg Roach /** 2818dded141SGreg Roach * @param float $latitude 2828dded141SGreg Roach * 2838dded141SGreg Roach * @return string 2848dded141SGreg Roach */ 2858dded141SGreg Roach public function writeLatitude(float $latitude): string 2868dded141SGreg Roach { 2878dded141SGreg Roach return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 2888dded141SGreg Roach } 2898dded141SGreg Roach 2908dded141SGreg Roach /** 2918dded141SGreg Roach * @param float $longitude 2928dded141SGreg Roach * 2938dded141SGreg Roach * @return string 2948dded141SGreg Roach */ 2958dded141SGreg Roach public function writeLongitude(float $longitude): string 2968dded141SGreg Roach { 2978dded141SGreg Roach return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 2988dded141SGreg Roach } 2998dded141SGreg Roach 3008dded141SGreg Roach /** 3018dded141SGreg Roach * @param float $degrees 3028dded141SGreg Roach * @param string $positive 3038dded141SGreg Roach * @param string $negative 3048dded141SGreg Roach * 3058dded141SGreg Roach * @return string 3068dded141SGreg Roach */ 3078dded141SGreg Roach private function writeDegrees(float $degrees, string $positive, string $negative): string 3088dded141SGreg Roach { 3098dded141SGreg Roach if ($degrees < 0.0) { 3108dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 3118dded141SGreg Roach } 3128dded141SGreg Roach 3138dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 3148dded141SGreg Roach } 3158dded141SGreg Roach 3168dded141SGreg Roach /** 3178dded141SGreg Roach * Although empty placenames are valid "Town, , Country", it is only meaningful 3188dded141SGreg Roach * when structured places are used (PLAC:FORM town, county, country), and 3198dded141SGreg Roach * structured places are discouraged. 3208dded141SGreg Roach * 3218dded141SGreg Roach * @param string $text 3228dded141SGreg Roach * 3238dded141SGreg Roach * @return string[] 3248dded141SGreg Roach */ 3258dded141SGreg Roach public function readPlace(string $text): array 3268dded141SGreg Roach { 3278dded141SGreg Roach $text = trim($text); 3288dded141SGreg Roach 3298dded141SGreg Roach return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 3308dded141SGreg Roach } 3318dded141SGreg Roach 3328dded141SGreg Roach /** 3338dded141SGreg Roach * @param string[] $place 3348dded141SGreg Roach * 3358dded141SGreg Roach * @return string 3368dded141SGreg Roach */ 3378dded141SGreg Roach public function writePlace(array $place): string 3388dded141SGreg Roach { 3398dded141SGreg Roach return implode(self::PLACE_SEPARATOR, $place); 3408dded141SGreg Roach } 3418dded141SGreg Roach 3428dded141SGreg Roach /** 3438dded141SGreg Roach * Some applications use non-standard values for unknown. 3448dded141SGreg Roach * 3458dded141SGreg Roach * @param string $text 3468dded141SGreg Roach * 3478dded141SGreg Roach * @return string 3488dded141SGreg Roach */ 3498dded141SGreg Roach public function readSex(string $text): string 3508dded141SGreg Roach { 3518dded141SGreg Roach $text = strtoupper($text); 3528dded141SGreg Roach 3538dded141SGreg Roach if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 3548dded141SGreg Roach $text = self::SEX_UNKNOWN; 3558dded141SGreg Roach } 3568dded141SGreg Roach 3578dded141SGreg Roach return $text; 3588dded141SGreg Roach } 3598dded141SGreg Roach} 360