18dded141SGreg Roach<?php 23976b470SGreg Roach 38dded141SGreg Roach/** 48dded141SGreg Roach * webtrees: online genealogy 58fcd0d32SGreg Roach * Copyright (C) 2019 webtrees development team 68dded141SGreg Roach * This program is free software: you can redistribute it and/or modify 78dded141SGreg Roach * it under the terms of the GNU General Public License as published by 88dded141SGreg Roach * the Free Software Foundation, either version 3 of the License, or 98dded141SGreg Roach * (at your option) any later version. 108dded141SGreg Roach * This program is distributed in the hope that it will be useful, 118dded141SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 128dded141SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 138dded141SGreg Roach * GNU General Public License for more details. 148dded141SGreg Roach * You should have received a copy of the GNU General Public License 158dded141SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 168dded141SGreg Roach */ 17*fcfa147eSGreg Roach 188dded141SGreg Roachdeclare(strict_types=1); 198dded141SGreg Roach 208dded141SGreg Roachnamespace Fisharebest\Webtrees\Services; 218dded141SGreg Roach 228dded141SGreg Roach/** 238dded141SGreg Roach * Utilities for manipulating GEDCOM data. 248dded141SGreg Roach */ 258dded141SGreg Roachclass GedcomService 268dded141SGreg Roach{ 278dded141SGreg Roach // Gedcom allows 255 characters (not bytes), including the EOL character. 2816d6367aSGreg Roach private const EOL = "\r\n"; 2916d6367aSGreg Roach private const EOL_REGEX = '\r|\r\n|\n|\n\r'; 3016d6367aSGreg Roach private const LINE_LENGTH = 255 - 2; 318dded141SGreg Roach 328dded141SGreg Roach 338dded141SGreg Roach // User defined tags begin with an underscore 3416d6367aSGreg Roach private const USER_DEFINED_TAG_PREFIX = '_'; 358dded141SGreg Roach 368dded141SGreg Roach // Some applications, such as FTM, use GEDCOM tag names instead of the tags. 3716d6367aSGreg Roach private const TAG_NAMES = [ 388dded141SGreg Roach 'ABBREVIATION' => 'ABBR', 398dded141SGreg Roach 'ADDRESS' => 'ADDR', 408dded141SGreg Roach 'ADDRESS1' => 'ADR1', 418dded141SGreg Roach 'ADDRESS2' => 'ADR2', 428dded141SGreg Roach 'ADOPTION' => 'ADOP', 438dded141SGreg Roach 'AFN' => 'AFN', 448dded141SGreg Roach 'AGE' => 'AGE', 458dded141SGreg Roach 'AGENCY' => 'AGNC', 468dded141SGreg Roach 'ALIAS' => 'ALIA', 478dded141SGreg Roach 'ANCESTORS' => 'ANCE', 488dded141SGreg Roach 'ANCES_INTEREST' => 'ANCI', 498dded141SGreg Roach 'ANULMENT' => 'ANUL', 508dded141SGreg Roach 'ASSOCIATES' => 'ASSO', 518dded141SGreg Roach 'AUTHOR' => 'AUTH', 528dded141SGreg Roach 'BAPTISM-LDS' => 'BAPL', 538dded141SGreg Roach 'BAPTISM' => 'BAPM', 548dded141SGreg Roach 'BAR_MITZVAH' => 'BARM', 558dded141SGreg Roach 'BAS_MITZVAH' => 'BASM', 568dded141SGreg Roach 'BIRTH' => 'BIRT', 578dded141SGreg Roach 'BLESSING' => 'BLES', 588dded141SGreg Roach 'BURIAL' => 'BURI', 598dded141SGreg Roach 'CALL_NUMBER' => 'CALN', 608dded141SGreg Roach 'CASTE' => 'CAST', 618dded141SGreg Roach 'CAUSE' => 'CAUS', 628dded141SGreg Roach 'CENSUS' => 'CENS', 638dded141SGreg Roach 'CHANGE' => 'CHAN', 648dded141SGreg Roach 'CHARACTER' => 'CHAR', 658dded141SGreg Roach 'CHILD' => 'CHIL', 668dded141SGreg Roach 'CHRISTENING' => 'CHR', 678dded141SGreg Roach 'ADULT_CHRISTENING' => 'CHRA', 688dded141SGreg Roach 'CITY' => 'CITY', 698dded141SGreg Roach 'CONCATENATION' => 'CONC', 708dded141SGreg Roach 'CONFIRMATION' => 'CONF', 718dded141SGreg Roach 'CONFIRMATION-LDS' => 'CONL', 728dded141SGreg Roach 'CONTINUED' => 'CONT', 738dded141SGreg Roach 'COPYRIGHT' => 'COPY', 748dded141SGreg Roach 'CORPORTATE' => 'CORP', 758dded141SGreg Roach 'CREMATION' => 'CREM', 768dded141SGreg Roach 'COUNTRY' => 'CTRY', 778dded141SGreg Roach 'DATA' => 'DATA', 788dded141SGreg Roach 'DATE' => 'DATE', 798dded141SGreg Roach 'DEATH' => 'DEAT', 808dded141SGreg Roach 'DESCENDANTS' => 'DESC', 818dded141SGreg Roach 'DESCENDANTS_INT' => 'DESI', 828dded141SGreg Roach 'DESTINATION' => 'DEST', 838dded141SGreg Roach 'DIVORCE' => 'DIV', 848dded141SGreg Roach 'DIVORCE_FILED' => 'DIVF', 858dded141SGreg Roach 'PHY_DESCRIPTION' => 'DSCR', 868dded141SGreg Roach 'EDUCATION' => 'EDUC', 878dded141SGreg Roach 'EMAIL' => 'EMAI', 888dded141SGreg Roach 'EMIGRATION' => 'EMIG', 898dded141SGreg Roach 'ENDOWMENT' => 'ENDL', 908dded141SGreg Roach 'ENGAGEMENT' => 'ENGA', 918dded141SGreg Roach 'EVENT' => 'EVEN', 928dded141SGreg Roach 'FACT' => 'FACT', 938dded141SGreg Roach 'FAMILY' => 'FAM', 948dded141SGreg Roach 'FAMILY_CHILD' => 'FAMC', 958dded141SGreg Roach 'FAMILY_FILE' => 'FAMF', 968dded141SGreg Roach 'FAMILY_SPOUSE' => 'FAMS', 978dded141SGreg Roach 'FACIMILIE' => 'FAX', 988dded141SGreg Roach 'FIRST_COMMUNION' => 'FCOM', 998dded141SGreg Roach 'FILE' => 'FILE', 1008dded141SGreg Roach 'FORMAT' => 'FORM', 1018dded141SGreg Roach 'PHONETIC' => 'FONE', 1028dded141SGreg Roach 'GEDCOM' => 'GEDC', 1038dded141SGreg Roach 'GIVEN_NAME' => 'GIVN', 1048dded141SGreg Roach 'GRADUATION' => 'GRAD', 1058dded141SGreg Roach 'HEADER' => 'HEAD', 1068dded141SGreg Roach 'HUSBAND' => 'HUSB', 1078dded141SGreg Roach 'IDENT_NUMBER' => 'IDNO', 1088dded141SGreg Roach 'IMMIGRATION' => 'IMMI', 1098dded141SGreg Roach 'INDIVIDUAL' => 'INDI', 1108dded141SGreg Roach 'LANGUAGE' => 'LANG', 1118dded141SGreg Roach 'LATITUDE' => 'LATI', 1128dded141SGreg Roach 'LONGITUDE' => 'LONG', 1138dded141SGreg Roach 'MAP' => 'MAP', 1148dded141SGreg Roach 'MARRIAGE_BANN' => 'MARB', 1158dded141SGreg Roach 'MARR_CONTRACT' => 'MARC', 1168dded141SGreg Roach 'MARR_LICENSE' => 'MARL', 1178dded141SGreg Roach 'MARRIAGE' => 'MARR', 1188dded141SGreg Roach 'MEDIA' => 'MEDI', 1198dded141SGreg Roach 'NAME' => 'NAME', 1208dded141SGreg Roach 'NATIONALITY' => 'NATI', 1218dded141SGreg Roach 'NATURALIZATION' => 'NATU', 1228dded141SGreg Roach 'CHILDREN_COUNT' => 'NCHI', 1238dded141SGreg Roach 'NICKNAME' => 'NICK', 1248dded141SGreg Roach 'MARRIAGE_COUNT' => 'NMR', 1258dded141SGreg Roach 'NOTE' => 'NOTE', 1268dded141SGreg Roach 'NAME_PREFIX' => 'NPFX', 1278dded141SGreg Roach 'NAME_SUFFIX' => 'NSFX', 1288dded141SGreg Roach 'OBJECT' => 'OBJE', 1298dded141SGreg Roach 'OCCUPATION' => 'OCCU', 1308dded141SGreg Roach 'ORDINANCE' => 'ORDI', 1318dded141SGreg Roach 'ORDINATION' => 'ORDN', 1328dded141SGreg Roach 'PAGE' => 'PAGE', 1338dded141SGreg Roach 'PEDIGREE' => 'PEDI', 1348dded141SGreg Roach 'PHONE' => 'PHON', 1358dded141SGreg Roach 'PLACE' => 'PLAC', 1368dded141SGreg Roach 'POSTAL_CODE' => 'POST', 1378dded141SGreg Roach 'PROBATE' => 'PROB', 1388dded141SGreg Roach 'PROPERTY' => 'PROP', 139c1afbf58SGreg Roach 'PUBLICATION' => 'PUBL', 1408dded141SGreg Roach 'QUALITY_OF_DATA' => 'QUAY', 1418dded141SGreg Roach 'REFERENCE' => 'REFN', 1428dded141SGreg Roach 'RELATIONSHIP' => 'RELA', 1438dded141SGreg Roach 'RELIGION' => 'RELI', 1448dded141SGreg Roach 'REPOSITORY' => 'REPO', 1458dded141SGreg Roach 'RESIDENCE' => 'RESI', 1468dded141SGreg Roach 'RESTRICTION' => 'RESN', 1478dded141SGreg Roach 'RETIREMENT' => 'RETI', 1488dded141SGreg Roach 'REC_FILE_NUMBER' => 'RFN', 1498dded141SGreg Roach 'REC_ID_NUMBER' => 'RIN', 1508dded141SGreg Roach 'ROLE' => 'ROLE', 1518dded141SGreg Roach 'ROMANIZED' => 'ROMN', 1528dded141SGreg Roach 'SEALING_CHILD' => 'SLGC', 1538dded141SGreg Roach 'SEALING_SPOUSE' => 'SLGS', 1548dded141SGreg Roach 'SEX' => 'SEX', 1558dded141SGreg Roach 'SOURCE' => 'SOUR', 1568dded141SGreg Roach 'SURN_PREFIX' => 'SPFX', 1578dded141SGreg Roach 'SOC_SEC_NUMBER' => 'SSN', 1588dded141SGreg Roach 'STATE' => 'STAE', 1598dded141SGreg Roach 'STATUS' => 'STAT', 1608dded141SGreg Roach 'SUBMITTER' => 'SUBM', 1618dded141SGreg Roach 'SUBMISSION' => 'SUBN', 1628dded141SGreg Roach 'SURNAME' => 'SURN', 1638dded141SGreg Roach 'TEMPLE' => 'TEMP', 1648dded141SGreg Roach 'TEXT' => 'TEXT', 1658dded141SGreg Roach 'TIME' => 'TIME', 1668dded141SGreg Roach 'TITLE' => 'TITL', 1678dded141SGreg Roach 'TRAILER' => 'TRLR', 1688dded141SGreg Roach 'TYPE' => 'TYPE', 1698dded141SGreg Roach 'VERSION' => 'VERS', 1708dded141SGreg Roach 'WIFE' => 'WIFE', 1718dded141SGreg Roach 'WILL' => 'WILL', 1728dded141SGreg Roach 'WEB' => 'WWW', 1738dded141SGreg Roach '_DEATH_OF_SPOUSE' => 'DETS', 1748dded141SGreg Roach '_DEGREE' => '_DEG', 1758dded141SGreg Roach '_FILE' => 'FILE', 1768dded141SGreg Roach '_MEDICAL' => '_MCL', 1778dded141SGreg Roach '_MILITARY_SERVICE' => '_MILT', 1788dded141SGreg Roach ]; 1798dded141SGreg Roach 1808dded141SGreg Roach // Custom tags used by other applications, with direct synonyms 18116d6367aSGreg Roach private const TAG_SYNONYMS = [ 1828dded141SGreg Roach ]; 1838dded141SGreg Roach 1848dded141SGreg Roach // LATI and LONG tags 18516d6367aSGreg Roach private const DEGREE_FORMAT = ' % .5f%s'; 18616d6367aSGreg Roach private const LATITUDE_NORTH = 'N'; 18716d6367aSGreg Roach private const LATITUDE_SOUTH = 'S'; 18816d6367aSGreg Roach private const LONGITUDE_EAST = 'E'; 18916d6367aSGreg Roach private const LONGITUDE_WEST = 'W'; 1908dded141SGreg Roach 1918dded141SGreg Roach // PLAC tags 19216d6367aSGreg Roach private const PLACE_SEPARATOR = ', '; 19316d6367aSGreg Roach private const PLACE_SEPARATOR_REGEX = ' *, *'; 1948dded141SGreg Roach 1958dded141SGreg Roach // SEX tags 19616d6367aSGreg Roach private const SEX_FEMALE = 'F'; 19716d6367aSGreg Roach private const SEX_MALE = 'M'; 19816d6367aSGreg Roach private const SEX_UNKNOWN = 'U'; 1998dded141SGreg Roach 2008dded141SGreg Roach /** 2018dded141SGreg Roach * Convert a GEDCOM tag to a canonical form. 2028dded141SGreg Roach * 2038dded141SGreg Roach * @param string $tag 2048dded141SGreg Roach * 2058dded141SGreg Roach * @return string 2068dded141SGreg Roach */ 2078dded141SGreg Roach public function canonicalTag(string $tag): string 2088dded141SGreg Roach { 2098dded141SGreg Roach $tag = strtoupper($tag); 2108dded141SGreg Roach 211c70c3c8cSGreg Roach $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag; 2128dded141SGreg Roach 2138dded141SGreg Roach return $tag; 2148dded141SGreg Roach } 2158dded141SGreg Roach 2168dded141SGreg Roach /** 2178dded141SGreg Roach * @param string $tag 2188dded141SGreg Roach * 2198dded141SGreg Roach * @return bool 2208dded141SGreg Roach */ 2218dded141SGreg Roach public function isUserDefinedTag(string $tag): bool 2228dded141SGreg Roach { 2234d798ef2SGreg Roach return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0; 2248dded141SGreg Roach } 2258dded141SGreg Roach 2268dded141SGreg Roach /** 2278dded141SGreg Roach * @param string $text 2288dded141SGreg Roach * 2298dded141SGreg Roach * @return float 2308dded141SGreg Roach */ 2318dded141SGreg Roach public function readLatitude(string $text): float 2328dded141SGreg Roach { 2338dded141SGreg Roach return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 2348dded141SGreg Roach } 2358dded141SGreg Roach 2368dded141SGreg Roach /** 2378dded141SGreg Roach * @param string $text 2388dded141SGreg Roach * 2398dded141SGreg Roach * @return float 2408dded141SGreg Roach */ 2418dded141SGreg Roach public function readLongitude(string $text): float 2428dded141SGreg Roach { 2438dded141SGreg Roach return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 2448dded141SGreg Roach } 2458dded141SGreg Roach 2468dded141SGreg Roach /** 2478dded141SGreg Roach * @param string $text 2488dded141SGreg Roach * @param string $positive 2498dded141SGreg Roach * @param string $negative 2508dded141SGreg Roach * 2518dded141SGreg Roach * @return float 2528dded141SGreg Roach */ 2538dded141SGreg Roach private function readDegrees(string $text, string $positive, string $negative): float 2548dded141SGreg Roach { 2558dded141SGreg Roach $text = trim($text); 2568dded141SGreg Roach $hemisphere = substr($text, 0, 1); 2578dded141SGreg Roach $degrees = substr($text, 1); 2588dded141SGreg Roach 2598dded141SGreg Roach // Match a valid GEDCOM format 2608dded141SGreg Roach if (is_numeric($degrees)) { 2618dded141SGreg Roach $hemisphere = strtoupper($hemisphere); 2628dded141SGreg Roach $degrees = (float) $degrees; 2638dded141SGreg Roach 2648dded141SGreg Roach if ($hemisphere === $positive) { 2658dded141SGreg Roach return $degrees; 2668dded141SGreg Roach } 2678dded141SGreg Roach 2688dded141SGreg Roach if ($hemisphere === $negative) { 2698dded141SGreg Roach return -$degrees; 2708dded141SGreg Roach } 2718dded141SGreg Roach } 2728dded141SGreg Roach 2738dded141SGreg Roach // Just a number? 2748dded141SGreg Roach if (is_numeric($text)) { 2758dded141SGreg Roach return (float) $text; 2768dded141SGreg Roach } 2778dded141SGreg Roach 2788dded141SGreg Roach // Can't match anything. 2798dded141SGreg Roach return 0.0; 2808dded141SGreg Roach } 2818dded141SGreg Roach 2828dded141SGreg Roach /** 2838dded141SGreg Roach * @param float $latitude 2848dded141SGreg Roach * 2858dded141SGreg Roach * @return string 2868dded141SGreg Roach */ 2878dded141SGreg Roach public function writeLatitude(float $latitude): string 2888dded141SGreg Roach { 2898dded141SGreg Roach return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH); 2908dded141SGreg Roach } 2918dded141SGreg Roach 2928dded141SGreg Roach /** 2938dded141SGreg Roach * @param float $longitude 2948dded141SGreg Roach * 2958dded141SGreg Roach * @return string 2968dded141SGreg Roach */ 2978dded141SGreg Roach public function writeLongitude(float $longitude): string 2988dded141SGreg Roach { 2998dded141SGreg Roach return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST); 3008dded141SGreg Roach } 3018dded141SGreg Roach 3028dded141SGreg Roach /** 3038dded141SGreg Roach * @param float $degrees 3048dded141SGreg Roach * @param string $positive 3058dded141SGreg Roach * @param string $negative 3068dded141SGreg Roach * 3078dded141SGreg Roach * @return string 3088dded141SGreg Roach */ 3098dded141SGreg Roach private function writeDegrees(float $degrees, string $positive, string $negative): string 3108dded141SGreg Roach { 3118dded141SGreg Roach if ($degrees < 0.0) { 3128dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $negative); 3138dded141SGreg Roach } 3148dded141SGreg Roach 3158dded141SGreg Roach return sprintf(self::DEGREE_FORMAT, $degrees, $positive); 3168dded141SGreg Roach } 3178dded141SGreg Roach 3188dded141SGreg Roach /** 3198dded141SGreg Roach * Although empty placenames are valid "Town, , Country", it is only meaningful 3208dded141SGreg Roach * when structured places are used (PLAC:FORM town, county, country), and 3218dded141SGreg Roach * structured places are discouraged. 3228dded141SGreg Roach * 3238dded141SGreg Roach * @param string $text 3248dded141SGreg Roach * 3258dded141SGreg Roach * @return string[] 3268dded141SGreg Roach */ 3278dded141SGreg Roach public function readPlace(string $text): array 3288dded141SGreg Roach { 3298dded141SGreg Roach $text = trim($text); 3308dded141SGreg Roach 3318dded141SGreg Roach return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY); 3328dded141SGreg Roach } 3338dded141SGreg Roach 3348dded141SGreg Roach /** 3358dded141SGreg Roach * @param string[] $place 3368dded141SGreg Roach * 3378dded141SGreg Roach * @return string 3388dded141SGreg Roach */ 3398dded141SGreg Roach public function writePlace(array $place): string 3408dded141SGreg Roach { 3418dded141SGreg Roach return implode(self::PLACE_SEPARATOR, $place); 3428dded141SGreg Roach } 3438dded141SGreg Roach 3448dded141SGreg Roach /** 3458dded141SGreg Roach * Some applications use non-standard values for unknown. 3468dded141SGreg Roach * 3478dded141SGreg Roach * @param string $text 3488dded141SGreg Roach * 3498dded141SGreg Roach * @return string 3508dded141SGreg Roach */ 3518dded141SGreg Roach public function readSex(string $text): string 3528dded141SGreg Roach { 3538dded141SGreg Roach $text = strtoupper($text); 3548dded141SGreg Roach 3558dded141SGreg Roach if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) { 3568dded141SGreg Roach $text = self::SEX_UNKNOWN; 3578dded141SGreg Roach } 3588dded141SGreg Roach 3598dded141SGreg Roach return $text; 3608dded141SGreg Roach } 3618dded141SGreg Roach} 362