xref: /webtrees/app/Services/GedcomService.php (revision fcfa147e10aaa6c7ff580c29bd6e5b88666befc1)
18dded141SGreg Roach<?php
23976b470SGreg Roach
38dded141SGreg Roach/**
48dded141SGreg Roach * webtrees: online genealogy
58fcd0d32SGreg Roach * Copyright (C) 2019 webtrees development team
68dded141SGreg Roach * This program is free software: you can redistribute it and/or modify
78dded141SGreg Roach * it under the terms of the GNU General Public License as published by
88dded141SGreg Roach * the Free Software Foundation, either version 3 of the License, or
98dded141SGreg Roach * (at your option) any later version.
108dded141SGreg Roach * This program is distributed in the hope that it will be useful,
118dded141SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
128dded141SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
138dded141SGreg Roach * GNU General Public License for more details.
148dded141SGreg Roach * You should have received a copy of the GNU General Public License
158dded141SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
168dded141SGreg Roach */
17*fcfa147eSGreg Roach
188dded141SGreg Roachdeclare(strict_types=1);
198dded141SGreg Roach
208dded141SGreg Roachnamespace Fisharebest\Webtrees\Services;
218dded141SGreg Roach
228dded141SGreg Roach/**
238dded141SGreg Roach * Utilities for manipulating GEDCOM data.
248dded141SGreg Roach */
258dded141SGreg Roachclass GedcomService
268dded141SGreg Roach{
278dded141SGreg Roach    // Gedcom allows 255 characters (not bytes), including the EOL character.
2816d6367aSGreg Roach    private const EOL         = "\r\n";
2916d6367aSGreg Roach    private const EOL_REGEX   = '\r|\r\n|\n|\n\r';
3016d6367aSGreg Roach    private const LINE_LENGTH = 255 - 2;
318dded141SGreg Roach
328dded141SGreg Roach
338dded141SGreg Roach    // User defined tags begin with an underscore
3416d6367aSGreg Roach    private const USER_DEFINED_TAG_PREFIX = '_';
358dded141SGreg Roach
368dded141SGreg Roach    // Some applications, such as FTM, use GEDCOM tag names instead of the tags.
3716d6367aSGreg Roach    private const TAG_NAMES = [
388dded141SGreg Roach        'ABBREVIATION'      => 'ABBR',
398dded141SGreg Roach        'ADDRESS'           => 'ADDR',
408dded141SGreg Roach        'ADDRESS1'          => 'ADR1',
418dded141SGreg Roach        'ADDRESS2'          => 'ADR2',
428dded141SGreg Roach        'ADOPTION'          => 'ADOP',
438dded141SGreg Roach        'AFN'               => 'AFN',
448dded141SGreg Roach        'AGE'               => 'AGE',
458dded141SGreg Roach        'AGENCY'            => 'AGNC',
468dded141SGreg Roach        'ALIAS'             => 'ALIA',
478dded141SGreg Roach        'ANCESTORS'         => 'ANCE',
488dded141SGreg Roach        'ANCES_INTEREST'    => 'ANCI',
498dded141SGreg Roach        'ANULMENT'          => 'ANUL',
508dded141SGreg Roach        'ASSOCIATES'        => 'ASSO',
518dded141SGreg Roach        'AUTHOR'            => 'AUTH',
528dded141SGreg Roach        'BAPTISM-LDS'       => 'BAPL',
538dded141SGreg Roach        'BAPTISM'           => 'BAPM',
548dded141SGreg Roach        'BAR_MITZVAH'       => 'BARM',
558dded141SGreg Roach        'BAS_MITZVAH'       => 'BASM',
568dded141SGreg Roach        'BIRTH'             => 'BIRT',
578dded141SGreg Roach        'BLESSING'          => 'BLES',
588dded141SGreg Roach        'BURIAL'            => 'BURI',
598dded141SGreg Roach        'CALL_NUMBER'       => 'CALN',
608dded141SGreg Roach        'CASTE'             => 'CAST',
618dded141SGreg Roach        'CAUSE'             => 'CAUS',
628dded141SGreg Roach        'CENSUS'            => 'CENS',
638dded141SGreg Roach        'CHANGE'            => 'CHAN',
648dded141SGreg Roach        'CHARACTER'         => 'CHAR',
658dded141SGreg Roach        'CHILD'             => 'CHIL',
668dded141SGreg Roach        'CHRISTENING'       => 'CHR',
678dded141SGreg Roach        'ADULT_CHRISTENING' => 'CHRA',
688dded141SGreg Roach        'CITY'              => 'CITY',
698dded141SGreg Roach        'CONCATENATION'     => 'CONC',
708dded141SGreg Roach        'CONFIRMATION'      => 'CONF',
718dded141SGreg Roach        'CONFIRMATION-LDS'  => 'CONL',
728dded141SGreg Roach        'CONTINUED'         => 'CONT',
738dded141SGreg Roach        'COPYRIGHT'         => 'COPY',
748dded141SGreg Roach        'CORPORTATE'        => 'CORP',
758dded141SGreg Roach        'CREMATION'         => 'CREM',
768dded141SGreg Roach        'COUNTRY'           => 'CTRY',
778dded141SGreg Roach        'DATA'              => 'DATA',
788dded141SGreg Roach        'DATE'              => 'DATE',
798dded141SGreg Roach        'DEATH'             => 'DEAT',
808dded141SGreg Roach        'DESCENDANTS'       => 'DESC',
818dded141SGreg Roach        'DESCENDANTS_INT'   => 'DESI',
828dded141SGreg Roach        'DESTINATION'       => 'DEST',
838dded141SGreg Roach        'DIVORCE'           => 'DIV',
848dded141SGreg Roach        'DIVORCE_FILED'     => 'DIVF',
858dded141SGreg Roach        'PHY_DESCRIPTION'   => 'DSCR',
868dded141SGreg Roach        'EDUCATION'         => 'EDUC',
878dded141SGreg Roach        'EMAIL'             => 'EMAI',
888dded141SGreg Roach        'EMIGRATION'        => 'EMIG',
898dded141SGreg Roach        'ENDOWMENT'         => 'ENDL',
908dded141SGreg Roach        'ENGAGEMENT'        => 'ENGA',
918dded141SGreg Roach        'EVENT'             => 'EVEN',
928dded141SGreg Roach        'FACT'              => 'FACT',
938dded141SGreg Roach        'FAMILY'            => 'FAM',
948dded141SGreg Roach        'FAMILY_CHILD'      => 'FAMC',
958dded141SGreg Roach        'FAMILY_FILE'       => 'FAMF',
968dded141SGreg Roach        'FAMILY_SPOUSE'     => 'FAMS',
978dded141SGreg Roach        'FACIMILIE'         => 'FAX',
988dded141SGreg Roach        'FIRST_COMMUNION'   => 'FCOM',
998dded141SGreg Roach        'FILE'              => 'FILE',
1008dded141SGreg Roach        'FORMAT'            => 'FORM',
1018dded141SGreg Roach        'PHONETIC'          => 'FONE',
1028dded141SGreg Roach        'GEDCOM'            => 'GEDC',
1038dded141SGreg Roach        'GIVEN_NAME'        => 'GIVN',
1048dded141SGreg Roach        'GRADUATION'        => 'GRAD',
1058dded141SGreg Roach        'HEADER'            => 'HEAD',
1068dded141SGreg Roach        'HUSBAND'           => 'HUSB',
1078dded141SGreg Roach        'IDENT_NUMBER'      => 'IDNO',
1088dded141SGreg Roach        'IMMIGRATION'       => 'IMMI',
1098dded141SGreg Roach        'INDIVIDUAL'        => 'INDI',
1108dded141SGreg Roach        'LANGUAGE'          => 'LANG',
1118dded141SGreg Roach        'LATITUDE'          => 'LATI',
1128dded141SGreg Roach        'LONGITUDE'         => 'LONG',
1138dded141SGreg Roach        'MAP'               => 'MAP',
1148dded141SGreg Roach        'MARRIAGE_BANN'     => 'MARB',
1158dded141SGreg Roach        'MARR_CONTRACT'     => 'MARC',
1168dded141SGreg Roach        'MARR_LICENSE'      => 'MARL',
1178dded141SGreg Roach        'MARRIAGE'          => 'MARR',
1188dded141SGreg Roach        'MEDIA'             => 'MEDI',
1198dded141SGreg Roach        'NAME'              => 'NAME',
1208dded141SGreg Roach        'NATIONALITY'       => 'NATI',
1218dded141SGreg Roach        'NATURALIZATION'    => 'NATU',
1228dded141SGreg Roach        'CHILDREN_COUNT'    => 'NCHI',
1238dded141SGreg Roach        'NICKNAME'          => 'NICK',
1248dded141SGreg Roach        'MARRIAGE_COUNT'    => 'NMR',
1258dded141SGreg Roach        'NOTE'              => 'NOTE',
1268dded141SGreg Roach        'NAME_PREFIX'       => 'NPFX',
1278dded141SGreg Roach        'NAME_SUFFIX'       => 'NSFX',
1288dded141SGreg Roach        'OBJECT'            => 'OBJE',
1298dded141SGreg Roach        'OCCUPATION'        => 'OCCU',
1308dded141SGreg Roach        'ORDINANCE'         => 'ORDI',
1318dded141SGreg Roach        'ORDINATION'        => 'ORDN',
1328dded141SGreg Roach        'PAGE'              => 'PAGE',
1338dded141SGreg Roach        'PEDIGREE'          => 'PEDI',
1348dded141SGreg Roach        'PHONE'             => 'PHON',
1358dded141SGreg Roach        'PLACE'             => 'PLAC',
1368dded141SGreg Roach        'POSTAL_CODE'       => 'POST',
1378dded141SGreg Roach        'PROBATE'           => 'PROB',
1388dded141SGreg Roach        'PROPERTY'          => 'PROP',
139c1afbf58SGreg Roach        'PUBLICATION'       => 'PUBL',
1408dded141SGreg Roach        'QUALITY_OF_DATA'   => 'QUAY',
1418dded141SGreg Roach        'REFERENCE'         => 'REFN',
1428dded141SGreg Roach        'RELATIONSHIP'      => 'RELA',
1438dded141SGreg Roach        'RELIGION'          => 'RELI',
1448dded141SGreg Roach        'REPOSITORY'        => 'REPO',
1458dded141SGreg Roach        'RESIDENCE'         => 'RESI',
1468dded141SGreg Roach        'RESTRICTION'       => 'RESN',
1478dded141SGreg Roach        'RETIREMENT'        => 'RETI',
1488dded141SGreg Roach        'REC_FILE_NUMBER'   => 'RFN',
1498dded141SGreg Roach        'REC_ID_NUMBER'     => 'RIN',
1508dded141SGreg Roach        'ROLE'              => 'ROLE',
1518dded141SGreg Roach        'ROMANIZED'         => 'ROMN',
1528dded141SGreg Roach        'SEALING_CHILD'     => 'SLGC',
1538dded141SGreg Roach        'SEALING_SPOUSE'    => 'SLGS',
1548dded141SGreg Roach        'SEX'               => 'SEX',
1558dded141SGreg Roach        'SOURCE'            => 'SOUR',
1568dded141SGreg Roach        'SURN_PREFIX'       => 'SPFX',
1578dded141SGreg Roach        'SOC_SEC_NUMBER'    => 'SSN',
1588dded141SGreg Roach        'STATE'             => 'STAE',
1598dded141SGreg Roach        'STATUS'            => 'STAT',
1608dded141SGreg Roach        'SUBMITTER'         => 'SUBM',
1618dded141SGreg Roach        'SUBMISSION'        => 'SUBN',
1628dded141SGreg Roach        'SURNAME'           => 'SURN',
1638dded141SGreg Roach        'TEMPLE'            => 'TEMP',
1648dded141SGreg Roach        'TEXT'              => 'TEXT',
1658dded141SGreg Roach        'TIME'              => 'TIME',
1668dded141SGreg Roach        'TITLE'             => 'TITL',
1678dded141SGreg Roach        'TRAILER'           => 'TRLR',
1688dded141SGreg Roach        'TYPE'              => 'TYPE',
1698dded141SGreg Roach        'VERSION'           => 'VERS',
1708dded141SGreg Roach        'WIFE'              => 'WIFE',
1718dded141SGreg Roach        'WILL'              => 'WILL',
1728dded141SGreg Roach        'WEB'               => 'WWW',
1738dded141SGreg Roach        '_DEATH_OF_SPOUSE'  => 'DETS',
1748dded141SGreg Roach        '_DEGREE'           => '_DEG',
1758dded141SGreg Roach        '_FILE'             => 'FILE',
1768dded141SGreg Roach        '_MEDICAL'          => '_MCL',
1778dded141SGreg Roach        '_MILITARY_SERVICE' => '_MILT',
1788dded141SGreg Roach    ];
1798dded141SGreg Roach
1808dded141SGreg Roach    // Custom tags used by other applications, with direct synonyms
18116d6367aSGreg Roach    private const TAG_SYNONYMS = [
1828dded141SGreg Roach    ];
1838dded141SGreg Roach
1848dded141SGreg Roach    // LATI and LONG tags
18516d6367aSGreg Roach    private const DEGREE_FORMAT  = ' % .5f%s';
18616d6367aSGreg Roach    private const LATITUDE_NORTH = 'N';
18716d6367aSGreg Roach    private const LATITUDE_SOUTH = 'S';
18816d6367aSGreg Roach    private const LONGITUDE_EAST = 'E';
18916d6367aSGreg Roach    private const LONGITUDE_WEST = 'W';
1908dded141SGreg Roach
1918dded141SGreg Roach    // PLAC tags
19216d6367aSGreg Roach    private const PLACE_SEPARATOR       = ', ';
19316d6367aSGreg Roach    private const PLACE_SEPARATOR_REGEX = ' *, *';
1948dded141SGreg Roach
1958dded141SGreg Roach    // SEX tags
19616d6367aSGreg Roach    private const SEX_FEMALE  = 'F';
19716d6367aSGreg Roach    private const SEX_MALE    = 'M';
19816d6367aSGreg Roach    private const SEX_UNKNOWN = 'U';
1998dded141SGreg Roach
2008dded141SGreg Roach    /**
2018dded141SGreg Roach     * Convert a GEDCOM tag to a canonical form.
2028dded141SGreg Roach     *
2038dded141SGreg Roach     * @param string $tag
2048dded141SGreg Roach     *
2058dded141SGreg Roach     * @return string
2068dded141SGreg Roach     */
2078dded141SGreg Roach    public function canonicalTag(string $tag): string
2088dded141SGreg Roach    {
2098dded141SGreg Roach        $tag = strtoupper($tag);
2108dded141SGreg Roach
211c70c3c8cSGreg Roach        $tag = self::TAG_NAMES[$tag] ?? self::TAG_SYNONYMS[$tag] ?? $tag;
2128dded141SGreg Roach
2138dded141SGreg Roach        return $tag;
2148dded141SGreg Roach    }
2158dded141SGreg Roach
2168dded141SGreg Roach    /**
2178dded141SGreg Roach     * @param string $tag
2188dded141SGreg Roach     *
2198dded141SGreg Roach     * @return bool
2208dded141SGreg Roach     */
2218dded141SGreg Roach    public function isUserDefinedTag(string $tag): bool
2228dded141SGreg Roach    {
2234d798ef2SGreg Roach        return substr_compare($tag, self::USER_DEFINED_TAG_PREFIX, 0, 1) === 0;
2248dded141SGreg Roach    }
2258dded141SGreg Roach
2268dded141SGreg Roach    /**
2278dded141SGreg Roach     * @param string $text
2288dded141SGreg Roach     *
2298dded141SGreg Roach     * @return float
2308dded141SGreg Roach     */
2318dded141SGreg Roach    public function readLatitude(string $text): float
2328dded141SGreg Roach    {
2338dded141SGreg Roach        return $this->readDegrees($text, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
2348dded141SGreg Roach    }
2358dded141SGreg Roach
2368dded141SGreg Roach    /**
2378dded141SGreg Roach     * @param string $text
2388dded141SGreg Roach     *
2398dded141SGreg Roach     * @return float
2408dded141SGreg Roach     */
2418dded141SGreg Roach    public function readLongitude(string $text): float
2428dded141SGreg Roach    {
2438dded141SGreg Roach        return $this->readDegrees($text, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
2448dded141SGreg Roach    }
2458dded141SGreg Roach
2468dded141SGreg Roach    /**
2478dded141SGreg Roach     * @param string $text
2488dded141SGreg Roach     * @param string $positive
2498dded141SGreg Roach     * @param string $negative
2508dded141SGreg Roach     *
2518dded141SGreg Roach     * @return float
2528dded141SGreg Roach     */
2538dded141SGreg Roach    private function readDegrees(string $text, string $positive, string $negative): float
2548dded141SGreg Roach    {
2558dded141SGreg Roach        $text       = trim($text);
2568dded141SGreg Roach        $hemisphere = substr($text, 0, 1);
2578dded141SGreg Roach        $degrees    = substr($text, 1);
2588dded141SGreg Roach
2598dded141SGreg Roach        // Match a valid GEDCOM format
2608dded141SGreg Roach        if (is_numeric($degrees)) {
2618dded141SGreg Roach            $hemisphere = strtoupper($hemisphere);
2628dded141SGreg Roach            $degrees    = (float) $degrees;
2638dded141SGreg Roach
2648dded141SGreg Roach            if ($hemisphere === $positive) {
2658dded141SGreg Roach                return $degrees;
2668dded141SGreg Roach            }
2678dded141SGreg Roach
2688dded141SGreg Roach            if ($hemisphere === $negative) {
2698dded141SGreg Roach                return -$degrees;
2708dded141SGreg Roach            }
2718dded141SGreg Roach        }
2728dded141SGreg Roach
2738dded141SGreg Roach        // Just a number?
2748dded141SGreg Roach        if (is_numeric($text)) {
2758dded141SGreg Roach            return (float) $text;
2768dded141SGreg Roach        }
2778dded141SGreg Roach
2788dded141SGreg Roach        // Can't match anything.
2798dded141SGreg Roach        return 0.0;
2808dded141SGreg Roach    }
2818dded141SGreg Roach
2828dded141SGreg Roach    /**
2838dded141SGreg Roach     * @param float $latitude
2848dded141SGreg Roach     *
2858dded141SGreg Roach     * @return string
2868dded141SGreg Roach     */
2878dded141SGreg Roach    public function writeLatitude(float $latitude): string
2888dded141SGreg Roach    {
2898dded141SGreg Roach        return $this->writeDegrees($latitude, self::LATITUDE_NORTH, self::LATITUDE_SOUTH);
2908dded141SGreg Roach    }
2918dded141SGreg Roach
2928dded141SGreg Roach    /**
2938dded141SGreg Roach     * @param float $longitude
2948dded141SGreg Roach     *
2958dded141SGreg Roach     * @return string
2968dded141SGreg Roach     */
2978dded141SGreg Roach    public function writeLongitude(float $longitude): string
2988dded141SGreg Roach    {
2998dded141SGreg Roach        return $this->writeDegrees($longitude, self::LONGITUDE_EAST, self::LONGITUDE_WEST);
3008dded141SGreg Roach    }
3018dded141SGreg Roach
3028dded141SGreg Roach    /**
3038dded141SGreg Roach     * @param float  $degrees
3048dded141SGreg Roach     * @param string $positive
3058dded141SGreg Roach     * @param string $negative
3068dded141SGreg Roach     *
3078dded141SGreg Roach     * @return string
3088dded141SGreg Roach     */
3098dded141SGreg Roach    private function writeDegrees(float $degrees, string $positive, string $negative): string
3108dded141SGreg Roach    {
3118dded141SGreg Roach        if ($degrees < 0.0) {
3128dded141SGreg Roach            return sprintf(self::DEGREE_FORMAT, $degrees, $negative);
3138dded141SGreg Roach        }
3148dded141SGreg Roach
3158dded141SGreg Roach        return sprintf(self::DEGREE_FORMAT, $degrees, $positive);
3168dded141SGreg Roach    }
3178dded141SGreg Roach
3188dded141SGreg Roach    /**
3198dded141SGreg Roach     * Although empty placenames are valid "Town, , Country", it is only meaningful
3208dded141SGreg Roach     * when structured places are used (PLAC:FORM town, county, country), and
3218dded141SGreg Roach     * structured places are discouraged.
3228dded141SGreg Roach     *
3238dded141SGreg Roach     * @param string $text
3248dded141SGreg Roach     *
3258dded141SGreg Roach     * @return string[]
3268dded141SGreg Roach     */
3278dded141SGreg Roach    public function readPlace(string $text): array
3288dded141SGreg Roach    {
3298dded141SGreg Roach        $text = trim($text);
3308dded141SGreg Roach
3318dded141SGreg Roach        return preg_split(self::PLACE_SEPARATOR_REGEX, $text, PREG_SPLIT_NO_EMPTY);
3328dded141SGreg Roach    }
3338dded141SGreg Roach
3348dded141SGreg Roach    /**
3358dded141SGreg Roach     * @param string[] $place
3368dded141SGreg Roach     *
3378dded141SGreg Roach     * @return string
3388dded141SGreg Roach     */
3398dded141SGreg Roach    public function writePlace(array $place): string
3408dded141SGreg Roach    {
3418dded141SGreg Roach        return implode(self::PLACE_SEPARATOR, $place);
3428dded141SGreg Roach    }
3438dded141SGreg Roach
3448dded141SGreg Roach    /**
3458dded141SGreg Roach     * Some applications use non-standard values for unknown.
3468dded141SGreg Roach     *
3478dded141SGreg Roach     * @param string $text
3488dded141SGreg Roach     *
3498dded141SGreg Roach     * @return string
3508dded141SGreg Roach     */
3518dded141SGreg Roach    public function readSex(string $text): string
3528dded141SGreg Roach    {
3538dded141SGreg Roach        $text = strtoupper($text);
3548dded141SGreg Roach
3558dded141SGreg Roach        if ($text !== self::SEX_MALE && $text !== self::SEX_FEMALE) {
3568dded141SGreg Roach            $text = self::SEX_UNKNOWN;
3578dded141SGreg Roach        }
3588dded141SGreg Roach
3598dded141SGreg Roach        return $text;
3608dded141SGreg Roach    }
3618dded141SGreg Roach}
362