xref: /webtrees/app/Module/FixDuplicateLinks.php (revision c72b7fa45c395eaae8c6500e313146f9f94a1211)
1ce42304aSGreg Roach<?php
2ce42304aSGreg Roach
3ce42304aSGreg Roach/**
4ce42304aSGreg Roach * webtrees: online genealogy
589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team
6ce42304aSGreg Roach * This program is free software: you can redistribute it and/or modify
7ce42304aSGreg Roach * it under the terms of the GNU General Public License as published by
8ce42304aSGreg Roach * the Free Software Foundation, either version 3 of the License, or
9ce42304aSGreg Roach * (at your option) any later version.
10ce42304aSGreg Roach * This program is distributed in the hope that it will be useful,
11ce42304aSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
12ce42304aSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13ce42304aSGreg Roach * GNU General Public License for more details.
14ce42304aSGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
16ce42304aSGreg Roach */
17ce42304aSGreg Roach
18ce42304aSGreg Roachdeclare(strict_types=1);
19ce42304aSGreg Roach
20ce42304aSGreg Roachnamespace Fisharebest\Webtrees\Module;
21ce42304aSGreg Roach
22ce42304aSGreg Roachuse Fisharebest\Webtrees\GedcomRecord;
23ce42304aSGreg Roachuse Fisharebest\Webtrees\I18N;
24ce42304aSGreg Roachuse Fisharebest\Webtrees\Services\DataFixService;
25ce42304aSGreg Roachuse Fisharebest\Webtrees\Tree;
26ce42304aSGreg Roachuse Illuminate\Support\Collection;
27ce42304aSGreg Roach
28ce42304aSGreg Roachuse function preg_match;
29ce42304aSGreg Roachuse function preg_replace;
30ce42304aSGreg Roach
31ce42304aSGreg Roach/**
32ce42304aSGreg Roach * Class FixDuplicateLinks
33ce42304aSGreg Roach */
34ce42304aSGreg Roachclass FixDuplicateLinks extends AbstractModule implements ModuleDataFixInterface
35ce42304aSGreg Roach{
36ce42304aSGreg Roach    use ModuleDataFixTrait;
37ce42304aSGreg Roach
38c4943cffSGreg Roach    private DataFixService $data_fix_service;
39ce42304aSGreg Roach
40ce42304aSGreg Roach    /**
41ce42304aSGreg Roach     * FixMissingDeaths constructor.
42ce42304aSGreg Roach     *
43ce42304aSGreg Roach     * @param DataFixService $data_fix_service
44ce42304aSGreg Roach     */
45ce42304aSGreg Roach    public function __construct(DataFixService $data_fix_service)
46ce42304aSGreg Roach    {
47ce42304aSGreg Roach        $this->data_fix_service = $data_fix_service;
48ce42304aSGreg Roach    }
49ce42304aSGreg Roach
50ce42304aSGreg Roach    /**
51ce42304aSGreg Roach     * How should this module be identified in the control panel, etc.?
52ce42304aSGreg Roach     *
53ce42304aSGreg Roach     * @return string
54ce42304aSGreg Roach     */
55ce42304aSGreg Roach    public function title(): string
56ce42304aSGreg Roach    {
57ce42304aSGreg Roach        /* I18N: Name of a module */
58ce42304aSGreg Roach        return I18N::translate('Remove duplicate links');
59ce42304aSGreg Roach    }
60ce42304aSGreg Roach
61ce42304aSGreg Roach    /**
62ce42304aSGreg Roach     * A sentence describing what this module does.
63ce42304aSGreg Roach     *
64ce42304aSGreg Roach     * @return string
65ce42304aSGreg Roach     */
66ce42304aSGreg Roach    public function description(): string
67ce42304aSGreg Roach    {
68ce42304aSGreg Roach        /* I18N: Description of a “Data fix” module */
69ce42304aSGreg Roach        return I18N::translate('A common error is to have multiple links to the same record, for example listing the same child more than once in a family record.');
70ce42304aSGreg Roach    }
71ce42304aSGreg Roach
72ce42304aSGreg Roach    /**
73ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
74ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
75ce42304aSGreg Roach     *
76ce42304aSGreg Roach     * @param Tree          $tree
77ce42304aSGreg Roach     * @param array<string> $params
78ce42304aSGreg Roach     *
79ce42304aSGreg Roach     * @return Collection<string>
80ce42304aSGreg Roach     */
81ce42304aSGreg Roach    protected function familiesToFix(Tree $tree, array $params): Collection
82ce42304aSGreg Roach    {
83ce42304aSGreg Roach        // No DB querying possible?  Select all.
847684867eSGreg Roach        return $this->familiesToFixQuery($tree, $params)
85ce42304aSGreg Roach            ->pluck('f_id');
86ce42304aSGreg Roach    }
87ce42304aSGreg Roach
88ce42304aSGreg Roach    /**
89ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
90ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
91ce42304aSGreg Roach     *
92ce42304aSGreg Roach     * @param Tree                 $tree
93ce42304aSGreg Roach     * @param array<string,string> $params
94ce42304aSGreg Roach     *
95ce42304aSGreg Roach     * @return Collection<string>|null
96ce42304aSGreg Roach     */
97ce42304aSGreg Roach    protected function individualsToFix(Tree $tree, array $params): ?Collection
98ce42304aSGreg Roach    {
99ce42304aSGreg Roach        // No DB querying possible?  Select all.
1007684867eSGreg Roach        return $this->individualsToFixQuery($tree, $params)
101ce42304aSGreg Roach            ->pluck('i_id');
102ce42304aSGreg Roach    }
103ce42304aSGreg Roach
104ce42304aSGreg Roach    /**
105ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
106ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
107ce42304aSGreg Roach     *
108ce42304aSGreg Roach     * @param Tree                 $tree
109ce42304aSGreg Roach     * @param array<string,string> $params
110ce42304aSGreg Roach     *
111ce42304aSGreg Roach     * @return Collection<string>
112ce42304aSGreg Roach     */
113ce42304aSGreg Roach    protected function mediaToFix(Tree $tree, array $params): Collection
114ce42304aSGreg Roach    {
115ce42304aSGreg Roach        // No DB querying possible?  Select all.
1167684867eSGreg Roach        return $this->mediaToFixQuery($tree, $params)
117ce42304aSGreg Roach            ->pluck('m_id');
118ce42304aSGreg Roach    }
119ce42304aSGreg Roach
120ce42304aSGreg Roach    /**
121ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
122ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
123ce42304aSGreg Roach     *
124ce42304aSGreg Roach     * @param Tree                 $tree
125ce42304aSGreg Roach     * @param array<string,string> $params
126ce42304aSGreg Roach     *
127ce42304aSGreg Roach     * @return Collection<string>
128ce42304aSGreg Roach     */
129ce42304aSGreg Roach    protected function notesToFix(Tree $tree, array $params): Collection
130ce42304aSGreg Roach    {
131ce42304aSGreg Roach        // No DB querying possible?  Select all.
1327684867eSGreg Roach        return $this->notesToFixQuery($tree, $params)
133ce42304aSGreg Roach            ->pluck('o_id');
134ce42304aSGreg Roach    }
135ce42304aSGreg Roach
136ce42304aSGreg Roach    /**
137ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
138ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
139ce42304aSGreg Roach     *
140ce42304aSGreg Roach     * @param Tree                 $tree
141ce42304aSGreg Roach     * @param array<string,string> $params
142ce42304aSGreg Roach     *
143ce42304aSGreg Roach     * @return Collection<string>
144ce42304aSGreg Roach     */
145ce42304aSGreg Roach    protected function repositoriesToFix(Tree $tree, array $params): Collection
146ce42304aSGreg Roach    {
147ce42304aSGreg Roach        // No DB querying possible?  Select all.
1487684867eSGreg Roach        return $this->repositoriesToFixQuery($tree, $params)
149ce42304aSGreg Roach            ->pluck('o_id');
150ce42304aSGreg Roach    }
151ce42304aSGreg Roach
152ce42304aSGreg Roach    /**
153ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
154ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
155ce42304aSGreg Roach     *
156ce42304aSGreg Roach     * @param Tree                 $tree
157ce42304aSGreg Roach     * @param array<string,string> $params
158ce42304aSGreg Roach     *
159ce42304aSGreg Roach     * @return Collection<string>
160ce42304aSGreg Roach     */
161ce42304aSGreg Roach    protected function sourcesToFix(Tree $tree, array $params): Collection
162ce42304aSGreg Roach    {
163ce42304aSGreg Roach        // No DB querying possible?  Select all.
1647684867eSGreg Roach        return $this->sourcesToFixQuery($tree, $params)
165ce42304aSGreg Roach            ->pluck('s_id');
166ce42304aSGreg Roach    }
167ce42304aSGreg Roach
168ce42304aSGreg Roach    /**
169ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
170ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
171ce42304aSGreg Roach     *
172ce42304aSGreg Roach     * @param Tree                 $tree
173ce42304aSGreg Roach     * @param array<string,string> $params
174ce42304aSGreg Roach     *
175ce42304aSGreg Roach     * @return Collection<string>
176ce42304aSGreg Roach     */
177ce42304aSGreg Roach    protected function submittersToFix(Tree $tree, array $params): Collection
178ce42304aSGreg Roach    {
179ce42304aSGreg Roach        // No DB querying possible?  Select all.
1807684867eSGreg Roach        return $this->submittersToFixQuery($tree, $params)
181ce42304aSGreg Roach            ->pluck('o_id');
182ce42304aSGreg Roach    }
183ce42304aSGreg Roach
184ce42304aSGreg Roach    /**
185ce42304aSGreg Roach     * Does a record need updating?
186ce42304aSGreg Roach     *
187ce42304aSGreg Roach     * @param GedcomRecord         $record
188ce42304aSGreg Roach     * @param array<string,string> $params
189ce42304aSGreg Roach     *
190ce42304aSGreg Roach     * @return bool
191ce42304aSGreg Roach     */
192ce42304aSGreg Roach    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
193ce42304aSGreg Roach    {
194ce42304aSGreg Roach        $gedcom = $record->gedcom();
195ce42304aSGreg Roach
196ce42304aSGreg Roach        return
197ce42304aSGreg Roach            preg_match('/(\n1.*@.+@.*(?:(?:\n[2-9].*)*))(?:\n1.*(?:\n[2-9].*)*)*\1/', $gedcom) ||
198ce42304aSGreg Roach            preg_match('/(\n2.*@.+@.*(?:(?:\n[3-9].*)*))(?:\n2.*(?:\n[3-9].*)*)*\1/', $gedcom) ||
199ce42304aSGreg Roach            preg_match('/(\n3.*@.+@.*(?:(?:\n[4-9].*)*))(?:\n3.*(?:\n[4-9].*)*)*\1/', $gedcom);
200ce42304aSGreg Roach    }
201ce42304aSGreg Roach
202ce42304aSGreg Roach    /**
203ce42304aSGreg Roach     * Show the changes we would make
204ce42304aSGreg Roach     *
205ce42304aSGreg Roach     * @param GedcomRecord         $record
206ce42304aSGreg Roach     * @param array<string,string> $params
207ce42304aSGreg Roach     *
208ce42304aSGreg Roach     * @return string
209ce42304aSGreg Roach     */
210ce42304aSGreg Roach    public function previewUpdate(GedcomRecord $record, array $params): string
211ce42304aSGreg Roach    {
212ce42304aSGreg Roach        $old = $record->gedcom();
213ce42304aSGreg Roach        $new = $this->updateGedcom($record);
214ce42304aSGreg Roach
215ce42304aSGreg Roach        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
216ce42304aSGreg Roach    }
217ce42304aSGreg Roach
218ce42304aSGreg Roach    /**
219ce42304aSGreg Roach     * Fix a record
220ce42304aSGreg Roach     *
221ce42304aSGreg Roach     * @param GedcomRecord         $record
222ce42304aSGreg Roach     * @param array<string,string> $params
223ce42304aSGreg Roach     *
224ce42304aSGreg Roach     * @return void
225ce42304aSGreg Roach     */
226ce42304aSGreg Roach    public function updateRecord(GedcomRecord $record, array $params): void
227ce42304aSGreg Roach    {
228ce42304aSGreg Roach        $record->updateRecord($this->updateGedcom($record), false);
229ce42304aSGreg Roach    }
230ce42304aSGreg Roach
231ce42304aSGreg Roach    /**
232ce42304aSGreg Roach     * @param GedcomRecord $record
233ce42304aSGreg Roach     *
234ce42304aSGreg Roach     * @return string
235ce42304aSGreg Roach     */
236ce42304aSGreg Roach    private function updateGedcom(GedcomRecord $record): string
237ce42304aSGreg Roach    {
238*c72b7fa4SGreg Roach        $gedcom = $record->gedcom();
239*c72b7fa4SGreg Roach        $gedcom = preg_replace('/(\n1.*@.+@.*(?:\n[2-9].*)*)((?:\n1.*(?:\n[2-9].*)*)*\1)/', '$2', $gedcom);
240*c72b7fa4SGreg Roach        $gedcom = preg_replace('/(\n2.*@.+@.*(?:\n[3-9].*)*)((?:\n2.*(?:\n[3-9].*)*)*\1)/', '$2', $gedcom);
241*c72b7fa4SGreg Roach
242*c72b7fa4SGreg Roach        return preg_replace('/(\n3.*@.+@.*(?:\n[4-9].*)*)((?:\n3.*(?:\n[4-9].*)*)*\1)/', '$2', $gedcom);
243ce42304aSGreg Roach    }
244ce42304aSGreg Roach}
245