xref: /webtrees/app/Module/FixDuplicateLinks.php (revision 7684867e23f29dec4c58f964ab2af8a5a6e49f03)
1ce42304aSGreg Roach<?php
2ce42304aSGreg Roach
3ce42304aSGreg Roach/**
4ce42304aSGreg Roach * webtrees: online genealogy
5*7684867eSGreg Roach * Copyright (C) 2020 webtrees development team
6ce42304aSGreg Roach * This program is free software: you can redistribute it and/or modify
7ce42304aSGreg Roach * it under the terms of the GNU General Public License as published by
8ce42304aSGreg Roach * the Free Software Foundation, either version 3 of the License, or
9ce42304aSGreg Roach * (at your option) any later version.
10ce42304aSGreg Roach * This program is distributed in the hope that it will be useful,
11ce42304aSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
12ce42304aSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13ce42304aSGreg Roach * GNU General Public License for more details.
14ce42304aSGreg Roach * You should have received a copy of the GNU General Public License
15ce42304aSGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
16ce42304aSGreg Roach */
17ce42304aSGreg Roach
18ce42304aSGreg Roachdeclare(strict_types=1);
19ce42304aSGreg Roach
20ce42304aSGreg Roachnamespace Fisharebest\Webtrees\Module;
21ce42304aSGreg Roach
22ce42304aSGreg Roachuse Fisharebest\Webtrees\GedcomRecord;
23ce42304aSGreg Roachuse Fisharebest\Webtrees\I18N;
24ce42304aSGreg Roachuse Fisharebest\Webtrees\Services\DataFixService;
25ce42304aSGreg Roachuse Fisharebest\Webtrees\Tree;
26ce42304aSGreg Roachuse Illuminate\Support\Collection;
27ce42304aSGreg Roach
28ce42304aSGreg Roachuse function preg_match;
29ce42304aSGreg Roachuse function preg_replace;
30ce42304aSGreg Roach
31ce42304aSGreg Roach/**
32ce42304aSGreg Roach * Class FixDuplicateLinks
33ce42304aSGreg Roach */
34ce42304aSGreg Roachclass FixDuplicateLinks extends AbstractModule implements ModuleDataFixInterface
35ce42304aSGreg Roach{
36ce42304aSGreg Roach    use ModuleDataFixTrait;
37ce42304aSGreg Roach
38ce42304aSGreg Roach    /** @var DataFixService */
39ce42304aSGreg Roach    private $data_fix_service;
40ce42304aSGreg Roach
41ce42304aSGreg Roach    /**
42ce42304aSGreg Roach     * FixMissingDeaths constructor.
43ce42304aSGreg Roach     *
44ce42304aSGreg Roach     * @param DataFixService $data_fix_service
45ce42304aSGreg Roach     */
46ce42304aSGreg Roach    public function __construct(DataFixService $data_fix_service)
47ce42304aSGreg Roach    {
48ce42304aSGreg Roach        $this->data_fix_service = $data_fix_service;
49ce42304aSGreg Roach    }
50ce42304aSGreg Roach
51ce42304aSGreg Roach    /**
52ce42304aSGreg Roach     * How should this module be identified in the control panel, etc.?
53ce42304aSGreg Roach     *
54ce42304aSGreg Roach     * @return string
55ce42304aSGreg Roach     */
56ce42304aSGreg Roach    public function title(): string
57ce42304aSGreg Roach    {
58ce42304aSGreg Roach        /* I18N: Name of a module */
59ce42304aSGreg Roach        return I18N::translate('Remove duplicate links');
60ce42304aSGreg Roach    }
61ce42304aSGreg Roach
62ce42304aSGreg Roach    /**
63ce42304aSGreg Roach     * A sentence describing what this module does.
64ce42304aSGreg Roach     *
65ce42304aSGreg Roach     * @return string
66ce42304aSGreg Roach     */
67ce42304aSGreg Roach    public function description(): string
68ce42304aSGreg Roach    {
69ce42304aSGreg Roach        /* I18N: Description of a “Data fix” module */
70ce42304aSGreg Roach        return I18N::translate('A common error is to have multiple links to the same record, for example listing the same child more than once in a family record.');
71ce42304aSGreg Roach    }
72ce42304aSGreg Roach
73ce42304aSGreg Roach    /**
74ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
75ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
76ce42304aSGreg Roach     *
77ce42304aSGreg Roach     * @param Tree          $tree
78ce42304aSGreg Roach     * @param array<string> $params
79ce42304aSGreg Roach     *
80ce42304aSGreg Roach     * @return Collection<string>
81ce42304aSGreg Roach     */
82ce42304aSGreg Roach    protected function familiesToFix(Tree $tree, array $params): Collection
83ce42304aSGreg Roach    {
84ce42304aSGreg Roach        // No DB querying possible?  Select all.
85*7684867eSGreg Roach        return $this->familiesToFixQuery($tree, $params)
86ce42304aSGreg Roach            ->pluck('f_id');
87ce42304aSGreg Roach    }
88ce42304aSGreg Roach
89ce42304aSGreg Roach    /**
90ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
91ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
92ce42304aSGreg Roach     *
93ce42304aSGreg Roach     * @param Tree                 $tree
94ce42304aSGreg Roach     * @param array<string,string> $params
95ce42304aSGreg Roach     *
96ce42304aSGreg Roach     * @return Collection<string>|null
97ce42304aSGreg Roach     */
98ce42304aSGreg Roach    protected function individualsToFix(Tree $tree, array $params): ?Collection
99ce42304aSGreg Roach    {
100ce42304aSGreg Roach        // No DB querying possible?  Select all.
101*7684867eSGreg Roach        return $this->individualsToFixQuery($tree, $params)
102ce42304aSGreg Roach            ->pluck('i_id');
103ce42304aSGreg Roach    }
104ce42304aSGreg Roach
105ce42304aSGreg Roach    /**
106ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
107ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
108ce42304aSGreg Roach     *
109ce42304aSGreg Roach     * @param Tree                 $tree
110ce42304aSGreg Roach     * @param array<string,string> $params
111ce42304aSGreg Roach     *
112ce42304aSGreg Roach     * @return Collection<string>
113ce42304aSGreg Roach     */
114ce42304aSGreg Roach    protected function mediaToFix(Tree $tree, array $params): Collection
115ce42304aSGreg Roach    {
116ce42304aSGreg Roach        // No DB querying possible?  Select all.
117*7684867eSGreg Roach        return $this->mediaToFixQuery($tree, $params)
118ce42304aSGreg Roach            ->pluck('m_id');
119ce42304aSGreg Roach    }
120ce42304aSGreg Roach
121ce42304aSGreg Roach    /**
122ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
123ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
124ce42304aSGreg Roach     *
125ce42304aSGreg Roach     * @param Tree                 $tree
126ce42304aSGreg Roach     * @param array<string,string> $params
127ce42304aSGreg Roach     *
128ce42304aSGreg Roach     * @return Collection<string>
129ce42304aSGreg Roach     */
130ce42304aSGreg Roach    protected function notesToFix(Tree $tree, array $params): Collection
131ce42304aSGreg Roach    {
132ce42304aSGreg Roach        // No DB querying possible?  Select all.
133*7684867eSGreg Roach        return $this->notesToFixQuery($tree, $params)
134ce42304aSGreg Roach            ->pluck('o_id');
135ce42304aSGreg Roach    }
136ce42304aSGreg Roach
137ce42304aSGreg Roach    /**
138ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
139ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
140ce42304aSGreg Roach     *
141ce42304aSGreg Roach     * @param Tree                 $tree
142ce42304aSGreg Roach     * @param array<string,string> $params
143ce42304aSGreg Roach     *
144ce42304aSGreg Roach     * @return Collection<string>
145ce42304aSGreg Roach     */
146ce42304aSGreg Roach    protected function repositoriesToFix(Tree $tree, array $params): Collection
147ce42304aSGreg Roach    {
148ce42304aSGreg Roach        // No DB querying possible?  Select all.
149*7684867eSGreg Roach        return $this->repositoriesToFixQuery($tree, $params)
150ce42304aSGreg Roach            ->pluck('o_id');
151ce42304aSGreg Roach    }
152ce42304aSGreg Roach
153ce42304aSGreg Roach    /**
154ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
155ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
156ce42304aSGreg Roach     *
157ce42304aSGreg Roach     * @param Tree                 $tree
158ce42304aSGreg Roach     * @param array<string,string> $params
159ce42304aSGreg Roach     *
160ce42304aSGreg Roach     * @return Collection<string>
161ce42304aSGreg Roach     */
162ce42304aSGreg Roach    protected function sourcesToFix(Tree $tree, array $params): Collection
163ce42304aSGreg Roach    {
164ce42304aSGreg Roach        // No DB querying possible?  Select all.
165*7684867eSGreg Roach        return $this->sourcesToFixQuery($tree, $params)
166ce42304aSGreg Roach            ->pluck('s_id');
167ce42304aSGreg Roach    }
168ce42304aSGreg Roach
169ce42304aSGreg Roach    /**
170ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
171ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
172ce42304aSGreg Roach     *
173ce42304aSGreg Roach     * @param Tree                 $tree
174ce42304aSGreg Roach     * @param array<string,string> $params
175ce42304aSGreg Roach     *
176ce42304aSGreg Roach     * @return Collection<string>
177ce42304aSGreg Roach     */
178ce42304aSGreg Roach    protected function submittersToFix(Tree $tree, array $params): Collection
179ce42304aSGreg Roach    {
180ce42304aSGreg Roach        // No DB querying possible?  Select all.
181*7684867eSGreg Roach        return $this->submittersToFixQuery($tree, $params)
182ce42304aSGreg Roach            ->pluck('o_id');
183ce42304aSGreg Roach    }
184ce42304aSGreg Roach
185ce42304aSGreg Roach    /**
186ce42304aSGreg Roach     * Does a record need updating?
187ce42304aSGreg Roach     *
188ce42304aSGreg Roach     * @param GedcomRecord         $record
189ce42304aSGreg Roach     * @param array<string,string> $params
190ce42304aSGreg Roach     *
191ce42304aSGreg Roach     * @return bool
192ce42304aSGreg Roach     */
193ce42304aSGreg Roach    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
194ce42304aSGreg Roach    {
195ce42304aSGreg Roach        $gedcom = $record->gedcom();
196ce42304aSGreg Roach
197ce42304aSGreg Roach        return
198ce42304aSGreg Roach            preg_match('/(\n1.*@.+@.*(?:(?:\n[2-9].*)*))(?:\n1.*(?:\n[2-9].*)*)*\1/', $gedcom) ||
199ce42304aSGreg Roach            preg_match('/(\n2.*@.+@.*(?:(?:\n[3-9].*)*))(?:\n2.*(?:\n[3-9].*)*)*\1/', $gedcom) ||
200ce42304aSGreg Roach            preg_match('/(\n3.*@.+@.*(?:(?:\n[4-9].*)*))(?:\n3.*(?:\n[4-9].*)*)*\1/', $gedcom);
201ce42304aSGreg Roach    }
202ce42304aSGreg Roach
203ce42304aSGreg Roach    /**
204ce42304aSGreg Roach     * Show the changes we would make
205ce42304aSGreg Roach     *
206ce42304aSGreg Roach     * @param GedcomRecord         $record
207ce42304aSGreg Roach     * @param array<string,string> $params
208ce42304aSGreg Roach     *
209ce42304aSGreg Roach     * @return string
210ce42304aSGreg Roach     */
211ce42304aSGreg Roach    public function previewUpdate(GedcomRecord $record, array $params): string
212ce42304aSGreg Roach    {
213ce42304aSGreg Roach        $old = $record->gedcom();
214ce42304aSGreg Roach        $new = $this->updateGedcom($record);
215ce42304aSGreg Roach
216ce42304aSGreg Roach        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
217ce42304aSGreg Roach    }
218ce42304aSGreg Roach
219ce42304aSGreg Roach    /**
220ce42304aSGreg Roach     * Fix a record
221ce42304aSGreg Roach     *
222ce42304aSGreg Roach     * @param GedcomRecord         $record
223ce42304aSGreg Roach     * @param array<string,string> $params
224ce42304aSGreg Roach     *
225ce42304aSGreg Roach     * @return void
226ce42304aSGreg Roach     */
227ce42304aSGreg Roach    public function updateRecord(GedcomRecord $record, array $params): void
228ce42304aSGreg Roach    {
229ce42304aSGreg Roach        $record->updateRecord($this->updateGedcom($record), false);
230ce42304aSGreg Roach    }
231ce42304aSGreg Roach
232ce42304aSGreg Roach    /**
233ce42304aSGreg Roach     * @param GedcomRecord $record
234ce42304aSGreg Roach     *
235ce42304aSGreg Roach     * @return string
236ce42304aSGreg Roach     */
237ce42304aSGreg Roach    private function updateGedcom(GedcomRecord $record): string
238ce42304aSGreg Roach    {
239ce42304aSGreg Roach        return preg_replace([
240ce42304aSGreg Roach            '/(\n1.*@.+@.*(?:(?:\n[2-9].*)*))((?:\n1.*(?:\n[2-9].*)*)*\1)/',
241ce42304aSGreg Roach            '/(\n2.*@.+@.*(?:(?:\n[3-9].*)*))((?:\n2.*(?:\n[3-9].*)*)*\1)/',
242ce42304aSGreg Roach            '/(\n3.*@.+@.*(?:(?:\n[4-9].*)*))((?:\n3.*(?:\n[4-9].*)*)*\1)/',
243ce42304aSGreg Roach        ], '$2', $record->gedcom());
244ce42304aSGreg Roach    }
245ce42304aSGreg Roach}
246