xref: /webtrees/app/Module/FixDuplicateLinks.php (revision 7413816e6dd2d50e569034fb804f3dce7471bb94)
1ce42304aSGreg Roach<?php
2ce42304aSGreg Roach
3ce42304aSGreg Roach/**
4ce42304aSGreg Roach * webtrees: online genealogy
5d11be702SGreg Roach * Copyright (C) 2023 webtrees development team
6ce42304aSGreg Roach * This program is free software: you can redistribute it and/or modify
7ce42304aSGreg Roach * it under the terms of the GNU General Public License as published by
8ce42304aSGreg Roach * the Free Software Foundation, either version 3 of the License, or
9ce42304aSGreg Roach * (at your option) any later version.
10ce42304aSGreg Roach * This program is distributed in the hope that it will be useful,
11ce42304aSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
12ce42304aSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13ce42304aSGreg Roach * GNU General Public License for more details.
14ce42304aSGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
16ce42304aSGreg Roach */
17ce42304aSGreg Roach
18ce42304aSGreg Roachdeclare(strict_types=1);
19ce42304aSGreg Roach
20ce42304aSGreg Roachnamespace Fisharebest\Webtrees\Module;
21ce42304aSGreg Roach
22ce42304aSGreg Roachuse Fisharebest\Webtrees\GedcomRecord;
23ce42304aSGreg Roachuse Fisharebest\Webtrees\I18N;
24ce42304aSGreg Roachuse Fisharebest\Webtrees\Services\DataFixService;
25ce42304aSGreg Roachuse Fisharebest\Webtrees\Tree;
26ce42304aSGreg Roachuse Illuminate\Support\Collection;
27ce42304aSGreg Roach
28ce42304aSGreg Roachuse function preg_match;
29ce42304aSGreg Roachuse function preg_replace;
30ce42304aSGreg Roach
31ce42304aSGreg Roach/**
32ce42304aSGreg Roach * Class FixDuplicateLinks
33ce42304aSGreg Roach */
34ce42304aSGreg Roachclass FixDuplicateLinks extends AbstractModule implements ModuleDataFixInterface
35ce42304aSGreg Roach{
36ce42304aSGreg Roach    use ModuleDataFixTrait;
37ce42304aSGreg Roach
38c4943cffSGreg Roach    private DataFixService $data_fix_service;
39ce42304aSGreg Roach
40ce42304aSGreg Roach    /**
41ce42304aSGreg Roach     * @param DataFixService $data_fix_service
42ce42304aSGreg Roach     */
43ce42304aSGreg Roach    public function __construct(DataFixService $data_fix_service)
44ce42304aSGreg Roach    {
45ce42304aSGreg Roach        $this->data_fix_service = $data_fix_service;
46ce42304aSGreg Roach    }
47ce42304aSGreg Roach
48ce42304aSGreg Roach    /**
49ce42304aSGreg Roach     * How should this module be identified in the control panel, etc.?
50ce42304aSGreg Roach     *
51ce42304aSGreg Roach     * @return string
52ce42304aSGreg Roach     */
53ce42304aSGreg Roach    public function title(): string
54ce42304aSGreg Roach    {
55ce42304aSGreg Roach        /* I18N: Name of a module */
56ce42304aSGreg Roach        return I18N::translate('Remove duplicate links');
57ce42304aSGreg Roach    }
58ce42304aSGreg Roach
59ce42304aSGreg Roach    public function description(): string
60ce42304aSGreg Roach    {
61ce42304aSGreg Roach        /* I18N: Description of a “Data fix” module */
62ce42304aSGreg Roach        return I18N::translate('A common error is to have multiple links to the same record, for example listing the same child more than once in a family record.');
63ce42304aSGreg Roach    }
64ce42304aSGreg Roach
65ce42304aSGreg Roach    /**
66ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
67ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
68ce42304aSGreg Roach     *
69ce42304aSGreg Roach     * @param Tree          $tree
70ce42304aSGreg Roach     * @param array<string> $params
71ce42304aSGreg Roach     *
7236779af1SGreg Roach     * @return Collection<int,string>
73ce42304aSGreg Roach     */
74ce42304aSGreg Roach    protected function familiesToFix(Tree $tree, array $params): Collection
75ce42304aSGreg Roach    {
76ce42304aSGreg Roach        // No DB querying possible?  Select all.
777684867eSGreg Roach        return $this->familiesToFixQuery($tree, $params)
78ce42304aSGreg Roach            ->pluck('f_id');
79ce42304aSGreg Roach    }
80ce42304aSGreg Roach
81ce42304aSGreg Roach    /**
82ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
83ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
84ce42304aSGreg Roach     *
85ce42304aSGreg Roach     * @param Tree                 $tree
86ce42304aSGreg Roach     * @param array<string,string> $params
87ce42304aSGreg Roach     *
8836779af1SGreg Roach     * @return Collection<int,string>|null
89ce42304aSGreg Roach     */
90*1ff45046SGreg Roach    protected function individualsToFix(Tree $tree, array $params): Collection|null
91ce42304aSGreg Roach    {
92ce42304aSGreg Roach        // No DB querying possible?  Select all.
937684867eSGreg Roach        return $this->individualsToFixQuery($tree, $params)
94ce42304aSGreg Roach            ->pluck('i_id');
95ce42304aSGreg Roach    }
96ce42304aSGreg Roach
97ce42304aSGreg Roach    /**
98ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
99ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
100ce42304aSGreg Roach     *
101ce42304aSGreg Roach     * @param Tree                 $tree
102ce42304aSGreg Roach     * @param array<string,string> $params
103ce42304aSGreg Roach     *
10436779af1SGreg Roach     * @return Collection<int,string>
105ce42304aSGreg Roach     */
106ce42304aSGreg Roach    protected function mediaToFix(Tree $tree, array $params): Collection
107ce42304aSGreg Roach    {
108ce42304aSGreg Roach        // No DB querying possible?  Select all.
1097684867eSGreg Roach        return $this->mediaToFixQuery($tree, $params)
110ce42304aSGreg Roach            ->pluck('m_id');
111ce42304aSGreg Roach    }
112ce42304aSGreg Roach
113ce42304aSGreg Roach    /**
114ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
115ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
116ce42304aSGreg Roach     *
117ce42304aSGreg Roach     * @param Tree                 $tree
118ce42304aSGreg Roach     * @param array<string,string> $params
119ce42304aSGreg Roach     *
12036779af1SGreg Roach     * @return Collection<int,string>
121ce42304aSGreg Roach     */
122ce42304aSGreg Roach    protected function notesToFix(Tree $tree, array $params): Collection
123ce42304aSGreg Roach    {
124ce42304aSGreg Roach        // No DB querying possible?  Select all.
1257684867eSGreg Roach        return $this->notesToFixQuery($tree, $params)
126ce42304aSGreg Roach            ->pluck('o_id');
127ce42304aSGreg Roach    }
128ce42304aSGreg Roach
129ce42304aSGreg Roach    /**
130ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
131ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
132ce42304aSGreg Roach     *
133ce42304aSGreg Roach     * @param Tree                 $tree
134ce42304aSGreg Roach     * @param array<string,string> $params
135ce42304aSGreg Roach     *
13636779af1SGreg Roach     * @return Collection<int,string>
137ce42304aSGreg Roach     */
138ce42304aSGreg Roach    protected function repositoriesToFix(Tree $tree, array $params): Collection
139ce42304aSGreg Roach    {
140ce42304aSGreg Roach        // No DB querying possible?  Select all.
1417684867eSGreg Roach        return $this->repositoriesToFixQuery($tree, $params)
142ce42304aSGreg Roach            ->pluck('o_id');
143ce42304aSGreg Roach    }
144ce42304aSGreg Roach
145ce42304aSGreg Roach    /**
146ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
147ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
148ce42304aSGreg Roach     *
149ce42304aSGreg Roach     * @param Tree                 $tree
150ce42304aSGreg Roach     * @param array<string,string> $params
151ce42304aSGreg Roach     *
15236779af1SGreg Roach     * @return Collection<int,string>
153ce42304aSGreg Roach     */
154ce42304aSGreg Roach    protected function sourcesToFix(Tree $tree, array $params): Collection
155ce42304aSGreg Roach    {
156ce42304aSGreg Roach        // No DB querying possible?  Select all.
1577684867eSGreg Roach        return $this->sourcesToFixQuery($tree, $params)
158ce42304aSGreg Roach            ->pluck('s_id');
159ce42304aSGreg Roach    }
160ce42304aSGreg Roach
161ce42304aSGreg Roach    /**
162ce42304aSGreg Roach     * A list of all records that need examining.  This may include records
163ce42304aSGreg Roach     * that do not need updating, if we can't detect this quickly using SQL.
164ce42304aSGreg Roach     *
165ce42304aSGreg Roach     * @param Tree                 $tree
166ce42304aSGreg Roach     * @param array<string,string> $params
167ce42304aSGreg Roach     *
16836779af1SGreg Roach     * @return Collection<int,string>
169ce42304aSGreg Roach     */
170ce42304aSGreg Roach    protected function submittersToFix(Tree $tree, array $params): Collection
171ce42304aSGreg Roach    {
172ce42304aSGreg Roach        // No DB querying possible?  Select all.
1737684867eSGreg Roach        return $this->submittersToFixQuery($tree, $params)
174ce42304aSGreg Roach            ->pluck('o_id');
175ce42304aSGreg Roach    }
176ce42304aSGreg Roach
177ce42304aSGreg Roach    /**
178ce42304aSGreg Roach     * Does a record need updating?
179ce42304aSGreg Roach     *
180ce42304aSGreg Roach     * @param GedcomRecord         $record
181ce42304aSGreg Roach     * @param array<string,string> $params
182ce42304aSGreg Roach     *
183ce42304aSGreg Roach     * @return bool
184ce42304aSGreg Roach     */
185ce42304aSGreg Roach    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
186ce42304aSGreg Roach    {
187ce42304aSGreg Roach        $gedcom = $record->gedcom();
188ce42304aSGreg Roach
189ce42304aSGreg Roach        return
190dce4f3a4SGreg Roach            preg_match('/(\n1.*@.+@.*(?:\n[2-9].*)*)(?:\n1.*(?:\n[2-9].*)*)*\1/', $gedcom) ||
191dce4f3a4SGreg Roach            preg_match('/(\n2.*@.+@.*(?:\n[3-9].*)*)(?:\n2.*(?:\n[3-9].*)*)*\1/', $gedcom) ||
192dce4f3a4SGreg Roach            preg_match('/(\n3.*@.+@.*(?:\n[4-9].*)*)(?:\n3.*(?:\n[4-9].*)*)*\1/', $gedcom);
193ce42304aSGreg Roach    }
194ce42304aSGreg Roach
195ce42304aSGreg Roach    /**
196ce42304aSGreg Roach     * Show the changes we would make
197ce42304aSGreg Roach     *
198ce42304aSGreg Roach     * @param GedcomRecord         $record
199ce42304aSGreg Roach     * @param array<string,string> $params
200ce42304aSGreg Roach     *
201ce42304aSGreg Roach     * @return string
202ce42304aSGreg Roach     */
203ce42304aSGreg Roach    public function previewUpdate(GedcomRecord $record, array $params): string
204ce42304aSGreg Roach    {
205ce42304aSGreg Roach        $old = $record->gedcom();
206ce42304aSGreg Roach        $new = $this->updateGedcom($record);
207ce42304aSGreg Roach
208ce42304aSGreg Roach        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
209ce42304aSGreg Roach    }
210ce42304aSGreg Roach
211ce42304aSGreg Roach    /**
212ce42304aSGreg Roach     * Fix a record
213ce42304aSGreg Roach     *
214ce42304aSGreg Roach     * @param GedcomRecord         $record
215ce42304aSGreg Roach     * @param array<string,string> $params
216ce42304aSGreg Roach     *
217ce42304aSGreg Roach     * @return void
218ce42304aSGreg Roach     */
219ce42304aSGreg Roach    public function updateRecord(GedcomRecord $record, array $params): void
220ce42304aSGreg Roach    {
221ce42304aSGreg Roach        $record->updateRecord($this->updateGedcom($record), false);
222ce42304aSGreg Roach    }
223ce42304aSGreg Roach
224ce42304aSGreg Roach    /**
225ce42304aSGreg Roach     * @param GedcomRecord $record
226ce42304aSGreg Roach     *
227ce42304aSGreg Roach     * @return string
228ce42304aSGreg Roach     */
229ce42304aSGreg Roach    private function updateGedcom(GedcomRecord $record): string
230ce42304aSGreg Roach    {
231c72b7fa4SGreg Roach        $gedcom = $record->gedcom();
232c72b7fa4SGreg Roach        $gedcom = preg_replace('/(\n1.*@.+@.*(?:\n[2-9].*)*)((?:\n1.*(?:\n[2-9].*)*)*\1)/', '$2', $gedcom);
233c72b7fa4SGreg Roach        $gedcom = preg_replace('/(\n2.*@.+@.*(?:\n[3-9].*)*)((?:\n2.*(?:\n[3-9].*)*)*\1)/', '$2', $gedcom);
234c72b7fa4SGreg Roach
235c72b7fa4SGreg Roach        return preg_replace('/(\n3.*@.+@.*(?:\n[4-9].*)*)((?:\n3.*(?:\n[4-9].*)*)*\1)/', '$2', $gedcom);
236ce42304aSGreg Roach    }
237ce42304aSGreg Roach}
238