xref: /webtrees/app/Http/RequestHandlers/GedcomLoad.php (revision a6faacba1b18a38dad8f857aff08ab497dffa74b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Http\RequestHandlers;
21
22use Exception;
23use Fisharebest\Webtrees\Encodings\UTF8;
24use Fisharebest\Webtrees\Exceptions\GedcomErrorException;
25use Fisharebest\Webtrees\Http\ViewResponseTrait;
26use Fisharebest\Webtrees\I18N;
27use Fisharebest\Webtrees\Services\GedcomImportService;
28use Fisharebest\Webtrees\Services\TimeoutService;
29use Fisharebest\Webtrees\Services\TreeService;
30use Fisharebest\Webtrees\Validator;
31use Illuminate\Database\Capsule\Manager as DB;
32use Illuminate\Database\DetectsConcurrencyErrors;
33use Psr\Http\Message\ResponseInterface;
34use Psr\Http\Message\ServerRequestInterface;
35use Psr\Http\Server\RequestHandlerInterface;
36
37use function preg_split;
38use function str_replace;
39use function str_starts_with;
40use function strlen;
41use function substr;
42
43/**
44 * Load a chunk of GEDCOM data.
45 */
46class GedcomLoad implements RequestHandlerInterface
47{
48    use ViewResponseTrait;
49    use DetectsConcurrencyErrors;
50
51    private GedcomImportService $gedcom_import_service;
52
53    private TimeoutService $timeout_service;
54
55    /**
56     * GedcomLoad constructor.
57     *
58     * @param GedcomImportService $gedcom_import_service
59     * @param TimeoutService      $timeout_service
60     */
61    public function __construct(
62        GedcomImportService $gedcom_import_service,
63        TimeoutService $timeout_service
64    ) {
65        $this->gedcom_import_service = $gedcom_import_service;
66        $this->timeout_service       = $timeout_service;
67    }
68
69    /**
70     * @param ServerRequestInterface $request
71     *
72     * @return ResponseInterface
73     */
74    public function handle(ServerRequestInterface $request): ResponseInterface
75    {
76        $this->layout = 'layouts/ajax';
77
78        $tree = Validator::attributes($request)->tree();
79
80        try {
81            // What is the current import status?
82            $import_offset = DB::table('gedcom_chunk')
83                ->where('gedcom_id', '=', $tree->id())
84                ->where('imported', '=', '1')
85                ->count();
86
87            $import_total = DB::table('gedcom_chunk')
88                ->where('gedcom_id', '=', $tree->id())
89                ->count();
90
91            // Finished?
92            if ($import_offset === $import_total) {
93                if ($tree->getPreference('imported') !== '1') {
94                    return $this->viewResponse('admin/import-fail', [
95                        'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'),
96                        'tree'  => $tree,
97                    ]);
98                }
99
100                return $this->viewResponse('admin/import-complete', ['tree' => $tree]);
101            }
102
103            // If we are loading the first (header) record, then delete old data.
104            if ($import_offset === 0) {
105                $queries = [
106                    'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()),
107                    'families'    => DB::table('families')->where('f_file', '=', $tree->id()),
108                    'sources'     => DB::table('sources')->where('s_file', '=', $tree->id()),
109                    'other'       => DB::table('other')->where('o_file', '=', $tree->id()),
110                    'places'      => DB::table('places')->where('p_file', '=', $tree->id()),
111                    'placelinks'  => DB::table('placelinks')->where('pl_file', '=', $tree->id()),
112                    'name'        => DB::table('name')->where('n_file', '=', $tree->id()),
113                    'dates'       => DB::table('dates')->where('d_file', '=', $tree->id()),
114                    'change'      => DB::table('change')->where('gedcom_id', '=', $tree->id()),
115                ];
116
117                if ($tree->getPreference('keep_media') === '1') {
118                    $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id())
119                        ->where('l_type', '<>', 'OBJE');
120                } else {
121                    $queries['link']       = DB::table('link')->where('l_file', '=', $tree->id());
122                    $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id());
123                    $queries['media']      = DB::table('media')->where('m_file', '=', $tree->id());
124                }
125
126                foreach ($queries as $table => $query) {
127                    // take() and delete() together don't return the number of delete rows.
128                    while ((clone $query)->count() > 0) {
129                        (clone $query)->take(1000)->delete();
130
131                        if ($this->timeout_service->isTimeLimitUp()) {
132                            return $this->viewResponse('admin/import-progress', [
133                                'errors'   => '',
134                                'progress' => 0.0,
135                                'status'   => I18N::translate('Deleting…') . ' ' . $table,
136                                'tree'     => $tree,
137                            ]);
138                        }
139                    }
140                }
141            }
142
143            // Calculate progress so far
144            $progress = $import_offset / $import_total;
145
146            $first_time = $import_offset === 0;
147
148            // Collect up any errors, and show them later.
149            $errors = '';
150
151            // Run for a short period of time. This keeps the resource requirements low.
152            do {
153                $data = DB::table('gedcom_chunk')
154                    ->where('gedcom_id', '=', $tree->id())
155                    ->where('imported', '=', '0')
156                    ->orderBy('gedcom_chunk_id')
157                    ->select(['gedcom_chunk_id', 'chunk_data'])
158                    ->first();
159
160                if ($data === null) {
161                    break;
162                }
163
164                // Mark the chunk as imported.  This will create a row-lock, to prevent other
165                // processes from reading it until we have finished.
166                $n = DB::table('gedcom_chunk')
167                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
168                    ->where('imported', '=', '0')
169                    ->update(['imported' => 1]);
170
171                // Another process has already imported this data?
172                if ($n === 0) {
173                    break;
174                }
175
176                if ($first_time) {
177                    // Remove any byte-order-mark
178                    if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) {
179                        $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK));
180                        DB::table('gedcom_chunk')
181                            ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
182                            ->update(['chunk_data' => $data->chunk_data]);
183                    }
184
185                    if (!str_starts_with($data->chunk_data, '0 HEAD')) {
186                        return $this->viewResponse('admin/import-fail', [
187                            'error' => I18N::translate('Invalid GEDCOM file - no header record found.'),
188                            'tree'  => $tree,
189                        ]);
190                    }
191
192                    $first_time = false;
193                }
194
195                $data->chunk_data = str_replace("\r", "\n", $data->chunk_data);
196
197                // Import all the records in this chunk of data
198                foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) {
199                    try {
200                        $this->gedcom_import_service->importRecord($rec, $tree, false);
201                    } catch (GedcomErrorException $exception) {
202                        $errors .= $exception->getMessage();
203                    }
204                }
205
206                // Do not need the data any more.
207                DB::table('gedcom_chunk')
208                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
209                    ->update(['chunk_data' => '']);
210            } while (!$this->timeout_service->isTimeLimitUp());
211
212            return $this->viewResponse('admin/import-progress', [
213                'errors'   => $errors,
214                'progress' => $progress,
215                'status'   => '',
216                'tree'     => $tree,
217            ]);
218        } catch (Exception $ex) {
219            DB::connection()->rollBack();
220
221            // Deadlock? Try again.
222            if ($this->causedByConcurrencyError($ex)) {
223                return $this->viewResponse('admin/import-progress', [
224                    'errors'   => '',
225                    'progress' => $progress ?? 0.0,
226                    'status'   => $ex->getMessage(),
227                    'tree'     => $tree,
228                ]);
229            }
230
231            return $this->viewResponse('admin/import-fail', [
232                'error' => $ex->getMessage(),
233                'tree'  => $tree,
234            ]);
235        }
236    }
237}
238