xref: /webtrees/app/Http/RequestHandlers/GedcomLoad.php (revision d97083fe315dad9b7d0a150d4fb5f563e57d1869)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Http\RequestHandlers;
21
22use Exception;
23use Fisharebest\Webtrees\Encodings\UTF8;
24use Fisharebest\Webtrees\Exceptions\GedcomErrorException;
25use Fisharebest\Webtrees\Functions\FunctionsImport;
26use Fisharebest\Webtrees\Http\ViewResponseTrait;
27use Fisharebest\Webtrees\I18N;
28use Fisharebest\Webtrees\Services\TimeoutService;
29use Fisharebest\Webtrees\Services\TreeService;
30use Fisharebest\Webtrees\Tree;
31use Illuminate\Database\Capsule\Manager as DB;
32use Illuminate\Database\DetectsConcurrencyErrors;
33use Illuminate\Database\Query\Expression;
34use Psr\Http\Message\ResponseInterface;
35use Psr\Http\Message\ServerRequestInterface;
36use Psr\Http\Server\RequestHandlerInterface;
37
38use function assert;
39use function preg_match;
40use function preg_split;
41use function response;
42use function str_replace;
43use function str_starts_with;
44use function strlen;
45use function strtoupper;
46use function substr;
47use function trim;
48use function view;
49
50/**
51 * Load a chunk of GEDCOM data.
52 */
53class GedcomLoad implements RequestHandlerInterface
54{
55    use ViewResponseTrait;
56    use DetectsConcurrencyErrors;
57
58    private TimeoutService $timeout_service;
59
60    private TreeService $tree_service;
61
62    /**
63     * GedcomLoad constructor.
64     *
65     * @param TimeoutService $timeout_service
66     * @param TreeService    $tree_service
67     */
68    public function __construct(TimeoutService $timeout_service, TreeService $tree_service)
69    {
70        $this->timeout_service = $timeout_service;
71        $this->tree_service    = $tree_service;
72    }
73
74    /**
75     * @param ServerRequestInterface $request
76     *
77     * @return ResponseInterface
78     */
79    public function handle(ServerRequestInterface $request): ResponseInterface
80    {
81        $this->layout = 'layouts/ajax';
82
83        $tree = $request->getAttribute('tree');
84        assert($tree instanceof Tree);
85
86        try {
87            // What is the current import status?
88            $import_offset = DB::table('gedcom_chunk')
89                ->where('gedcom_id', '=', $tree->id())
90                ->where('imported', '=', '1')
91                ->count();
92
93            $import_total = DB::table('gedcom_chunk')
94                ->where('gedcom_id', '=', $tree->id())
95                ->count();
96
97            // Finished?
98            if ($import_offset === $import_total) {
99                $tree->setPreference('imported', '1');
100
101                $html = view('admin/import-complete', ['tree' => $tree]);
102
103                return response($html);
104            }
105
106            // Calculate progress so far
107            $progress = $import_offset / $import_total;
108
109            $first_time = $import_offset === 0;
110
111            // Collect up any errors, and show them later.
112            $errors = '';
113
114            // Run for a short period of time. This keeps the resource requirements low.
115            do {
116                $data = DB::table('gedcom_chunk')
117                    ->where('gedcom_id', '=', $tree->id())
118                    ->where('imported', '=', '0')
119                    ->orderBy('gedcom_chunk_id')
120                    ->select(['gedcom_chunk_id', 'chunk_data'])
121                    ->first();
122
123                if ($data === null) {
124                    break;
125                }
126
127                // Mark the chunk as imported.  This will create a row-lock, to prevent other
128                // processes from reading it until we have finished.
129                $n = DB::table('gedcom_chunk')
130                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
131                    ->where('imported', '=', '0')
132                    ->update(['imported' => 1]);
133
134                // Another process has already imported this data?
135                if ($n === 0) {
136                    break;
137                }
138
139                // If we are loading the first (header) record, then delete old data.
140                if ($first_time) {
141                    $this->tree_service->deleteGenealogyData($tree, (bool) $tree->getPreference('keep_media'));
142
143                    // Remove any byte-order-mark
144                    if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) {
145                        $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK));
146                        DB::table('gedcom_chunk')
147                            ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
148                            ->update(['chunk_data' => $data->chunk_data]);
149                    }
150
151                    if (!str_starts_with($data->chunk_data, '0 HEAD')) {
152                        return $this->viewResponse('admin/import-fail', [
153                            'error' => I18N::translate('Invalid GEDCOM file - no header record found.'),
154                            'tree'  => $tree,
155                        ]);
156                    }
157
158                    $first_time = false;
159                }
160
161                $data->chunk_data = str_replace("\r", "\n", $data->chunk_data);
162
163                // Import all the records in this chunk of data
164                foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) {
165                    try {
166                        FunctionsImport::importRecord($rec, $tree, false);
167                    } catch (GedcomErrorException $exception) {
168                        $errors .= $exception->getMessage();
169                    }
170                }
171
172                // Do not need the data any more.
173                DB::table('gedcom_chunk')
174                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
175                    ->update(['chunk_data' => '']);
176            } while (!$this->timeout_service->isTimeLimitUp());
177
178            return $this->viewResponse('admin/import-progress', [
179                'errors'   => $errors,
180                'progress' => $progress,
181                'tree'     => $tree,
182            ]);
183        } catch (Exception $ex) {
184            DB::connection()->rollBack();
185
186            // Deadlock? Try again.
187            if ($this->causedByConcurrencyError($ex)) {
188                return $this->viewResponse('admin/import-progress', [
189                    'errors'   => '',
190                    'progress' => $progress ?? 0.0,
191                    'tree'     => $tree,
192                ]);
193            }
194
195            return $this->viewResponse('admin/import-fail', [
196                'error' => $ex->getMessage(),
197                'tree'  => $tree,
198            ]);
199        }
200    }
201}
202