xref: /webtrees/app/Http/RequestHandlers/GedcomLoad.php (revision 2c685d76b4ab2ea8f166ab8a2e4112fa1ba8de5d)
16fd01894SGreg Roach<?php
26fd01894SGreg Roach
36fd01894SGreg Roach/**
46fd01894SGreg Roach * webtrees: online genealogy
589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team
66fd01894SGreg Roach * This program is free software: you can redistribute it and/or modify
76fd01894SGreg Roach * it under the terms of the GNU General Public License as published by
86fd01894SGreg Roach * the Free Software Foundation, either version 3 of the License, or
96fd01894SGreg Roach * (at your option) any later version.
106fd01894SGreg Roach * This program is distributed in the hope that it will be useful,
116fd01894SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
126fd01894SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
136fd01894SGreg Roach * GNU General Public License for more details.
146fd01894SGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
166fd01894SGreg Roach */
176fd01894SGreg Roach
186fd01894SGreg Roachdeclare(strict_types=1);
196fd01894SGreg Roach
206fd01894SGreg Roachnamespace Fisharebest\Webtrees\Http\RequestHandlers;
216fd01894SGreg Roach
226fd01894SGreg Roachuse Exception;
231c6adce8SGreg Roachuse Fisharebest\Webtrees\Encodings\UTF8;
246fd01894SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException;
256fd01894SGreg Roachuse Fisharebest\Webtrees\Http\ViewResponseTrait;
266fd01894SGreg Roachuse Fisharebest\Webtrees\I18N;
27*2c685d76SGreg Roachuse Fisharebest\Webtrees\Services\GedcomImportService;
286fd01894SGreg Roachuse Fisharebest\Webtrees\Services\TimeoutService;
295cd281f4SGreg Roachuse Fisharebest\Webtrees\Services\TreeService;
306fd01894SGreg Roachuse Fisharebest\Webtrees\Tree;
316fd01894SGreg Roachuse Illuminate\Database\Capsule\Manager as DB;
32182e92b8SGreg Roachuse Illuminate\Database\DetectsConcurrencyErrors;
336fd01894SGreg Roachuse Psr\Http\Message\ResponseInterface;
346fd01894SGreg Roachuse Psr\Http\Message\ServerRequestInterface;
356fd01894SGreg Roachuse Psr\Http\Server\RequestHandlerInterface;
366fd01894SGreg Roach
376fd01894SGreg Roachuse function assert;
386fd01894SGreg Roachuse function preg_split;
396fd01894SGreg Roachuse function response;
406fd01894SGreg Roachuse function str_replace;
416fd01894SGreg Roachuse function str_starts_with;
426fd01894SGreg Roachuse function strlen;
436fd01894SGreg Roachuse function substr;
446fd01894SGreg Roachuse function view;
456fd01894SGreg Roach
466fd01894SGreg Roach/**
476fd01894SGreg Roach * Load a chunk of GEDCOM data.
486fd01894SGreg Roach */
496fd01894SGreg Roachclass GedcomLoad implements RequestHandlerInterface
506fd01894SGreg Roach{
516fd01894SGreg Roach    use ViewResponseTrait;
52182e92b8SGreg Roach    use DetectsConcurrencyErrors;
536fd01894SGreg Roach
54*2c685d76SGreg Roach    private GedcomImportService $gedcom_import_service;
55*2c685d76SGreg Roach
56c4943cffSGreg Roach    private TimeoutService $timeout_service;
576fd01894SGreg Roach
58c4943cffSGreg Roach    private TreeService $tree_service;
595cd281f4SGreg Roach
606fd01894SGreg Roach    /**
616fd01894SGreg Roach     * GedcomLoad constructor.
626fd01894SGreg Roach     *
63*2c685d76SGreg Roach     * @param GedcomImportService $gedcom_import_service
646fd01894SGreg Roach     * @param TimeoutService      $timeout_service
655cd281f4SGreg Roach     * @param TreeService         $tree_service
666fd01894SGreg Roach     */
67*2c685d76SGreg Roach    public function __construct(
68*2c685d76SGreg Roach        GedcomImportService $gedcom_import_service,
69*2c685d76SGreg Roach        TimeoutService $timeout_service,
70*2c685d76SGreg Roach        TreeService $tree_service
71*2c685d76SGreg Roach    ) {
72*2c685d76SGreg Roach        $this->gedcom_import_service = $gedcom_import_service;
736fd01894SGreg Roach        $this->timeout_service       = $timeout_service;
745cd281f4SGreg Roach        $this->tree_service          = $tree_service;
756fd01894SGreg Roach    }
766fd01894SGreg Roach
776fd01894SGreg Roach    /**
786fd01894SGreg Roach     * @param ServerRequestInterface $request
796fd01894SGreg Roach     *
806fd01894SGreg Roach     * @return ResponseInterface
816fd01894SGreg Roach     */
826fd01894SGreg Roach    public function handle(ServerRequestInterface $request): ResponseInterface
836fd01894SGreg Roach    {
846fd01894SGreg Roach        $this->layout = 'layouts/ajax';
856fd01894SGreg Roach
866fd01894SGreg Roach        $tree = $request->getAttribute('tree');
876fd01894SGreg Roach        assert($tree instanceof Tree);
886fd01894SGreg Roach
896fd01894SGreg Roach        try {
906fd01894SGreg Roach            // What is the current import status?
916fd01894SGreg Roach            $import_offset = DB::table('gedcom_chunk')
926fd01894SGreg Roach                ->where('gedcom_id', '=', $tree->id())
936fd01894SGreg Roach                ->where('imported', '=', '1')
946fd01894SGreg Roach                ->count();
956fd01894SGreg Roach
966fd01894SGreg Roach            $import_total = DB::table('gedcom_chunk')
976fd01894SGreg Roach                ->where('gedcom_id', '=', $tree->id())
986fd01894SGreg Roach                ->count();
996fd01894SGreg Roach
1006fd01894SGreg Roach            // Finished?
1016fd01894SGreg Roach            if ($import_offset === $import_total) {
1026fd01894SGreg Roach                $tree->setPreference('imported', '1');
1036fd01894SGreg Roach
1046fd01894SGreg Roach                $html = view('admin/import-complete', ['tree' => $tree]);
1056fd01894SGreg Roach
1066fd01894SGreg Roach                return response($html);
1076fd01894SGreg Roach            }
1086fd01894SGreg Roach
1096fd01894SGreg Roach            // Calculate progress so far
1106fd01894SGreg Roach            $progress = $import_offset / $import_total;
1116fd01894SGreg Roach
1127dd04261SGreg Roach            $first_time = $import_offset === 0;
1136fd01894SGreg Roach
1146fd01894SGreg Roach            // Collect up any errors, and show them later.
1156fd01894SGreg Roach            $errors = '';
1166fd01894SGreg Roach
1176fd01894SGreg Roach            // Run for a short period of time. This keeps the resource requirements low.
1186fd01894SGreg Roach            do {
1196fd01894SGreg Roach                $data = DB::table('gedcom_chunk')
1206fd01894SGreg Roach                    ->where('gedcom_id', '=', $tree->id())
1216fd01894SGreg Roach                    ->where('imported', '=', '0')
1226fd01894SGreg Roach                    ->orderBy('gedcom_chunk_id')
1236fd01894SGreg Roach                    ->select(['gedcom_chunk_id', 'chunk_data'])
1246fd01894SGreg Roach                    ->first();
1256fd01894SGreg Roach
1269d173e09SGreg Roach                if ($data === null) {
1279d173e09SGreg Roach                    break;
1289d173e09SGreg Roach                }
1299d173e09SGreg Roach
1309d173e09SGreg Roach                // Mark the chunk as imported.  This will create a row-lock, to prevent other
1319d173e09SGreg Roach                // processes from reading it until we have finished.
1329d173e09SGreg Roach                $n = DB::table('gedcom_chunk')
1339d173e09SGreg Roach                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
1349d173e09SGreg Roach                    ->where('imported', '=', '0')
1359d173e09SGreg Roach                    ->update(['imported' => 1]);
1369d173e09SGreg Roach
1379d173e09SGreg Roach                // Another process has already imported this data?
1389d173e09SGreg Roach                if ($n === 0) {
1399d173e09SGreg Roach                    break;
1409d173e09SGreg Roach                }
1419d173e09SGreg Roach
1421c6adce8SGreg Roach                // If we are loading the first (header) record, then delete old data.
1436fd01894SGreg Roach                if ($first_time) {
1445cd281f4SGreg Roach                    $this->tree_service->deleteGenealogyData($tree, (bool) $tree->getPreference('keep_media'));
1455cd281f4SGreg Roach
1466fd01894SGreg Roach                    // Remove any byte-order-mark
1471c6adce8SGreg Roach                    if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) {
1481c6adce8SGreg Roach                        $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK));
1496fd01894SGreg Roach                        DB::table('gedcom_chunk')
1506fd01894SGreg Roach                            ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
1516fd01894SGreg Roach                            ->update(['chunk_data' => $data->chunk_data]);
1526fd01894SGreg Roach                    }
1536fd01894SGreg Roach
1546fd01894SGreg Roach                    if (!str_starts_with($data->chunk_data, '0 HEAD')) {
1556fd01894SGreg Roach                        return $this->viewResponse('admin/import-fail', [
1566fd01894SGreg Roach                            'error' => I18N::translate('Invalid GEDCOM file - no header record found.'),
1576fd01894SGreg Roach                            'tree'  => $tree,
1586fd01894SGreg Roach                        ]);
1596fd01894SGreg Roach                    }
1606fd01894SGreg Roach
1616fd01894SGreg Roach                    $first_time = false;
1626fd01894SGreg Roach                }
1636fd01894SGreg Roach
1646fd01894SGreg Roach                $data->chunk_data = str_replace("\r", "\n", $data->chunk_data);
1656fd01894SGreg Roach
1666fd01894SGreg Roach                // Import all the records in this chunk of data
1676fd01894SGreg Roach                foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) {
1686fd01894SGreg Roach                    try {
169*2c685d76SGreg Roach                        $this->gedcom_import_service->importRecord($rec, $tree, false);
1706fd01894SGreg Roach                    } catch (GedcomErrorException $exception) {
1716fd01894SGreg Roach                        $errors .= $exception->getMessage();
1726fd01894SGreg Roach                    }
1736fd01894SGreg Roach                }
1746fd01894SGreg Roach
1759d173e09SGreg Roach                // Do not need the data any more.
1766fd01894SGreg Roach                DB::table('gedcom_chunk')
1776fd01894SGreg Roach                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
1789d173e09SGreg Roach                    ->update(['chunk_data' => '']);
1796fd01894SGreg Roach            } while (!$this->timeout_service->isTimeLimitUp());
1806fd01894SGreg Roach
1816fd01894SGreg Roach            return $this->viewResponse('admin/import-progress', [
1826fd01894SGreg Roach                'errors'   => $errors,
1836fd01894SGreg Roach                'progress' => $progress,
1846fd01894SGreg Roach                'tree'     => $tree,
1856fd01894SGreg Roach            ]);
1866fd01894SGreg Roach        } catch (Exception $ex) {
1876fd01894SGreg Roach            DB::connection()->rollBack();
1886fd01894SGreg Roach
1899d173e09SGreg Roach            // Deadlock? Try again.
190f32d77e6SGreg Roach            if ($this->causedByConcurrencyError($ex)) {
1919d173e09SGreg Roach                return $this->viewResponse('admin/import-progress', [
1929d173e09SGreg Roach                    'errors'   => '',
1939d173e09SGreg Roach                    'progress' => $progress ?? 0.0,
1949d173e09SGreg Roach                    'tree'     => $tree,
1959d173e09SGreg Roach                ]);
1969d173e09SGreg Roach            }
1979d173e09SGreg Roach
1986fd01894SGreg Roach            return $this->viewResponse('admin/import-fail', [
1996fd01894SGreg Roach                'error' => $ex->getMessage(),
2006fd01894SGreg Roach                'tree'  => $tree,
2016fd01894SGreg Roach            ]);
2026fd01894SGreg Roach        }
2036fd01894SGreg Roach    }
2046fd01894SGreg Roach}
205