xref: /webtrees/app/Http/RequestHandlers/GedcomLoad.php (revision 52550490b7095dd69811f3ec21ed5a3ca1a8968d)
16fd01894SGreg Roach<?php
26fd01894SGreg Roach
36fd01894SGreg Roach/**
46fd01894SGreg Roach * webtrees: online genealogy
5d11be702SGreg Roach * Copyright (C) 2023 webtrees development team
66fd01894SGreg Roach * This program is free software: you can redistribute it and/or modify
76fd01894SGreg Roach * it under the terms of the GNU General Public License as published by
86fd01894SGreg Roach * the Free Software Foundation, either version 3 of the License, or
96fd01894SGreg Roach * (at your option) any later version.
106fd01894SGreg Roach * This program is distributed in the hope that it will be useful,
116fd01894SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
126fd01894SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
136fd01894SGreg Roach * GNU General Public License for more details.
146fd01894SGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
166fd01894SGreg Roach */
176fd01894SGreg Roach
186fd01894SGreg Roachdeclare(strict_types=1);
196fd01894SGreg Roach
206fd01894SGreg Roachnamespace Fisharebest\Webtrees\Http\RequestHandlers;
216fd01894SGreg Roach
226fd01894SGreg Roachuse Exception;
236f4ec3caSGreg Roachuse Fisharebest\Webtrees\DB;
241c6adce8SGreg Roachuse Fisharebest\Webtrees\Encodings\UTF8;
256fd01894SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException;
266fd01894SGreg Roachuse Fisharebest\Webtrees\Http\ViewResponseTrait;
276fd01894SGreg Roachuse Fisharebest\Webtrees\I18N;
282c685d76SGreg Roachuse Fisharebest\Webtrees\Services\GedcomImportService;
296fd01894SGreg Roachuse Fisharebest\Webtrees\Services\TimeoutService;
30b55cbc6bSGreg Roachuse Fisharebest\Webtrees\Validator;
31182e92b8SGreg Roachuse Illuminate\Database\DetectsConcurrencyErrors;
326fd01894SGreg Roachuse Psr\Http\Message\ResponseInterface;
336fd01894SGreg Roachuse Psr\Http\Message\ServerRequestInterface;
346fd01894SGreg Roachuse Psr\Http\Server\RequestHandlerInterface;
356fd01894SGreg Roach
366fd01894SGreg Roachuse function preg_split;
376fd01894SGreg Roachuse function str_replace;
386fd01894SGreg Roachuse function str_starts_with;
396fd01894SGreg Roachuse function strlen;
406fd01894SGreg Roachuse function substr;
416fd01894SGreg Roach
426fd01894SGreg Roach/**
436fd01894SGreg Roach * Load a chunk of GEDCOM data.
446fd01894SGreg Roach */
456fd01894SGreg Roachclass GedcomLoad implements RequestHandlerInterface
466fd01894SGreg Roach{
476fd01894SGreg Roach    use ViewResponseTrait;
48182e92b8SGreg Roach    use DetectsConcurrencyErrors;
496fd01894SGreg Roach
502c685d76SGreg Roach    private GedcomImportService $gedcom_import_service;
512c685d76SGreg Roach
52c4943cffSGreg Roach    private TimeoutService $timeout_service;
536fd01894SGreg Roach
546fd01894SGreg Roach    /**
552c685d76SGreg Roach     * @param GedcomImportService $gedcom_import_service
566fd01894SGreg Roach     * @param TimeoutService      $timeout_service
576fd01894SGreg Roach     */
582c685d76SGreg Roach    public function __construct(
592c685d76SGreg Roach        GedcomImportService $gedcom_import_service,
603ddb2c3fSGreg Roach        TimeoutService $timeout_service
612c685d76SGreg Roach    ) {
622c685d76SGreg Roach        $this->gedcom_import_service = $gedcom_import_service;
636fd01894SGreg Roach        $this->timeout_service       = $timeout_service;
646fd01894SGreg Roach    }
656fd01894SGreg Roach
666fd01894SGreg Roach    /**
676fd01894SGreg Roach     * @param ServerRequestInterface $request
686fd01894SGreg Roach     *
696fd01894SGreg Roach     * @return ResponseInterface
706fd01894SGreg Roach     */
716fd01894SGreg Roach    public function handle(ServerRequestInterface $request): ResponseInterface
726fd01894SGreg Roach    {
736fd01894SGreg Roach        $this->layout = 'layouts/ajax';
746fd01894SGreg Roach
75b55cbc6bSGreg Roach        $tree = Validator::attributes($request)->tree();
766fd01894SGreg Roach
776fd01894SGreg Roach        try {
786fd01894SGreg Roach            // What is the current import status?
796fd01894SGreg Roach            $import_offset = DB::table('gedcom_chunk')
806fd01894SGreg Roach                ->where('gedcom_id', '=', $tree->id())
816fd01894SGreg Roach                ->where('imported', '=', '1')
826fd01894SGreg Roach                ->count();
836fd01894SGreg Roach
846fd01894SGreg Roach            $import_total = DB::table('gedcom_chunk')
856fd01894SGreg Roach                ->where('gedcom_id', '=', $tree->id())
866fd01894SGreg Roach                ->count();
876fd01894SGreg Roach
886fd01894SGreg Roach            // Finished?
896fd01894SGreg Roach            if ($import_offset === $import_total) {
90c1a23560SGreg Roach                if ($tree->getPreference('imported') !== '1') {
91c1a23560SGreg Roach                    return $this->viewResponse('admin/import-fail', [
92c1a23560SGreg Roach                        'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'),
93c1a23560SGreg Roach                        'tree'  => $tree,
94c1a23560SGreg Roach                    ]);
95c1a23560SGreg Roach                }
966fd01894SGreg Roach
97c1a23560SGreg Roach                return $this->viewResponse('admin/import-complete', ['tree' => $tree]);
986fd01894SGreg Roach            }
996fd01894SGreg Roach
1003ddb2c3fSGreg Roach            // If we are loading the first (header) record, then delete old data.
1013ddb2c3fSGreg Roach            if ($import_offset === 0) {
1023ddb2c3fSGreg Roach                $queries = [
1033ddb2c3fSGreg Roach                    'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()),
1043ddb2c3fSGreg Roach                    'families'    => DB::table('families')->where('f_file', '=', $tree->id()),
1053ddb2c3fSGreg Roach                    'sources'     => DB::table('sources')->where('s_file', '=', $tree->id()),
1063ddb2c3fSGreg Roach                    'other'       => DB::table('other')->where('o_file', '=', $tree->id()),
1073ddb2c3fSGreg Roach                    'places'      => DB::table('places')->where('p_file', '=', $tree->id()),
1083ddb2c3fSGreg Roach                    'placelinks'  => DB::table('placelinks')->where('pl_file', '=', $tree->id()),
1093ddb2c3fSGreg Roach                    'name'        => DB::table('name')->where('n_file', '=', $tree->id()),
1103ddb2c3fSGreg Roach                    'dates'       => DB::table('dates')->where('d_file', '=', $tree->id()),
1113ddb2c3fSGreg Roach                    'change'      => DB::table('change')->where('gedcom_id', '=', $tree->id()),
1123ddb2c3fSGreg Roach                ];
1133ddb2c3fSGreg Roach
1143ddb2c3fSGreg Roach                if ($tree->getPreference('keep_media') === '1') {
1153ddb2c3fSGreg Roach                    $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id())
1163ddb2c3fSGreg Roach                        ->where('l_type', '<>', 'OBJE');
1173ddb2c3fSGreg Roach                } else {
1183ddb2c3fSGreg Roach                    $queries['link']       = DB::table('link')->where('l_file', '=', $tree->id());
1193ddb2c3fSGreg Roach                    $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id());
1203ddb2c3fSGreg Roach                    $queries['media']      = DB::table('media')->where('m_file', '=', $tree->id());
1213ddb2c3fSGreg Roach                }
1223ddb2c3fSGreg Roach
1233ddb2c3fSGreg Roach                foreach ($queries as $table => $query) {
1243ddb2c3fSGreg Roach                    // take() and delete() together don't return the number of delete rows.
1253ddb2c3fSGreg Roach                    while ((clone $query)->count() > 0) {
1263ddb2c3fSGreg Roach                        (clone $query)->take(1000)->delete();
1273ddb2c3fSGreg Roach
1283ddb2c3fSGreg Roach                        if ($this->timeout_service->isTimeLimitUp()) {
1293ddb2c3fSGreg Roach                            return $this->viewResponse('admin/import-progress', [
1303ddb2c3fSGreg Roach                                'errors'   => '',
1313ddb2c3fSGreg Roach                                'progress' => 0.0,
1323ddb2c3fSGreg Roach                                'status'   => I18N::translate('Deleting…') . ' ' . $table,
1333ddb2c3fSGreg Roach                                'tree'     => $tree,
1343ddb2c3fSGreg Roach                            ]);
1353ddb2c3fSGreg Roach                        }
1363ddb2c3fSGreg Roach                    }
1373ddb2c3fSGreg Roach                }
1383ddb2c3fSGreg Roach            }
1393ddb2c3fSGreg Roach
1406fd01894SGreg Roach            // Calculate progress so far
1416fd01894SGreg Roach            $progress = $import_offset / $import_total;
1426fd01894SGreg Roach
1437dd04261SGreg Roach            $first_time = $import_offset === 0;
1446fd01894SGreg Roach
1456fd01894SGreg Roach            // Collect up any errors, and show them later.
1466fd01894SGreg Roach            $errors = '';
1476fd01894SGreg Roach
1486fd01894SGreg Roach            // Run for a short period of time. This keeps the resource requirements low.
1496fd01894SGreg Roach            do {
1506fd01894SGreg Roach                $data = DB::table('gedcom_chunk')
1516fd01894SGreg Roach                    ->where('gedcom_id', '=', $tree->id())
1526fd01894SGreg Roach                    ->where('imported', '=', '0')
1536fd01894SGreg Roach                    ->orderBy('gedcom_chunk_id')
1546fd01894SGreg Roach                    ->select(['gedcom_chunk_id', 'chunk_data'])
1556fd01894SGreg Roach                    ->first();
1566fd01894SGreg Roach
1579d173e09SGreg Roach                if ($data === null) {
1589d173e09SGreg Roach                    break;
1599d173e09SGreg Roach                }
1609d173e09SGreg Roach
1619d173e09SGreg Roach                // Mark the chunk as imported.  This will create a row-lock, to prevent other
1629d173e09SGreg Roach                // processes from reading it until we have finished.
1639d173e09SGreg Roach                $n = DB::table('gedcom_chunk')
1649d173e09SGreg Roach                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
1659d173e09SGreg Roach                    ->where('imported', '=', '0')
1669d173e09SGreg Roach                    ->update(['imported' => 1]);
1679d173e09SGreg Roach
1689d173e09SGreg Roach                // Another process has already imported this data?
1699d173e09SGreg Roach                if ($n === 0) {
1709d173e09SGreg Roach                    break;
1719d173e09SGreg Roach                }
1729d173e09SGreg Roach
1736fd01894SGreg Roach                if ($first_time) {
1746fd01894SGreg Roach                    // Remove any byte-order-mark
1751c6adce8SGreg Roach                    if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) {
1761c6adce8SGreg Roach                        $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK));
1776fd01894SGreg Roach                        DB::table('gedcom_chunk')
1786fd01894SGreg Roach                            ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
1796fd01894SGreg Roach                            ->update(['chunk_data' => $data->chunk_data]);
1806fd01894SGreg Roach                    }
1816fd01894SGreg Roach
1826fd01894SGreg Roach                    if (!str_starts_with($data->chunk_data, '0 HEAD')) {
1836fd01894SGreg Roach                        return $this->viewResponse('admin/import-fail', [
1846fd01894SGreg Roach                            'error' => I18N::translate('Invalid GEDCOM file - no header record found.'),
1856fd01894SGreg Roach                            'tree'  => $tree,
1866fd01894SGreg Roach                        ]);
1876fd01894SGreg Roach                    }
1886fd01894SGreg Roach
1896fd01894SGreg Roach                    $first_time = false;
1906fd01894SGreg Roach                }
1916fd01894SGreg Roach
1926fd01894SGreg Roach                $data->chunk_data = str_replace("\r", "\n", $data->chunk_data);
1936fd01894SGreg Roach
1946fd01894SGreg Roach                // Import all the records in this chunk of data
1956fd01894SGreg Roach                foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) {
1966fd01894SGreg Roach                    try {
1972c685d76SGreg Roach                        $this->gedcom_import_service->importRecord($rec, $tree, false);
1986fd01894SGreg Roach                    } catch (GedcomErrorException $exception) {
1996fd01894SGreg Roach                        $errors .= $exception->getMessage();
2006fd01894SGreg Roach                    }
2016fd01894SGreg Roach                }
2026fd01894SGreg Roach
2039d173e09SGreg Roach                // Do not need the data any more.
2046fd01894SGreg Roach                DB::table('gedcom_chunk')
2056fd01894SGreg Roach                    ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id)
2069d173e09SGreg Roach                    ->update(['chunk_data' => '']);
2076fd01894SGreg Roach            } while (!$this->timeout_service->isTimeLimitUp());
2086fd01894SGreg Roach
2096fd01894SGreg Roach            return $this->viewResponse('admin/import-progress', [
2106fd01894SGreg Roach                'errors'   => $errors,
2116fd01894SGreg Roach                'progress' => $progress,
2123ddb2c3fSGreg Roach                'status'   => '',
2136fd01894SGreg Roach                'tree'     => $tree,
2146fd01894SGreg Roach            ]);
2156fd01894SGreg Roach        } catch (Exception $ex) {
216*52550490SGreg Roach            DB::rollBack();
2176fd01894SGreg Roach
2189d173e09SGreg Roach            // Deadlock? Try again.
219f32d77e6SGreg Roach            if ($this->causedByConcurrencyError($ex)) {
2209d173e09SGreg Roach                return $this->viewResponse('admin/import-progress', [
2219d173e09SGreg Roach                    'errors'   => '',
2226bd19c8cSGreg Roach                    'progress' => $progress,
223ed946cf6SGreg Roach                    'status'   => e($ex->getMessage()),
2249d173e09SGreg Roach                    'tree'     => $tree,
2259d173e09SGreg Roach                ]);
2269d173e09SGreg Roach            }
2279d173e09SGreg Roach
2286fd01894SGreg Roach            return $this->viewResponse('admin/import-fail', [
229ed946cf6SGreg Roach                'error' => e($ex->getMessage()),
2306fd01894SGreg Roach                'tree'  => $tree,
2316fd01894SGreg Roach            ]);
2326fd01894SGreg Roach        }
2336fd01894SGreg Roach    }
2346fd01894SGreg Roach}
235