1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Encodings\UTF16BE; 24use Fisharebest\Webtrees\Encodings\UTF16LE; 25use Fisharebest\Webtrees\Encodings\UTF8; 26use Fisharebest\Webtrees\Encodings\Windows1252; 27use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 28use Fisharebest\Webtrees\Gedcom; 29use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; 30use Fisharebest\Webtrees\GedcomRecord; 31use Fisharebest\Webtrees\Header; 32use Fisharebest\Webtrees\Registry; 33use Fisharebest\Webtrees\Tree; 34use Fisharebest\Webtrees\Webtrees; 35use Illuminate\Database\Capsule\Manager as DB; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Database\Query\Expression; 38use Illuminate\Support\Collection; 39use League\Flysystem\Filesystem; 40use League\Flysystem\FilesystemOperator; 41use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider; 42use League\Flysystem\ZipArchive\ZipArchiveAdapter; 43use Psr\Http\Message\ResponseFactoryInterface; 44use Psr\Http\Message\ResponseInterface; 45use Psr\Http\Message\StreamFactoryInterface; 46use RuntimeException; 47 48use function addcslashes; 49use function date; 50use function explode; 51use function fclose; 52use function fopen; 53use function fwrite; 54use function is_string; 55use function pathinfo; 56use function preg_match_all; 57use function rewind; 58use function str_contains; 59use function stream_filter_append; 60use function stream_get_meta_data; 61use function strlen; 62use function strpos; 63use function strtolower; 64use function strtoupper; 65use function tmpfile; 66 67use const PATHINFO_EXTENSION; 68use const PREG_SET_ORDER; 69use const STREAM_FILTER_WRITE; 70 71/** 72 * Export data in GEDCOM format 73 */ 74class GedcomExportService 75{ 76 private const ACCESS_LEVELS = [ 77 'gedadmin' => Auth::PRIV_NONE, 78 'user' => Auth::PRIV_USER, 79 'visitor' => Auth::PRIV_PRIVATE, 80 'none' => Auth::PRIV_HIDE, 81 ]; 82 83 private ResponseFactoryInterface $response_factory; 84 85 private StreamFactoryInterface $stream_factory; 86 87 /** 88 * @param ResponseFactoryInterface $response_factory 89 * @param StreamFactoryInterface $stream_factory 90 */ 91 public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory) 92 { 93 $this->response_factory = $response_factory; 94 $this->stream_factory = $stream_factory; 95 } 96 97 /** 98 * @param Tree $tree - Export data from this tree 99 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 100 * @param string $encoding - Convert from UTF-8 to other encoding 101 * @param string $privacy - Filter records by role 102 * @param string $filename - Name of download file, without an extension 103 * @param string $format - One of: gedcom, zip, zipmedia, gedzip 104 * 105 * @return ResponseInterface 106 */ 107 public function downloadResponse( 108 Tree $tree, 109 bool $sort_by_xref, 110 string $encoding, 111 string $privacy, 112 string $line_endings, 113 string $filename, 114 string $format, 115 Collection $records = null 116 ): ResponseInterface { 117 $access_level = self::ACCESS_LEVELS[$privacy]; 118 119 if ($format === 'gedcom') { 120 $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records); 121 $stream = $this->stream_factory->createStreamFromResource($resource); 122 123 return $this->response_factory->createResponse() 124 ->withBody($stream) 125 ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME) 126 ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"'); 127 } 128 129 // Create a new/empty .ZIP file 130 $temp_zip_file = stream_get_meta_data(tmpfile())['uri']; 131 $zip_provider = new FilesystemZipArchiveProvider($temp_zip_file, 0755); 132 $zip_adapter = new ZipArchiveAdapter($zip_provider); 133 $zip_filesystem = new Filesystem($zip_adapter); 134 135 if ($format === 'zipmedia') { 136 $media_path = $tree->getPreference('MEDIA_DIRECTORY'); 137 } elseif ($format === 'gedzip') { 138 $media_path = ''; 139 } else { 140 // Don't add media 141 $media_path = null; 142 } 143 144 $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path); 145 146 if ($format === 'gedzip') { 147 $zip_filesystem->writeStream('gedcom.ged', $resource); 148 $extension = '.gdz'; 149 } else { 150 $zip_filesystem->writeStream($filename . '.ged', $resource); 151 $extension = '.zip'; 152 } 153 154 fclose($resource); 155 156 $stream = $this->stream_factory->createStreamFromFile($temp_zip_file); 157 158 return $this->response_factory->createResponse() 159 ->withBody($stream) 160 ->withHeader('content-type', 'application/zip') 161 ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"'); 162 } 163 164 /** 165 * Write GEDCOM data to a stream. 166 * 167 * @param Tree $tree - Export data from this tree 168 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 169 * @param string $encoding - Convert from UTF-8 to other encoding 170 * @param int $access_level - Apply privacy filtering 171 * @param string $line_endings - CRLF or LF 172 * @param Collection<int,string>|null $records - Just export these records 173 * @param FilesystemOperator|null $zip_filesystem - Write media files to this filesystem 174 * @param string|null $media_path - Location within the zip filesystem 175 * 176 * @return resource 177 */ 178 public function export( 179 Tree $tree, 180 bool $sort_by_xref = false, 181 string $encoding = UTF8::NAME, 182 int $access_level = Auth::PRIV_HIDE, 183 string $line_endings = 'CRLF', 184 Collection $records = null, 185 FilesystemOperator $zip_filesystem = null, 186 string $media_path = null 187 ) { 188 $stream = fopen('php://memory', 'wb+'); 189 190 if ($stream === false) { 191 throw new RuntimeException('Failed to create temporary stream'); 192 } 193 194 stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); 195 196 if ($records instanceof Collection) { 197 // Export just these records - e.g. from clippings cart. 198 $data = [ 199 new Collection([$this->createHeader($tree, $encoding, false)]), 200 $records, 201 new Collection(['0 TRLR']), 202 ]; 203 } elseif ($access_level === Auth::PRIV_HIDE) { 204 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 205 $data = [ 206 new Collection([$this->createHeader($tree, $encoding, true)]), 207 $this->individualQuery($tree, $sort_by_xref)->cursor(), 208 $this->familyQuery($tree, $sort_by_xref)->cursor(), 209 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 210 $this->otherQuery($tree, $sort_by_xref)->cursor(), 211 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 212 new Collection(['0 TRLR']), 213 ]; 214 } else { 215 // Disable the pending changes before creating GEDCOM records. 216 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 217 return new Collection(); 218 }); 219 220 $data = [ 221 new Collection([$this->createHeader($tree, $encoding, true)]), 222 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 223 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 224 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 225 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 226 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 227 new Collection(['0 TRLR']), 228 ]; 229 } 230 231 $media_filesystem = Registry::filesystem()->media($tree); 232 233 foreach ($data as $rows) { 234 foreach ($rows as $datum) { 235 if (is_string($datum)) { 236 $gedcom = $datum; 237 } elseif ($datum instanceof GedcomRecord) { 238 $gedcom = $datum->privatizeGedcom($access_level); 239 } else { 240 $gedcom = 241 $datum->i_gedcom ?? 242 $datum->f_gedcom ?? 243 $datum->s_gedcom ?? 244 $datum->m_gedcom ?? 245 $datum->o_gedcom; 246 } 247 248 if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) { 249 preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER); 250 251 foreach ($matches as $match) { 252 $media_file = $match[1]; 253 254 if ($media_filesystem->fileExists($media_file)) { 255 $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file)); 256 } 257 } 258 } 259 260 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; 261 262 if ($line_endings === 'CRLF') { 263 $gedcom = strtr($gedcom, ["\n" => "\r\n"]); 264 } 265 266 $bytes_written = fwrite($stream, $gedcom); 267 268 if ($bytes_written !== strlen($gedcom)) { 269 throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); 270 } 271 } 272 } 273 274 if (rewind($stream) === false) { 275 throw new RuntimeException('Cannot rewind temporary stream'); 276 } 277 278 return $stream; 279 } 280 281 /** 282 * Create a header record for a gedcom file. 283 * 284 * @param Tree $tree 285 * @param string $encoding 286 * @param bool $include_sub 287 * 288 * @return string 289 */ 290 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 291 { 292 // Force a ".ged" suffix 293 $filename = $tree->name(); 294 295 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 296 $filename .= '.ged'; 297 } 298 299 $gedcom_encodings = [ 300 UTF16BE::NAME => 'UNICODE', 301 UTF16LE::NAME => 'UNICODE', 302 Windows1252::NAME => 'ANSI', 303 ]; 304 305 $encoding = $gedcom_encodings[$encoding] ?? $encoding; 306 307 // Build a new header record 308 $gedcom = '0 HEAD'; 309 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 310 $gedcom .= "\n2 NAME " . Webtrees::NAME; 311 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 312 $gedcom .= "\n1 DEST DISKETTE"; 313 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 314 $gedcom .= "\n2 TIME " . date('H:i:s'); 315 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 316 $gedcom .= "\n1 CHAR " . $encoding; 317 $gedcom .= "\n1 FILE " . $filename; 318 319 // Preserve some values from the original header 320 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 321 322 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 323 $gedcom .= "\n" . $fact->gedcom(); 324 } 325 326 if ($include_sub) { 327 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 328 $gedcom .= "\n" . $fact->gedcom(); 329 } 330 } 331 332 return $gedcom; 333 } 334 335 /** 336 * Wrap long lines using concatenation records. 337 * 338 * @param string $gedcom 339 * @param int $max_line_length 340 * 341 * @return string 342 */ 343 public function wrapLongLines(string $gedcom, int $max_line_length): string 344 { 345 $lines = []; 346 347 foreach (explode("\n", $gedcom) as $line) { 348 // Split long lines 349 // The total length of a GEDCOM line, including level number, cross-reference number, 350 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 351 if (mb_strlen($line) > $max_line_length) { 352 [$level, $tag] = explode(' ', $line, 3); 353 if ($tag !== 'CONT') { 354 $level++; 355 } 356 do { 357 // Split after $pos chars 358 $pos = $max_line_length; 359 // Split on a non-space (standard gedcom behavior) 360 while (mb_substr($line, $pos - 1, 1) === ' ') { 361 --$pos; 362 } 363 if ($pos === strpos($line, ' ', 3)) { 364 // No non-spaces in the data! Can’t split it :-( 365 break; 366 } 367 $lines[] = mb_substr($line, 0, $pos); 368 $line = $level . ' CONC ' . mb_substr($line, $pos); 369 } while (mb_strlen($line) > $max_line_length); 370 } 371 $lines[] = $line; 372 } 373 374 return implode("\n", $lines); 375 } 376 377 /** 378 * @param Tree $tree 379 * @param bool $sort_by_xref 380 * 381 * @return Builder 382 */ 383 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 384 { 385 $query = DB::table('families') 386 ->where('f_file', '=', $tree->id()) 387 ->select(['f_gedcom', 'f_id']); 388 389 390 if ($sort_by_xref) { 391 $query 392 ->orderBy(new Expression('LENGTH(f_id)')) 393 ->orderBy('f_id'); 394 } 395 396 return $query; 397 } 398 399 /** 400 * @param Tree $tree 401 * @param bool $sort_by_xref 402 * 403 * @return Builder 404 */ 405 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 406 { 407 $query = DB::table('individuals') 408 ->where('i_file', '=', $tree->id()) 409 ->select(['i_gedcom', 'i_id']); 410 411 if ($sort_by_xref) { 412 $query 413 ->orderBy(new Expression('LENGTH(i_id)')) 414 ->orderBy('i_id'); 415 } 416 417 return $query; 418 } 419 420 /** 421 * @param Tree $tree 422 * @param bool $sort_by_xref 423 * 424 * @return Builder 425 */ 426 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 427 { 428 $query = DB::table('sources') 429 ->where('s_file', '=', $tree->id()) 430 ->select(['s_gedcom', 's_id']); 431 432 if ($sort_by_xref) { 433 $query 434 ->orderBy(new Expression('LENGTH(s_id)')) 435 ->orderBy('s_id'); 436 } 437 438 return $query; 439 } 440 441 /** 442 * @param Tree $tree 443 * @param bool $sort_by_xref 444 * 445 * @return Builder 446 */ 447 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 448 { 449 $query = DB::table('media') 450 ->where('m_file', '=', $tree->id()) 451 ->select(['m_gedcom', 'm_id']); 452 453 if ($sort_by_xref) { 454 $query 455 ->orderBy(new Expression('LENGTH(m_id)')) 456 ->orderBy('m_id'); 457 } 458 459 return $query; 460 } 461 462 /** 463 * @param Tree $tree 464 * @param bool $sort_by_xref 465 * 466 * @return Builder 467 */ 468 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 469 { 470 $query = DB::table('other') 471 ->where('o_file', '=', $tree->id()) 472 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 473 ->select(['o_gedcom', 'o_id']); 474 475 if ($sort_by_xref) { 476 $query 477 ->orderBy('o_type') 478 ->orderBy(new Expression('LENGTH(o_id)')) 479 ->orderBy('o_id'); 480 } 481 482 return $query; 483 } 484} 485