1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\DB; 24use Fisharebest\Webtrees\Encodings\UTF16BE; 25use Fisharebest\Webtrees\Encodings\UTF16LE; 26use Fisharebest\Webtrees\Encodings\UTF8; 27use Fisharebest\Webtrees\Encodings\Windows1252; 28use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 29use Fisharebest\Webtrees\Gedcom; 30use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; 31use Fisharebest\Webtrees\GedcomRecord; 32use Fisharebest\Webtrees\Header; 33use Fisharebest\Webtrees\Registry; 34use Fisharebest\Webtrees\Tree; 35use Fisharebest\Webtrees\Webtrees; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Database\Query\Expression; 38use Illuminate\Support\Collection; 39use League\Flysystem\Filesystem; 40use League\Flysystem\FilesystemOperator; 41use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider; 42use League\Flysystem\ZipArchive\ZipArchiveAdapter; 43use Psr\Http\Message\ResponseFactoryInterface; 44use Psr\Http\Message\ResponseInterface; 45use Psr\Http\Message\StreamFactoryInterface; 46use RuntimeException; 47 48use function addcslashes; 49use function date; 50use function explode; 51use function fclose; 52use function fopen; 53use function fwrite; 54use function is_string; 55use function pathinfo; 56use function preg_match_all; 57use function rewind; 58use function stream_filter_append; 59use function stream_get_meta_data; 60use function strlen; 61use function strpos; 62use function strtolower; 63use function strtoupper; 64use function tmpfile; 65 66use const PATHINFO_EXTENSION; 67use const PREG_SET_ORDER; 68use const STREAM_FILTER_WRITE; 69 70/** 71 * Export data in GEDCOM format 72 */ 73class GedcomExportService 74{ 75 private const ACCESS_LEVELS = [ 76 'gedadmin' => Auth::PRIV_NONE, 77 'user' => Auth::PRIV_USER, 78 'visitor' => Auth::PRIV_PRIVATE, 79 'none' => Auth::PRIV_HIDE, 80 ]; 81 82 private ResponseFactoryInterface $response_factory; 83 84 private StreamFactoryInterface $stream_factory; 85 86 public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory) 87 { 88 $this->response_factory = $response_factory; 89 $this->stream_factory = $stream_factory; 90 } 91 92 /** 93 * @param Tree $tree Export data from this tree 94 * @param bool $sort_by_xref Write GEDCOM records in XREF order 95 * @param string $encoding Convert from UTF-8 to other encoding 96 * @param string $privacy Filter records by role 97 * @param string $line_endings CRLF or LF 98 * @param string $filename Name of download file, without an extension 99 * @param string $format One of: gedcom, zip, zipmedia, gedzip 100 * @param Collection<int,string|object|GedcomRecord>|null $records 101 */ 102 public function downloadResponse( 103 Tree $tree, 104 bool $sort_by_xref, 105 string $encoding, 106 string $privacy, 107 string $line_endings, 108 string $filename, 109 string $format, 110 Collection|null $records = null 111 ): ResponseInterface { 112 $access_level = self::ACCESS_LEVELS[$privacy]; 113 114 if ($format === 'gedcom') { 115 $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records); 116 $stream = $this->stream_factory->createStreamFromResource($resource); 117 118 return $this->response_factory->createResponse() 119 ->withBody($stream) 120 ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME) 121 ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"'); 122 } 123 124 // Create a new/empty .ZIP file 125 $temp_zip_file = stream_get_meta_data(tmpfile())['uri']; 126 $zip_provider = new FilesystemZipArchiveProvider($temp_zip_file, 0755); 127 $zip_adapter = new ZipArchiveAdapter($zip_provider); 128 $zip_filesystem = new Filesystem($zip_adapter); 129 130 if ($format === 'zipmedia') { 131 $media_path = $tree->getPreference('MEDIA_DIRECTORY'); 132 } elseif ($format === 'gedzip') { 133 $media_path = ''; 134 } else { 135 // Don't add media 136 $media_path = null; 137 } 138 139 $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path); 140 141 if ($format === 'gedzip') { 142 $zip_filesystem->writeStream('gedcom.ged', $resource); 143 $extension = '.gdz'; 144 } else { 145 $zip_filesystem->writeStream($filename . '.ged', $resource); 146 $extension = '.zip'; 147 } 148 149 fclose($resource); 150 151 $stream = $this->stream_factory->createStreamFromFile($temp_zip_file); 152 153 return $this->response_factory->createResponse() 154 ->withBody($stream) 155 ->withHeader('content-type', 'application/zip') 156 ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"'); 157 } 158 159 /** 160 * Write GEDCOM data to a stream. 161 * 162 * @param Tree $tree Export data from this tree 163 * @param bool $sort_by_xref Write GEDCOM records in XREF order 164 * @param string $encoding Convert from UTF-8 to other encoding 165 * @param int $access_level Apply privacy filtering 166 * @param string $line_endings CRLF or LF 167 * @param Collection<int,string|object|GedcomRecord>|null $records Just export these records 168 * @param FilesystemOperator|null $zip_filesystem Write media files to this filesystem 169 * @param string|null $media_path Location within the zip filesystem 170 * 171 * @return resource 172 */ 173 public function export( 174 Tree $tree, 175 bool $sort_by_xref = false, 176 string $encoding = UTF8::NAME, 177 int $access_level = Auth::PRIV_HIDE, 178 string $line_endings = 'CRLF', 179 Collection|null $records = null, 180 FilesystemOperator|null $zip_filesystem = null, 181 string|null $media_path = null 182 ) { 183 $stream = fopen('php://memory', 'wb+'); 184 185 if ($stream === false) { 186 throw new RuntimeException('Failed to create temporary stream'); 187 } 188 189 stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); 190 191 if ($records instanceof Collection) { 192 // Export just these records - e.g. from clippings cart. 193 $data = [ 194 new Collection([$this->createHeader($tree, $encoding, false)]), 195 $records, 196 new Collection(['0 TRLR']), 197 ]; 198 } elseif ($access_level === Auth::PRIV_HIDE) { 199 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 200 $data = [ 201 new Collection([$this->createHeader($tree, $encoding, true)]), 202 $this->individualQuery($tree, $sort_by_xref)->cursor(), 203 $this->familyQuery($tree, $sort_by_xref)->cursor(), 204 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 205 $this->otherQuery($tree, $sort_by_xref)->cursor(), 206 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 207 new Collection(['0 TRLR']), 208 ]; 209 } else { 210 // Disable the pending changes before creating GEDCOM records. 211 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection()); 212 213 $data = [ 214 new Collection([$this->createHeader($tree, $encoding, true)]), 215 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 216 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 217 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 218 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 219 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 220 new Collection(['0 TRLR']), 221 ]; 222 } 223 224 $media_filesystem = $tree->mediaFilesystem(); 225 226 foreach ($data as $rows) { 227 foreach ($rows as $datum) { 228 if (is_string($datum)) { 229 $gedcom = $datum; 230 } elseif ($datum instanceof GedcomRecord) { 231 $gedcom = $datum->privatizeGedcom($access_level); 232 233 if ($gedcom === '') { 234 continue; 235 } 236 } else { 237 $gedcom = 238 $datum->i_gedcom ?? 239 $datum->f_gedcom ?? 240 $datum->s_gedcom ?? 241 $datum->m_gedcom ?? 242 $datum->o_gedcom; 243 } 244 245 if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) { 246 preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER); 247 248 foreach ($matches as $match) { 249 $media_file = $match[1]; 250 251 if ($media_filesystem->fileExists($media_file)) { 252 $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file)); 253 } 254 } 255 } 256 257 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; 258 259 if ($line_endings === 'CRLF') { 260 $gedcom = strtr($gedcom, ["\n" => "\r\n"]); 261 } 262 263 $bytes_written = fwrite($stream, $gedcom); 264 265 if ($bytes_written !== strlen($gedcom)) { 266 throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); 267 } 268 } 269 } 270 271 if (rewind($stream) === false) { 272 throw new RuntimeException('Cannot rewind temporary stream'); 273 } 274 275 return $stream; 276 } 277 278 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 279 { 280 // Force a ".ged" suffix 281 $filename = $tree->name(); 282 283 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 284 $filename .= '.ged'; 285 } 286 287 $gedcom_encodings = [ 288 UTF16BE::NAME => 'UNICODE', 289 UTF16LE::NAME => 'UNICODE', 290 Windows1252::NAME => 'ANSI', 291 ]; 292 293 $encoding = $gedcom_encodings[$encoding] ?? $encoding; 294 295 // Build a new header record 296 $gedcom = '0 HEAD'; 297 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 298 $gedcom .= "\n2 NAME " . Webtrees::NAME; 299 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 300 $gedcom .= "\n1 DEST DISKETTE"; 301 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 302 $gedcom .= "\n2 TIME " . date('H:i:s'); 303 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 304 $gedcom .= "\n1 CHAR " . $encoding; 305 $gedcom .= "\n1 FILE " . $filename; 306 307 // Preserve some values from the original header 308 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 309 310 // There should always be a header record. 311 if ($header instanceof Header) { 312 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 313 $gedcom .= "\n" . $fact->gedcom(); 314 } 315 316 if ($include_sub) { 317 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 318 $gedcom .= "\n" . $fact->gedcom(); 319 } 320 } 321 } 322 323 return $gedcom; 324 } 325 326 public function wrapLongLines(string $gedcom, int $max_line_length): string 327 { 328 $lines = []; 329 330 foreach (explode("\n", $gedcom) as $line) { 331 // Split long lines 332 // The total length of a GEDCOM line, including level number, cross-reference number, 333 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 334 if (mb_strlen($line) > $max_line_length) { 335 [$level, $tag] = explode(' ', $line, 3); 336 if ($tag !== 'CONT') { 337 $level++; 338 } 339 do { 340 // Split after $pos chars 341 $pos = $max_line_length; 342 // Split on a non-space (standard gedcom behavior) 343 while (mb_substr($line, $pos - 1, 1) === ' ') { 344 --$pos; 345 } 346 if ($pos === strpos($line, ' ', 3)) { 347 // No non-spaces in the data! Can’t split it :-( 348 break; 349 } 350 $lines[] = mb_substr($line, 0, $pos); 351 $line = $level . ' CONC ' . mb_substr($line, $pos); 352 } while (mb_strlen($line) > $max_line_length); 353 } 354 $lines[] = $line; 355 } 356 357 return implode("\n", $lines); 358 } 359 360 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 361 { 362 $query = DB::table('families') 363 ->where('f_file', '=', $tree->id()) 364 ->select(['f_gedcom', 'f_id']); 365 366 if ($sort_by_xref) { 367 $query 368 ->orderBy(new Expression('LENGTH(f_id)')) 369 ->orderBy('f_id'); 370 } 371 372 return $query; 373 } 374 375 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 376 { 377 $query = DB::table('individuals') 378 ->where('i_file', '=', $tree->id()) 379 ->select(['i_gedcom', 'i_id']); 380 381 if ($sort_by_xref) { 382 $query 383 ->orderBy(new Expression('LENGTH(i_id)')) 384 ->orderBy('i_id'); 385 } 386 387 return $query; 388 } 389 390 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 391 { 392 $query = DB::table('sources') 393 ->where('s_file', '=', $tree->id()) 394 ->select(['s_gedcom', 's_id']); 395 396 if ($sort_by_xref) { 397 $query 398 ->orderBy(new Expression('LENGTH(s_id)')) 399 ->orderBy('s_id'); 400 } 401 402 return $query; 403 } 404 405 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 406 { 407 $query = DB::table('media') 408 ->where('m_file', '=', $tree->id()) 409 ->select(['m_gedcom', 'm_id']); 410 411 if ($sort_by_xref) { 412 $query 413 ->orderBy(new Expression('LENGTH(m_id)')) 414 ->orderBy('m_id'); 415 } 416 417 return $query; 418 } 419 420 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 421 { 422 $query = DB::table('other') 423 ->where('o_file', '=', $tree->id()) 424 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 425 ->select(['o_gedcom', 'o_id']); 426 427 if ($sort_by_xref) { 428 $query 429 ->orderBy('o_type') 430 ->orderBy(new Expression('LENGTH(o_id)')) 431 ->orderBy('o_id'); 432 } 433 434 return $query; 435 } 436} 437