1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Encodings\UTF16BE; 24use Fisharebest\Webtrees\Encodings\UTF16LE; 25use Fisharebest\Webtrees\Encodings\UTF8; 26use Fisharebest\Webtrees\Encodings\Windows1252; 27use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 28use Fisharebest\Webtrees\Gedcom; 29use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; 30use Fisharebest\Webtrees\GedcomRecord; 31use Fisharebest\Webtrees\Header; 32use Fisharebest\Webtrees\Registry; 33use Fisharebest\Webtrees\Tree; 34use Fisharebest\Webtrees\Webtrees; 35use Illuminate\Database\Capsule\Manager as DB; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Database\Query\Expression; 38use Illuminate\Support\Collection; 39use RuntimeException; 40 41use function date; 42use function explode; 43use function fopen; 44use function fwrite; 45use function pathinfo; 46use function rewind; 47use function str_contains; 48use function str_starts_with; 49use function stream_filter_append; 50use function strlen; 51use function strpos; 52use function strtolower; 53use function strtoupper; 54 55use const PATHINFO_EXTENSION; 56use const STREAM_FILTER_WRITE; 57 58/** 59 * Export data in GEDCOM format 60 */ 61class GedcomExportService 62{ 63 /** 64 * Write GEDCOM data to a stream. 65 * 66 * @param Tree $tree - Export data from this tree 67 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 68 * @param string $encoding - Convert from UTF-8 to other encoding 69 * @param int $access_level - Apply privacy filtering 70 * @param string $media_path - Prepend path to media filenames 71 * @param string $line_endings - CRLF or LF 72 * @param Collection<string>|null $records - Just export these records 73 * 74 * @return resource 75 */ 76 public function export( 77 Tree $tree, 78 bool $sort_by_xref = false, 79 string $encoding = UTF8::NAME, 80 int $access_level = Auth::PRIV_HIDE, 81 string $media_path = '', 82 string $line_endings = 'CRLF', 83 Collection $records = null 84 ) { 85 $stream = fopen('php://memory', 'wb+'); 86 87 if ($stream === false) { 88 throw new RuntimeException('Failed to create temporary stream'); 89 } 90 91 stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); 92 93 if ($records instanceof Collection) { 94 // Export just these records - e.g. from clippings cart. 95 $data = [ 96 new Collection([$this->createHeader($tree, $encoding, false)]), 97 $records, 98 new Collection(['0 TRLR']), 99 ]; 100 } elseif ($access_level === Auth::PRIV_HIDE) { 101 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 102 $data = [ 103 new Collection([$this->createHeader($tree, $encoding, true)]), 104 $this->individualQuery($tree, $sort_by_xref)->cursor(), 105 $this->familyQuery($tree, $sort_by_xref)->cursor(), 106 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 107 $this->otherQuery($tree, $sort_by_xref)->cursor(), 108 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 109 new Collection(['0 TRLR']), 110 ]; 111 } else { 112 // Disable the pending changes before creating GEDCOM records. 113 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 114 return new Collection(); 115 }); 116 117 $data = [ 118 new Collection([$this->createHeader($tree, $encoding, true)]), 119 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 120 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 121 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 122 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 123 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 124 new Collection(['0 TRLR']), 125 ]; 126 } 127 128 foreach ($data as $rows) { 129 foreach ($rows as $datum) { 130 if (is_string($datum)) { 131 $gedcom = $datum; 132 } elseif ($datum instanceof GedcomRecord) { 133 $gedcom = $datum->privatizeGedcom($access_level); 134 } else { 135 $gedcom = 136 $datum->i_gedcom ?? 137 $datum->f_gedcom ?? 138 $datum->s_gedcom ?? 139 $datum->m_gedcom ?? 140 $datum->o_gedcom; 141 } 142 143 if ($media_path !== '') { 144 $gedcom = $this->convertMediaPath($gedcom, $media_path); 145 } 146 147 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; 148 149 if ($line_endings === 'CRLF') { 150 $gedcom = strtr($gedcom, ["\n" => "\r\n"]); 151 } 152 153 $bytes_written = fwrite($stream, $gedcom); 154 155 if ($bytes_written !== strlen($gedcom)) { 156 throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); 157 } 158 } 159 } 160 161 if (rewind($stream) === false) { 162 throw new RuntimeException('Cannot rewind temporary stream'); 163 } 164 165 return $stream; 166 } 167 168 /** 169 * Create a header record for a gedcom file. 170 * 171 * @param Tree $tree 172 * @param string $encoding 173 * @param bool $include_sub 174 * 175 * @return string 176 */ 177 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 178 { 179 // Force a ".ged" suffix 180 $filename = $tree->name(); 181 182 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 183 $filename .= '.ged'; 184 } 185 186 $gedcom_encodings = [ 187 UTF16BE::NAME => 'UNICODE', 188 UTF16LE::NAME => 'UNICODE', 189 Windows1252::NAME => 'ANSI', 190 ]; 191 192 $encoding = $gedcom_encodings[$encoding] ?? $encoding; 193 194 // Build a new header record 195 $gedcom = '0 HEAD'; 196 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 197 $gedcom .= "\n2 NAME " . Webtrees::NAME; 198 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 199 $gedcom .= "\n1 DEST DISKETTE"; 200 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 201 $gedcom .= "\n2 TIME " . date('H:i:s'); 202 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 203 $gedcom .= "\n1 CHAR " . $encoding; 204 $gedcom .= "\n1 FILE " . $filename; 205 206 // Preserve some values from the original header 207 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 208 209 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 210 $gedcom .= "\n" . $fact->gedcom(); 211 } 212 213 if ($include_sub) { 214 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 215 $gedcom .= "\n" . $fact->gedcom(); 216 } 217 } 218 219 return $gedcom; 220 } 221 222 /** 223 * Prepend a media path, such as might have been removed during import. 224 * 225 * @param string $gedcom 226 * @param string $media_path 227 * 228 * @return string 229 */ 230 private function convertMediaPath(string $gedcom, string $media_path): string 231 { 232 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 233 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 234 $filename = $match[1]; 235 236 // Don’t modify external links 237 if (!str_contains($filename, '://')) { 238 // Convert separators to match new path. 239 if (str_contains($media_path, '\\')) { 240 $filename = strtr($filename, ['/' => '\\']); 241 } 242 243 if (!str_starts_with($filename, $media_path)) { 244 $filename = $media_path . $filename; 245 } 246 } 247 248 return "\n1 FILE " . $filename; 249 }, $gedcom); 250 } 251 252 return $gedcom; 253 } 254 255 /** 256 * Wrap long lines using concatenation records. 257 * 258 * @param string $gedcom 259 * @param int $max_line_length 260 * 261 * @return string 262 */ 263 public function wrapLongLines(string $gedcom, int $max_line_length): string 264 { 265 $lines = []; 266 267 foreach (explode("\n", $gedcom) as $line) { 268 // Split long lines 269 // The total length of a GEDCOM line, including level number, cross-reference number, 270 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 271 if (mb_strlen($line) > $max_line_length) { 272 [$level, $tag] = explode(' ', $line, 3); 273 if ($tag !== 'CONT') { 274 $level++; 275 } 276 do { 277 // Split after $pos chars 278 $pos = $max_line_length; 279 // Split on a non-space (standard gedcom behavior) 280 while (mb_substr($line, $pos - 1, 1) === ' ') { 281 --$pos; 282 } 283 if ($pos === strpos($line, ' ', 3)) { 284 // No non-spaces in the data! Can’t split it :-( 285 break; 286 } 287 $lines[] = mb_substr($line, 0, $pos); 288 $line = $level . ' CONC ' . mb_substr($line, $pos); 289 } while (mb_strlen($line) > $max_line_length); 290 } 291 $lines[] = $line; 292 } 293 294 return implode("\n", $lines); 295 } 296 297 /** 298 * @param Tree $tree 299 * @param bool $sort_by_xref 300 * 301 * @return Builder 302 */ 303 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 304 { 305 $query = DB::table('families') 306 ->where('f_file', '=', $tree->id()) 307 ->select(['f_gedcom', 'f_id']); 308 309 310 if ($sort_by_xref) { 311 $query 312 ->orderBy(new Expression('LENGTH(f_id)')) 313 ->orderBy('f_id'); 314 } 315 316 return $query; 317 } 318 319 /** 320 * @param Tree $tree 321 * @param bool $sort_by_xref 322 * 323 * @return Builder 324 */ 325 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 326 { 327 $query = DB::table('individuals') 328 ->where('i_file', '=', $tree->id()) 329 ->select(['i_gedcom', 'i_id']); 330 331 if ($sort_by_xref) { 332 $query 333 ->orderBy(new Expression('LENGTH(i_id)')) 334 ->orderBy('i_id'); 335 } 336 337 return $query; 338 } 339 340 /** 341 * @param Tree $tree 342 * @param bool $sort_by_xref 343 * 344 * @return Builder 345 */ 346 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 347 { 348 $query = DB::table('sources') 349 ->where('s_file', '=', $tree->id()) 350 ->select(['s_gedcom', 's_id']); 351 352 if ($sort_by_xref) { 353 $query 354 ->orderBy(new Expression('LENGTH(s_id)')) 355 ->orderBy('s_id'); 356 } 357 358 return $query; 359 } 360 361 /** 362 * @param Tree $tree 363 * @param bool $sort_by_xref 364 * 365 * @return Builder 366 */ 367 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 368 { 369 $query = DB::table('media') 370 ->where('m_file', '=', $tree->id()) 371 ->select(['m_gedcom', 'm_id']); 372 373 if ($sort_by_xref) { 374 $query 375 ->orderBy(new Expression('LENGTH(m_id)')) 376 ->orderBy('m_id'); 377 } 378 379 return $query; 380 } 381 382 /** 383 * @param Tree $tree 384 * @param bool $sort_by_xref 385 * 386 * @return Builder 387 */ 388 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 389 { 390 $query = DB::table('other') 391 ->where('o_file', '=', $tree->id()) 392 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 393 ->select(['o_gedcom', 'o_id']); 394 395 if ($sort_by_xref) { 396 $query 397 ->orderBy('o_type') 398 ->orderBy(new Expression('LENGTH(o_id)')) 399 ->orderBy('o_id'); 400 } 401 402 return $query; 403 } 404} 405