1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 24use Fisharebest\Webtrees\Registry; 25use Fisharebest\Webtrees\Gedcom; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Tree; 29use Fisharebest\Webtrees\Webtrees; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\Query\Builder; 32use Illuminate\Database\Query\Expression; 33use Illuminate\Support\Collection; 34use RuntimeException; 35 36use function date; 37use function explode; 38use function fopen; 39use function fwrite; 40use function mb_convert_encoding; 41use function pathinfo; 42use function rewind; 43use function str_contains; 44use function str_starts_with; 45use function strlen; 46use function strpos; 47use function strtolower; 48use function strtoupper; 49use function utf8_decode; 50 51use const PATHINFO_EXTENSION; 52 53/** 54 * Export data in GEDCOM format 55 */ 56class GedcomExportService 57{ 58 /** 59 * Write GEDCOM data to a stream. 60 * 61 * @param Tree $tree - Export data from this tree 62 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 63 * @param string $encoding - Convert from UTF-8 to other encoding 64 * @param int $access_level - Apply privacy filtering 65 * @param string $media_path - Prepend path to media filenames 66 * @param Collection<string>|null $records - Just export these records 67 * 68 * @return resource 69 */ 70 public function export( 71 Tree $tree, 72 bool $sort_by_xref = false, 73 string $encoding = 'UTF-8', 74 int $access_level = Auth::PRIV_HIDE, 75 string $media_path = '', 76 Collection $records = null 77 ) { 78 $stream = fopen('php://memory', 'wb+'); 79 80 if ($stream === false) { 81 throw new RuntimeException('Failed to create temporary stream'); 82 } 83 84 if ($records instanceof Collection) { 85 // Export just these records - e.g. from clippings cart. 86 $data = [ 87 new Collection([$this->createHeader($tree, $encoding, false)]), 88 $records, 89 new Collection(['0 TRLR']), 90 ]; 91 } elseif ($access_level === Auth::PRIV_HIDE) { 92 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 93 $data = [ 94 new Collection([$this->createHeader($tree, $encoding, true)]), 95 $this->individualQuery($tree, $sort_by_xref)->cursor(), 96 $this->familyQuery($tree, $sort_by_xref)->cursor(), 97 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 98 $this->otherQuery($tree, $sort_by_xref)->cursor(), 99 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 100 new Collection(['0 TRLR']), 101 ]; 102 } else { 103 // Disable the pending changes before creating GEDCOM records. 104 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 105 return new Collection(); 106 }); 107 108 $data = [ 109 new Collection([$this->createHeader($tree, $encoding, true)]), 110 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 111 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 112 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 113 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 114 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 115 new Collection(['0 TRLR']), 116 ]; 117 } 118 119 foreach ($data as $rows) { 120 foreach ($rows as $datum) { 121 if (is_string($datum)) { 122 $gedcom = $datum; 123 } elseif ($datum instanceof GedcomRecord) { 124 $gedcom = $datum->privatizeGedcom($access_level); 125 } else { 126 $gedcom = 127 $datum->i_gedcom ?? 128 $datum->f_gedcom ?? 129 $datum->s_gedcom ?? 130 $datum->m_gedcom ?? 131 $datum->o_gedcom; 132 } 133 134 if ($media_path !== '') { 135 $gedcom = $this->convertMediaPath($gedcom, $media_path); 136 } 137 138 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 139 $gedcom = $this->convertEncoding($encoding, $gedcom); 140 141 $bytes_written = fwrite($stream, $gedcom); 142 143 if ($bytes_written !== strlen($gedcom)) { 144 throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); 145 } 146 } 147 } 148 149 if (rewind($stream) === false) { 150 throw new RuntimeException('Cannot rewind temporary stream'); 151 } 152 153 return $stream; 154 } 155 156 /** 157 * Create a header record for a gedcom file. 158 * 159 * @param Tree $tree 160 * @param string $encoding 161 * @param bool $include_sub 162 * 163 * @return string 164 */ 165 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 166 { 167 // Force a ".ged" suffix 168 $filename = $tree->name(); 169 170 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 171 $filename .= '.ged'; 172 } 173 174 // Build a new header record 175 $gedcom = '0 HEAD'; 176 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 177 $gedcom .= "\n2 NAME " . Webtrees::NAME; 178 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 179 $gedcom .= "\n1 DEST DISKETTE"; 180 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 181 $gedcom .= "\n2 TIME " . date('H:i:s'); 182 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 183 $gedcom .= "\n1 CHAR " . $encoding; 184 $gedcom .= "\n1 FILE " . $filename; 185 186 // Preserve some values from the original header 187 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 188 189 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 190 $gedcom .= "\n" . $fact->gedcom(); 191 } 192 193 if ($include_sub) { 194 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 195 $gedcom .= "\n" . $fact->gedcom(); 196 } 197 } 198 199 return $gedcom; 200 } 201 202 /** 203 * Prepend a media path, such as might have been removed during import. 204 * 205 * @param string $gedcom 206 * @param string $media_path 207 * 208 * @return string 209 */ 210 private function convertMediaPath(string $gedcom, string $media_path): string 211 { 212 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 213 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 214 $filename = $match[1]; 215 216 // Don’t modify external links 217 if (!str_contains($filename, '://')) { 218 // Convert separators to match new path. 219 if (str_contains($media_path, '\\')) { 220 $filename = strtr($filename, ['/' => '\\']); 221 } 222 223 if (!str_starts_with($filename, $media_path)) { 224 $filename = $media_path . $filename; 225 } 226 } 227 228 return "\n1 FILE " . $filename; 229 }, $gedcom); 230 } 231 232 return $gedcom; 233 } 234 235 /** 236 * @param string $encoding 237 * @param string $gedcom 238 * 239 * @return string 240 */ 241 private function convertEncoding(string $encoding, string $gedcom): string 242 { 243 switch ($encoding) { 244 case 'ANSI': 245 // Many desktop applications interpret ANSI as ISO-8859-1 246 return utf8_decode($gedcom); 247 248 case 'ANSEL': 249 // coming soon...? 250 case 'ASCII': 251 // Might be needed by really old software? 252 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 253 254 default: 255 return $gedcom; 256 } 257 } 258 259 /** 260 * Wrap long lines using concatenation records. 261 * 262 * @param string $gedcom 263 * @param int $max_line_length 264 * 265 * @return string 266 */ 267 public function wrapLongLines(string $gedcom, int $max_line_length): string 268 { 269 $lines = []; 270 271 foreach (explode("\n", $gedcom) as $line) { 272 // Split long lines 273 // The total length of a GEDCOM line, including level number, cross-reference number, 274 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 275 if (mb_strlen($line) > $max_line_length) { 276 [$level, $tag] = explode(' ', $line, 3); 277 if ($tag !== 'CONT') { 278 $level++; 279 } 280 do { 281 // Split after $pos chars 282 $pos = $max_line_length; 283 // Split on a non-space (standard gedcom behavior) 284 while (mb_substr($line, $pos - 1, 1) === ' ') { 285 --$pos; 286 } 287 if ($pos === strpos($line, ' ', 3)) { 288 // No non-spaces in the data! Can’t split it :-( 289 break; 290 } 291 $lines[] = mb_substr($line, 0, $pos); 292 $line = $level . ' CONC ' . mb_substr($line, $pos); 293 } while (mb_strlen($line) > $max_line_length); 294 } 295 $lines[] = $line; 296 } 297 298 return implode(Gedcom::EOL, $lines); 299 } 300 301 /** 302 * @param Tree $tree 303 * @param bool $sort_by_xref 304 * 305 * @return Builder 306 */ 307 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 308 { 309 $query = DB::table('families') 310 ->where('f_file', '=', $tree->id()) 311 ->select(['f_gedcom', 'f_id']); 312 313 314 if ($sort_by_xref) { 315 $query 316 ->orderBy(new Expression('LENGTH(f_id)')) 317 ->orderBy('f_id'); 318 } 319 320 return $query; 321 } 322 323 /** 324 * @param Tree $tree 325 * @param bool $sort_by_xref 326 * 327 * @return Builder 328 */ 329 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 330 { 331 $query = DB::table('individuals') 332 ->where('i_file', '=', $tree->id()) 333 ->select(['i_gedcom', 'i_id']); 334 335 if ($sort_by_xref) { 336 $query 337 ->orderBy(new Expression('LENGTH(i_id)')) 338 ->orderBy('i_id'); 339 } 340 341 return $query; 342 } 343 344 /** 345 * @param Tree $tree 346 * @param bool $sort_by_xref 347 * 348 * @return Builder 349 */ 350 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 351 { 352 $query = DB::table('sources') 353 ->where('s_file', '=', $tree->id()) 354 ->select(['s_gedcom', 's_id']); 355 356 if ($sort_by_xref) { 357 $query 358 ->orderBy(new Expression('LENGTH(s_id)')) 359 ->orderBy('s_id'); 360 } 361 362 return $query; 363 } 364 365 /** 366 * @param Tree $tree 367 * @param bool $sort_by_xref 368 * 369 * @return Builder 370 */ 371 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 372 { 373 $query = DB::table('media') 374 ->where('m_file', '=', $tree->id()) 375 ->select(['m_gedcom', 'm_id']); 376 377 if ($sort_by_xref) { 378 $query 379 ->orderBy(new Expression('LENGTH(m_id)')) 380 ->orderBy('m_id'); 381 } 382 383 return $query; 384 } 385 386 /** 387 * @param Tree $tree 388 * @param bool $sort_by_xref 389 * 390 * @return Builder 391 */ 392 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 393 { 394 $query = DB::table('other') 395 ->where('o_file', '=', $tree->id()) 396 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 397 ->select(['o_gedcom', 'o_id']); 398 399 if ($sort_by_xref) { 400 $query 401 ->orderBy('o_type') 402 ->orderBy(new Expression('LENGTH(o_id)')) 403 ->orderBy('o_id'); 404 } 405 406 return $query; 407 } 408} 409