1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 24use Fisharebest\Webtrees\Registry; 25use Fisharebest\Webtrees\Gedcom; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Tree; 29use Fisharebest\Webtrees\Webtrees; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\Query\Builder; 32use Illuminate\Database\Query\Expression; 33use Illuminate\Support\Collection; 34use RuntimeException; 35 36use function date; 37use function explode; 38use function fwrite; 39use function mb_convert_encoding; 40use function pathinfo; 41use function str_contains; 42use function str_starts_with; 43use function strlen; 44use function strpos; 45use function strtolower; 46use function strtoupper; 47use function utf8_decode; 48 49use const PATHINFO_EXTENSION; 50 51/** 52 * Export data in GEDCOM format 53 */ 54class GedcomExportService 55{ 56 /** 57 * Write GEDCOM data to a stream. 58 * 59 * @param Tree $tree - Export data from this tree 60 * @param resource $stream - Write to this stream 61 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 62 * @param string $encoding - Convert from UTF-8 to other encoding 63 * @param int $access_level - Apply privacy filtering 64 * @param string $media_path - Prepend path to media filenames 65 * @param Collection<string>|null $records - Just export these records 66 */ 67 public function export( 68 Tree $tree, 69 $stream, 70 bool $sort_by_xref = false, 71 string $encoding = 'UTF-8', 72 int $access_level = Auth::PRIV_HIDE, 73 string $media_path = '', 74 Collection $records = null 75 ): void { 76 if ($records instanceof Collection) { 77 // Export just these records - e.g. from clippings cart. 78 $data = [ 79 new Collection([$this->createHeader($tree, $encoding, false)]), 80 $records, 81 new Collection(['0 TRLR']), 82 ]; 83 } elseif ($access_level === Auth::PRIV_HIDE) { 84 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 85 $data = [ 86 new Collection([$this->createHeader($tree, $encoding, true)]), 87 $this->individualQuery($tree, $sort_by_xref)->cursor(), 88 $this->familyQuery($tree, $sort_by_xref)->cursor(), 89 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 90 $this->otherQuery($tree, $sort_by_xref)->cursor(), 91 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 92 new Collection(['0 TRLR']), 93 ]; 94 } else { 95 // Disable the pending changes before creating GEDCOM records. 96 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 97 return new Collection(); 98 }); 99 100 $data = [ 101 new Collection([$this->createHeader($tree, $encoding, true)]), 102 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 103 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 104 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 105 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 106 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 107 new Collection(['0 TRLR']), 108 ]; 109 } 110 111 foreach ($data as $rows) { 112 foreach ($rows as $datum) { 113 if (is_string($datum)) { 114 $gedcom = $datum; 115 } elseif ($datum instanceof GedcomRecord) { 116 $gedcom = $datum->privatizeGedcom($access_level); 117 } else { 118 $gedcom = 119 $datum->i_gedcom ?? 120 $datum->f_gedcom ?? 121 $datum->s_gedcom ?? 122 $datum->m_gedcom ?? 123 $datum->o_gedcom; 124 } 125 126 if ($media_path !== '') { 127 $gedcom = $this->convertMediaPath($gedcom, $media_path); 128 } 129 130 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 131 $gedcom = $this->convertEncoding($encoding, $gedcom); 132 133 $bytes_written = fwrite($stream, $gedcom); 134 135 if ($bytes_written !== strlen($gedcom)) { 136 throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); 137 } 138 } 139 } 140 } 141 142 /** 143 * Create a header record for a gedcom file. 144 * 145 * @param Tree $tree 146 * @param string $encoding 147 * @param bool $include_sub 148 * 149 * @return string 150 */ 151 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 152 { 153 // Force a ".ged" suffix 154 $filename = $tree->name(); 155 156 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 157 $filename .= '.ged'; 158 } 159 160 // Build a new header record 161 $gedcom = '0 HEAD'; 162 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 163 $gedcom .= "\n2 NAME " . Webtrees::NAME; 164 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 165 $gedcom .= "\n1 DEST DISKETTE"; 166 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 167 $gedcom .= "\n2 TIME " . date('H:i:s'); 168 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 169 $gedcom .= "\n1 CHAR " . $encoding; 170 $gedcom .= "\n1 FILE " . $filename; 171 172 // Preserve some values from the original header 173 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 174 175 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 176 $gedcom .= "\n" . $fact->gedcom(); 177 } 178 179 if ($include_sub) { 180 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 181 $gedcom .= "\n" . $fact->gedcom(); 182 } 183 } 184 185 return $gedcom; 186 } 187 188 /** 189 * Prepend a media path, such as might have been removed during import. 190 * 191 * @param string $gedcom 192 * @param string $media_path 193 * 194 * @return string 195 */ 196 private function convertMediaPath(string $gedcom, string $media_path): string 197 { 198 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 199 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 200 $filename = $match[1]; 201 202 // Don’t modify external links 203 if (!str_contains($filename, '://')) { 204 // Convert separators to match new path. 205 if (str_contains($media_path, '\\')) { 206 $filename = strtr($filename, ['/' => '\\']); 207 } 208 209 if (!str_starts_with($filename, $media_path)) { 210 $filename = $media_path . $filename; 211 } 212 } 213 214 return "\n1 FILE " . $filename; 215 }, $gedcom); 216 } 217 218 return $gedcom; 219 } 220 221 /** 222 * @param string $encoding 223 * @param string $gedcom 224 * 225 * @return string 226 */ 227 private function convertEncoding(string $encoding, string $gedcom): string 228 { 229 switch ($encoding) { 230 case 'ANSI': 231 // Many desktop applications interpret ANSI as ISO-8859-1 232 return utf8_decode($gedcom); 233 234 case 'ANSEL': 235 // coming soon...? 236 case 'ASCII': 237 // Might be needed by really old software? 238 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 239 240 default: 241 return $gedcom; 242 } 243 } 244 245 /** 246 * Wrap long lines using concatenation records. 247 * 248 * @param string $gedcom 249 * @param int $max_line_length 250 * 251 * @return string 252 */ 253 public function wrapLongLines(string $gedcom, int $max_line_length): string 254 { 255 $lines = []; 256 257 foreach (explode("\n", $gedcom) as $line) { 258 // Split long lines 259 // The total length of a GEDCOM line, including level number, cross-reference number, 260 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 261 if (mb_strlen($line) > $max_line_length) { 262 [$level, $tag] = explode(' ', $line, 3); 263 if ($tag !== 'CONT') { 264 $level++; 265 } 266 do { 267 // Split after $pos chars 268 $pos = $max_line_length; 269 // Split on a non-space (standard gedcom behavior) 270 while (mb_substr($line, $pos - 1, 1) === ' ') { 271 --$pos; 272 } 273 if ($pos === strpos($line, ' ', 3)) { 274 // No non-spaces in the data! Can’t split it :-( 275 break; 276 } 277 $lines[] = mb_substr($line, 0, $pos); 278 $line = $level . ' CONC ' . mb_substr($line, $pos); 279 } while (mb_strlen($line) > $max_line_length); 280 } 281 $lines[] = $line; 282 } 283 284 return implode(Gedcom::EOL, $lines); 285 } 286 287 /** 288 * @param Tree $tree 289 * @param bool $sort_by_xref 290 * 291 * @return Builder 292 */ 293 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 294 { 295 $query = DB::table('families') 296 ->where('f_file', '=', $tree->id()) 297 ->select(['f_gedcom', 'f_id']); 298 299 300 if ($sort_by_xref) { 301 $query 302 ->orderBy(new Expression('LENGTH(f_id)')) 303 ->orderBy('f_id'); 304 } 305 306 return $query; 307 } 308 309 /** 310 * @param Tree $tree 311 * @param bool $sort_by_xref 312 * 313 * @return Builder 314 */ 315 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 316 { 317 $query = DB::table('individuals') 318 ->where('i_file', '=', $tree->id()) 319 ->select(['i_gedcom', 'i_id']); 320 321 if ($sort_by_xref) { 322 $query 323 ->orderBy(new Expression('LENGTH(i_id)')) 324 ->orderBy('i_id'); 325 } 326 327 return $query; 328 } 329 330 /** 331 * @param Tree $tree 332 * @param bool $sort_by_xref 333 * 334 * @return Builder 335 */ 336 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 337 { 338 $query = DB::table('sources') 339 ->where('s_file', '=', $tree->id()) 340 ->select(['s_gedcom', 's_id']); 341 342 if ($sort_by_xref) { 343 $query 344 ->orderBy(new Expression('LENGTH(s_id)')) 345 ->orderBy('s_id'); 346 } 347 348 return $query; 349 } 350 351 /** 352 * @param Tree $tree 353 * @param bool $sort_by_xref 354 * 355 * @return Builder 356 */ 357 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 358 { 359 $query = DB::table('media') 360 ->where('m_file', '=', $tree->id()) 361 ->select(['m_gedcom', 'm_id']); 362 363 if ($sort_by_xref) { 364 $query 365 ->orderBy(new Expression('LENGTH(m_id)')) 366 ->orderBy('m_id'); 367 } 368 369 return $query; 370 } 371 372 /** 373 * @param Tree $tree 374 * @param bool $sort_by_xref 375 * 376 * @return Builder 377 */ 378 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 379 { 380 $query = DB::table('other') 381 ->where('o_file', '=', $tree->id()) 382 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 383 ->select(['o_gedcom', 'o_id']); 384 385 if ($sort_by_xref) { 386 $query 387 ->orderBy('o_type') 388 ->orderBy(new Expression('LENGTH(o_id)')) 389 ->orderBy('o_id'); 390 } 391 392 return $query; 393 } 394} 395