1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Cache; 24use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 25use Fisharebest\Webtrees\Factory; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Header; 29use Fisharebest\Webtrees\Tree; 30use Fisharebest\Webtrees\Webtrees; 31use Illuminate\Database\Capsule\Manager as DB; 32use Illuminate\Database\Query\Builder; 33use Illuminate\Database\Query\Expression; 34use Illuminate\Support\Collection; 35 36use function app; 37use function assert; 38use function date; 39use function explode; 40use function fwrite; 41use function mb_convert_encoding; 42use function pathinfo; 43use function str_contains; 44use function str_starts_with; 45use function strpos; 46use function strtolower; 47use function strtoupper; 48use function utf8_decode; 49 50use const PATHINFO_EXTENSION; 51 52/** 53 * Export data in GEDCOM format 54 */ 55class GedcomExportService 56{ 57 /** 58 * Write GEDCOM data to a stream. 59 * 60 * @param Tree $tree - Export data from this tree 61 * @param resource $stream - Write to this stream 62 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 63 * @param string $encoding - Convert from UTF-8 to other encoding 64 * @param int $access_level - Apply privacy filtering 65 * @param string $media_path - Prepend path to media filenames 66 * @param Collection<string>|null $records - Just export these records 67 */ 68 public function export( 69 Tree $tree, 70 $stream, 71 bool $sort_by_xref = false, 72 string $encoding = 'UTF-8', 73 int $access_level = Auth::PRIV_HIDE, 74 string $media_path = '', 75 Collection $records = null 76 ): void { 77 if ($records instanceof Collection) { 78 // Export just these records - e.g. from clippings cart. 79 $data = [ 80 new Collection([$this->createHeader($tree, $encoding, false)]), 81 $records, 82 new Collection(['0 TRLR']), 83 ]; 84 } elseif ($access_level === Auth::PRIV_HIDE) { 85 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 86 $data = [ 87 new Collection([$this->createHeader($tree, $encoding, true)]), 88 $this->individualQuery($tree, $sort_by_xref)->cursor(), 89 $this->familyQuery($tree, $sort_by_xref)->cursor(), 90 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 91 $this->otherQuery($tree, $sort_by_xref)->cursor(), 92 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 93 new Collection(['0 TRLR']), 94 ]; 95 } else { 96 // Disable the pending changes before creating GEDCOM records. 97 $cache = app('cache.array'); 98 assert($cache instanceof Cache); 99 $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 100 return new Collection(); 101 }); 102 103 $data = [ 104 new Collection([$this->createHeader($tree, $encoding, true)]), 105 $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)), 106 $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)), 107 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)), 108 $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)), 109 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)), 110 new Collection(['0 TRLR']), 111 ]; 112 } 113 114 foreach ($data as $rows) { 115 foreach ($rows as $datum) { 116 if (is_string($datum)) { 117 $gedcom = $datum; 118 } elseif ($datum instanceof GedcomRecord) { 119 $gedcom = $datum->privatizeGedcom($access_level); 120 } else { 121 $gedcom = 122 $datum->i_gedcom ?? 123 $datum->f_gedcom ?? 124 $datum->s_gedcom ?? 125 $datum->m_gedcom ?? 126 $datum->o_gedcom; 127 } 128 129 if ($media_path !== '') { 130 $gedcom = $this->convertMediaPath($gedcom, $media_path); 131 } 132 133 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 134 $gedcom = $this->convertEncoding($encoding, $gedcom); 135 136 fwrite($stream, $gedcom); 137 } 138 } 139 } 140 141 /** 142 * Create a header record for a gedcom file. 143 * 144 * @param Tree $tree 145 * @param string $encoding 146 * @param bool $include_sub 147 * 148 * @return string 149 */ 150 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 151 { 152 // Force a ".ged" suffix 153 $filename = $tree->name(); 154 155 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 156 $filename .= '.ged'; 157 } 158 159 // Build a new header record 160 $gedcom = '0 HEAD'; 161 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 162 $gedcom .= "\n2 NAME " . Webtrees::NAME; 163 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 164 $gedcom .= "\n1 DEST DISKETTE"; 165 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 166 $gedcom .= "\n2 TIME " . date('H:i:s'); 167 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 168 $gedcom .= "\n1 CHAR " . $encoding; 169 $gedcom .= "\n1 FILE " . $filename; 170 171 // Preserve some values from the original header 172 $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree); 173 174 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 175 $gedcom .= "\n" . $fact->gedcom(); 176 } 177 178 if ($include_sub) { 179 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 180 $gedcom .= "\n" . $fact->gedcom(); 181 } 182 } 183 184 return $gedcom; 185 } 186 187 /** 188 * Prepend a media path, such as might have been removed during import. 189 * 190 * @param string $gedcom 191 * @param string $media_path 192 * 193 * @return string 194 */ 195 private function convertMediaPath(string $gedcom, string $media_path): string 196 { 197 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 198 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 199 $filename = $match[1]; 200 201 // Convert separators to match new path. 202 if (str_contains($media_path, '\\')) { 203 $filename = strtr($filename, ['/' => '\\']); 204 } 205 206 if (!str_starts_with($filename, $media_path)) { 207 return $media_path . $filename; 208 } 209 210 return $filename; 211 }, $gedcom); 212 } 213 214 return $gedcom; 215 } 216 217 /** 218 * @param string $encoding 219 * @param string $gedcom 220 * 221 * @return string 222 */ 223 private function convertEncoding(string $encoding, string $gedcom): string 224 { 225 switch ($encoding) { 226 case 'ANSI': 227 // Many desktop applications interpret ANSI as ISO-8859-1 228 return utf8_decode($gedcom); 229 230 case 'ANSEL': 231 // coming soon...? 232 case 'ASCII': 233 // Might be needed by really old software? 234 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 235 236 default: 237 return $gedcom; 238 } 239 } 240 241 /** 242 * Wrap long lines using concatenation records. 243 * 244 * @param string $gedcom 245 * @param int $max_line_length 246 * 247 * @return string 248 */ 249 public function wrapLongLines(string $gedcom, int $max_line_length): string 250 { 251 $lines = []; 252 253 foreach (explode("\n", $gedcom) as $line) { 254 // Split long lines 255 // The total length of a GEDCOM line, including level number, cross-reference number, 256 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 257 if (mb_strlen($line) > $max_line_length) { 258 [$level, $tag] = explode(' ', $line, 3); 259 if ($tag !== 'CONT') { 260 $level++; 261 } 262 do { 263 // Split after $pos chars 264 $pos = $max_line_length; 265 // Split on a non-space (standard gedcom behavior) 266 while (mb_substr($line, $pos - 1, 1) === ' ') { 267 --$pos; 268 } 269 if ($pos === strpos($line, ' ', 3)) { 270 // No non-spaces in the data! Can’t split it :-( 271 break; 272 } 273 $lines[] = mb_substr($line, 0, $pos); 274 $line = $level . ' CONC ' . mb_substr($line, $pos); 275 } while (mb_strlen($line) > $max_line_length); 276 } 277 $lines[] = $line; 278 } 279 280 return implode(Gedcom::EOL, $lines); 281 } 282 283 /** 284 * @param Tree $tree 285 * @param bool $sort_by_xref 286 * 287 * @return Builder 288 */ 289 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 290 { 291 $query = DB::table('families') 292 ->where('f_file', '=', $tree->id()) 293 ->select(['f_gedcom', 'f_id']); 294 295 296 if ($sort_by_xref) { 297 $query 298 ->orderBy(new Expression('LENGTH(f_id)')) 299 ->orderBy('f_id'); 300 } 301 302 return $query; 303 } 304 305 /** 306 * @param Tree $tree 307 * @param bool $sort_by_xref 308 * 309 * @return Builder 310 */ 311 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 312 { 313 $query = DB::table('individuals') 314 ->where('i_file', '=', $tree->id()) 315 ->select(['i_gedcom', 'i_id']); 316 317 if ($sort_by_xref) { 318 $query 319 ->orderBy(new Expression('LENGTH(i_id)')) 320 ->orderBy('i_id'); 321 } 322 323 return $query; 324 } 325 326 /** 327 * @param Tree $tree 328 * @param bool $sort_by_xref 329 * 330 * @return Builder 331 */ 332 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 333 { 334 $query = DB::table('sources') 335 ->where('s_file', '=', $tree->id()) 336 ->select(['s_gedcom', 's_id']); 337 338 if ($sort_by_xref) { 339 $query 340 ->orderBy(new Expression('LENGTH(s_id)')) 341 ->orderBy('s_id'); 342 } 343 344 return $query; 345 } 346 347 /** 348 * @param Tree $tree 349 * @param bool $sort_by_xref 350 * 351 * @return Builder 352 */ 353 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 354 { 355 $query = DB::table('media') 356 ->where('m_file', '=', $tree->id()) 357 ->select(['m_gedcom', 'm_id']); 358 359 if ($sort_by_xref) { 360 $query 361 ->orderBy(new Expression('LENGTH(m_id)')) 362 ->orderBy('m_id'); 363 } 364 365 return $query; 366 } 367 368 /** 369 * @param Tree $tree 370 * @param bool $sort_by_xref 371 * 372 * @return Builder 373 */ 374 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 375 { 376 $query = DB::table('other') 377 ->where('o_file', '=', $tree->id()) 378 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 379 ->select(['o_gedcom', 'o_id']); 380 381 if ($sort_by_xref) { 382 $query 383 ->orderBy('o_type') 384 ->orderBy(new Expression('LENGTH(o_id)')) 385 ->orderBy('o_id'); 386 } 387 388 return $query; 389 } 390} 391