1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Cache; 24use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 25use Fisharebest\Webtrees\Factory; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Header; 29use Fisharebest\Webtrees\Tree; 30use Fisharebest\Webtrees\Webtrees; 31use Illuminate\Database\Capsule\Manager as DB; 32use Illuminate\Database\Query\Builder; 33use Illuminate\Database\Query\Expression; 34use Illuminate\Support\Collection; 35 36use function app; 37use function assert; 38use function date; 39use function explode; 40use function fwrite; 41use function mb_convert_encoding; 42use function pathinfo; 43use function strpos; 44use function strtolower; 45use function strtoupper; 46use function utf8_decode; 47 48use const PATHINFO_EXTENSION; 49 50/** 51 * Export data in GEDCOM format 52 */ 53class GedcomExportService 54{ 55 /** 56 * Write GEDCOM data to a stream. 57 * 58 * @param Tree $tree - Export data from this tree 59 * @param resource $stream - Write to this stream 60 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 61 * @param string $encoding - Convert from UTF-8 to other encoding 62 * @param int $access_level - Apply privacy filtering 63 * @param string $media_path - Prepend path to media filenames 64 * @param Collection<string>|null $records - Just export these records 65 */ 66 public function export( 67 Tree $tree, 68 $stream, 69 bool $sort_by_xref = false, 70 string $encoding = 'UTF-8', 71 int $access_level = Auth::PRIV_HIDE, 72 string $media_path = '', 73 Collection $records = null 74 ): void { 75 if ($records instanceof Collection) { 76 // Export just these records - e.g. from clippings cart. 77 $data = [ 78 new Collection([$this->createHeader($tree, $encoding, false)]), 79 $records, 80 new Collection(['0 TRLR']), 81 ]; 82 } elseif ($access_level === Auth::PRIV_HIDE) { 83 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 84 $data = [ 85 new Collection([$this->createHeader($tree, $encoding, true)]), 86 $this->individualQuery($tree, $sort_by_xref)->cursor(), 87 $this->familyQuery($tree, $sort_by_xref)->cursor(), 88 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 89 $this->otherQuery($tree, $sort_by_xref)->cursor(), 90 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 91 new Collection(['0 TRLR']), 92 ]; 93 } else { 94 // Disable the pending changes before creating GEDCOM records. 95 $cache = app('cache.array'); 96 assert($cache instanceof Cache); 97 $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 98 return new Collection(); 99 }); 100 101 $data = [ 102 new Collection([$this->createHeader($tree, $encoding, true)]), 103 $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)), 104 $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)), 105 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)), 106 $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)), 107 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)), 108 new Collection(['0 TRLR']), 109 ]; 110 } 111 112 foreach ($data as $rows) { 113 foreach ($rows as $datum) { 114 if (is_string($datum)) { 115 $gedcom = $datum; 116 } elseif ($datum instanceof GedcomRecord) { 117 $gedcom = $datum->privatizeGedcom($access_level); 118 } else { 119 $gedcom = 120 $datum->i_gedcom ?? 121 $datum->f_gedcom ?? 122 $datum->s_gedcom ?? 123 $datum->m_gedcom ?? 124 $datum->o_gedcom; 125 } 126 127 if ($media_path !== '') { 128 $gedcom = $this->convertMediaPath($gedcom, $media_path); 129 } 130 131 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 132 $gedcom = $this->convertEncoding($encoding, $gedcom); 133 134 fwrite($stream, $gedcom); 135 } 136 } 137 } 138 139 /** 140 * Create a header record for a gedcom file. 141 * 142 * @param Tree $tree 143 * @param string $encoding 144 * @param bool $include_sub 145 * 146 * @return string 147 */ 148 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 149 { 150 // Force a ".ged" suffix 151 $filename = $tree->name(); 152 153 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 154 $filename .= '.ged'; 155 } 156 157 // Build a new header record 158 $gedcom = '0 HEAD'; 159 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 160 $gedcom .= "\n2 NAME " . Webtrees::NAME; 161 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 162 $gedcom .= "\n1 DEST DISKETTE"; 163 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 164 $gedcom .= "\n2 TIME " . date('H:i:s'); 165 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 166 $gedcom .= "\n1 CHAR " . $encoding; 167 $gedcom .= "\n1 FILE " . $filename; 168 169 // Preserve some values from the original header 170 $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree); 171 172 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 173 $gedcom .= "\n" . $fact->gedcom(); 174 } 175 176 if ($include_sub) { 177 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 178 $gedcom .= "\n" . $fact->gedcom(); 179 } 180 } 181 182 return $gedcom; 183 } 184 185 /** 186 * Prepend a media path, such as might have been removed during import. 187 * 188 * @param string $gedcom 189 * @param string $media_path 190 * 191 * @return string 192 */ 193 private function convertMediaPath(string $gedcom, string $media_path): string 194 { 195 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 196 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 197 $filename = $match[1]; 198 199 // Convert separators to match new path. 200 if (strpos($media_path, '\\') !== false) { 201 $filename = strtr($filename, ['/' => '\\']); 202 } 203 204 if (strpos($filename, $media_path) !== 0) { 205 return $media_path . $filename; 206 } 207 208 return $filename; 209 }, $gedcom); 210 } 211 212 return $gedcom; 213 } 214 215 /** 216 * @param string $encoding 217 * @param string $gedcom 218 * 219 * @return string 220 */ 221 private function convertEncoding(string $encoding, string $gedcom): string 222 { 223 switch ($encoding) { 224 case 'ANSI': 225 // Many desktop applications interpret ANSI as ISO-8859-1 226 return utf8_decode($gedcom); 227 228 case 'ANSEL': 229 // coming soon...? 230 case 'ASCII': 231 // Might be needed by really old software? 232 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 233 234 default: 235 return $gedcom; 236 } 237 } 238 239 /** 240 * Wrap long lines using concatenation records. 241 * 242 * @param string $gedcom 243 * @param int $max_line_length 244 * 245 * @return string 246 */ 247 public function wrapLongLines(string $gedcom, int $max_line_length): string 248 { 249 $lines = []; 250 251 foreach (explode("\n", $gedcom) as $line) { 252 // Split long lines 253 // The total length of a GEDCOM line, including level number, cross-reference number, 254 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 255 if (mb_strlen($line) > $max_line_length) { 256 [$level, $tag] = explode(' ', $line, 3); 257 if ($tag !== 'CONT') { 258 $level++; 259 } 260 do { 261 // Split after $pos chars 262 $pos = $max_line_length; 263 // Split on a non-space (standard gedcom behavior) 264 while (mb_substr($line, $pos - 1, 1) === ' ') { 265 --$pos; 266 } 267 if ($pos === strpos($line, ' ', 3)) { 268 // No non-spaces in the data! Can’t split it :-( 269 break; 270 } 271 $lines[] = mb_substr($line, 0, $pos); 272 $line = $level . ' CONC ' . mb_substr($line, $pos); 273 } while (mb_strlen($line) > $max_line_length); 274 } 275 $lines[] = $line; 276 } 277 278 return implode(Gedcom::EOL, $lines); 279 } 280 281 /** 282 * @param Tree $tree 283 * @param bool $sort_by_xref 284 * 285 * @return Builder 286 */ 287 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 288 { 289 $query = DB::table('families') 290 ->where('f_file', '=', $tree->id()) 291 ->select(['f_gedcom', 'f_id']); 292 293 294 if ($sort_by_xref) { 295 $query 296 ->orderBy(new Expression('LENGTH(f_id)')) 297 ->orderBy('f_id'); 298 } 299 300 return $query; 301 } 302 303 /** 304 * @param Tree $tree 305 * @param bool $sort_by_xref 306 * 307 * @return Builder 308 */ 309 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 310 { 311 $query = DB::table('individuals') 312 ->where('i_file', '=', $tree->id()) 313 ->select(['i_gedcom', 'i_id']); 314 315 if ($sort_by_xref) { 316 $query 317 ->orderBy(new Expression('LENGTH(i_id)')) 318 ->orderBy('i_id'); 319 } 320 321 return $query; 322 } 323 324 /** 325 * @param Tree $tree 326 * @param bool $sort_by_xref 327 * 328 * @return Builder 329 */ 330 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 331 { 332 $query = DB::table('sources') 333 ->where('s_file', '=', $tree->id()) 334 ->select(['s_gedcom', 's_id']); 335 336 if ($sort_by_xref) { 337 $query 338 ->orderBy(new Expression('LENGTH(s_id)')) 339 ->orderBy('s_id'); 340 } 341 342 return $query; 343 } 344 345 /** 346 * @param Tree $tree 347 * @param bool $sort_by_xref 348 * 349 * @return Builder 350 */ 351 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 352 { 353 $query = DB::table('media') 354 ->where('m_file', '=', $tree->id()) 355 ->select(['m_gedcom', 'm_id']); 356 357 if ($sort_by_xref) { 358 $query 359 ->orderBy(new Expression('LENGTH(m_id)')) 360 ->orderBy('m_id'); 361 } 362 363 return $query; 364 } 365 366 /** 367 * @param Tree $tree 368 * @param bool $sort_by_xref 369 * 370 * @return Builder 371 */ 372 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 373 { 374 $query = DB::table('other') 375 ->where('o_file', '=', $tree->id()) 376 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 377 ->select(['o_gedcom', 'o_id']); 378 379 if ($sort_by_xref) { 380 $query 381 ->orderBy('o_type') 382 ->orderBy(new Expression('LENGTH(o_id)')) 383 ->orderBy('o_id'); 384 } 385 386 return $query; 387 } 388} 389