1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Cache; 24use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 25use Fisharebest\Webtrees\Factory; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Header; 29use Fisharebest\Webtrees\Tree; 30use Fisharebest\Webtrees\Webtrees; 31use Illuminate\Database\Capsule\Manager as DB; 32use Illuminate\Database\Query\Builder; 33use Illuminate\Database\Query\Expression; 34use Illuminate\Support\Collection; 35 36use function app; 37use function assert; 38use function date; 39use function explode; 40use function fwrite; 41use function mb_convert_encoding; 42use function pathinfo; 43use function strpos; 44use function strtolower; 45use function strtoupper; 46use function utf8_decode; 47 48use const PATHINFO_EXTENSION; 49 50/** 51 * Export data in GEDCOM format 52 */ 53class GedcomExportService 54{ 55 /** 56 * Write GEDCOM data to a stream. 57 * 58 * @param Tree $tree - Export data from this tree 59 * @param resource $stream - Write to this stream 60 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 61 * @param string $encoding - Convert from UTF-8 to other encoding 62 * @param int $access_level - Apply privacy filtering 63 * @param string $media_path - Prepend path to media filenames 64 * @param Collection<string>|null $records - Just export these records 65 */ 66 public function export( 67 Tree $tree, 68 $stream, 69 bool $sort_by_xref = false, 70 string $encoding = 'UTF-8', 71 int $access_level = Auth::PRIV_HIDE, 72 string $media_path = '', 73 Collection $records = null 74 ): void { 75 if ($records instanceof Collection) { 76 // Export just these records - e.g. from clippings cart. 77 $data = [ 78 new Collection([$this->createHeader($tree, $encoding, false)]), 79 $records, 80 new Collection(['0 TRLR']), 81 ]; 82 } elseif ($access_level === Auth::PRIV_HIDE) { 83 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 84 $data = [ 85 new Collection([$this->createHeader($tree, $encoding, true)]), 86 $this->individualQuery($tree, $sort_by_xref)->cursor(), 87 $this->familyQuery($tree, $sort_by_xref)->cursor(), 88 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 89 $this->otherQuery($tree, $sort_by_xref)->cursor(), 90 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 91 new Collection(['0 TRLR']), 92 ]; 93 } else { 94 // Disable the pending changes before creating GEDCOM records. 95 $cache = app('cache.array'); 96 assert($cache instanceof Cache); 97 $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 98 return new Collection(); 99 }); 100 101 $data = [ 102 new Collection([$this->createHeader($tree, $encoding, true)]), 103 $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)), 104 $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)), 105 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)), 106 $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)), 107 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)), 108 new Collection(['0 TRLR']), 109 ]; 110 } 111 112 foreach ($data as $rows) { 113 foreach ($rows as $datum) { 114 if (is_string($datum)) { 115 $gedcom = $datum; 116 } elseif ($datum instanceof GedcomRecord) { 117 $gedcom = $datum->privatizeGedcom($access_level); 118 } else { 119 $gedcom = $datum->gedcom; 120 } 121 122 if ($media_path !== '') { 123 $gedcom = $this->convertMediaPath($gedcom, $media_path); 124 } 125 126 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 127 $gedcom = $this->convertEncoding($encoding, $gedcom); 128 129 fwrite($stream, $gedcom); 130 } 131 } 132 } 133 134 /** 135 * Create a header record for a gedcom file. 136 * 137 * @param Tree $tree 138 * @param string $encoding 139 * @param bool $include_sub 140 * 141 * @return string 142 */ 143 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 144 { 145 // Force a ".ged" suffix 146 $filename = $tree->name(); 147 148 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 149 $filename .= '.ged'; 150 } 151 152 // Build a new header record 153 $gedcom = '0 HEAD'; 154 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 155 $gedcom .= "\n2 NAME " . Webtrees::NAME; 156 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 157 $gedcom .= "\n1 DEST DISKETTE"; 158 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 159 $gedcom .= "\n2 TIME " . date('H:i:s'); 160 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 161 $gedcom .= "\n1 CHAR " . $encoding; 162 $gedcom .= "\n1 FILE " . $filename; 163 164 // Preserve some values from the original header 165 $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree); 166 167 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 168 $gedcom .= "\n" . $fact->gedcom(); 169 } 170 171 if ($include_sub) { 172 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 173 $gedcom .= "\n" . $fact->gedcom(); 174 } 175 } 176 177 return $gedcom; 178 } 179 180 /** 181 * Prepend a media path, such as might have been removed during import. 182 * 183 * @param string $gedcom 184 * @param string $media_path 185 * 186 * @return string 187 */ 188 private function convertMediaPath(string $gedcom, string $media_path): string 189 { 190 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 191 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 192 $filename = $match[1]; 193 194 // Convert separators to match new path. 195 if (strpos($media_path, '\\') !== false) { 196 $filename = strtr($filename, ['/' => '\\']); 197 } 198 199 if (strpos($filename, $media_path) !== 0) { 200 return $media_path . $filename; 201 } 202 203 return $filename; 204 }, $gedcom); 205 } 206 207 return $gedcom; 208 } 209 210 /** 211 * @param string $encoding 212 * @param string $gedcom 213 * 214 * @return string 215 */ 216 private function convertEncoding(string $encoding, string $gedcom): string 217 { 218 switch ($encoding) { 219 case 'ANSI': 220 // Many desktop applications interpret ANSI as ISO-8859-1 221 return utf8_decode($gedcom); 222 223 case 'ANSEL': 224 // coming soon...? 225 case 'ASCII': 226 // Might be needed by really old software? 227 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 228 229 default: 230 return $gedcom; 231 } 232 } 233 234 /** 235 * Wrap long lines using concatenation records. 236 * 237 * @param string $gedcom 238 * @param int $max_line_length 239 * 240 * @return string 241 */ 242 public function wrapLongLines(string $gedcom, int $max_line_length): string 243 { 244 $lines = []; 245 246 foreach (explode("\n", $gedcom) as $line) { 247 // Split long lines 248 // The total length of a GEDCOM line, including level number, cross-reference number, 249 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 250 if (mb_strlen($line) > $max_line_length) { 251 [$level, $tag] = explode(' ', $line, 3); 252 if ($tag !== 'CONT') { 253 $level++; 254 } 255 do { 256 // Split after $pos chars 257 $pos = $max_line_length; 258 // Split on a non-space (standard gedcom behavior) 259 while (mb_substr($line, $pos - 1, 1) === ' ') { 260 --$pos; 261 } 262 if ($pos === strpos($line, ' ', 3)) { 263 // No non-spaces in the data! Can’t split it :-( 264 break; 265 } 266 $lines[] = mb_substr($line, 0, $pos); 267 $line = $level . ' CONC ' . mb_substr($line, $pos); 268 } while (mb_strlen($line) > $max_line_length); 269 } 270 $lines[] = $line; 271 } 272 273 return implode(Gedcom::EOL, $lines); 274 } 275 276 /** 277 * @param Tree $tree 278 * @param bool $sort_by_xref 279 * 280 * @return Builder 281 */ 282 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 283 { 284 $query = DB::table('families') 285 ->where('f_file', '=', $tree->id()) 286 ->select(['f_gedcom AS gedcom', 'f_id AS xref']); 287 288 289 if ($sort_by_xref) { 290 $query 291 ->orderBy(new Expression('LENGTH(f_id)')) 292 ->orderBy('f_id'); 293 } 294 295 return $query; 296 } 297 298 /** 299 * @param Tree $tree 300 * @param bool $sort_by_xref 301 * 302 * @return Builder 303 */ 304 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 305 { 306 $query = DB::table('individuals') 307 ->where('i_file', '=', $tree->id()) 308 ->select(['i_gedcom AS gedcom', 'i_id AS xref']); 309 310 if ($sort_by_xref) { 311 $query 312 ->orderBy(new Expression('LENGTH(i_id)')) 313 ->orderBy('i_id'); 314 } 315 316 return $query; 317 } 318 319 /** 320 * @param Tree $tree 321 * @param bool $sort_by_xref 322 * 323 * @return Builder 324 */ 325 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 326 { 327 $query = DB::table('sources') 328 ->where('s_file', '=', $tree->id()) 329 ->select(['s_gedcom AS gedcom', 's_id AS xref']); 330 331 if ($sort_by_xref) { 332 $query 333 ->orderBy(new Expression('LENGTH(s_id)')) 334 ->orderBy('s_id'); 335 } 336 337 return $query; 338 } 339 340 /** 341 * @param Tree $tree 342 * @param bool $sort_by_xref 343 * 344 * @return Builder 345 */ 346 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 347 { 348 $query = DB::table('media') 349 ->where('m_file', '=', $tree->id()) 350 ->select(['m_gedcom AS gedcom', 'm_id AS xref']); 351 352 if ($sort_by_xref) { 353 $query 354 ->orderBy(new Expression('LENGTH(m_id)')) 355 ->orderBy('m_id'); 356 } 357 358 return $query; 359 } 360 361 /** 362 * @param Tree $tree 363 * @param bool $sort_by_xref 364 * 365 * @return Builder 366 */ 367 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 368 { 369 $query = DB::table('other') 370 ->where('o_file', '=', $tree->id()) 371 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 372 ->select(['o_gedcom AS gedcom', 'o_id AS xref']); 373 374 if ($sort_by_xref) { 375 $query 376 ->orderBy('o_type') 377 ->orderBy(new Expression('LENGTH(o_id)')) 378 ->orderBy('o_id'); 379 } 380 381 return $query; 382 } 383} 384