1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 24use Fisharebest\Webtrees\Registry; 25use Fisharebest\Webtrees\Gedcom; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Tree; 29use Fisharebest\Webtrees\Webtrees; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\Query\Builder; 32use Illuminate\Database\Query\Expression; 33use Illuminate\Support\Collection; 34 35use function date; 36use function explode; 37use function fwrite; 38use function mb_convert_encoding; 39use function pathinfo; 40use function str_contains; 41use function str_starts_with; 42use function strpos; 43use function strtolower; 44use function strtoupper; 45use function utf8_decode; 46 47use const PATHINFO_EXTENSION; 48 49/** 50 * Export data in GEDCOM format 51 */ 52class GedcomExportService 53{ 54 /** 55 * Write GEDCOM data to a stream. 56 * 57 * @param Tree $tree - Export data from this tree 58 * @param resource $stream - Write to this stream 59 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 60 * @param string $encoding - Convert from UTF-8 to other encoding 61 * @param int $access_level - Apply privacy filtering 62 * @param string $media_path - Prepend path to media filenames 63 * @param Collection<string>|null $records - Just export these records 64 */ 65 public function export( 66 Tree $tree, 67 $stream, 68 bool $sort_by_xref = false, 69 string $encoding = 'UTF-8', 70 int $access_level = Auth::PRIV_HIDE, 71 string $media_path = '', 72 Collection $records = null 73 ): void { 74 if ($records instanceof Collection) { 75 // Export just these records - e.g. from clippings cart. 76 $data = [ 77 new Collection([$this->createHeader($tree, $encoding, false)]), 78 $records, 79 new Collection(['0 TRLR']), 80 ]; 81 } elseif ($access_level === Auth::PRIV_HIDE) { 82 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 83 $data = [ 84 new Collection([$this->createHeader($tree, $encoding, true)]), 85 $this->individualQuery($tree, $sort_by_xref)->cursor(), 86 $this->familyQuery($tree, $sort_by_xref)->cursor(), 87 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 88 $this->otherQuery($tree, $sort_by_xref)->cursor(), 89 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 90 new Collection(['0 TRLR']), 91 ]; 92 } else { 93 // Disable the pending changes before creating GEDCOM records. 94 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 95 return new Collection(); 96 }); 97 98 $data = [ 99 new Collection([$this->createHeader($tree, $encoding, true)]), 100 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 101 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 102 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 103 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 104 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 105 new Collection(['0 TRLR']), 106 ]; 107 } 108 109 foreach ($data as $rows) { 110 foreach ($rows as $datum) { 111 if (is_string($datum)) { 112 $gedcom = $datum; 113 } elseif ($datum instanceof GedcomRecord) { 114 $gedcom = $datum->privatizeGedcom($access_level); 115 } else { 116 $gedcom = 117 $datum->i_gedcom ?? 118 $datum->f_gedcom ?? 119 $datum->s_gedcom ?? 120 $datum->m_gedcom ?? 121 $datum->o_gedcom; 122 } 123 124 if ($media_path !== '') { 125 $gedcom = $this->convertMediaPath($gedcom, $media_path); 126 } 127 128 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 129 $gedcom = $this->convertEncoding($encoding, $gedcom); 130 131 fwrite($stream, $gedcom); 132 } 133 } 134 } 135 136 /** 137 * Create a header record for a gedcom file. 138 * 139 * @param Tree $tree 140 * @param string $encoding 141 * @param bool $include_sub 142 * 143 * @return string 144 */ 145 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 146 { 147 // Force a ".ged" suffix 148 $filename = $tree->name(); 149 150 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 151 $filename .= '.ged'; 152 } 153 154 // Build a new header record 155 $gedcom = '0 HEAD'; 156 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 157 $gedcom .= "\n2 NAME " . Webtrees::NAME; 158 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 159 $gedcom .= "\n1 DEST DISKETTE"; 160 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 161 $gedcom .= "\n2 TIME " . date('H:i:s'); 162 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 163 $gedcom .= "\n1 CHAR " . $encoding; 164 $gedcom .= "\n1 FILE " . $filename; 165 166 // Preserve some values from the original header 167 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 168 169 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 170 $gedcom .= "\n" . $fact->gedcom(); 171 } 172 173 if ($include_sub) { 174 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 175 $gedcom .= "\n" . $fact->gedcom(); 176 } 177 } 178 179 return $gedcom; 180 } 181 182 /** 183 * Prepend a media path, such as might have been removed during import. 184 * 185 * @param string $gedcom 186 * @param string $media_path 187 * 188 * @return string 189 */ 190 private function convertMediaPath(string $gedcom, string $media_path): string 191 { 192 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 193 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 194 $filename = $match[1]; 195 196 // Don’t modify external links 197 if (!str_contains($filename, '://')) { 198 // Convert separators to match new path. 199 if (str_contains($media_path, '\\')) { 200 $filename = strtr($filename, ['/' => '\\']); 201 } 202 203 if (!str_starts_with($filename, $media_path)) { 204 $filename = $media_path . $filename; 205 } 206 } 207 208 return "\n1 FILE " . $filename; 209 }, $gedcom); 210 } 211 212 return $gedcom; 213 } 214 215 /** 216 * @param string $encoding 217 * @param string $gedcom 218 * 219 * @return string 220 */ 221 private function convertEncoding(string $encoding, string $gedcom): string 222 { 223 switch ($encoding) { 224 case 'ANSI': 225 // Many desktop applications interpret ANSI as ISO-8859-1 226 return utf8_decode($gedcom); 227 228 case 'ANSEL': 229 // coming soon...? 230 case 'ASCII': 231 // Might be needed by really old software? 232 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 233 234 default: 235 return $gedcom; 236 } 237 } 238 239 /** 240 * Wrap long lines using concatenation records. 241 * 242 * @param string $gedcom 243 * @param int $max_line_length 244 * 245 * @return string 246 */ 247 public function wrapLongLines(string $gedcom, int $max_line_length): string 248 { 249 $lines = []; 250 251 foreach (explode("\n", $gedcom) as $line) { 252 // Split long lines 253 // The total length of a GEDCOM line, including level number, cross-reference number, 254 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 255 if (mb_strlen($line) > $max_line_length) { 256 [$level, $tag] = explode(' ', $line, 3); 257 if ($tag !== 'CONT') { 258 $level++; 259 } 260 do { 261 // Split after $pos chars 262 $pos = $max_line_length; 263 // Split on a non-space (standard gedcom behavior) 264 while (mb_substr($line, $pos - 1, 1) === ' ') { 265 --$pos; 266 } 267 if ($pos === strpos($line, ' ', 3)) { 268 // No non-spaces in the data! Can’t split it :-( 269 break; 270 } 271 $lines[] = mb_substr($line, 0, $pos); 272 $line = $level . ' CONC ' . mb_substr($line, $pos); 273 } while (mb_strlen($line) > $max_line_length); 274 } 275 $lines[] = $line; 276 } 277 278 return implode(Gedcom::EOL, $lines); 279 } 280 281 /** 282 * @param Tree $tree 283 * @param bool $sort_by_xref 284 * 285 * @return Builder 286 */ 287 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 288 { 289 $query = DB::table('families') 290 ->where('f_file', '=', $tree->id()) 291 ->select(['f_gedcom', 'f_id']); 292 293 294 if ($sort_by_xref) { 295 $query 296 ->orderBy(new Expression('LENGTH(f_id)')) 297 ->orderBy('f_id'); 298 } 299 300 return $query; 301 } 302 303 /** 304 * @param Tree $tree 305 * @param bool $sort_by_xref 306 * 307 * @return Builder 308 */ 309 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 310 { 311 $query = DB::table('individuals') 312 ->where('i_file', '=', $tree->id()) 313 ->select(['i_gedcom', 'i_id']); 314 315 if ($sort_by_xref) { 316 $query 317 ->orderBy(new Expression('LENGTH(i_id)')) 318 ->orderBy('i_id'); 319 } 320 321 return $query; 322 } 323 324 /** 325 * @param Tree $tree 326 * @param bool $sort_by_xref 327 * 328 * @return Builder 329 */ 330 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 331 { 332 $query = DB::table('sources') 333 ->where('s_file', '=', $tree->id()) 334 ->select(['s_gedcom', 's_id']); 335 336 if ($sort_by_xref) { 337 $query 338 ->orderBy(new Expression('LENGTH(s_id)')) 339 ->orderBy('s_id'); 340 } 341 342 return $query; 343 } 344 345 /** 346 * @param Tree $tree 347 * @param bool $sort_by_xref 348 * 349 * @return Builder 350 */ 351 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 352 { 353 $query = DB::table('media') 354 ->where('m_file', '=', $tree->id()) 355 ->select(['m_gedcom', 'm_id']); 356 357 if ($sort_by_xref) { 358 $query 359 ->orderBy(new Expression('LENGTH(m_id)')) 360 ->orderBy('m_id'); 361 } 362 363 return $query; 364 } 365 366 /** 367 * @param Tree $tree 368 * @param bool $sort_by_xref 369 * 370 * @return Builder 371 */ 372 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 373 { 374 $query = DB::table('other') 375 ->where('o_file', '=', $tree->id()) 376 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 377 ->select(['o_gedcom', 'o_id']); 378 379 if ($sort_by_xref) { 380 $query 381 ->orderBy('o_type') 382 ->orderBy(new Expression('LENGTH(o_id)')) 383 ->orderBy('o_id'); 384 } 385 386 return $query; 387 } 388} 389