1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Auth; 23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 24use Fisharebest\Webtrees\Registry; 25use Fisharebest\Webtrees\Gedcom; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Tree; 29use Fisharebest\Webtrees\Webtrees; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\Query\Builder; 32use Illuminate\Database\Query\Expression; 33use Illuminate\Support\Collection; 34 35use function date; 36use function explode; 37use function fwrite; 38use function mb_convert_encoding; 39use function pathinfo; 40use function str_contains; 41use function str_starts_with; 42use function strpos; 43use function strtolower; 44use function strtoupper; 45use function utf8_decode; 46 47use const PATHINFO_EXTENSION; 48 49/** 50 * Export data in GEDCOM format 51 */ 52class GedcomExportService 53{ 54 /** 55 * Write GEDCOM data to a stream. 56 * 57 * @param Tree $tree - Export data from this tree 58 * @param resource $stream - Write to this stream 59 * @param bool $sort_by_xref - Write GEDCOM records in XREF order 60 * @param string $encoding - Convert from UTF-8 to other encoding 61 * @param int $access_level - Apply privacy filtering 62 * @param string $media_path - Prepend path to media filenames 63 * @param Collection<string>|null $records - Just export these records 64 */ 65 public function export( 66 Tree $tree, 67 $stream, 68 bool $sort_by_xref = false, 69 string $encoding = 'UTF-8', 70 int $access_level = Auth::PRIV_HIDE, 71 string $media_path = '', 72 Collection $records = null 73 ): void { 74 if ($records instanceof Collection) { 75 // Export just these records - e.g. from clippings cart. 76 $data = [ 77 new Collection([$this->createHeader($tree, $encoding, false)]), 78 $records, 79 new Collection(['0 TRLR']), 80 ]; 81 } elseif ($access_level === Auth::PRIV_HIDE) { 82 // If we will be applying privacy filters, then we will need the GEDCOM record objects. 83 $data = [ 84 new Collection([$this->createHeader($tree, $encoding, true)]), 85 $this->individualQuery($tree, $sort_by_xref)->cursor(), 86 $this->familyQuery($tree, $sort_by_xref)->cursor(), 87 $this->sourceQuery($tree, $sort_by_xref)->cursor(), 88 $this->otherQuery($tree, $sort_by_xref)->cursor(), 89 $this->mediaQuery($tree, $sort_by_xref)->cursor(), 90 new Collection(['0 TRLR']), 91 ]; 92 } else { 93 // Disable the pending changes before creating GEDCOM records. 94 Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 95 return new Collection(); 96 }); 97 98 $data = [ 99 new Collection([$this->createHeader($tree, $encoding, true)]), 100 $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 101 $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 102 $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 103 $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 104 $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 105 new Collection(['0 TRLR']), 106 ]; 107 } 108 109 foreach ($data as $rows) { 110 foreach ($rows as $datum) { 111 if (is_string($datum)) { 112 $gedcom = $datum; 113 } elseif ($datum instanceof GedcomRecord) { 114 $gedcom = $datum->privatizeGedcom($access_level); 115 } else { 116 $gedcom = 117 $datum->i_gedcom ?? 118 $datum->f_gedcom ?? 119 $datum->s_gedcom ?? 120 $datum->m_gedcom ?? 121 $datum->o_gedcom; 122 } 123 124 if ($media_path !== '') { 125 $gedcom = $this->convertMediaPath($gedcom, $media_path); 126 } 127 128 $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 129 $gedcom = $this->convertEncoding($encoding, $gedcom); 130 131 fwrite($stream, $gedcom); 132 } 133 } 134 } 135 136 /** 137 * Create a header record for a gedcom file. 138 * 139 * @param Tree $tree 140 * @param string $encoding 141 * @param bool $include_sub 142 * 143 * @return string 144 */ 145 public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 146 { 147 // Force a ".ged" suffix 148 $filename = $tree->name(); 149 150 if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 151 $filename .= '.ged'; 152 } 153 154 // Build a new header record 155 $gedcom = '0 HEAD'; 156 $gedcom .= "\n1 SOUR " . Webtrees::NAME; 157 $gedcom .= "\n2 NAME " . Webtrees::NAME; 158 $gedcom .= "\n2 VERS " . Webtrees::VERSION; 159 $gedcom .= "\n1 DEST DISKETTE"; 160 $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 161 $gedcom .= "\n2 TIME " . date('H:i:s'); 162 $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 163 $gedcom .= "\n1 CHAR " . $encoding; 164 $gedcom .= "\n1 FILE " . $filename; 165 166 // Preserve some values from the original header 167 $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 168 169 foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 170 $gedcom .= "\n" . $fact->gedcom(); 171 } 172 173 if ($include_sub) { 174 foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 175 $gedcom .= "\n" . $fact->gedcom(); 176 } 177 } 178 179 return $gedcom; 180 } 181 182 /** 183 * Prepend a media path, such as might have been removed during import. 184 * 185 * @param string $gedcom 186 * @param string $media_path 187 * 188 * @return string 189 */ 190 private function convertMediaPath(string $gedcom, string $media_path): string 191 { 192 if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 193 return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 194 $filename = $match[1]; 195 196 // Convert separators to match new path. 197 if (str_contains($media_path, '\\')) { 198 $filename = strtr($filename, ['/' => '\\']); 199 } 200 201 if (!str_starts_with($filename, $media_path)) { 202 return $media_path . $filename; 203 } 204 205 return $filename; 206 }, $gedcom); 207 } 208 209 return $gedcom; 210 } 211 212 /** 213 * @param string $encoding 214 * @param string $gedcom 215 * 216 * @return string 217 */ 218 private function convertEncoding(string $encoding, string $gedcom): string 219 { 220 switch ($encoding) { 221 case 'ANSI': 222 // Many desktop applications interpret ANSI as ISO-8859-1 223 return utf8_decode($gedcom); 224 225 case 'ANSEL': 226 // coming soon...? 227 case 'ASCII': 228 // Might be needed by really old software? 229 return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 230 231 default: 232 return $gedcom; 233 } 234 } 235 236 /** 237 * Wrap long lines using concatenation records. 238 * 239 * @param string $gedcom 240 * @param int $max_line_length 241 * 242 * @return string 243 */ 244 public function wrapLongLines(string $gedcom, int $max_line_length): string 245 { 246 $lines = []; 247 248 foreach (explode("\n", $gedcom) as $line) { 249 // Split long lines 250 // The total length of a GEDCOM line, including level number, cross-reference number, 251 // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 252 if (mb_strlen($line) > $max_line_length) { 253 [$level, $tag] = explode(' ', $line, 3); 254 if ($tag !== 'CONT') { 255 $level++; 256 } 257 do { 258 // Split after $pos chars 259 $pos = $max_line_length; 260 // Split on a non-space (standard gedcom behavior) 261 while (mb_substr($line, $pos - 1, 1) === ' ') { 262 --$pos; 263 } 264 if ($pos === strpos($line, ' ', 3)) { 265 // No non-spaces in the data! Can’t split it :-( 266 break; 267 } 268 $lines[] = mb_substr($line, 0, $pos); 269 $line = $level . ' CONC ' . mb_substr($line, $pos); 270 } while (mb_strlen($line) > $max_line_length); 271 } 272 $lines[] = $line; 273 } 274 275 return implode(Gedcom::EOL, $lines); 276 } 277 278 /** 279 * @param Tree $tree 280 * @param bool $sort_by_xref 281 * 282 * @return Builder 283 */ 284 private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 285 { 286 $query = DB::table('families') 287 ->where('f_file', '=', $tree->id()) 288 ->select(['f_gedcom', 'f_id']); 289 290 291 if ($sort_by_xref) { 292 $query 293 ->orderBy(new Expression('LENGTH(f_id)')) 294 ->orderBy('f_id'); 295 } 296 297 return $query; 298 } 299 300 /** 301 * @param Tree $tree 302 * @param bool $sort_by_xref 303 * 304 * @return Builder 305 */ 306 private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 307 { 308 $query = DB::table('individuals') 309 ->where('i_file', '=', $tree->id()) 310 ->select(['i_gedcom', 'i_id']); 311 312 if ($sort_by_xref) { 313 $query 314 ->orderBy(new Expression('LENGTH(i_id)')) 315 ->orderBy('i_id'); 316 } 317 318 return $query; 319 } 320 321 /** 322 * @param Tree $tree 323 * @param bool $sort_by_xref 324 * 325 * @return Builder 326 */ 327 private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 328 { 329 $query = DB::table('sources') 330 ->where('s_file', '=', $tree->id()) 331 ->select(['s_gedcom', 's_id']); 332 333 if ($sort_by_xref) { 334 $query 335 ->orderBy(new Expression('LENGTH(s_id)')) 336 ->orderBy('s_id'); 337 } 338 339 return $query; 340 } 341 342 /** 343 * @param Tree $tree 344 * @param bool $sort_by_xref 345 * 346 * @return Builder 347 */ 348 private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 349 { 350 $query = DB::table('media') 351 ->where('m_file', '=', $tree->id()) 352 ->select(['m_gedcom', 'm_id']); 353 354 if ($sort_by_xref) { 355 $query 356 ->orderBy(new Expression('LENGTH(m_id)')) 357 ->orderBy('m_id'); 358 } 359 360 return $query; 361 } 362 363 /** 364 * @param Tree $tree 365 * @param bool $sort_by_xref 366 * 367 * @return Builder 368 */ 369 private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 370 { 371 $query = DB::table('other') 372 ->where('o_file', '=', $tree->id()) 373 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 374 ->select(['o_gedcom', 'o_id']); 375 376 if ($sort_by_xref) { 377 $query 378 ->orderBy('o_type') 379 ->orderBy(new Expression('LENGTH(o_id)')) 380 ->orderBy('o_id'); 381 } 382 383 return $query; 384 } 385} 386