1 /*
2 * Copyright (c) 2007-2011, Novell Inc.
3 *
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
6 */
7
8 /*
9 * repo_write.c
10 *
11 * Write Repo data out to a file in solv format
12 *
13 * See doc/README.format for a description
14 * of the binary file format
15 *
16 */
17
18 #include <sys/types.h>
19 #include <limits.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25 #include <errno.h>
26
27 #include "pool.h"
28 #include "util.h"
29 #include "repo_write.h"
30 #include "repopage.h"
31
32 /*------------------------------------------------------------------*/
33 /* Id map optimizations */
34
35 typedef struct needid {
36 Id need;
37 Id map;
38 } NeedId;
39
40
41 #define RELOFF(id) (needid[0].map + GETRELID(id))
42
43 /*
44 * increment need Id
45 * idarray: array of Ids, ID_NULL terminated
46 * needid: array of Id->NeedId
47 *
48 * return size of array (including trailing zero)
49 *
50 */
51
52 static void
incneedid(Pool * pool,Id id,NeedId * needid)53 incneedid(Pool *pool, Id id, NeedId *needid)
54 {
55 while (ISRELDEP(id))
56 {
57 Reldep *rd = GETRELDEP(pool, id);
58 needid[RELOFF(id)].need++;
59 if (ISRELDEP(rd->evr))
60 incneedid(pool, rd->evr, needid);
61 else
62 needid[rd->evr].need++;
63 id = rd->name;
64 }
65 needid[id].need++;
66 }
67
68 static int
incneedidarray(Pool * pool,Id * idarray,NeedId * needid)69 incneedidarray(Pool *pool, Id *idarray, NeedId *needid)
70 {
71 Id id;
72 int n = 0;
73
74 if (!idarray)
75 return 0;
76 while ((id = *idarray++) != 0)
77 {
78 n++;
79 while (ISRELDEP(id))
80 {
81 Reldep *rd = GETRELDEP(pool, id);
82 needid[RELOFF(id)].need++;
83 if (ISRELDEP(rd->evr))
84 incneedid(pool, rd->evr, needid);
85 else
86 needid[rd->evr].need++;
87 id = rd->name;
88 }
89 needid[id].need++;
90 }
91 return n + 1;
92 }
93
94
95 /*
96 *
97 */
98
99 static int
needid_cmp_need(const void * ap,const void * bp,void * dp)100 needid_cmp_need(const void *ap, const void *bp, void *dp)
101 {
102 const NeedId *a = ap;
103 const NeedId *b = bp;
104 int r;
105 r = b->need - a->need;
106 if (r)
107 return r;
108 return a->map - b->map;
109 }
110
111 static int
needid_cmp_need_s(const void * ap,const void * bp,void * dp)112 needid_cmp_need_s(const void *ap, const void *bp, void *dp)
113 {
114 const NeedId *a = ap;
115 const NeedId *b = bp;
116 Stringpool *spool = dp;
117 const char *as;
118 const char *bs;
119
120 int r;
121 r = b->need - a->need;
122 if (r)
123 return r;
124 as = spool->stringspace + spool->strings[a->map];
125 bs = spool->stringspace + spool->strings[b->map];
126 return strcmp(as, bs);
127 }
128
129
130 /*------------------------------------------------------------------*/
131 /* output helper routines, used for writing the header */
132 /* (the data itself is accumulated in memory and written with
133 * write_blob) */
134
135 /*
136 * unsigned 32-bit
137 */
138
139 static void
write_u32(Repodata * data,unsigned int x)140 write_u32(Repodata *data, unsigned int x)
141 {
142 FILE *fp = data->fp;
143 if (data->error)
144 return;
145 if (putc(x >> 24, fp) == EOF ||
146 putc(x >> 16, fp) == EOF ||
147 putc(x >> 8, fp) == EOF ||
148 putc(x, fp) == EOF)
149 {
150 data->error = pool_error(data->repo->pool, -1, "write error u32: %s", strerror(errno));
151 }
152 }
153
154
155 /*
156 * unsigned 8-bit
157 */
158
159 static void
write_u8(Repodata * data,unsigned int x)160 write_u8(Repodata *data, unsigned int x)
161 {
162 if (data->error)
163 return;
164 if (putc(x, data->fp) == EOF)
165 {
166 data->error = pool_error(data->repo->pool, -1, "write error u8: %s", strerror(errno));
167 }
168 }
169
170 /*
171 * data blob
172 */
173
174 static void
write_blob(Repodata * data,void * blob,int len)175 write_blob(Repodata *data, void *blob, int len)
176 {
177 if (data->error)
178 return;
179 if (len && fwrite(blob, len, 1, data->fp) != 1)
180 {
181 data->error = pool_error(data->repo->pool, -1, "write error blob: %s", strerror(errno));
182 }
183 }
184
185 /*
186 * Id
187 */
188
189 static void
write_id(Repodata * data,Id x)190 write_id(Repodata *data, Id x)
191 {
192 FILE *fp = data->fp;
193 if (data->error)
194 return;
195 if (x >= (1 << 14))
196 {
197 if (x >= (1 << 28))
198 putc((x >> 28) | 128, fp);
199 if (x >= (1 << 21))
200 putc((x >> 21) | 128, fp);
201 putc((x >> 14) | 128, fp);
202 }
203 if (x >= (1 << 7))
204 putc((x >> 7) | 128, fp);
205 if (putc(x & 127, fp) == EOF)
206 {
207 data->error = pool_error(data->repo->pool, -1, "write error id: %s", strerror(errno));
208 }
209 }
210
211 static inline void
write_id_eof(Repodata * data,Id x,int eof)212 write_id_eof(Repodata *data, Id x, int eof)
213 {
214 if (x >= 64)
215 x = (x & 63) | ((x & ~63) << 1);
216 write_id(data, x | (eof ? 0 : 64));
217 }
218
219
220
221 static inline void
write_str(Repodata * data,const char * str)222 write_str(Repodata *data, const char *str)
223 {
224 if (data->error)
225 return;
226 if (fputs(str, data->fp) == EOF || putc(0, data->fp) == EOF)
227 {
228 data->error = pool_error(data->repo->pool, -1, "write error str: %s", strerror(errno));
229 }
230 }
231
232 /*
233 * Array of Ids
234 */
235
236 static void
write_idarray(Repodata * data,Pool * pool,NeedId * needid,Id * ids)237 write_idarray(Repodata *data, Pool *pool, NeedId *needid, Id *ids)
238 {
239 Id id;
240 if (!ids)
241 return;
242 if (!*ids)
243 {
244 write_u8(data, 0);
245 return;
246 }
247 for (;;)
248 {
249 id = *ids++;
250 if (needid)
251 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
252 if (id >= 64)
253 id = (id & 63) | ((id & ~63) << 1);
254 if (!*ids)
255 {
256 write_id(data, id);
257 return;
258 }
259 write_id(data, id | 64);
260 }
261 }
262
263 static int
cmp_ids(const void * pa,const void * pb,void * dp)264 cmp_ids(const void *pa, const void *pb, void *dp)
265 {
266 Id a = *(Id *)pa;
267 Id b = *(Id *)pb;
268 return a - b;
269 }
270
271 #if 0
272 static void
273 write_idarray_sort(Repodata *data, Pool *pool, NeedId *needid, Id *ids, Id marker)
274 {
275 int len, i;
276 Id lids[64], *sids;
277
278 if (!ids)
279 return;
280 if (!*ids)
281 {
282 write_u8(data, 0);
283 return;
284 }
285 for (len = 0; len < 64 && ids[len]; len++)
286 {
287 Id id = ids[len];
288 if (needid)
289 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
290 lids[len] = id;
291 }
292 if (ids[len])
293 {
294 for (i = len + 1; ids[i]; i++)
295 ;
296 sids = solv_malloc2(i, sizeof(Id));
297 memcpy(sids, lids, 64 * sizeof(Id));
298 for (; ids[len]; len++)
299 {
300 Id id = ids[len];
301 if (needid)
302 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
303 sids[len] = id;
304 }
305 }
306 else
307 sids = lids;
308
309 /* That bloody solvable:prereqmarker needs to stay in position :-( */
310 if (needid)
311 marker = needid[marker].need;
312 for (i = 0; i < len; i++)
313 if (sids[i] == marker)
314 break;
315 if (i > 1)
316 solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
317 if ((len - i) > 2)
318 solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
319
320 Id id, old = 0;
321
322 /* The differencing above produces many runs of ones and twos. I tried
323 fairly elaborate schemes to RLE those, but they give only very mediocre
324 improvements in compression, as coding the escapes costs quite some
325 space. Even if they are coded only as bits in IDs. The best improvement
326 was about 2.7% for the whole .solv file. It's probably better to
327 invest some complexity into sharing idarrays, than RLEing. */
328 for (i = 0; i < len - 1; i++)
329 {
330 id = sids[i];
331 /* Ugly PREREQ handling. A "difference" of 0 is the prereq marker,
332 hence all real differences are offsetted by 1. Otherwise we would
333 have to handle negative differences, which would cost code space for
334 the encoding of the sign. We loose the exact mapping of prereq here,
335 but we know the result, so we can recover from that in the reader. */
336 if (id == marker)
337 id = old = 0;
338 else
339 {
340 id = id - old + 1;
341 old = sids[i];
342 }
343 /* XXX If difference is zero we have multiple equal elements,
344 we might want to skip writing them out. */
345 if (id >= 64)
346 id = (id & 63) | ((id & ~63) << 1);
347 write_id(data, id | 64);
348 }
349 id = sids[i];
350 if (id == marker)
351 id = 0;
352 else
353 id = id - old + 1;
354 if (id >= 64)
355 id = (id & 63) | ((id & ~63) << 1);
356 write_id(data, id);
357 if (sids != lids)
358 solv_free(sids);
359 }
360 #endif
361
362
363 struct extdata {
364 unsigned char *buf;
365 int len;
366 };
367
368 struct cbdata {
369 Repo *repo;
370 Repodata *target;
371
372 Stringpool *ownspool;
373 Dirpool *owndirpool;
374
375 Id *keymap;
376 int nkeymap;
377 Id *keymapstart;
378
379 NeedId *needid;
380
381 Id *schema; /* schema construction space */
382 Id *sp; /* pointer in above */
383 Id *oldschema, *oldsp;
384
385 Id *solvschemata;
386 Id *subschemata;
387 int nsubschemata;
388 int current_sub;
389
390 struct extdata *extdata;
391
392 Id *dirused;
393
394 Id vstart;
395
396 Id maxdata;
397 Id lastlen;
398
399 int doingsolvables; /* working on solvables data */
400 };
401
402 #define NEEDED_BLOCK 1023
403 #define SCHEMATA_BLOCK 31
404 #define SCHEMATADATA_BLOCK 255
405 #define EXTDATA_BLOCK 4095
406
407 static inline void
data_addid(struct extdata * xd,Id sx)408 data_addid(struct extdata *xd, Id sx)
409 {
410 unsigned int x = (unsigned int)sx;
411 unsigned char *dp;
412
413 xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
414 dp = xd->buf + xd->len;
415
416 if (x >= (1 << 14))
417 {
418 if (x >= (1 << 28))
419 *dp++ = (x >> 28) | 128;
420 if (x >= (1 << 21))
421 *dp++ = (x >> 21) | 128;
422 *dp++ = (x >> 14) | 128;
423 }
424 if (x >= (1 << 7))
425 *dp++ = (x >> 7) | 128;
426 *dp++ = x & 127;
427 xd->len = dp - xd->buf;
428 }
429
430 static inline void
data_addideof(struct extdata * xd,Id sx,int eof)431 data_addideof(struct extdata *xd, Id sx, int eof)
432 {
433 unsigned int x = (unsigned int)sx;
434 unsigned char *dp;
435
436 xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
437 dp = xd->buf + xd->len;
438
439 if (x >= (1 << 13))
440 {
441 if (x >= (1 << 27))
442 *dp++ = (x >> 27) | 128;
443 if (x >= (1 << 20))
444 *dp++ = (x >> 20) | 128;
445 *dp++ = (x >> 13) | 128;
446 }
447 if (x >= (1 << 6))
448 *dp++ = (x >> 6) | 128;
449 *dp++ = eof ? (x & 63) : (x & 63) | 64;
450 xd->len = dp - xd->buf;
451 }
452
453 static void
data_addid64(struct extdata * xd,unsigned int x,unsigned int hx)454 data_addid64(struct extdata *xd, unsigned int x, unsigned int hx)
455 {
456 if (hx)
457 {
458 if (hx > 7)
459 {
460 data_addid(xd, (Id)(hx >> 3));
461 xd->buf[xd->len - 1] |= 128;
462 hx &= 7;
463 }
464 data_addid(xd, (Id)(x | 0x80000000));
465 xd->buf[xd->len - 5] = (x >> 28) | (hx << 4) | 128;
466 }
467 else
468 data_addid(xd, (Id)x);
469 }
470
471 static void
data_addidarray_sort(struct extdata * xd,Pool * pool,NeedId * needid,Id * ids,Id marker)472 data_addidarray_sort(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
473 {
474 int len, i;
475 Id lids[64], *sids;
476 Id id, old;
477
478 if (!ids)
479 return;
480 if (!*ids)
481 {
482 data_addid(xd, 0);
483 return;
484 }
485 for (len = 0; len < 64 && ids[len]; len++)
486 {
487 Id id = ids[len];
488 if (needid)
489 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
490 lids[len] = id;
491 }
492 if (ids[len])
493 {
494 for (i = len + 1; ids[i]; i++)
495 ;
496 sids = solv_malloc2(i, sizeof(Id));
497 memcpy(sids, lids, 64 * sizeof(Id));
498 for (; ids[len]; len++)
499 {
500 Id id = ids[len];
501 if (needid)
502 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
503 sids[len] = id;
504 }
505 }
506 else
507 sids = lids;
508
509 /* That bloody solvable:prereqmarker needs to stay in position :-( */
510 if (needid)
511 marker = needid[marker].need;
512 for (i = 0; i < len; i++)
513 if (sids[i] == marker)
514 break;
515 if (i > 1)
516 solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
517 if ((len - i) > 2)
518 solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
519
520 old = 0;
521
522 /* The differencing above produces many runs of ones and twos. I tried
523 fairly elaborate schemes to RLE those, but they give only very mediocre
524 improvements in compression, as coding the escapes costs quite some
525 space. Even if they are coded only as bits in IDs. The best improvement
526 was about 2.7% for the whole .solv file. It's probably better to
527 invest some complexity into sharing idarrays, than RLEing. */
528 for (i = 0; i < len - 1; i++)
529 {
530 id = sids[i];
531 /* Ugly PREREQ handling. A "difference" of 0 is the prereq marker,
532 hence all real differences are offsetted by 1. Otherwise we would
533 have to handle negative differences, which would cost code space for
534 the encoding of the sign. We loose the exact mapping of prereq here,
535 but we know the result, so we can recover from that in the reader. */
536 if (id == marker)
537 id = old = 0;
538 else
539 {
540 id = id - old + 1;
541 old = sids[i];
542 }
543 /* XXX If difference is zero we have multiple equal elements,
544 we might want to skip writing them out. */
545 data_addideof(xd, id, 0);
546 }
547 id = sids[i];
548 if (id == marker)
549 id = 0;
550 else
551 id = id - old + 1;
552 data_addideof(xd, id, 1);
553 if (sids != lids)
554 solv_free(sids);
555 }
556
557 static inline void
data_addblob(struct extdata * xd,unsigned char * blob,int len)558 data_addblob(struct extdata *xd, unsigned char *blob, int len)
559 {
560 xd->buf = solv_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
561 memcpy(xd->buf + xd->len, blob, len);
562 xd->len += len;
563 }
564
565 static inline void
data_addu32(struct extdata * xd,unsigned int num)566 data_addu32(struct extdata *xd, unsigned int num)
567 {
568 unsigned char d[4];
569 d[0] = num >> 24;
570 d[1] = num >> 16;
571 d[2] = num >> 8;
572 d[3] = num;
573 data_addblob(xd, d, 4);
574 }
575
576 static Id
putinownpool(struct cbdata * cbdata,Stringpool * ss,Id id)577 putinownpool(struct cbdata *cbdata, Stringpool *ss, Id id)
578 {
579 const char *str = stringpool_id2str(ss, id);
580 id = stringpool_str2id(cbdata->ownspool, str, 1);
581 if (id >= cbdata->needid[0].map)
582 {
583 int oldoff = cbdata->needid[0].map;
584 int newoff = (id + 1 + NEEDED_BLOCK) & ~NEEDED_BLOCK;
585 int nrels = cbdata->repo->pool->nrels;
586 cbdata->needid = solv_realloc2(cbdata->needid, newoff + nrels, sizeof(NeedId));
587 if (nrels)
588 memmove(cbdata->needid + newoff, cbdata->needid + oldoff, nrels * sizeof(NeedId));
589 memset(cbdata->needid + oldoff, 0, (newoff - oldoff) * sizeof(NeedId));
590 cbdata->needid[0].map = newoff;
591 }
592 return id;
593 }
594
595 static Id
putinowndirpool(struct cbdata * cbdata,Repodata * data,Dirpool * dp,Id dir)596 putinowndirpool(struct cbdata *cbdata, Repodata *data, Dirpool *dp, Id dir)
597 {
598 Id compid, parent;
599
600 parent = dirpool_parent(dp, dir);
601 if (parent)
602 parent = putinowndirpool(cbdata, data, dp, parent);
603 compid = dp->dirs[dir];
604 if (cbdata->ownspool && compid > 1)
605 compid = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, compid);
606 return dirpool_add_dir(cbdata->owndirpool, parent, compid, 1);
607 }
608
609 /*
610 * collect usage information about the dirs
611 * 1: dir used, no child of dir used
612 * 2: dir used as parent of another used dir
613 */
614 static inline void
setdirused(struct cbdata * cbdata,Dirpool * dp,Id dir)615 setdirused(struct cbdata *cbdata, Dirpool *dp, Id dir)
616 {
617 if (cbdata->dirused[dir])
618 return;
619 cbdata->dirused[dir] = 1;
620 while ((dir = dirpool_parent(dp, dir)) != 0)
621 {
622 if (cbdata->dirused[dir] == 2)
623 return;
624 if (cbdata->dirused[dir])
625 {
626 cbdata->dirused[dir] = 2;
627 return;
628 }
629 cbdata->dirused[dir] = 2;
630 }
631 cbdata->dirused[0] = 2;
632 }
633
634 /*
635 * pass 1 callback:
636 * collect key/id/dirid usage information, create needed schemas
637 */
638 static int
repo_write_collect_needed(struct cbdata * cbdata,Repo * repo,Repodata * data,Repokey * key,KeyValue * kv)639 repo_write_collect_needed(struct cbdata *cbdata, Repo *repo, Repodata *data, Repokey *key, KeyValue *kv)
640 {
641 Id id;
642 int rm;
643
644 if (key->name == REPOSITORY_SOLVABLES)
645 return SEARCH_NEXT_KEY; /* we do not want this one */
646
647 /* hack: ignore some keys, see BUGS */
648 if (data->repodataid != data->repo->nrepodata - 1)
649 if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
650 return SEARCH_NEXT_KEY;
651
652 rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
653 if (!rm)
654 return SEARCH_NEXT_KEY; /* we do not want this one */
655
656 /* record key in schema */
657 if ((key->type != REPOKEY_TYPE_FIXARRAY || kv->eof == 0)
658 && (cbdata->sp == cbdata->schema || cbdata->sp[-1] != rm))
659 *cbdata->sp++ = rm;
660
661 switch(key->type)
662 {
663 case REPOKEY_TYPE_ID:
664 case REPOKEY_TYPE_IDARRAY:
665 id = kv->id;
666 if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
667 id = putinownpool(cbdata, data->localpool ? &data->spool : &repo->pool->ss, id);
668 incneedid(repo->pool, id, cbdata->needid);
669 break;
670 case REPOKEY_TYPE_DIR:
671 case REPOKEY_TYPE_DIRNUMNUMARRAY:
672 case REPOKEY_TYPE_DIRSTRARRAY:
673 id = kv->id;
674 if (cbdata->owndirpool)
675 putinowndirpool(cbdata, data, &data->dirpool, id);
676 else
677 setdirused(cbdata, &data->dirpool, id);
678 break;
679 case REPOKEY_TYPE_FIXARRAY:
680 if (kv->eof == 0)
681 {
682 if (cbdata->oldschema)
683 {
684 cbdata->target->error = pool_error(cbdata->repo->pool, -1, "nested fixarray structs not yet implemented");
685 return SEARCH_NEXT_KEY;
686 }
687 cbdata->oldschema = cbdata->schema;
688 cbdata->oldsp = cbdata->sp;
689 cbdata->schema = solv_calloc(cbdata->target->nkeys, sizeof(Id));
690 cbdata->sp = cbdata->schema;
691 }
692 else if (kv->eof == 1)
693 {
694 cbdata->current_sub++;
695 *cbdata->sp = 0;
696 cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
697 cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, cbdata->schema, 1);
698 #if 0
699 fprintf(stderr, "Have schema %d\n", cbdata->subschemata[cbdata->nsubschemata-1]);
700 #endif
701 cbdata->sp = cbdata->schema;
702 }
703 else
704 {
705 solv_free(cbdata->schema);
706 cbdata->schema = cbdata->oldschema;
707 cbdata->sp = cbdata->oldsp;
708 cbdata->oldsp = cbdata->oldschema = 0;
709 }
710 break;
711 case REPOKEY_TYPE_FLEXARRAY:
712 if (kv->entry == 0)
713 {
714 if (kv->eof != 2)
715 *cbdata->sp++ = 0; /* mark start */
716 }
717 else
718 {
719 /* just finished a schema, rewind */
720 Id *sp = cbdata->sp - 1;
721 *sp = 0;
722 while (sp[-1])
723 sp--;
724 cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
725 cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, sp, 1);
726 cbdata->sp = kv->eof == 2 ? sp - 1: sp;
727 }
728 break;
729 default:
730 break;
731 }
732 return 0;
733 }
734
735 static int
repo_write_cb_needed(void * vcbdata,Solvable * s,Repodata * data,Repokey * key,KeyValue * kv)736 repo_write_cb_needed(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
737 {
738 struct cbdata *cbdata = vcbdata;
739 Repo *repo = data->repo;
740
741 #if 0
742 if (s)
743 fprintf(stderr, "solvable %d (%s): key (%d)%s %d\n", s ? s - repo->pool->solvables : 0, s ? pool_id2str(repo->pool, s->name) : "", key->name, pool_id2str(repo->pool, key->name), key->type);
744 #endif
745 return repo_write_collect_needed(cbdata, repo, data, key, kv);
746 }
747
748
749 /*
750 * pass 2 callback:
751 * encode all of the data into the correct buffers
752 */
753
754 static int
repo_write_adddata(struct cbdata * cbdata,Repodata * data,Repokey * key,KeyValue * kv)755 repo_write_adddata(struct cbdata *cbdata, Repodata *data, Repokey *key, KeyValue *kv)
756 {
757 int rm;
758 Id id;
759 unsigned int u32;
760 unsigned char v[4];
761 struct extdata *xd;
762 NeedId *needid;
763
764 if (key->name == REPOSITORY_SOLVABLES)
765 return SEARCH_NEXT_KEY;
766
767 /* hack: ignore some keys, see BUGS */
768 if (data->repodataid != data->repo->nrepodata - 1)
769 if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
770 return SEARCH_NEXT_KEY;
771
772 rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
773 if (!rm)
774 return SEARCH_NEXT_KEY; /* we do not want this one */
775
776 if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET)
777 {
778 xd = cbdata->extdata + rm; /* vertical buffer */
779 if (cbdata->vstart == -1)
780 cbdata->vstart = xd->len;
781 }
782 else
783 xd = cbdata->extdata + 0; /* incore buffer */
784 switch(key->type)
785 {
786 case REPOKEY_TYPE_VOID:
787 case REPOKEY_TYPE_CONSTANT:
788 case REPOKEY_TYPE_CONSTANTID:
789 break;
790 case REPOKEY_TYPE_ID:
791 id = kv->id;
792 if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
793 id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
794 needid = cbdata->needid;
795 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
796 data_addid(xd, id);
797 break;
798 case REPOKEY_TYPE_IDARRAY:
799 id = kv->id;
800 if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
801 id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
802 needid = cbdata->needid;
803 id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
804 data_addideof(xd, id, kv->eof);
805 break;
806 case REPOKEY_TYPE_STR:
807 data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
808 break;
809 case REPOKEY_TYPE_MD5:
810 data_addblob(xd, (unsigned char *)kv->str, SIZEOF_MD5);
811 break;
812 case REPOKEY_TYPE_SHA1:
813 data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA1);
814 break;
815 case REPOKEY_TYPE_SHA256:
816 data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA256);
817 break;
818 case REPOKEY_TYPE_U32:
819 u32 = kv->num;
820 v[0] = u32 >> 24;
821 v[1] = u32 >> 16;
822 v[2] = u32 >> 8;
823 v[3] = u32;
824 data_addblob(xd, v, 4);
825 break;
826 case REPOKEY_TYPE_NUM:
827 data_addid64(xd, kv->num, kv->num2);
828 break;
829 case REPOKEY_TYPE_DIR:
830 id = kv->id;
831 if (cbdata->owndirpool)
832 id = putinowndirpool(cbdata, data, &data->dirpool, id);
833 id = cbdata->dirused[id];
834 data_addid(xd, id);
835 break;
836 case REPOKEY_TYPE_BINARY:
837 data_addid(xd, kv->num);
838 if (kv->num)
839 data_addblob(xd, (unsigned char *)kv->str, kv->num);
840 break;
841 case REPOKEY_TYPE_DIRNUMNUMARRAY:
842 id = kv->id;
843 if (cbdata->owndirpool)
844 id = putinowndirpool(cbdata, data, &data->dirpool, id);
845 id = cbdata->dirused[id];
846 data_addid(xd, id);
847 data_addid(xd, kv->num);
848 data_addideof(xd, kv->num2, kv->eof);
849 break;
850 case REPOKEY_TYPE_DIRSTRARRAY:
851 id = kv->id;
852 if (cbdata->owndirpool)
853 id = putinowndirpool(cbdata, data, &data->dirpool, id);
854 id = cbdata->dirused[id];
855 data_addideof(xd, id, kv->eof);
856 data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
857 break;
858 case REPOKEY_TYPE_FIXARRAY:
859 if (kv->eof == 0)
860 {
861 if (kv->num)
862 {
863 data_addid(xd, kv->num);
864 data_addid(xd, cbdata->subschemata[cbdata->current_sub]);
865 #if 0
866 fprintf(stderr, "writing %d %d\n", kv->num, cbdata->subschemata[cbdata->current_sub]);
867 #endif
868 }
869 }
870 else if (kv->eof == 1)
871 {
872 cbdata->current_sub++;
873 }
874 break;
875 case REPOKEY_TYPE_FLEXARRAY:
876 if (!kv->entry)
877 data_addid(xd, kv->num);
878 if (kv->eof != 2)
879 data_addid(xd, cbdata->subschemata[cbdata->current_sub++]);
880 if (xd == cbdata->extdata + 0 && !kv->parent && !cbdata->doingsolvables)
881 {
882 if (xd->len - cbdata->lastlen > cbdata->maxdata)
883 cbdata->maxdata = xd->len - cbdata->lastlen;
884 cbdata->lastlen = xd->len;
885 }
886 break;
887 default:
888 cbdata->target->error = pool_error(cbdata->repo->pool, -1, "unknown type for %d: %d\n", key->name, key->type);
889 break;
890 }
891 if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET && kv->eof)
892 {
893 /* we can re-use old data in the blob here! */
894 data_addid(cbdata->extdata + 0, cbdata->vstart); /* add offset into incore data */
895 data_addid(cbdata->extdata + 0, xd->len - cbdata->vstart); /* add length into incore data */
896 cbdata->vstart = -1;
897 }
898 return 0;
899 }
900
901 static int
repo_write_cb_adddata(void * vcbdata,Solvable * s,Repodata * data,Repokey * key,KeyValue * kv)902 repo_write_cb_adddata(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
903 {
904 struct cbdata *cbdata = vcbdata;
905 return repo_write_adddata(cbdata, data, key, kv);
906 }
907
908 /* traverse through directory with first child "dir" */
909 static int
traverse_dirs(Dirpool * dp,Id * dirmap,Id n,Id dir,Id * used)910 traverse_dirs(Dirpool *dp, Id *dirmap, Id n, Id dir, Id *used)
911 {
912 Id sib, child;
913 Id parent, lastn;
914
915 parent = n;
916 /* special case for '/', which has to come first */
917 if (parent == 1)
918 dirmap[n++] = 1;
919 for (sib = dir; sib; sib = dirpool_sibling(dp, sib))
920 {
921 if (used && !used[sib])
922 continue;
923 if (sib == 1 && parent == 1)
924 continue; /* already did that one above */
925 dirmap[n++] = sib;
926 }
927
928 /* now go through all the siblings we just added and
929 * do recursive calls on them */
930 lastn = n;
931 for (; parent < lastn; parent++)
932 {
933 sib = dirmap[parent];
934 if (used && used[sib] != 2) /* 2: used as parent */
935 continue;
936 child = dirpool_child(dp, sib);
937 if (child)
938 {
939 dirmap[n++] = -parent; /* start new block */
940 n = traverse_dirs(dp, dirmap, n, child, used);
941 }
942 }
943 return n;
944 }
945
946 static void
write_compressed_page(Repodata * data,unsigned char * page,int len)947 write_compressed_page(Repodata *data, unsigned char *page, int len)
948 {
949 int clen;
950 unsigned char cpage[REPOPAGE_BLOBSIZE];
951
952 clen = repopagestore_compress_page(page, len, cpage, len - 1);
953 if (!clen)
954 {
955 write_u32(data, len * 2);
956 write_blob(data, page, len);
957 }
958 else
959 {
960 write_u32(data, clen * 2 + 1);
961 write_blob(data, cpage, clen);
962 }
963 }
964
965 static Id verticals[] = {
966 SOLVABLE_AUTHORS,
967 SOLVABLE_DESCRIPTION,
968 SOLVABLE_MESSAGEDEL,
969 SOLVABLE_MESSAGEINS,
970 SOLVABLE_EULA,
971 SOLVABLE_DISKUSAGE,
972 SOLVABLE_FILELIST,
973 SOLVABLE_CHECKSUM,
974 DELTA_CHECKSUM,
975 DELTA_SEQ_NUM,
976 SOLVABLE_PKGID,
977 SOLVABLE_HDRID,
978 SOLVABLE_LEADSIGID,
979 SOLVABLE_CHANGELOG_AUTHOR,
980 SOLVABLE_CHANGELOG_TEXT,
981 0
982 };
983
984 static char *languagetags[] = {
985 "solvable:summary:",
986 "solvable:description:",
987 "solvable:messageins:",
988 "solvable:messagedel:",
989 "solvable:eula:",
990 0
991 };
992
993 int
repo_write_stdkeyfilter(Repo * repo,Repokey * key,void * kfdata)994 repo_write_stdkeyfilter(Repo *repo, Repokey *key, void *kfdata)
995 {
996 const char *keyname;
997 int i;
998
999 for (i = 0; verticals[i]; i++)
1000 if (key->name == verticals[i])
1001 return KEY_STORAGE_VERTICAL_OFFSET;
1002 keyname = pool_id2str(repo->pool, key->name);
1003 for (i = 0; languagetags[i] != 0; i++)
1004 if (!strncmp(keyname, languagetags[i], strlen(languagetags[i])))
1005 return KEY_STORAGE_VERTICAL_OFFSET;
1006 return KEY_STORAGE_INCORE;
1007 }
1008
1009 /*
1010 * Repo
1011 */
1012
1013 /*
1014 * the code works the following way:
1015 *
1016 * 1) find which keys should be written
1017 * 2) collect usage information for keys/ids/dirids, create schema
1018 * data
1019 * 3) use usage information to create mapping tables, so that often
1020 * used ids get a lower number
1021 * 4) encode data into buffers using the mapping tables
1022 * 5) write everything to disk
1023 */
1024 int
repo_write_filtered(Repo * repo,FILE * fp,int (* keyfilter)(Repo * repo,Repokey * key,void * kfdata),void * kfdata,Queue * keyq)1025 repo_write_filtered(Repo *repo, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1026 {
1027 Pool *pool = repo->pool;
1028 int i, j, n;
1029 Solvable *s;
1030 NeedId *needid;
1031 int nstrings, nrels;
1032 unsigned int sizeid;
1033 unsigned int solv_flags;
1034 Reldep *ran;
1035 Id *idarraydata;
1036
1037 Id id, *sp;
1038
1039 Id *dirmap;
1040 int ndirmap;
1041 Id *keyused;
1042 unsigned char *repodataused;
1043 int anyrepodataused = 0;
1044 int anysolvableused = 0;
1045
1046 struct cbdata cbdata;
1047 int clonepool;
1048 Repokey *key;
1049 int poolusage, dirpoolusage, idused, dirused;
1050 int reloff;
1051
1052 Repodata *data, *dirpooldata;
1053
1054 Repodata target;
1055
1056 Stringpool *spool;
1057 Dirpool *dirpool;
1058
1059 Id mainschema;
1060
1061 struct extdata *xd;
1062
1063 Id type_constantid = REPOKEY_TYPE_CONSTANTID;
1064
1065
1066 memset(&cbdata, 0, sizeof(cbdata));
1067 cbdata.repo = repo;
1068 cbdata.target = ⌖
1069
1070 repodata_initdata(&target, repo, 1);
1071
1072 /* go through all repodata and find the keys we need */
1073 /* also unify keys */
1074 /* keymapstart - maps repo number to keymap offset */
1075 /* keymap - maps repo key to my key, 0 -> not used */
1076
1077 /* start with all KEY_STORAGE_SOLVABLE ids */
1078
1079 n = ID_NUM_INTERNAL;
1080 FOR_REPODATAS(repo, i, data)
1081 n += data->nkeys;
1082 cbdata.keymap = solv_calloc(n, sizeof(Id));
1083 cbdata.keymapstart = solv_calloc(repo->nrepodata, sizeof(Id));
1084 repodataused = solv_calloc(repo->nrepodata, 1);
1085
1086 clonepool = 0;
1087 poolusage = 0;
1088
1089 /* add keys for STORAGE_SOLVABLE */
1090 for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
1091 {
1092 Repokey keyd;
1093 keyd.name = i;
1094 if (i < SOLVABLE_PROVIDES)
1095 keyd.type = REPOKEY_TYPE_ID;
1096 else if (i < RPM_RPMDBID)
1097 keyd.type = REPOKEY_TYPE_REL_IDARRAY;
1098 else
1099 keyd.type = REPOKEY_TYPE_NUM;
1100 keyd.size = 0;
1101 keyd.storage = KEY_STORAGE_SOLVABLE;
1102 if (keyfilter)
1103 {
1104 keyd.storage = keyfilter(repo, &keyd, kfdata);
1105 if (keyd.storage == KEY_STORAGE_DROPPED)
1106 continue;
1107 keyd.storage = KEY_STORAGE_SOLVABLE;
1108 }
1109 poolusage = 1;
1110 clonepool = 1;
1111 cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1112 }
1113
1114 if (repo->nsolvables)
1115 {
1116 Repokey keyd;
1117 keyd.name = REPOSITORY_SOLVABLES;
1118 keyd.type = REPOKEY_TYPE_FLEXARRAY;
1119 keyd.size = 0;
1120 keyd.storage = KEY_STORAGE_INCORE;
1121 cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1122 }
1123
1124 dirpoolusage = 0;
1125
1126 spool = 0;
1127 dirpool = 0;
1128 dirpooldata = 0;
1129 n = ID_NUM_INTERNAL;
1130 FOR_REPODATAS(repo, i, data)
1131 {
1132 cbdata.keymapstart[i] = n;
1133 cbdata.keymap[n++] = 0; /* key 0 */
1134 idused = 0;
1135 dirused = 0;
1136 if (keyfilter)
1137 {
1138 Repokey keyd;
1139 /* check if we want this repodata */
1140 memset(&keyd, 0, sizeof(keyd));
1141 keyd.name = 1;
1142 keyd.type = 1;
1143 keyd.size = i;
1144 if (keyfilter(repo, &keyd, kfdata) == -1)
1145 continue;
1146 }
1147 for (j = 1; j < data->nkeys; j++, n++)
1148 {
1149 key = data->keys + j;
1150 if (key->name == REPOSITORY_SOLVABLES && key->type == REPOKEY_TYPE_FLEXARRAY)
1151 {
1152 cbdata.keymap[n] = cbdata.keymap[key->name];
1153 continue;
1154 }
1155 if (key->type == REPOKEY_TYPE_DELETED)
1156 {
1157 cbdata.keymap[n] = 0;
1158 continue;
1159 }
1160 if (key->type == REPOKEY_TYPE_CONSTANTID && data->localpool)
1161 {
1162 Repokey keyd = *key;
1163 keyd.size = repodata_globalize_id(data, key->size, 1);
1164 id = repodata_key2id(&target, &keyd, 0);
1165 }
1166 else
1167 id = repodata_key2id(&target, key, 0);
1168 if (!id)
1169 {
1170 Repokey keyd = *key;
1171 keyd.storage = KEY_STORAGE_INCORE;
1172 if (keyd.type == REPOKEY_TYPE_CONSTANTID)
1173 keyd.size = repodata_globalize_id(data, key->size, 1);
1174 else if (keyd.type != REPOKEY_TYPE_CONSTANT)
1175 keyd.size = 0;
1176 if (keyfilter)
1177 {
1178 keyd.storage = keyfilter(repo, &keyd, kfdata);
1179 if (keyd.storage == KEY_STORAGE_DROPPED)
1180 {
1181 cbdata.keymap[n] = 0;
1182 continue;
1183 }
1184 }
1185 id = repodata_key2id(&target, &keyd, 1);
1186 }
1187 cbdata.keymap[n] = id;
1188 /* load repodata if not already loaded */
1189 if (data->state == REPODATA_STUB)
1190 {
1191 if (data->loadcallback)
1192 data->loadcallback(data);
1193 else
1194 data->state = REPODATA_ERROR;
1195 if (data->state != REPODATA_ERROR)
1196 {
1197 /* redo this repodata! */
1198 j = 0;
1199 n = cbdata.keymapstart[i];
1200 continue;
1201 }
1202 }
1203 if (data->state == REPODATA_ERROR)
1204 {
1205 /* too bad! */
1206 cbdata.keymap[n] = 0;
1207 continue;
1208 }
1209
1210 repodataused[i] = 1;
1211 anyrepodataused = 1;
1212 if (key->type == REPOKEY_TYPE_CONSTANTID || key->type == REPOKEY_TYPE_ID ||
1213 key->type == REPOKEY_TYPE_IDARRAY || key->type == REPOKEY_TYPE_REL_IDARRAY)
1214 idused = 1;
1215 else if (key->type == REPOKEY_TYPE_DIR || key->type == REPOKEY_TYPE_DIRNUMNUMARRAY || key->type == REPOKEY_TYPE_DIRSTRARRAY)
1216 {
1217 idused = 1; /* dirs also use ids */
1218 dirused = 1;
1219 }
1220 }
1221 if (idused)
1222 {
1223 if (data->localpool)
1224 {
1225 if (poolusage)
1226 poolusage = 3; /* need own pool */
1227 else
1228 {
1229 poolusage = 2;
1230 spool = &data->spool;
1231 }
1232 }
1233 else
1234 {
1235 if (poolusage == 0)
1236 poolusage = 1;
1237 else if (poolusage != 1)
1238 poolusage = 3; /* need own pool */
1239 }
1240 }
1241 if (dirused)
1242 {
1243 if (dirpoolusage)
1244 dirpoolusage = 3; /* need own dirpool */
1245 else
1246 {
1247 dirpoolusage = 2;
1248 dirpool = &data->dirpool;
1249 dirpooldata = data;
1250 }
1251 }
1252 }
1253 cbdata.nkeymap = n;
1254
1255 /* 0: no pool needed at all */
1256 /* 1: use global pool */
1257 /* 2: use repodata local pool */
1258 /* 3: need own pool */
1259 if (poolusage == 3)
1260 {
1261 spool = &target.spool;
1262 /* hack: reuse global pool data so we don't have to map pool ids */
1263 if (clonepool)
1264 {
1265 stringpool_free(spool);
1266 stringpool_clone(spool, &pool->ss);
1267 }
1268 cbdata.ownspool = spool;
1269 }
1270 else if (poolusage == 0 || poolusage == 1)
1271 {
1272 poolusage = 1;
1273 spool = &pool->ss;
1274 }
1275
1276 if (dirpoolusage == 3)
1277 {
1278 dirpool = &target.dirpool;
1279 dirpooldata = 0;
1280 cbdata.owndirpool = dirpool;
1281 }
1282 else if (dirpool)
1283 cbdata.dirused = solv_calloc(dirpool->ndirs, sizeof(Id));
1284
1285
1286 /********************************************************************/
1287 #if 0
1288 fprintf(stderr, "poolusage: %d\n", poolusage);
1289 fprintf(stderr, "dirpoolusage: %d\n", dirpoolusage);
1290 fprintf(stderr, "nkeys: %d\n", target.nkeys);
1291 for (i = 1; i < target.nkeys; i++)
1292 fprintf(stderr, " %2d: %s[%d] %d %d %d\n", i, pool_id2str(pool, target.keys[i].name), target.keys[i].name, target.keys[i].type, target.keys[i].size, target.keys[i].storage);
1293 #endif
1294
1295 /* copy keys if requested */
1296 if (keyq)
1297 {
1298 queue_empty(keyq);
1299 for (i = 1; i < target.nkeys; i++)
1300 queue_push2(keyq, target.keys[i].name, target.keys[i].type);
1301 }
1302
1303 if (poolusage > 1)
1304 {
1305 /* put all the keys we need in our string pool */
1306 /* put mapped ids right into target.keys */
1307 for (i = 1, key = target.keys + i; i < target.nkeys; i++, key++)
1308 {
1309 key->name = stringpool_str2id(spool, pool_id2str(pool, key->name), 1);
1310 if (key->type == REPOKEY_TYPE_CONSTANTID)
1311 {
1312 key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1313 type_constantid = key->type;
1314 key->size = stringpool_str2id(spool, pool_id2str(pool, key->size), 1);
1315 }
1316 else
1317 key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1318 }
1319 if (poolusage == 2)
1320 stringpool_freehash(spool); /* free some mem */
1321 }
1322
1323
1324 /********************************************************************/
1325
1326 /* set needed count of all strings and rels,
1327 * find which keys are used in the solvables
1328 * put all strings in own spool
1329 */
1330
1331 reloff = spool->nstrings;
1332 if (poolusage == 3)
1333 reloff = (reloff + NEEDED_BLOCK) & ~NEEDED_BLOCK;
1334
1335 needid = calloc(reloff + pool->nrels, sizeof(*needid));
1336 needid[0].map = reloff;
1337
1338 cbdata.needid = needid;
1339 cbdata.schema = solv_calloc(target.nkeys, sizeof(Id));
1340 cbdata.sp = cbdata.schema;
1341 cbdata.solvschemata = solv_calloc(repo->nsolvables, sizeof(Id));
1342
1343 /* create main schema */
1344 cbdata.sp = cbdata.schema;
1345 /* collect all other data from all repodatas */
1346 /* XXX: merge arrays of equal keys? */
1347 FOR_REPODATAS(repo, j, data)
1348 {
1349 if (!repodataused[j])
1350 continue;
1351 repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1352 }
1353 sp = cbdata.sp;
1354 /* add solvables if needed (may revert later) */
1355 if (repo->nsolvables)
1356 {
1357 *sp++ = cbdata.keymap[REPOSITORY_SOLVABLES];
1358 target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size++;
1359 }
1360 *sp = 0;
1361 mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1362
1363 idarraydata = repo->idarraydata;
1364
1365 anysolvableused = 0;
1366 cbdata.doingsolvables = 1;
1367 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1368 {
1369 if (s->repo != repo)
1370 continue;
1371
1372 /* set schema info, keep in sync with further down */
1373 sp = cbdata.schema;
1374 if (cbdata.keymap[SOLVABLE_NAME])
1375 {
1376 *sp++ = cbdata.keymap[SOLVABLE_NAME];
1377 needid[s->name].need++;
1378 }
1379 if (cbdata.keymap[SOLVABLE_ARCH])
1380 {
1381 *sp++ = cbdata.keymap[SOLVABLE_ARCH];
1382 needid[s->arch].need++;
1383 }
1384 if (cbdata.keymap[SOLVABLE_EVR])
1385 {
1386 *sp++ = cbdata.keymap[SOLVABLE_EVR];
1387 needid[s->evr].need++;
1388 }
1389 if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1390 {
1391 *sp++ = cbdata.keymap[SOLVABLE_VENDOR];
1392 needid[s->vendor].need++;
1393 }
1394 if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1395 {
1396 *sp++ = cbdata.keymap[SOLVABLE_PROVIDES];
1397 target.keys[cbdata.keymap[SOLVABLE_PROVIDES]].size += incneedidarray(pool, idarraydata + s->provides, needid);
1398 }
1399 if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1400 {
1401 *sp++ = cbdata.keymap[SOLVABLE_OBSOLETES];
1402 target.keys[cbdata.keymap[SOLVABLE_OBSOLETES]].size += incneedidarray(pool, idarraydata + s->obsoletes, needid);
1403 }
1404 if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1405 {
1406 *sp++ = cbdata.keymap[SOLVABLE_CONFLICTS];
1407 target.keys[cbdata.keymap[SOLVABLE_CONFLICTS]].size += incneedidarray(pool, idarraydata + s->conflicts, needid);
1408 }
1409 if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1410 {
1411 *sp++ = cbdata.keymap[SOLVABLE_REQUIRES];
1412 target.keys[cbdata.keymap[SOLVABLE_REQUIRES]].size += incneedidarray(pool, idarraydata + s->requires, needid);
1413 }
1414 if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1415 {
1416 *sp++ = cbdata.keymap[SOLVABLE_RECOMMENDS];
1417 target.keys[cbdata.keymap[SOLVABLE_RECOMMENDS]].size += incneedidarray(pool, idarraydata + s->recommends, needid);
1418 }
1419 if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1420 {
1421 *sp++ = cbdata.keymap[SOLVABLE_SUGGESTS];
1422 target.keys[cbdata.keymap[SOLVABLE_SUGGESTS]].size += incneedidarray(pool, idarraydata + s->suggests, needid);
1423 }
1424 if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1425 {
1426 *sp++ = cbdata.keymap[SOLVABLE_SUPPLEMENTS];
1427 target.keys[cbdata.keymap[SOLVABLE_SUPPLEMENTS]].size += incneedidarray(pool, idarraydata + s->supplements, needid);
1428 }
1429 if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1430 {
1431 *sp++ = cbdata.keymap[SOLVABLE_ENHANCES];
1432 target.keys[cbdata.keymap[SOLVABLE_ENHANCES]].size += incneedidarray(pool, idarraydata + s->enhances, needid);
1433 }
1434 if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1435 {
1436 *sp++ = cbdata.keymap[RPM_RPMDBID];
1437 target.keys[cbdata.keymap[RPM_RPMDBID]].size++;
1438 }
1439 cbdata.sp = sp;
1440
1441 if (anyrepodataused)
1442 {
1443 FOR_REPODATAS(repo, j, data)
1444 {
1445 if (!repodataused[j])
1446 continue;
1447 if (i < data->start || i >= data->end)
1448 continue;
1449 repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1450 needid = cbdata.needid;
1451 }
1452 }
1453 *cbdata.sp = 0;
1454 cbdata.solvschemata[n] = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1455 if (cbdata.solvschemata[n])
1456 anysolvableused = 1;
1457 n++;
1458 }
1459 cbdata.doingsolvables = 0;
1460 assert(n == repo->nsolvables);
1461
1462 if (repo->nsolvables && !anysolvableused)
1463 {
1464 /* strip off solvable from the main schema */
1465 target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size = 0;
1466 sp = cbdata.schema;
1467 for (i = 0; target.schemadata[target.schemata[mainschema] + i]; i++)
1468 {
1469 *sp = target.schemadata[target.schemata[mainschema] + i];
1470 if (*sp != cbdata.keymap[REPOSITORY_SOLVABLES])
1471 sp++;
1472 }
1473 assert(target.schemadatalen == target.schemata[mainschema] + i + 1);
1474 *sp = 0;
1475 target.schemadatalen = target.schemata[mainschema];
1476 target.nschemata--;
1477 repodata_free_schemahash(&target);
1478 mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1479 }
1480
1481 /********************************************************************/
1482
1483 /* remove unused keys */
1484 keyused = solv_calloc(target.nkeys, sizeof(Id));
1485 for (i = 1; i < target.schemadatalen; i++)
1486 keyused[target.schemadata[i]] = 1;
1487 keyused[0] = 0;
1488 for (n = i = 1; i < target.nkeys; i++)
1489 {
1490 if (!keyused[i])
1491 continue;
1492 keyused[i] = n;
1493 if (i != n)
1494 {
1495 target.keys[n] = target.keys[i];
1496 if (keyq)
1497 {
1498 keyq->elements[2 * n - 2] = keyq->elements[2 * i - 2];
1499 keyq->elements[2 * n - 1] = keyq->elements[2 * i - 1];
1500 }
1501 }
1502 n++;
1503 }
1504 target.nkeys = n;
1505 if (keyq)
1506 queue_truncate(keyq, 2 * n - 2);
1507
1508 /* update schema data to the new key ids */
1509 for (i = 1; i < target.schemadatalen; i++)
1510 target.schemadata[i] = keyused[target.schemadata[i]];
1511 /* update keymap to the new key ids */
1512 for (i = 0; i < cbdata.nkeymap; i++)
1513 cbdata.keymap[i] = keyused[cbdata.keymap[i]];
1514 keyused = solv_free(keyused);
1515
1516 /* increment needid of the used keys, they are already mapped to
1517 * the correct string pool */
1518 for (i = 1; i < target.nkeys; i++)
1519 {
1520 if (target.keys[i].type == type_constantid)
1521 needid[target.keys[i].size].need++;
1522 needid[target.keys[i].name].need++;
1523 needid[target.keys[i].type].need++;
1524 }
1525
1526 /********************************************************************/
1527
1528 if (dirpool && cbdata.dirused && !cbdata.dirused[0])
1529 {
1530 /* no dirs used at all */
1531 cbdata.dirused = solv_free(cbdata.dirused);
1532 dirpool = 0;
1533 }
1534
1535 /* increment need id for used dir components */
1536 if (dirpool)
1537 {
1538 /* if we have own dirpool, all entries in it are used.
1539 also, all comp ids are already mapped by putinowndirpool(),
1540 so we can simply increment needid.
1541 (owndirpool != 0, dirused == 0, dirpooldata == 0) */
1542 /* else we re-use a dirpool of repodata "dirpooldata".
1543 dirused tells us which of the ids are used.
1544 we need to map comp ids if we generate a new pool.
1545 (owndirpool == 0, dirused != 0, dirpooldata != 0) */
1546 for (i = 1; i < dirpool->ndirs; i++)
1547 {
1548 #if 0
1549 fprintf(stderr, "dir %d used %d\n", i, cbdata.dirused ? cbdata.dirused[i] : 1);
1550 #endif
1551 if (cbdata.dirused && !cbdata.dirused[i])
1552 continue;
1553 id = dirpool->dirs[i];
1554 if (id <= 0)
1555 continue;
1556 if (dirpooldata && cbdata.ownspool && id > 1)
1557 {
1558 id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1559 needid = cbdata.needid;
1560 }
1561 needid[id].need++;
1562 }
1563 }
1564
1565
1566 /********************************************************************/
1567
1568 /*
1569 * create mapping table, new keys are sorted by needid[].need
1570 *
1571 * needid[key].need : old key -> new key
1572 * needid[key].map : new key -> old key
1573 */
1574
1575 /* zero out id 0 and rel 0 just in case */
1576 reloff = needid[0].map;
1577 needid[0].need = 0;
1578 needid[reloff].need = 0;
1579
1580 for (i = 1; i < reloff + pool->nrels; i++)
1581 needid[i].map = i;
1582
1583 #if 0
1584 solv_sort(needid + 1, spool->nstrings - 1, sizeof(*needid), needid_cmp_need_s, spool);
1585 #else
1586 /* make first entry '' */
1587 needid[1].need = 1;
1588 solv_sort(needid + 2, spool->nstrings - 2, sizeof(*needid), needid_cmp_need_s, spool);
1589 #endif
1590 solv_sort(needid + reloff, pool->nrels, sizeof(*needid), needid_cmp_need, 0);
1591 /* now needid is in new order, needid[newid].map -> oldid */
1592
1593 /* calculate string space size, also zero out needid[].need */
1594 sizeid = 0;
1595 for (i = 1; i < reloff; i++)
1596 {
1597 if (!needid[i].need)
1598 break; /* as we have sorted, every entry after this also has need == 0 */
1599 needid[i].need = 0;
1600 sizeid += strlen(spool->stringspace + spool->strings[needid[i].map]) + 1;
1601 }
1602 nstrings = i; /* our new string id end */
1603
1604 /* make needid[oldid].need point to newid */
1605 for (i = 1; i < nstrings; i++)
1606 needid[needid[i].map].need = i;
1607
1608 /* same as above for relations */
1609 for (i = 0; i < pool->nrels; i++)
1610 {
1611 if (!needid[reloff + i].need)
1612 break;
1613 needid[reloff + i].need = 0;
1614 }
1615 nrels = i; /* our new rel id end */
1616
1617 for (i = 0; i < nrels; i++)
1618 needid[needid[reloff + i].map].need = nstrings + i;
1619
1620 /* now we have: needid[oldid].need -> newid
1621 needid[newid].map -> oldid
1622 both for strings and relations */
1623
1624
1625 /********************************************************************/
1626
1627 ndirmap = 0;
1628 dirmap = 0;
1629 if (dirpool)
1630 {
1631 /* create our new target directory structure by traversing through all
1632 * used dirs. This will concatenate blocks with the same parent
1633 * directory into single blocks.
1634 * Instead of components, traverse_dirs stores the old dirids,
1635 * we will change this in the second step below */
1636 /* (dirpooldata and dirused are 0 if we have our own dirpool) */
1637 if (cbdata.dirused && !cbdata.dirused[1])
1638 cbdata.dirused[1] = 1; /* always want / entry */
1639 dirmap = solv_calloc(dirpool->ndirs, sizeof(Id));
1640 dirmap[0] = 0;
1641 ndirmap = traverse_dirs(dirpool, dirmap, 1, dirpool_child(dirpool, 0), cbdata.dirused);
1642
1643 /* (re)create dirused, so that it maps from "old dirid" to "new dirid" */
1644 /* change dirmap so that it maps from "new dirid" to "new compid" */
1645 if (!cbdata.dirused)
1646 cbdata.dirused = solv_malloc2(dirpool->ndirs, sizeof(Id));
1647 memset(cbdata.dirused, 0, dirpool->ndirs * sizeof(Id));
1648 for (i = 1; i < ndirmap; i++)
1649 {
1650 if (dirmap[i] <= 0)
1651 continue;
1652 cbdata.dirused[dirmap[i]] = i;
1653 id = dirpool->dirs[dirmap[i]];
1654 if (dirpooldata && cbdata.ownspool && id > 1)
1655 id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1656 dirmap[i] = needid[id].need;
1657 }
1658 /* now the new target directory structure is complete (dirmap), and we have
1659 * dirused[olddirid] -> newdirid */
1660 }
1661
1662 /********************************************************************/
1663
1664 /* collect all data
1665 * we use extdata[0] for incore data and extdata[keyid] for vertical data
1666 */
1667
1668 cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata));
1669
1670 xd = cbdata.extdata;
1671 cbdata.current_sub = 0;
1672 /* add main schema */
1673 cbdata.lastlen = 0;
1674 data_addid(xd, mainschema);
1675
1676 #if 1
1677 FOR_REPODATAS(repo, j, data)
1678 {
1679 if (!repodataused[j])
1680 continue;
1681 repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1682 }
1683 #endif
1684
1685 if (xd->len - cbdata.lastlen > cbdata.maxdata)
1686 cbdata.maxdata = xd->len - cbdata.lastlen;
1687 cbdata.lastlen = xd->len;
1688
1689 if (anysolvableused)
1690 {
1691 data_addid(xd, repo->nsolvables); /* FLEXARRAY nentries */
1692 cbdata.doingsolvables = 1;
1693 for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1694 {
1695 if (s->repo != repo)
1696 continue;
1697 data_addid(xd, cbdata.solvschemata[n]);
1698 if (cbdata.keymap[SOLVABLE_NAME])
1699 data_addid(xd, needid[s->name].need);
1700 if (cbdata.keymap[SOLVABLE_ARCH])
1701 data_addid(xd, needid[s->arch].need);
1702 if (cbdata.keymap[SOLVABLE_EVR])
1703 data_addid(xd, needid[s->evr].need);
1704 if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1705 data_addid(xd, needid[s->vendor].need);
1706 if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1707 data_addidarray_sort(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
1708 if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1709 data_addidarray_sort(xd, pool, needid, idarraydata + s->obsoletes, 0);
1710 if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1711 data_addidarray_sort(xd, pool, needid, idarraydata + s->conflicts, 0);
1712 if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1713 data_addidarray_sort(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
1714 if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1715 data_addidarray_sort(xd, pool, needid, idarraydata + s->recommends, 0);
1716 if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1717 data_addidarray_sort(xd, pool, needid, idarraydata + s->suggests, 0);
1718 if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1719 data_addidarray_sort(xd, pool, needid, idarraydata + s->supplements, 0);
1720 if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1721 data_addidarray_sort(xd, pool, needid, idarraydata + s->enhances, 0);
1722 if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1723 data_addid(xd, repo->rpmdbid[i - repo->start]);
1724 if (anyrepodataused)
1725 {
1726 cbdata.vstart = -1;
1727 FOR_REPODATAS(repo, j, data)
1728 {
1729 if (!repodataused[j])
1730 continue;
1731 if (i < data->start || i >= data->end)
1732 continue;
1733 repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1734 }
1735 }
1736 if (xd->len - cbdata.lastlen > cbdata.maxdata)
1737 cbdata.maxdata = xd->len - cbdata.lastlen;
1738 cbdata.lastlen = xd->len;
1739 n++;
1740 }
1741 cbdata.doingsolvables = 0;
1742 }
1743
1744 assert(cbdata.current_sub == cbdata.nsubschemata);
1745 if (cbdata.subschemata)
1746 {
1747 cbdata.subschemata = solv_free(cbdata.subschemata);
1748 cbdata.nsubschemata = 0;
1749 }
1750
1751 /********************************************************************/
1752
1753 target.fp = fp;
1754
1755 /* write header */
1756
1757 /* write file header */
1758 write_u32(&target, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1759 write_u32(&target, SOLV_VERSION_8);
1760
1761
1762 /* write counts */
1763 write_u32(&target, nstrings);
1764 write_u32(&target, nrels);
1765 write_u32(&target, ndirmap);
1766 write_u32(&target, anysolvableused ? repo->nsolvables : 0);
1767 write_u32(&target, target.nkeys);
1768 write_u32(&target, target.nschemata);
1769 solv_flags = 0;
1770 solv_flags |= SOLV_FLAG_PREFIX_POOL;
1771 solv_flags |= SOLV_FLAG_SIZE_BYTES;
1772 write_u32(&target, solv_flags);
1773
1774 if (nstrings)
1775 {
1776 /*
1777 * calculate prefix encoding of the strings
1778 */
1779 unsigned char *prefixcomp = solv_malloc(nstrings);
1780 unsigned int compsum = 0;
1781 char *old_str = "";
1782
1783 prefixcomp[0] = 0;
1784 for (i = 1; i < nstrings; i++)
1785 {
1786 char *str = spool->stringspace + spool->strings[needid[i].map];
1787 int same;
1788 for (same = 0; same < 255; same++)
1789 if (!old_str[same] || old_str[same] != str[same])
1790 break;
1791 prefixcomp[i] = same;
1792 compsum += same;
1793 old_str = str;
1794 }
1795
1796 /*
1797 * write strings
1798 */
1799 write_u32(&target, sizeid);
1800 /* we save compsum bytes but need 1 extra byte for every string */
1801 write_u32(&target, sizeid + nstrings - 1 - compsum);
1802 for (i = 1; i < nstrings; i++)
1803 {
1804 char *str = spool->stringspace + spool->strings[needid[i].map];
1805 write_u8(&target, prefixcomp[i]);
1806 write_str(&target, str + prefixcomp[i]);
1807 }
1808 solv_free(prefixcomp);
1809 }
1810 else
1811 {
1812 write_u32(&target, 0);
1813 write_u32(&target, 0);
1814 }
1815
1816 /*
1817 * write RelDeps
1818 */
1819 for (i = 0; i < nrels; i++)
1820 {
1821 ran = pool->rels + (needid[reloff + i].map - reloff);
1822 write_id(&target, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
1823 write_id(&target, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
1824 write_u8(&target, ran->flags);
1825 }
1826
1827 /*
1828 * write dirs (skip both root and / entry)
1829 */
1830 for (i = 2; i < ndirmap; i++)
1831 {
1832 if (dirmap[i] > 0)
1833 write_id(&target, dirmap[i]);
1834 else
1835 write_id(&target, nstrings - dirmap[i]);
1836 }
1837 solv_free(dirmap);
1838
1839 /*
1840 * write keys
1841 */
1842 for (i = 1; i < target.nkeys; i++)
1843 {
1844 write_id(&target, needid[target.keys[i].name].need);
1845 write_id(&target, needid[target.keys[i].type].need);
1846 if (target.keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
1847 {
1848 if (target.keys[i].type == type_constantid)
1849 write_id(&target, needid[target.keys[i].size].need);
1850 else
1851 write_id(&target, target.keys[i].size);
1852 }
1853 else
1854 write_id(&target, cbdata.extdata[i].len);
1855 write_id(&target, target.keys[i].storage);
1856 }
1857
1858 /*
1859 * write schemata
1860 */
1861 write_id(&target, target.schemadatalen); /* XXX -1? */
1862 for (i = 1; i < target.nschemata; i++)
1863 write_idarray(&target, pool, 0, repodata_id2schema(&target, i));
1864
1865 /********************************************************************/
1866
1867 write_id(&target, cbdata.maxdata);
1868 write_id(&target, cbdata.extdata[0].len);
1869 if (cbdata.extdata[0].len)
1870 write_blob(&target, cbdata.extdata[0].buf, cbdata.extdata[0].len);
1871 solv_free(cbdata.extdata[0].buf);
1872
1873 /* do we have vertical data? */
1874 for (i = 1; i < target.nkeys; i++)
1875 if (cbdata.extdata[i].len)
1876 break;
1877 if (i < target.nkeys)
1878 {
1879 /* yes, write it in pages */
1880 unsigned char *dp, vpage[REPOPAGE_BLOBSIZE];
1881 int l, ll, lpage = 0;
1882
1883 write_u32(&target, REPOPAGE_BLOBSIZE);
1884 for (i = 1; i < target.nkeys; i++)
1885 {
1886 if (!cbdata.extdata[i].len)
1887 continue;
1888 l = cbdata.extdata[i].len;
1889 dp = cbdata.extdata[i].buf;
1890 while (l)
1891 {
1892 ll = REPOPAGE_BLOBSIZE - lpage;
1893 if (l < ll)
1894 ll = l;
1895 memcpy(vpage + lpage, dp, ll);
1896 dp += ll;
1897 lpage += ll;
1898 l -= ll;
1899 if (lpage == REPOPAGE_BLOBSIZE)
1900 {
1901 write_compressed_page(&target, vpage, lpage);
1902 lpage = 0;
1903 }
1904 }
1905 }
1906 if (lpage)
1907 write_compressed_page(&target, vpage, lpage);
1908 }
1909
1910 for (i = 1; i < target.nkeys; i++)
1911 solv_free(cbdata.extdata[i].buf);
1912 solv_free(cbdata.extdata);
1913
1914 target.fp = 0;
1915 repodata_freedata(&target);
1916
1917 solv_free(needid);
1918 solv_free(cbdata.solvschemata);
1919 solv_free(cbdata.schema);
1920
1921 solv_free(cbdata.keymap);
1922 solv_free(cbdata.keymapstart);
1923 solv_free(cbdata.dirused);
1924 solv_free(repodataused);
1925 return target.error;
1926 }
1927
1928 struct repodata_write_data {
1929 int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata);
1930 void *kfdata;
1931 int repodataid;
1932 };
1933
1934 static int
repodata_write_keyfilter(Repo * repo,Repokey * key,void * kfdata)1935 repodata_write_keyfilter(Repo *repo, Repokey *key, void *kfdata)
1936 {
1937 struct repodata_write_data *wd = kfdata;
1938
1939 /* XXX: special repodata selection hack */
1940 if (key->name == 1 && key->size != wd->repodataid)
1941 return -1;
1942 if (key->storage == KEY_STORAGE_SOLVABLE)
1943 return KEY_STORAGE_DROPPED; /* not part of this repodata */
1944 if (wd->keyfilter)
1945 return (*wd->keyfilter)(repo, key, wd->kfdata);
1946 return key->storage;
1947 }
1948
1949 int
repodata_write_filtered(Repodata * data,FILE * fp,int (* keyfilter)(Repo * repo,Repokey * key,void * kfdata),void * kfdata,Queue * keyq)1950 repodata_write_filtered(Repodata *data, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1951 {
1952 struct repodata_write_data wd;
1953
1954 wd.keyfilter = keyfilter;
1955 wd.kfdata = kfdata;
1956 wd.repodataid = data->repodataid;
1957 return repo_write_filtered(data->repo, fp, repodata_write_keyfilter, &wd, keyq);
1958 }
1959
1960 int
repodata_write(Repodata * data,FILE * fp)1961 repodata_write(Repodata *data, FILE *fp)
1962 {
1963 return repodata_write_filtered(data, fp, repo_write_stdkeyfilter, 0, 0);
1964 }
1965
1966 int
repo_write(Repo * repo,FILE * fp)1967 repo_write(Repo *repo, FILE *fp)
1968 {
1969 return repo_write_filtered(repo, fp, repo_write_stdkeyfilter, 0, 0);
1970 }
1971