xref: /haiku/src/libs/libsolv/solv/repo_write.c (revision 909af08f4328301fbdef1ffb41f566c3b5bec0c7)
1 /*
2  * Copyright (c) 2007-2011, Novell Inc.
3  *
4  * This program is licensed under the BSD license, read LICENSE.BSD
5  * for further information
6  */
7 
8 /*
9  * repo_write.c
10  *
11  * Write Repo data out to a file in solv format
12  *
13  * See doc/README.format for a description
14  * of the binary file format
15  *
16  */
17 
18 #include <sys/types.h>
19 #include <limits.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
25 #include <errno.h>
26 
27 #include "pool.h"
28 #include "util.h"
29 #include "repo_write.h"
30 #include "repopage.h"
31 
32 /*------------------------------------------------------------------*/
33 /* Id map optimizations */
34 
35 typedef struct needid {
36   Id need;
37   Id map;
38 } NeedId;
39 
40 
41 #define RELOFF(id) (needid[0].map + GETRELID(id))
42 
43 /*
44  * increment need Id
45  * idarray: array of Ids, ID_NULL terminated
46  * needid: array of Id->NeedId
47  *
48  * return size of array (including trailing zero)
49  *
50  */
51 
52 static void
53 incneedid(Pool *pool, Id id, NeedId *needid)
54 {
55   while (ISRELDEP(id))
56     {
57       Reldep *rd = GETRELDEP(pool, id);
58       needid[RELOFF(id)].need++;
59       if (ISRELDEP(rd->evr))
60 	incneedid(pool, rd->evr, needid);
61       else
62 	needid[rd->evr].need++;
63       id = rd->name;
64     }
65   needid[id].need++;
66 }
67 
68 static int
69 incneedidarray(Pool *pool, Id *idarray, NeedId *needid)
70 {
71   Id id;
72   int n = 0;
73 
74   if (!idarray)
75     return 0;
76   while ((id = *idarray++) != 0)
77     {
78       n++;
79       while (ISRELDEP(id))
80 	{
81 	  Reldep *rd = GETRELDEP(pool, id);
82 	  needid[RELOFF(id)].need++;
83 	  if (ISRELDEP(rd->evr))
84 	    incneedid(pool, rd->evr, needid);
85 	  else
86 	    needid[rd->evr].need++;
87 	  id = rd->name;
88 	}
89       needid[id].need++;
90     }
91   return n + 1;
92 }
93 
94 
95 /*
96  *
97  */
98 
99 static int
100 needid_cmp_need(const void *ap, const void *bp, void *dp)
101 {
102   const NeedId *a = ap;
103   const NeedId *b = bp;
104   int r;
105   r = b->need - a->need;
106   if (r)
107     return r;
108   return a->map - b->map;
109 }
110 
111 static int
112 needid_cmp_need_s(const void *ap, const void *bp, void *dp)
113 {
114   const NeedId *a = ap;
115   const NeedId *b = bp;
116   Stringpool *spool = dp;
117   const char *as;
118   const char *bs;
119 
120   int r;
121   r = b->need - a->need;
122   if (r)
123     return r;
124   as = spool->stringspace + spool->strings[a->map];
125   bs = spool->stringspace + spool->strings[b->map];
126   return strcmp(as, bs);
127 }
128 
129 
130 /*------------------------------------------------------------------*/
131 /* output helper routines, used for writing the header */
132 /* (the data itself is accumulated in memory and written with
133  * write_blob) */
134 
135 /*
136  * unsigned 32-bit
137  */
138 
139 static void
140 write_u32(Repodata *data, unsigned int x)
141 {
142   FILE *fp = data->fp;
143   if (data->error)
144     return;
145   if (putc(x >> 24, fp) == EOF ||
146       putc(x >> 16, fp) == EOF ||
147       putc(x >> 8, fp) == EOF ||
148       putc(x, fp) == EOF)
149     {
150       data->error = pool_error(data->repo->pool, -1, "write error u32: %s", strerror(errno));
151     }
152 }
153 
154 
155 /*
156  * unsigned 8-bit
157  */
158 
159 static void
160 write_u8(Repodata *data, unsigned int x)
161 {
162   if (data->error)
163     return;
164   if (putc(x, data->fp) == EOF)
165     {
166       data->error = pool_error(data->repo->pool, -1, "write error u8: %s", strerror(errno));
167     }
168 }
169 
170 /*
171  * data blob
172  */
173 
174 static void
175 write_blob(Repodata *data, void *blob, int len)
176 {
177   if (data->error)
178     return;
179   if (len && fwrite(blob, len, 1, data->fp) != 1)
180     {
181       data->error = pool_error(data->repo->pool, -1, "write error blob: %s", strerror(errno));
182     }
183 }
184 
185 /*
186  * Id
187  */
188 
189 static void
190 write_id(Repodata *data, Id x)
191 {
192   FILE *fp = data->fp;
193   if (data->error)
194     return;
195   if (x >= (1 << 14))
196     {
197       if (x >= (1 << 28))
198 	putc((x >> 28) | 128, fp);
199       if (x >= (1 << 21))
200 	putc((x >> 21) | 128, fp);
201       putc((x >> 14) | 128, fp);
202     }
203   if (x >= (1 << 7))
204     putc((x >> 7) | 128, fp);
205   if (putc(x & 127, fp) == EOF)
206     {
207       data->error = pool_error(data->repo->pool, -1, "write error id: %s", strerror(errno));
208     }
209 }
210 
211 static inline void
212 write_id_eof(Repodata *data, Id x, int eof)
213 {
214   if (x >= 64)
215     x = (x & 63) | ((x & ~63) << 1);
216   write_id(data, x | (eof ? 0 : 64));
217 }
218 
219 
220 
221 static inline void
222 write_str(Repodata *data, const char *str)
223 {
224   if (data->error)
225     return;
226   if (fputs(str, data->fp) == EOF || putc(0, data->fp) == EOF)
227     {
228       data->error = pool_error(data->repo->pool, -1, "write error str: %s", strerror(errno));
229     }
230 }
231 
232 /*
233  * Array of Ids
234  */
235 
236 static void
237 write_idarray(Repodata *data, Pool *pool, NeedId *needid, Id *ids)
238 {
239   Id id;
240   if (!ids)
241     return;
242   if (!*ids)
243     {
244       write_u8(data, 0);
245       return;
246     }
247   for (;;)
248     {
249       id = *ids++;
250       if (needid)
251         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
252       if (id >= 64)
253 	id = (id & 63) | ((id & ~63) << 1);
254       if (!*ids)
255 	{
256 	  write_id(data, id);
257 	  return;
258 	}
259       write_id(data, id | 64);
260     }
261 }
262 
263 static int
264 cmp_ids(const void *pa, const void *pb, void *dp)
265 {
266   Id a = *(Id *)pa;
267   Id b = *(Id *)pb;
268   return a - b;
269 }
270 
271 #if 0
272 static void
273 write_idarray_sort(Repodata *data, Pool *pool, NeedId *needid, Id *ids, Id marker)
274 {
275   int len, i;
276   Id lids[64], *sids;
277 
278   if (!ids)
279     return;
280   if (!*ids)
281     {
282       write_u8(data, 0);
283       return;
284     }
285   for (len = 0; len < 64 && ids[len]; len++)
286     {
287       Id id = ids[len];
288       if (needid)
289         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
290       lids[len] = id;
291     }
292   if (ids[len])
293     {
294       for (i = len + 1; ids[i]; i++)
295 	;
296       sids = solv_malloc2(i, sizeof(Id));
297       memcpy(sids, lids, 64 * sizeof(Id));
298       for (; ids[len]; len++)
299 	{
300 	  Id id = ids[len];
301 	  if (needid)
302             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
303 	  sids[len] = id;
304 	}
305     }
306   else
307     sids = lids;
308 
309   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
310   if (needid)
311     marker = needid[marker].need;
312   for (i = 0; i < len; i++)
313     if (sids[i] == marker)
314       break;
315   if (i > 1)
316     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
317   if ((len - i) > 2)
318     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
319 
320   Id id, old = 0;
321 
322   /* The differencing above produces many runs of ones and twos.  I tried
323      fairly elaborate schemes to RLE those, but they give only very mediocre
324      improvements in compression, as coding the escapes costs quite some
325      space.  Even if they are coded only as bits in IDs.  The best improvement
326      was about 2.7% for the whole .solv file.  It's probably better to
327      invest some complexity into sharing idarrays, than RLEing.  */
328   for (i = 0; i < len - 1; i++)
329     {
330       id = sids[i];
331     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
332        hence all real differences are offsetted by 1.  Otherwise we would
333        have to handle negative differences, which would cost code space for
334        the encoding of the sign.  We loose the exact mapping of prereq here,
335        but we know the result, so we can recover from that in the reader.  */
336       if (id == marker)
337 	id = old = 0;
338       else
339 	{
340           id = id - old + 1;
341 	  old = sids[i];
342 	}
343       /* XXX If difference is zero we have multiple equal elements,
344 	 we might want to skip writing them out.  */
345       if (id >= 64)
346 	id = (id & 63) | ((id & ~63) << 1);
347       write_id(data, id | 64);
348     }
349   id = sids[i];
350   if (id == marker)
351     id = 0;
352   else
353     id = id - old + 1;
354   if (id >= 64)
355     id = (id & 63) | ((id & ~63) << 1);
356   write_id(data, id);
357   if (sids != lids)
358     solv_free(sids);
359 }
360 #endif
361 
362 
363 struct extdata {
364   unsigned char *buf;
365   int len;
366 };
367 
368 struct cbdata {
369   Repo *repo;
370   Repodata *target;
371 
372   Stringpool *ownspool;
373   Dirpool *owndirpool;
374 
375   Id *keymap;
376   int nkeymap;
377   Id *keymapstart;
378 
379   NeedId *needid;
380 
381   Id *schema;		/* schema construction space */
382   Id *sp;		/* pointer in above */
383   Id *oldschema, *oldsp;
384 
385   Id *solvschemata;
386   Id *subschemata;
387   int nsubschemata;
388   int current_sub;
389 
390   struct extdata *extdata;
391 
392   Id *dirused;
393 
394   Id vstart;
395 
396   Id maxdata;
397   Id lastlen;
398 
399   int doingsolvables;	/* working on solvables data */
400 };
401 
402 #define NEEDED_BLOCK 1023
403 #define SCHEMATA_BLOCK 31
404 #define SCHEMATADATA_BLOCK 255
405 #define EXTDATA_BLOCK 4095
406 
407 static inline void
408 data_addid(struct extdata *xd, Id sx)
409 {
410   unsigned int x = (unsigned int)sx;
411   unsigned char *dp;
412 
413   xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
414   dp = xd->buf + xd->len;
415 
416   if (x >= (1 << 14))
417     {
418       if (x >= (1 << 28))
419 	*dp++ = (x >> 28) | 128;
420       if (x >= (1 << 21))
421 	*dp++ = (x >> 21) | 128;
422       *dp++ = (x >> 14) | 128;
423     }
424   if (x >= (1 << 7))
425     *dp++ = (x >> 7) | 128;
426   *dp++ = x & 127;
427   xd->len = dp - xd->buf;
428 }
429 
430 static inline void
431 data_addideof(struct extdata *xd, Id sx, int eof)
432 {
433   unsigned int x = (unsigned int)sx;
434   unsigned char *dp;
435 
436   xd->buf = solv_extend(xd->buf, xd->len, 5, 1, EXTDATA_BLOCK);
437   dp = xd->buf + xd->len;
438 
439   if (x >= (1 << 13))
440     {
441       if (x >= (1 << 27))
442         *dp++ = (x >> 27) | 128;
443       if (x >= (1 << 20))
444         *dp++ = (x >> 20) | 128;
445       *dp++ = (x >> 13) | 128;
446     }
447   if (x >= (1 << 6))
448     *dp++ = (x >> 6) | 128;
449   *dp++ = eof ? (x & 63) : (x & 63) | 64;
450   xd->len = dp - xd->buf;
451 }
452 
453 static void
454 data_addid64(struct extdata *xd, unsigned int x, unsigned int hx)
455 {
456   if (hx)
457     {
458       if (hx > 7)
459         {
460           data_addid(xd, (Id)(hx >> 3));
461           xd->buf[xd->len - 1] |= 128;
462 	  hx &= 7;
463         }
464       data_addid(xd, (Id)(x | 0x80000000));
465       xd->buf[xd->len - 5] = (x >> 28) | (hx << 4) | 128;
466     }
467   else
468     data_addid(xd, (Id)x);
469 }
470 
471 static void
472 data_addidarray_sort(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
473 {
474   int len, i;
475   Id lids[64], *sids;
476   Id id, old;
477 
478   if (!ids)
479     return;
480   if (!*ids)
481     {
482       data_addid(xd, 0);
483       return;
484     }
485   for (len = 0; len < 64 && ids[len]; len++)
486     {
487       Id id = ids[len];
488       if (needid)
489         id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
490       lids[len] = id;
491     }
492   if (ids[len])
493     {
494       for (i = len + 1; ids[i]; i++)
495 	;
496       sids = solv_malloc2(i, sizeof(Id));
497       memcpy(sids, lids, 64 * sizeof(Id));
498       for (; ids[len]; len++)
499 	{
500 	  Id id = ids[len];
501 	  if (needid)
502             id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
503 	  sids[len] = id;
504 	}
505     }
506   else
507     sids = lids;
508 
509   /* That bloody solvable:prereqmarker needs to stay in position :-(  */
510   if (needid)
511     marker = needid[marker].need;
512   for (i = 0; i < len; i++)
513     if (sids[i] == marker)
514       break;
515   if (i > 1)
516     solv_sort(sids, i, sizeof(Id), cmp_ids, 0);
517   if ((len - i) > 2)
518     solv_sort(sids + i + 1, len - i - 1, sizeof(Id), cmp_ids, 0);
519 
520   old = 0;
521 
522   /* The differencing above produces many runs of ones and twos.  I tried
523      fairly elaborate schemes to RLE those, but they give only very mediocre
524      improvements in compression, as coding the escapes costs quite some
525      space.  Even if they are coded only as bits in IDs.  The best improvement
526      was about 2.7% for the whole .solv file.  It's probably better to
527      invest some complexity into sharing idarrays, than RLEing.  */
528   for (i = 0; i < len - 1; i++)
529     {
530       id = sids[i];
531     /* Ugly PREREQ handling.  A "difference" of 0 is the prereq marker,
532        hence all real differences are offsetted by 1.  Otherwise we would
533        have to handle negative differences, which would cost code space for
534        the encoding of the sign.  We loose the exact mapping of prereq here,
535        but we know the result, so we can recover from that in the reader.  */
536       if (id == marker)
537 	id = old = 0;
538       else
539 	{
540           id = id - old + 1;
541 	  old = sids[i];
542 	}
543       /* XXX If difference is zero we have multiple equal elements,
544 	 we might want to skip writing them out.  */
545       data_addideof(xd, id, 0);
546     }
547   id = sids[i];
548   if (id == marker)
549     id = 0;
550   else
551     id = id - old + 1;
552   data_addideof(xd, id, 1);
553   if (sids != lids)
554     solv_free(sids);
555 }
556 
557 static inline void
558 data_addblob(struct extdata *xd, unsigned char *blob, int len)
559 {
560   xd->buf = solv_extend(xd->buf, xd->len, len, 1, EXTDATA_BLOCK);
561   memcpy(xd->buf + xd->len, blob, len);
562   xd->len += len;
563 }
564 
565 static inline void
566 data_addu32(struct extdata *xd, unsigned int num)
567 {
568   unsigned char d[4];
569   d[0] = num >> 24;
570   d[1] = num >> 16;
571   d[2] = num >> 8;
572   d[3] = num;
573   data_addblob(xd, d, 4);
574 }
575 
576 static Id
577 putinownpool(struct cbdata *cbdata, Stringpool *ss, Id id)
578 {
579   const char *str = stringpool_id2str(ss, id);
580   id = stringpool_str2id(cbdata->ownspool, str, 1);
581   if (id >= cbdata->needid[0].map)
582     {
583       int oldoff = cbdata->needid[0].map;
584       int newoff = (id + 1 + NEEDED_BLOCK) & ~NEEDED_BLOCK;
585       int nrels = cbdata->repo->pool->nrels;
586       cbdata->needid = solv_realloc2(cbdata->needid, newoff + nrels, sizeof(NeedId));
587       if (nrels)
588 	memmove(cbdata->needid + newoff, cbdata->needid + oldoff, nrels * sizeof(NeedId));
589       memset(cbdata->needid + oldoff, 0, (newoff - oldoff) * sizeof(NeedId));
590       cbdata->needid[0].map = newoff;
591     }
592   return id;
593 }
594 
595 static Id
596 putinowndirpool(struct cbdata *cbdata, Repodata *data, Dirpool *dp, Id dir)
597 {
598   Id compid, parent;
599 
600   parent = dirpool_parent(dp, dir);
601   if (parent)
602     parent = putinowndirpool(cbdata, data, dp, parent);
603   compid = dp->dirs[dir];
604   if (cbdata->ownspool && compid > 1)
605     compid = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, compid);
606   return dirpool_add_dir(cbdata->owndirpool, parent, compid, 1);
607 }
608 
609 /*
610  * collect usage information about the dirs
611  * 1: dir used, no child of dir used
612  * 2: dir used as parent of another used dir
613  */
614 static inline void
615 setdirused(struct cbdata *cbdata, Dirpool *dp, Id dir)
616 {
617   if (cbdata->dirused[dir])
618     return;
619   cbdata->dirused[dir] = 1;
620   while ((dir = dirpool_parent(dp, dir)) != 0)
621     {
622       if (cbdata->dirused[dir] == 2)
623 	return;
624       if (cbdata->dirused[dir])
625         {
626 	  cbdata->dirused[dir] = 2;
627 	  return;
628         }
629       cbdata->dirused[dir] = 2;
630     }
631   cbdata->dirused[0] = 2;
632 }
633 
634 /*
635  * pass 1 callback:
636  * collect key/id/dirid usage information, create needed schemas
637  */
638 static int
639 repo_write_collect_needed(struct cbdata *cbdata, Repo *repo, Repodata *data, Repokey *key, KeyValue *kv)
640 {
641   Id id;
642   int rm;
643 
644   if (key->name == REPOSITORY_SOLVABLES)
645     return SEARCH_NEXT_KEY;	/* we do not want this one */
646 
647   /* hack: ignore some keys, see BUGS */
648   if (data->repodataid != data->repo->nrepodata - 1)
649     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
650       return SEARCH_NEXT_KEY;
651 
652   rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
653   if (!rm)
654     return SEARCH_NEXT_KEY;	/* we do not want this one */
655 
656   /* record key in schema */
657   if ((key->type != REPOKEY_TYPE_FIXARRAY || kv->eof == 0)
658       && (cbdata->sp == cbdata->schema || cbdata->sp[-1] != rm))
659     *cbdata->sp++ = rm;
660 
661   switch(key->type)
662     {
663       case REPOKEY_TYPE_ID:
664       case REPOKEY_TYPE_IDARRAY:
665 	id = kv->id;
666 	if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
667 	  id = putinownpool(cbdata, data->localpool ? &data->spool : &repo->pool->ss, id);
668 	incneedid(repo->pool, id, cbdata->needid);
669 	break;
670       case REPOKEY_TYPE_DIR:
671       case REPOKEY_TYPE_DIRNUMNUMARRAY:
672       case REPOKEY_TYPE_DIRSTRARRAY:
673 	id = kv->id;
674 	if (cbdata->owndirpool)
675 	  putinowndirpool(cbdata, data, &data->dirpool, id);
676 	else
677 	  setdirused(cbdata, &data->dirpool, id);
678 	break;
679       case REPOKEY_TYPE_FIXARRAY:
680 	if (kv->eof == 0)
681 	  {
682 	    if (cbdata->oldschema)
683 	      {
684 		cbdata->target->error = pool_error(cbdata->repo->pool, -1, "nested fixarray structs not yet implemented");
685 		return SEARCH_NEXT_KEY;
686 	      }
687 	    cbdata->oldschema = cbdata->schema;
688 	    cbdata->oldsp = cbdata->sp;
689 	    cbdata->schema = solv_calloc(cbdata->target->nkeys, sizeof(Id));
690 	    cbdata->sp = cbdata->schema;
691 	  }
692 	else if (kv->eof == 1)
693 	  {
694 	    cbdata->current_sub++;
695 	    *cbdata->sp = 0;
696 	    cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
697 	    cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, cbdata->schema, 1);
698 #if 0
699 	    fprintf(stderr, "Have schema %d\n", cbdata->subschemata[cbdata->nsubschemata-1]);
700 #endif
701 	    cbdata->sp = cbdata->schema;
702 	  }
703 	else
704 	  {
705 	    solv_free(cbdata->schema);
706 	    cbdata->schema = cbdata->oldschema;
707 	    cbdata->sp = cbdata->oldsp;
708 	    cbdata->oldsp = cbdata->oldschema = 0;
709 	  }
710 	break;
711       case REPOKEY_TYPE_FLEXARRAY:
712 	if (kv->entry == 0)
713 	  {
714 	    if (kv->eof != 2)
715 	      *cbdata->sp++ = 0;	/* mark start */
716 	  }
717 	else
718 	  {
719 	    /* just finished a schema, rewind */
720 	    Id *sp = cbdata->sp - 1;
721 	    *sp = 0;
722 	    while (sp[-1])
723 	      sp--;
724 	    cbdata->subschemata = solv_extend(cbdata->subschemata, cbdata->nsubschemata, 1, sizeof(Id), SCHEMATA_BLOCK);
725 	    cbdata->subschemata[cbdata->nsubschemata++] = repodata_schema2id(cbdata->target, sp, 1);
726 	    cbdata->sp = kv->eof == 2 ? sp - 1: sp;
727 	  }
728 	break;
729       default:
730 	break;
731     }
732   return 0;
733 }
734 
735 static int
736 repo_write_cb_needed(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
737 {
738   struct cbdata *cbdata = vcbdata;
739   Repo *repo = data->repo;
740 
741 #if 0
742   if (s)
743     fprintf(stderr, "solvable %d (%s): key (%d)%s %d\n", s ? s - repo->pool->solvables : 0, s ? pool_id2str(repo->pool, s->name) : "", key->name, pool_id2str(repo->pool, key->name), key->type);
744 #endif
745   return repo_write_collect_needed(cbdata, repo, data, key, kv);
746 }
747 
748 
749 /*
750  * pass 2 callback:
751  * encode all of the data into the correct buffers
752  */
753 
754 static int
755 repo_write_adddata(struct cbdata *cbdata, Repodata *data, Repokey *key, KeyValue *kv)
756 {
757   int rm;
758   Id id;
759   unsigned int u32;
760   unsigned char v[4];
761   struct extdata *xd;
762   NeedId *needid;
763 
764   if (key->name == REPOSITORY_SOLVABLES)
765     return SEARCH_NEXT_KEY;
766 
767   /* hack: ignore some keys, see BUGS */
768   if (data->repodataid != data->repo->nrepodata - 1)
769     if (key->name == REPOSITORY_ADDEDFILEPROVIDES || key->name == REPOSITORY_EXTERNAL || key->name == REPOSITORY_LOCATION || key->name == REPOSITORY_KEYS || key->name == REPOSITORY_TOOLVERSION)
770       return SEARCH_NEXT_KEY;
771 
772   rm = cbdata->keymap[cbdata->keymapstart[data->repodataid] + (key - data->keys)];
773   if (!rm)
774     return SEARCH_NEXT_KEY;	/* we do not want this one */
775 
776   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET)
777     {
778       xd = cbdata->extdata + rm;	/* vertical buffer */
779       if (cbdata->vstart == -1)
780         cbdata->vstart = xd->len;
781     }
782   else
783     xd = cbdata->extdata + 0;		/* incore buffer */
784   switch(key->type)
785     {
786       case REPOKEY_TYPE_VOID:
787       case REPOKEY_TYPE_CONSTANT:
788       case REPOKEY_TYPE_CONSTANTID:
789 	break;
790       case REPOKEY_TYPE_ID:
791 	id = kv->id;
792 	if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
793 	  id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
794 	needid = cbdata->needid;
795 	id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
796 	data_addid(xd, id);
797 	break;
798       case REPOKEY_TYPE_IDARRAY:
799 	id = kv->id;
800 	if (!ISRELDEP(id) && cbdata->ownspool && id > 1)
801 	  id = putinownpool(cbdata, data->localpool ? &data->spool : &data->repo->pool->ss, id);
802 	needid = cbdata->needid;
803 	id = needid[ISRELDEP(id) ? RELOFF(id) : id].need;
804 	data_addideof(xd, id, kv->eof);
805 	break;
806       case REPOKEY_TYPE_STR:
807 	data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
808 	break;
809       case REPOKEY_TYPE_MD5:
810 	data_addblob(xd, (unsigned char *)kv->str, SIZEOF_MD5);
811 	break;
812       case REPOKEY_TYPE_SHA1:
813 	data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA1);
814 	break;
815       case REPOKEY_TYPE_SHA256:
816 	data_addblob(xd, (unsigned char *)kv->str, SIZEOF_SHA256);
817 	break;
818       case REPOKEY_TYPE_U32:
819 	u32 = kv->num;
820 	v[0] = u32 >> 24;
821 	v[1] = u32 >> 16;
822 	v[2] = u32 >> 8;
823 	v[3] = u32;
824 	data_addblob(xd, v, 4);
825 	break;
826       case REPOKEY_TYPE_NUM:
827 	data_addid64(xd, kv->num, kv->num2);
828 	break;
829       case REPOKEY_TYPE_DIR:
830 	id = kv->id;
831 	if (cbdata->owndirpool)
832 	  id = putinowndirpool(cbdata, data, &data->dirpool, id);
833 	id = cbdata->dirused[id];
834 	data_addid(xd, id);
835 	break;
836       case REPOKEY_TYPE_BINARY:
837 	data_addid(xd, kv->num);
838 	if (kv->num)
839 	  data_addblob(xd, (unsigned char *)kv->str, kv->num);
840 	break;
841       case REPOKEY_TYPE_DIRNUMNUMARRAY:
842 	id = kv->id;
843 	if (cbdata->owndirpool)
844 	  id = putinowndirpool(cbdata, data, &data->dirpool, id);
845 	id = cbdata->dirused[id];
846 	data_addid(xd, id);
847 	data_addid(xd, kv->num);
848 	data_addideof(xd, kv->num2, kv->eof);
849 	break;
850       case REPOKEY_TYPE_DIRSTRARRAY:
851 	id = kv->id;
852 	if (cbdata->owndirpool)
853 	  id = putinowndirpool(cbdata, data, &data->dirpool, id);
854 	id = cbdata->dirused[id];
855 	data_addideof(xd, id, kv->eof);
856 	data_addblob(xd, (unsigned char *)kv->str, strlen(kv->str) + 1);
857 	break;
858       case REPOKEY_TYPE_FIXARRAY:
859 	if (kv->eof == 0)
860 	  {
861 	    if (kv->num)
862 	      {
863 		data_addid(xd, kv->num);
864 		data_addid(xd, cbdata->subschemata[cbdata->current_sub]);
865 #if 0
866 		fprintf(stderr, "writing %d %d\n", kv->num, cbdata->subschemata[cbdata->current_sub]);
867 #endif
868 	      }
869 	  }
870 	else if (kv->eof == 1)
871 	  {
872 	    cbdata->current_sub++;
873 	  }
874 	break;
875       case REPOKEY_TYPE_FLEXARRAY:
876 	if (!kv->entry)
877 	  data_addid(xd, kv->num);
878 	if (kv->eof != 2)
879 	  data_addid(xd, cbdata->subschemata[cbdata->current_sub++]);
880 	if (xd == cbdata->extdata + 0 && !kv->parent && !cbdata->doingsolvables)
881 	  {
882 	    if (xd->len - cbdata->lastlen > cbdata->maxdata)
883 	      cbdata->maxdata = xd->len - cbdata->lastlen;
884 	    cbdata->lastlen = xd->len;
885 	  }
886 	break;
887       default:
888 	cbdata->target->error = pool_error(cbdata->repo->pool, -1, "unknown type for %d: %d\n", key->name, key->type);
889 	break;
890     }
891   if (cbdata->target->keys[rm].storage == KEY_STORAGE_VERTICAL_OFFSET && kv->eof)
892     {
893       /* we can re-use old data in the blob here! */
894       data_addid(cbdata->extdata + 0, cbdata->vstart);			/* add offset into incore data */
895       data_addid(cbdata->extdata + 0, xd->len - cbdata->vstart);	/* add length into incore data */
896       cbdata->vstart = -1;
897     }
898   return 0;
899 }
900 
901 static int
902 repo_write_cb_adddata(void *vcbdata, Solvable *s, Repodata *data, Repokey *key, KeyValue *kv)
903 {
904   struct cbdata *cbdata = vcbdata;
905   return repo_write_adddata(cbdata, data, key, kv);
906 }
907 
908 /* traverse through directory with first child "dir" */
909 static int
910 traverse_dirs(Dirpool *dp, Id *dirmap, Id n, Id dir, Id *used)
911 {
912   Id sib, child;
913   Id parent, lastn;
914 
915   parent = n;
916   /* special case for '/', which has to come first */
917   if (parent == 1)
918     dirmap[n++] = 1;
919   for (sib = dir; sib; sib = dirpool_sibling(dp, sib))
920     {
921       if (used && !used[sib])
922 	continue;
923       if (sib == 1 && parent == 1)
924 	continue;	/* already did that one above */
925       dirmap[n++] = sib;
926     }
927 
928   /* now go through all the siblings we just added and
929    * do recursive calls on them */
930   lastn = n;
931   for (; parent < lastn; parent++)
932     {
933       sib = dirmap[parent];
934       if (used && used[sib] != 2)	/* 2: used as parent */
935 	continue;
936       child = dirpool_child(dp, sib);
937       if (child)
938 	{
939 	  dirmap[n++] = -parent;	/* start new block */
940 	  n = traverse_dirs(dp, dirmap, n, child, used);
941 	}
942     }
943   return n;
944 }
945 
946 static void
947 write_compressed_page(Repodata *data, unsigned char *page, int len)
948 {
949   int clen;
950   unsigned char cpage[REPOPAGE_BLOBSIZE];
951 
952   clen = repopagestore_compress_page(page, len, cpage, len - 1);
953   if (!clen)
954     {
955       write_u32(data, len * 2);
956       write_blob(data, page, len);
957     }
958   else
959     {
960       write_u32(data, clen * 2 + 1);
961       write_blob(data, cpage, clen);
962     }
963 }
964 
965 static Id verticals[] = {
966   SOLVABLE_AUTHORS,
967   SOLVABLE_DESCRIPTION,
968   SOLVABLE_MESSAGEDEL,
969   SOLVABLE_MESSAGEINS,
970   SOLVABLE_EULA,
971   SOLVABLE_DISKUSAGE,
972   SOLVABLE_FILELIST,
973   SOLVABLE_CHECKSUM,
974   DELTA_CHECKSUM,
975   DELTA_SEQ_NUM,
976   SOLVABLE_PKGID,
977   SOLVABLE_HDRID,
978   SOLVABLE_LEADSIGID,
979   SOLVABLE_CHANGELOG_AUTHOR,
980   SOLVABLE_CHANGELOG_TEXT,
981   0
982 };
983 
984 static char *languagetags[] = {
985   "solvable:summary:",
986   "solvable:description:",
987   "solvable:messageins:",
988   "solvable:messagedel:",
989   "solvable:eula:",
990   0
991 };
992 
993 int
994 repo_write_stdkeyfilter(Repo *repo, Repokey *key, void *kfdata)
995 {
996   const char *keyname;
997   int i;
998 
999   for (i = 0; verticals[i]; i++)
1000     if (key->name == verticals[i])
1001       return KEY_STORAGE_VERTICAL_OFFSET;
1002   keyname = pool_id2str(repo->pool, key->name);
1003   for (i = 0; languagetags[i] != 0; i++)
1004     if (!strncmp(keyname, languagetags[i], strlen(languagetags[i])))
1005       return KEY_STORAGE_VERTICAL_OFFSET;
1006   return KEY_STORAGE_INCORE;
1007 }
1008 
1009 /*
1010  * Repo
1011  */
1012 
1013 /*
1014  * the code works the following way:
1015  *
1016  * 1) find which keys should be written
1017  * 2) collect usage information for keys/ids/dirids, create schema
1018  *    data
1019  * 3) use usage information to create mapping tables, so that often
1020  *    used ids get a lower number
1021  * 4) encode data into buffers using the mapping tables
1022  * 5) write everything to disk
1023  */
1024 int
1025 repo_write_filtered(Repo *repo, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1026 {
1027   Pool *pool = repo->pool;
1028   int i, j, n;
1029   Solvable *s;
1030   NeedId *needid;
1031   int nstrings, nrels;
1032   unsigned int sizeid;
1033   unsigned int solv_flags;
1034   Reldep *ran;
1035   Id *idarraydata;
1036 
1037   Id id, *sp;
1038 
1039   Id *dirmap;
1040   int ndirmap;
1041   Id *keyused;
1042   unsigned char *repodataused;
1043   int anyrepodataused = 0;
1044   int anysolvableused = 0;
1045 
1046   struct cbdata cbdata;
1047   int clonepool;
1048   Repokey *key;
1049   int poolusage, dirpoolusage, idused, dirused;
1050   int reloff;
1051 
1052   Repodata *data, *dirpooldata;
1053 
1054   Repodata target;
1055 
1056   Stringpool *spool;
1057   Dirpool *dirpool;
1058 
1059   Id mainschema;
1060 
1061   struct extdata *xd;
1062 
1063   Id type_constantid = REPOKEY_TYPE_CONSTANTID;
1064 
1065 
1066   memset(&cbdata, 0, sizeof(cbdata));
1067   cbdata.repo = repo;
1068   cbdata.target = &target;
1069 
1070   repodata_initdata(&target, repo, 1);
1071 
1072   /* go through all repodata and find the keys we need */
1073   /* also unify keys */
1074   /*          keymapstart - maps repo number to keymap offset */
1075   /*          keymap      - maps repo key to my key, 0 -> not used */
1076 
1077   /* start with all KEY_STORAGE_SOLVABLE ids */
1078 
1079   n = ID_NUM_INTERNAL;
1080   FOR_REPODATAS(repo, i, data)
1081     n += data->nkeys;
1082   cbdata.keymap = solv_calloc(n, sizeof(Id));
1083   cbdata.keymapstart = solv_calloc(repo->nrepodata, sizeof(Id));
1084   repodataused = solv_calloc(repo->nrepodata, 1);
1085 
1086   clonepool = 0;
1087   poolusage = 0;
1088 
1089   /* add keys for STORAGE_SOLVABLE */
1090   for (i = SOLVABLE_NAME; i <= RPM_RPMDBID; i++)
1091     {
1092       Repokey keyd;
1093       keyd.name = i;
1094       if (i < SOLVABLE_PROVIDES)
1095         keyd.type = REPOKEY_TYPE_ID;
1096       else if (i < RPM_RPMDBID)
1097         keyd.type = REPOKEY_TYPE_REL_IDARRAY;
1098       else
1099         keyd.type = REPOKEY_TYPE_NUM;
1100       keyd.size = 0;
1101       keyd.storage = KEY_STORAGE_SOLVABLE;
1102       if (keyfilter)
1103 	{
1104 	  keyd.storage = keyfilter(repo, &keyd, kfdata);
1105 	  if (keyd.storage == KEY_STORAGE_DROPPED)
1106 	    continue;
1107 	  keyd.storage = KEY_STORAGE_SOLVABLE;
1108 	}
1109       poolusage = 1;
1110       clonepool = 1;
1111       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1112     }
1113 
1114   if (repo->nsolvables)
1115     {
1116       Repokey keyd;
1117       keyd.name = REPOSITORY_SOLVABLES;
1118       keyd.type = REPOKEY_TYPE_FLEXARRAY;
1119       keyd.size = 0;
1120       keyd.storage = KEY_STORAGE_INCORE;
1121       cbdata.keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
1122     }
1123 
1124   dirpoolusage = 0;
1125 
1126   spool = 0;
1127   dirpool = 0;
1128   dirpooldata = 0;
1129   n = ID_NUM_INTERNAL;
1130   FOR_REPODATAS(repo, i, data)
1131     {
1132       cbdata.keymapstart[i] = n;
1133       cbdata.keymap[n++] = 0;	/* key 0 */
1134       idused = 0;
1135       dirused = 0;
1136       if (keyfilter)
1137 	{
1138 	  Repokey keyd;
1139 	  /* check if we want this repodata */
1140 	  memset(&keyd, 0, sizeof(keyd));
1141 	  keyd.name = 1;
1142 	  keyd.type = 1;
1143 	  keyd.size = i;
1144 	  if (keyfilter(repo, &keyd, kfdata) == -1)
1145 	    continue;
1146 	}
1147       for (j = 1; j < data->nkeys; j++, n++)
1148 	{
1149 	  key = data->keys + j;
1150 	  if (key->name == REPOSITORY_SOLVABLES && key->type == REPOKEY_TYPE_FLEXARRAY)
1151 	    {
1152 	      cbdata.keymap[n] = cbdata.keymap[key->name];
1153 	      continue;
1154 	    }
1155 	  if (key->type == REPOKEY_TYPE_DELETED)
1156 	    {
1157 	      cbdata.keymap[n] = 0;
1158 	      continue;
1159 	    }
1160 	  if (key->type == REPOKEY_TYPE_CONSTANTID && data->localpool)
1161 	    {
1162 	      Repokey keyd = *key;
1163 	      keyd.size = repodata_globalize_id(data, key->size, 1);
1164 	      id = repodata_key2id(&target, &keyd, 0);
1165 	    }
1166 	  else
1167 	    id = repodata_key2id(&target, key, 0);
1168 	  if (!id)
1169 	    {
1170 	      Repokey keyd = *key;
1171 	      keyd.storage = KEY_STORAGE_INCORE;
1172 	      if (keyd.type == REPOKEY_TYPE_CONSTANTID)
1173 		keyd.size = repodata_globalize_id(data, key->size, 1);
1174 	      else if (keyd.type != REPOKEY_TYPE_CONSTANT)
1175 		keyd.size = 0;
1176 	      if (keyfilter)
1177 		{
1178 		  keyd.storage = keyfilter(repo, &keyd, kfdata);
1179 		  if (keyd.storage == KEY_STORAGE_DROPPED)
1180 		    {
1181 		      cbdata.keymap[n] = 0;
1182 		      continue;
1183 		    }
1184 		}
1185 	      id = repodata_key2id(&target, &keyd, 1);
1186 	    }
1187 	  cbdata.keymap[n] = id;
1188 	  /* load repodata if not already loaded */
1189 	  if (data->state == REPODATA_STUB)
1190 	    {
1191 	      if (data->loadcallback)
1192 		data->loadcallback(data);
1193 	      else
1194 		data->state = REPODATA_ERROR;
1195 	      if (data->state != REPODATA_ERROR)
1196 		{
1197 		  /* redo this repodata! */
1198 		  j = 0;
1199 		  n = cbdata.keymapstart[i];
1200 		  continue;
1201 		}
1202 	    }
1203 	  if (data->state == REPODATA_ERROR)
1204 	    {
1205 	      /* too bad! */
1206 	      cbdata.keymap[n] = 0;
1207 	      continue;
1208 	    }
1209 
1210 	  repodataused[i] = 1;
1211 	  anyrepodataused = 1;
1212 	  if (key->type == REPOKEY_TYPE_CONSTANTID || key->type == REPOKEY_TYPE_ID ||
1213               key->type == REPOKEY_TYPE_IDARRAY || key->type == REPOKEY_TYPE_REL_IDARRAY)
1214 	    idused = 1;
1215 	  else if (key->type == REPOKEY_TYPE_DIR || key->type == REPOKEY_TYPE_DIRNUMNUMARRAY || key->type == REPOKEY_TYPE_DIRSTRARRAY)
1216 	    {
1217 	      idused = 1;	/* dirs also use ids */
1218 	      dirused = 1;
1219 	    }
1220 	}
1221       if (idused)
1222 	{
1223 	  if (data->localpool)
1224 	    {
1225 	      if (poolusage)
1226 		poolusage = 3;	/* need own pool */
1227 	      else
1228 		{
1229 		  poolusage = 2;
1230 		  spool = &data->spool;
1231 		}
1232 	    }
1233 	  else
1234 	    {
1235 	      if (poolusage == 0)
1236 		poolusage = 1;
1237 	      else if (poolusage != 1)
1238 		poolusage = 3;	/* need own pool */
1239 	    }
1240 	}
1241       if (dirused)
1242 	{
1243 	  if (dirpoolusage)
1244 	    dirpoolusage = 3;	/* need own dirpool */
1245 	  else
1246 	    {
1247 	      dirpoolusage = 2;
1248 	      dirpool = &data->dirpool;
1249 	      dirpooldata = data;
1250 	    }
1251 	}
1252     }
1253   cbdata.nkeymap = n;
1254 
1255   /* 0: no pool needed at all */
1256   /* 1: use global pool */
1257   /* 2: use repodata local pool */
1258   /* 3: need own pool */
1259   if (poolusage == 3)
1260     {
1261       spool = &target.spool;
1262       /* hack: reuse global pool data so we don't have to map pool ids */
1263       if (clonepool)
1264 	{
1265 	  stringpool_free(spool);
1266 	  stringpool_clone(spool, &pool->ss);
1267 	}
1268       cbdata.ownspool = spool;
1269     }
1270   else if (poolusage == 0 || poolusage == 1)
1271     {
1272       poolusage = 1;
1273       spool = &pool->ss;
1274     }
1275 
1276   if (dirpoolusage == 3)
1277     {
1278       dirpool = &target.dirpool;
1279       dirpooldata = 0;
1280       cbdata.owndirpool = dirpool;
1281     }
1282   else if (dirpool)
1283     cbdata.dirused = solv_calloc(dirpool->ndirs, sizeof(Id));
1284 
1285 
1286 /********************************************************************/
1287 #if 0
1288 fprintf(stderr, "poolusage: %d\n", poolusage);
1289 fprintf(stderr, "dirpoolusage: %d\n", dirpoolusage);
1290 fprintf(stderr, "nkeys: %d\n", target.nkeys);
1291 for (i = 1; i < target.nkeys; i++)
1292   fprintf(stderr, "  %2d: %s[%d] %d %d %d\n", i, pool_id2str(pool, target.keys[i].name), target.keys[i].name, target.keys[i].type, target.keys[i].size, target.keys[i].storage);
1293 #endif
1294 
1295   /* copy keys if requested */
1296   if (keyq)
1297     {
1298       queue_empty(keyq);
1299       for (i = 1; i < target.nkeys; i++)
1300 	queue_push2(keyq, target.keys[i].name, target.keys[i].type);
1301     }
1302 
1303   if (poolusage > 1)
1304     {
1305       /* put all the keys we need in our string pool */
1306       /* put mapped ids right into target.keys */
1307       for (i = 1, key = target.keys + i; i < target.nkeys; i++, key++)
1308 	{
1309 	  key->name = stringpool_str2id(spool, pool_id2str(pool, key->name), 1);
1310 	  if (key->type == REPOKEY_TYPE_CONSTANTID)
1311 	    {
1312 	      key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1313 	      type_constantid = key->type;
1314 	      key->size = stringpool_str2id(spool, pool_id2str(pool, key->size), 1);
1315 	    }
1316 	  else
1317 	    key->type = stringpool_str2id(spool, pool_id2str(pool, key->type), 1);
1318 	}
1319       if (poolusage == 2)
1320 	stringpool_freehash(spool);	/* free some mem */
1321     }
1322 
1323 
1324 /********************************************************************/
1325 
1326   /* set needed count of all strings and rels,
1327    * find which keys are used in the solvables
1328    * put all strings in own spool
1329    */
1330 
1331   reloff = spool->nstrings;
1332   if (poolusage == 3)
1333     reloff = (reloff + NEEDED_BLOCK) & ~NEEDED_BLOCK;
1334 
1335   needid = calloc(reloff + pool->nrels, sizeof(*needid));
1336   needid[0].map = reloff;
1337 
1338   cbdata.needid = needid;
1339   cbdata.schema = solv_calloc(target.nkeys, sizeof(Id));
1340   cbdata.sp = cbdata.schema;
1341   cbdata.solvschemata = solv_calloc(repo->nsolvables, sizeof(Id));
1342 
1343   /* create main schema */
1344   cbdata.sp = cbdata.schema;
1345   /* collect all other data from all repodatas */
1346   /* XXX: merge arrays of equal keys? */
1347   FOR_REPODATAS(repo, j, data)
1348     {
1349       if (!repodataused[j])
1350 	continue;
1351       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1352     }
1353   sp = cbdata.sp;
1354   /* add solvables if needed (may revert later) */
1355   if (repo->nsolvables)
1356     {
1357       *sp++ = cbdata.keymap[REPOSITORY_SOLVABLES];
1358       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size++;
1359     }
1360   *sp = 0;
1361   mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1362 
1363   idarraydata = repo->idarraydata;
1364 
1365   anysolvableused = 0;
1366   cbdata.doingsolvables = 1;
1367   for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1368     {
1369       if (s->repo != repo)
1370 	continue;
1371 
1372       /* set schema info, keep in sync with further down */
1373       sp = cbdata.schema;
1374       if (cbdata.keymap[SOLVABLE_NAME])
1375 	{
1376           *sp++ = cbdata.keymap[SOLVABLE_NAME];
1377 	  needid[s->name].need++;
1378 	}
1379       if (cbdata.keymap[SOLVABLE_ARCH])
1380 	{
1381           *sp++ = cbdata.keymap[SOLVABLE_ARCH];
1382 	  needid[s->arch].need++;
1383 	}
1384       if (cbdata.keymap[SOLVABLE_EVR])
1385 	{
1386           *sp++ = cbdata.keymap[SOLVABLE_EVR];
1387 	  needid[s->evr].need++;
1388 	}
1389       if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1390 	{
1391           *sp++ = cbdata.keymap[SOLVABLE_VENDOR];
1392 	  needid[s->vendor].need++;
1393 	}
1394       if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1395         {
1396           *sp++ = cbdata.keymap[SOLVABLE_PROVIDES];
1397 	  target.keys[cbdata.keymap[SOLVABLE_PROVIDES]].size += incneedidarray(pool, idarraydata + s->provides, needid);
1398 	}
1399       if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1400 	{
1401           *sp++ = cbdata.keymap[SOLVABLE_OBSOLETES];
1402 	  target.keys[cbdata.keymap[SOLVABLE_OBSOLETES]].size += incneedidarray(pool, idarraydata + s->obsoletes, needid);
1403 	}
1404       if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1405 	{
1406           *sp++ = cbdata.keymap[SOLVABLE_CONFLICTS];
1407 	  target.keys[cbdata.keymap[SOLVABLE_CONFLICTS]].size += incneedidarray(pool, idarraydata + s->conflicts, needid);
1408 	}
1409       if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1410 	{
1411           *sp++ = cbdata.keymap[SOLVABLE_REQUIRES];
1412 	  target.keys[cbdata.keymap[SOLVABLE_REQUIRES]].size += incneedidarray(pool, idarraydata + s->requires, needid);
1413 	}
1414       if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1415 	{
1416           *sp++ = cbdata.keymap[SOLVABLE_RECOMMENDS];
1417 	  target.keys[cbdata.keymap[SOLVABLE_RECOMMENDS]].size += incneedidarray(pool, idarraydata + s->recommends, needid);
1418 	}
1419       if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1420 	{
1421           *sp++ = cbdata.keymap[SOLVABLE_SUGGESTS];
1422 	  target.keys[cbdata.keymap[SOLVABLE_SUGGESTS]].size += incneedidarray(pool, idarraydata + s->suggests, needid);
1423 	}
1424       if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1425 	{
1426           *sp++ = cbdata.keymap[SOLVABLE_SUPPLEMENTS];
1427 	  target.keys[cbdata.keymap[SOLVABLE_SUPPLEMENTS]].size += incneedidarray(pool, idarraydata + s->supplements, needid);
1428 	}
1429       if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1430 	{
1431           *sp++ = cbdata.keymap[SOLVABLE_ENHANCES];
1432 	  target.keys[cbdata.keymap[SOLVABLE_ENHANCES]].size += incneedidarray(pool, idarraydata + s->enhances, needid);
1433 	}
1434       if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1435 	{
1436           *sp++ = cbdata.keymap[RPM_RPMDBID];
1437 	  target.keys[cbdata.keymap[RPM_RPMDBID]].size++;
1438 	}
1439       cbdata.sp = sp;
1440 
1441       if (anyrepodataused)
1442 	{
1443 	  FOR_REPODATAS(repo, j, data)
1444 	    {
1445 	      if (!repodataused[j])
1446 		continue;
1447 	      if (i < data->start || i >= data->end)
1448 		continue;
1449 	      repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_needed, &cbdata);
1450 	      needid = cbdata.needid;
1451 	    }
1452 	}
1453       *cbdata.sp = 0;
1454       cbdata.solvschemata[n] = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1455       if (cbdata.solvschemata[n])
1456 	anysolvableused = 1;
1457       n++;
1458     }
1459   cbdata.doingsolvables = 0;
1460   assert(n == repo->nsolvables);
1461 
1462   if (repo->nsolvables && !anysolvableused)
1463     {
1464       /* strip off solvable from the main schema */
1465       target.keys[cbdata.keymap[REPOSITORY_SOLVABLES]].size = 0;
1466       sp = cbdata.schema;
1467       for (i = 0; target.schemadata[target.schemata[mainschema] + i]; i++)
1468 	{
1469 	  *sp = target.schemadata[target.schemata[mainschema] + i];
1470 	  if (*sp != cbdata.keymap[REPOSITORY_SOLVABLES])
1471 	    sp++;
1472 	}
1473       assert(target.schemadatalen == target.schemata[mainschema] + i + 1);
1474       *sp = 0;
1475       target.schemadatalen = target.schemata[mainschema];
1476       target.nschemata--;
1477       repodata_free_schemahash(&target);
1478       mainschema = repodata_schema2id(cbdata.target, cbdata.schema, 1);
1479     }
1480 
1481 /********************************************************************/
1482 
1483   /* remove unused keys */
1484   keyused = solv_calloc(target.nkeys, sizeof(Id));
1485   for (i = 1; i < target.schemadatalen; i++)
1486     keyused[target.schemadata[i]] = 1;
1487   keyused[0] = 0;
1488   for (n = i = 1; i < target.nkeys; i++)
1489     {
1490       if (!keyused[i])
1491 	continue;
1492       keyused[i] = n;
1493       if (i != n)
1494 	{
1495 	  target.keys[n] = target.keys[i];
1496 	  if (keyq)
1497 	    {
1498 	      keyq->elements[2 * n - 2] = keyq->elements[2 * i - 2];
1499 	      keyq->elements[2 * n - 1] = keyq->elements[2 * i - 1];
1500 	    }
1501 	}
1502       n++;
1503     }
1504   target.nkeys = n;
1505   if (keyq)
1506     queue_truncate(keyq, 2 * n - 2);
1507 
1508   /* update schema data to the new key ids */
1509   for (i = 1; i < target.schemadatalen; i++)
1510     target.schemadata[i] = keyused[target.schemadata[i]];
1511   /* update keymap to the new key ids */
1512   for (i = 0; i < cbdata.nkeymap; i++)
1513     cbdata.keymap[i] = keyused[cbdata.keymap[i]];
1514   keyused = solv_free(keyused);
1515 
1516   /* increment needid of the used keys, they are already mapped to
1517    * the correct string pool  */
1518   for (i = 1; i < target.nkeys; i++)
1519     {
1520       if (target.keys[i].type == type_constantid)
1521 	needid[target.keys[i].size].need++;
1522       needid[target.keys[i].name].need++;
1523       needid[target.keys[i].type].need++;
1524     }
1525 
1526 /********************************************************************/
1527 
1528   if (dirpool && cbdata.dirused && !cbdata.dirused[0])
1529     {
1530       /* no dirs used at all */
1531       cbdata.dirused = solv_free(cbdata.dirused);
1532       dirpool = 0;
1533     }
1534 
1535   /* increment need id for used dir components */
1536   if (dirpool)
1537     {
1538       /* if we have own dirpool, all entries in it are used.
1539 	 also, all comp ids are already mapped by putinowndirpool(),
1540 	 so we can simply increment needid.
1541 	 (owndirpool != 0, dirused == 0, dirpooldata == 0) */
1542       /* else we re-use a dirpool of repodata "dirpooldata".
1543 	 dirused tells us which of the ids are used.
1544 	 we need to map comp ids if we generate a new pool.
1545 	 (owndirpool == 0, dirused != 0, dirpooldata != 0) */
1546       for (i = 1; i < dirpool->ndirs; i++)
1547 	{
1548 #if 0
1549 fprintf(stderr, "dir %d used %d\n", i, cbdata.dirused ? cbdata.dirused[i] : 1);
1550 #endif
1551 	  if (cbdata.dirused && !cbdata.dirused[i])
1552 	    continue;
1553 	  id = dirpool->dirs[i];
1554 	  if (id <= 0)
1555 	    continue;
1556 	  if (dirpooldata && cbdata.ownspool && id > 1)
1557 	    {
1558 	      id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1559 	      needid = cbdata.needid;
1560 	    }
1561 	  needid[id].need++;
1562 	}
1563     }
1564 
1565 
1566 /********************************************************************/
1567 
1568   /*
1569    * create mapping table, new keys are sorted by needid[].need
1570    *
1571    * needid[key].need : old key -> new key
1572    * needid[key].map  : new key -> old key
1573    */
1574 
1575   /* zero out id 0 and rel 0 just in case */
1576   reloff = needid[0].map;
1577   needid[0].need = 0;
1578   needid[reloff].need = 0;
1579 
1580   for (i = 1; i < reloff + pool->nrels; i++)
1581     needid[i].map = i;
1582 
1583 #if 0
1584   solv_sort(needid + 1, spool->nstrings - 1, sizeof(*needid), needid_cmp_need_s, spool);
1585 #else
1586   /* make first entry '' */
1587   needid[1].need = 1;
1588   solv_sort(needid + 2, spool->nstrings - 2, sizeof(*needid), needid_cmp_need_s, spool);
1589 #endif
1590   solv_sort(needid + reloff, pool->nrels, sizeof(*needid), needid_cmp_need, 0);
1591   /* now needid is in new order, needid[newid].map -> oldid */
1592 
1593   /* calculate string space size, also zero out needid[].need */
1594   sizeid = 0;
1595   for (i = 1; i < reloff; i++)
1596     {
1597       if (!needid[i].need)
1598         break;	/* as we have sorted, every entry after this also has need == 0 */
1599       needid[i].need = 0;
1600       sizeid += strlen(spool->stringspace + spool->strings[needid[i].map]) + 1;
1601     }
1602   nstrings = i;	/* our new string id end */
1603 
1604   /* make needid[oldid].need point to newid */
1605   for (i = 1; i < nstrings; i++)
1606     needid[needid[i].map].need = i;
1607 
1608   /* same as above for relations */
1609   for (i = 0; i < pool->nrels; i++)
1610     {
1611       if (!needid[reloff + i].need)
1612         break;
1613       needid[reloff + i].need = 0;
1614     }
1615   nrels = i;	/* our new rel id end */
1616 
1617   for (i = 0; i < nrels; i++)
1618     needid[needid[reloff + i].map].need = nstrings + i;
1619 
1620   /* now we have: needid[oldid].need -> newid
1621                   needid[newid].map  -> oldid
1622      both for strings and relations  */
1623 
1624 
1625 /********************************************************************/
1626 
1627   ndirmap = 0;
1628   dirmap = 0;
1629   if (dirpool)
1630     {
1631       /* create our new target directory structure by traversing through all
1632        * used dirs. This will concatenate blocks with the same parent
1633        * directory into single blocks.
1634        * Instead of components, traverse_dirs stores the old dirids,
1635        * we will change this in the second step below */
1636       /* (dirpooldata and dirused are 0 if we have our own dirpool) */
1637       if (cbdata.dirused && !cbdata.dirused[1])
1638 	cbdata.dirused[1] = 1;	/* always want / entry */
1639       dirmap = solv_calloc(dirpool->ndirs, sizeof(Id));
1640       dirmap[0] = 0;
1641       ndirmap = traverse_dirs(dirpool, dirmap, 1, dirpool_child(dirpool, 0), cbdata.dirused);
1642 
1643       /* (re)create dirused, so that it maps from "old dirid" to "new dirid" */
1644       /* change dirmap so that it maps from "new dirid" to "new compid" */
1645       if (!cbdata.dirused)
1646 	cbdata.dirused = solv_malloc2(dirpool->ndirs, sizeof(Id));
1647       memset(cbdata.dirused, 0, dirpool->ndirs * sizeof(Id));
1648       for (i = 1; i < ndirmap; i++)
1649 	{
1650 	  if (dirmap[i] <= 0)
1651 	    continue;
1652 	  cbdata.dirused[dirmap[i]] = i;
1653 	  id = dirpool->dirs[dirmap[i]];
1654 	  if (dirpooldata && cbdata.ownspool && id > 1)
1655 	    id = putinownpool(&cbdata, dirpooldata->localpool ? &dirpooldata->spool : &pool->ss, id);
1656 	  dirmap[i] = needid[id].need;
1657 	}
1658       /* now the new target directory structure is complete (dirmap), and we have
1659        * dirused[olddirid] -> newdirid */
1660     }
1661 
1662 /********************************************************************/
1663 
1664   /* collect all data
1665    * we use extdata[0] for incore data and extdata[keyid] for vertical data
1666    */
1667 
1668   cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata));
1669 
1670   xd = cbdata.extdata;
1671   cbdata.current_sub = 0;
1672   /* add main schema */
1673   cbdata.lastlen = 0;
1674   data_addid(xd, mainschema);
1675 
1676 #if 1
1677   FOR_REPODATAS(repo, j, data)
1678     {
1679       if (!repodataused[j])
1680 	continue;
1681       repodata_search(data, SOLVID_META, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1682     }
1683 #endif
1684 
1685   if (xd->len - cbdata.lastlen > cbdata.maxdata)
1686     cbdata.maxdata = xd->len - cbdata.lastlen;
1687   cbdata.lastlen = xd->len;
1688 
1689   if (anysolvableused)
1690     {
1691       data_addid(xd, repo->nsolvables);	/* FLEXARRAY nentries */
1692       cbdata.doingsolvables = 1;
1693       for (i = repo->start, s = pool->solvables + i, n = 0; i < repo->end; i++, s++)
1694 	{
1695 	  if (s->repo != repo)
1696 	    continue;
1697 	  data_addid(xd, cbdata.solvschemata[n]);
1698 	  if (cbdata.keymap[SOLVABLE_NAME])
1699 	    data_addid(xd, needid[s->name].need);
1700 	  if (cbdata.keymap[SOLVABLE_ARCH])
1701 	    data_addid(xd, needid[s->arch].need);
1702 	  if (cbdata.keymap[SOLVABLE_EVR])
1703 	    data_addid(xd, needid[s->evr].need);
1704 	  if (s->vendor && cbdata.keymap[SOLVABLE_VENDOR])
1705 	    data_addid(xd, needid[s->vendor].need);
1706 	  if (s->provides && cbdata.keymap[SOLVABLE_PROVIDES])
1707 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
1708 	  if (s->obsoletes && cbdata.keymap[SOLVABLE_OBSOLETES])
1709 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->obsoletes, 0);
1710 	  if (s->conflicts && cbdata.keymap[SOLVABLE_CONFLICTS])
1711 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->conflicts, 0);
1712 	  if (s->requires && cbdata.keymap[SOLVABLE_REQUIRES])
1713 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
1714 	  if (s->recommends && cbdata.keymap[SOLVABLE_RECOMMENDS])
1715 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->recommends, 0);
1716 	  if (s->suggests && cbdata.keymap[SOLVABLE_SUGGESTS])
1717 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->suggests, 0);
1718 	  if (s->supplements && cbdata.keymap[SOLVABLE_SUPPLEMENTS])
1719 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->supplements, 0);
1720 	  if (s->enhances && cbdata.keymap[SOLVABLE_ENHANCES])
1721 	    data_addidarray_sort(xd, pool, needid, idarraydata + s->enhances, 0);
1722 	  if (repo->rpmdbid && cbdata.keymap[RPM_RPMDBID])
1723 	    data_addid(xd, repo->rpmdbid[i - repo->start]);
1724 	  if (anyrepodataused)
1725 	    {
1726 	      cbdata.vstart = -1;
1727 	      FOR_REPODATAS(repo, j, data)
1728 		{
1729 		  if (!repodataused[j])
1730 		    continue;
1731 		  if (i < data->start || i >= data->end)
1732 		    continue;
1733 		  repodata_search(data, i, 0, SEARCH_SUB|SEARCH_ARRAYSENTINEL, repo_write_cb_adddata, &cbdata);
1734 		}
1735 	    }
1736 	  if (xd->len - cbdata.lastlen > cbdata.maxdata)
1737 	    cbdata.maxdata = xd->len - cbdata.lastlen;
1738 	  cbdata.lastlen = xd->len;
1739 	  n++;
1740 	}
1741       cbdata.doingsolvables = 0;
1742     }
1743 
1744   assert(cbdata.current_sub == cbdata.nsubschemata);
1745   if (cbdata.subschemata)
1746     {
1747       cbdata.subschemata = solv_free(cbdata.subschemata);
1748       cbdata.nsubschemata = 0;
1749     }
1750 
1751 /********************************************************************/
1752 
1753   target.fp = fp;
1754 
1755   /* write header */
1756 
1757   /* write file header */
1758   write_u32(&target, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
1759   write_u32(&target, SOLV_VERSION_8);
1760 
1761 
1762   /* write counts */
1763   write_u32(&target, nstrings);
1764   write_u32(&target, nrels);
1765   write_u32(&target, ndirmap);
1766   write_u32(&target, anysolvableused ? repo->nsolvables : 0);
1767   write_u32(&target, target.nkeys);
1768   write_u32(&target, target.nschemata);
1769   solv_flags = 0;
1770   solv_flags |= SOLV_FLAG_PREFIX_POOL;
1771   solv_flags |= SOLV_FLAG_SIZE_BYTES;
1772   write_u32(&target, solv_flags);
1773 
1774   if (nstrings)
1775     {
1776       /*
1777        * calculate prefix encoding of the strings
1778        */
1779       unsigned char *prefixcomp = solv_malloc(nstrings);
1780       unsigned int compsum = 0;
1781       char *old_str = "";
1782 
1783       prefixcomp[0] = 0;
1784       for (i = 1; i < nstrings; i++)
1785 	{
1786 	  char *str = spool->stringspace + spool->strings[needid[i].map];
1787 	  int same;
1788 	  for (same = 0; same < 255; same++)
1789 	    if (!old_str[same] || old_str[same] != str[same])
1790 	      break;
1791 	  prefixcomp[i] = same;
1792 	  compsum += same;
1793 	  old_str = str;
1794 	}
1795 
1796       /*
1797        * write strings
1798        */
1799       write_u32(&target, sizeid);
1800       /* we save compsum bytes but need 1 extra byte for every string */
1801       write_u32(&target, sizeid + nstrings - 1 - compsum);
1802       for (i = 1; i < nstrings; i++)
1803 	{
1804 	  char *str = spool->stringspace + spool->strings[needid[i].map];
1805 	  write_u8(&target, prefixcomp[i]);
1806 	  write_str(&target, str + prefixcomp[i]);
1807 	}
1808       solv_free(prefixcomp);
1809     }
1810   else
1811     {
1812       write_u32(&target, 0);
1813       write_u32(&target, 0);
1814     }
1815 
1816   /*
1817    * write RelDeps
1818    */
1819   for (i = 0; i < nrels; i++)
1820     {
1821       ran = pool->rels + (needid[reloff + i].map - reloff);
1822       write_id(&target, needid[ISRELDEP(ran->name) ? RELOFF(ran->name) : ran->name].need);
1823       write_id(&target, needid[ISRELDEP(ran->evr) ? RELOFF(ran->evr) : ran->evr].need);
1824       write_u8(&target, ran->flags);
1825     }
1826 
1827   /*
1828    * write dirs (skip both root and / entry)
1829    */
1830   for (i = 2; i < ndirmap; i++)
1831     {
1832       if (dirmap[i] > 0)
1833         write_id(&target, dirmap[i]);
1834       else
1835         write_id(&target, nstrings - dirmap[i]);
1836     }
1837   solv_free(dirmap);
1838 
1839   /*
1840    * write keys
1841    */
1842   for (i = 1; i < target.nkeys; i++)
1843     {
1844       write_id(&target, needid[target.keys[i].name].need);
1845       write_id(&target, needid[target.keys[i].type].need);
1846       if (target.keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
1847 	{
1848 	  if (target.keys[i].type == type_constantid)
1849             write_id(&target, needid[target.keys[i].size].need);
1850 	  else
1851             write_id(&target, target.keys[i].size);
1852 	}
1853       else
1854         write_id(&target, cbdata.extdata[i].len);
1855       write_id(&target, target.keys[i].storage);
1856     }
1857 
1858   /*
1859    * write schemata
1860    */
1861   write_id(&target, target.schemadatalen);	/* XXX -1? */
1862   for (i = 1; i < target.nschemata; i++)
1863     write_idarray(&target, pool, 0, repodata_id2schema(&target, i));
1864 
1865 /********************************************************************/
1866 
1867   write_id(&target, cbdata.maxdata);
1868   write_id(&target, cbdata.extdata[0].len);
1869   if (cbdata.extdata[0].len)
1870     write_blob(&target, cbdata.extdata[0].buf, cbdata.extdata[0].len);
1871   solv_free(cbdata.extdata[0].buf);
1872 
1873   /* do we have vertical data? */
1874   for (i = 1; i < target.nkeys; i++)
1875     if (cbdata.extdata[i].len)
1876       break;
1877   if (i < target.nkeys)
1878     {
1879       /* yes, write it in pages */
1880       unsigned char *dp, vpage[REPOPAGE_BLOBSIZE];
1881       int l, ll, lpage = 0;
1882 
1883       write_u32(&target, REPOPAGE_BLOBSIZE);
1884       for (i = 1; i < target.nkeys; i++)
1885 	{
1886 	  if (!cbdata.extdata[i].len)
1887 	    continue;
1888 	  l = cbdata.extdata[i].len;
1889 	  dp = cbdata.extdata[i].buf;
1890 	  while (l)
1891 	    {
1892 	      ll = REPOPAGE_BLOBSIZE - lpage;
1893 	      if (l < ll)
1894 		ll = l;
1895 	      memcpy(vpage + lpage, dp, ll);
1896 	      dp += ll;
1897 	      lpage += ll;
1898 	      l -= ll;
1899 	      if (lpage == REPOPAGE_BLOBSIZE)
1900 		{
1901 		  write_compressed_page(&target, vpage, lpage);
1902 		  lpage = 0;
1903 		}
1904 	    }
1905 	}
1906       if (lpage)
1907 	write_compressed_page(&target, vpage, lpage);
1908     }
1909 
1910   for (i = 1; i < target.nkeys; i++)
1911     solv_free(cbdata.extdata[i].buf);
1912   solv_free(cbdata.extdata);
1913 
1914   target.fp = 0;
1915   repodata_freedata(&target);
1916 
1917   solv_free(needid);
1918   solv_free(cbdata.solvschemata);
1919   solv_free(cbdata.schema);
1920 
1921   solv_free(cbdata.keymap);
1922   solv_free(cbdata.keymapstart);
1923   solv_free(cbdata.dirused);
1924   solv_free(repodataused);
1925   return target.error;
1926 }
1927 
1928 struct repodata_write_data {
1929   int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata);
1930   void *kfdata;
1931   int repodataid;
1932 };
1933 
1934 static int
1935 repodata_write_keyfilter(Repo *repo, Repokey *key, void *kfdata)
1936 {
1937   struct repodata_write_data *wd = kfdata;
1938 
1939   /* XXX: special repodata selection hack */
1940   if (key->name == 1 && key->size != wd->repodataid)
1941     return -1;
1942   if (key->storage == KEY_STORAGE_SOLVABLE)
1943     return KEY_STORAGE_DROPPED;	/* not part of this repodata */
1944   if (wd->keyfilter)
1945     return (*wd->keyfilter)(repo, key, wd->kfdata);
1946   return key->storage;
1947 }
1948 
1949 int
1950 repodata_write_filtered(Repodata *data, FILE *fp, int (*keyfilter)(Repo *repo, Repokey *key, void *kfdata), void *kfdata, Queue *keyq)
1951 {
1952   struct repodata_write_data wd;
1953 
1954   wd.keyfilter = keyfilter;
1955   wd.kfdata = kfdata;
1956   wd.repodataid = data->repodataid;
1957   return repo_write_filtered(data->repo, fp, repodata_write_keyfilter, &wd, keyq);
1958 }
1959 
1960 int
1961 repodata_write(Repodata *data, FILE *fp)
1962 {
1963   return repodata_write_filtered(data, fp, repo_write_stdkeyfilter, 0, 0);
1964 }
1965 
1966 int
1967 repo_write(Repo *repo, FILE *fp)
1968 {
1969   return repo_write_filtered(repo, fp, repo_write_stdkeyfilter, 0, 0);
1970 }
1971