xref: /haiku/src/add-ons/kernel/file_systems/userlandfs/server/beos/fs_cache.c (revision 410ed2fbba58819ac21e27d3676739728416761d)
1 /*
2    This file contains the global device cache for the BeOS.  All
3    file system I/O comes through here.  The cache can handle blocks
4    of different sizes for multiple different underlying physical
5    devices.
6 
7    The cache is organized as a hash table (for lookups by device
8    and block number) and two doubly-linked lists.  The normal
9    list is for "normal" blocks which are either clean or dirty.
10    The locked list is for blocks that are locked in the cache by
11    BFS.  The lists are LRU ordered.
12 
13    Most of the work happens in the function cache_block_io() which
14    is quite lengthy.  The other functions of interest are get_ents()
15    which picks victims to be kicked out of the cache; flush_ents()
16    which does the work of flushing them; and set_blocks_info() which
17    handles cloning blocks and setting callbacks so that the BFS
18    journal will work properly.  If you want to modify this code it
19    will take some study but it's not too bad.  Do not think about
20    separating the list of clean and dirty blocks into two lists as
21    I did that already and it's slower.
22 
23    Originally this cache code was written while listening to the album
24    "Ride the Lightning" by Metallica.  The current version was written
25    while listening to "Welcome to SkyValley" by Kyuss as well as an
26    ambient collection on the German label FAX.  Music helps a lot when
27    writing code.
28 
29    THIS CODE COPYRIGHT DOMINIC GIAMPAOLO.  NO WARRANTY IS EXPRESSED
30    OR IMPLIED.  YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR
31    NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED.
32 
33    FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com).
34 
35    Dominic Giampaolo
36    dbg@be.com
37 */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <memory.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <sys/types.h>
46 #include <sys/uio.h>
47 #include <unistd.h>
48 
49 
50 #include <OS.h>
51 #include <KernelExport.h>
52 
53 #include "fs_cache.h"
54 #include "fs_cache_priv.h"
55 #include "lock.h"
56 
57 
58 
59 #ifndef USER
60 #define printf dprintf
61 #endif
62 #ifdef USER
63 #define kprintf printf
64 #endif
65 
66 
67 typedef off_t fs_off_t;
68 
69 /* forward prototypes */
70 static int   flush_ents(cache_ent **ents, int n_ents);
71 
72 //static int   do_dump(int argc, char **argv);
73 //static int   do_find_block(int argc, char **argv);
74 //static int   do_find_data(int argc, char **argv);
75 //static void  cache_flusher(void *arg, int phase);
76 
77 
78 int chatty_io = 0;
79 
80 #define CHUNK (512 * 1024)   /* a hack to work around scsi driver bugs */
81 
82 static void
83 beos_panic(const char *format, ...)
84 {
85     va_list     ap;
86 
87     va_start(ap, format);
88     vfprintf(stderr, format, ap);
89     va_end(ap);
90 
91     while (TRUE)
92         ;
93 }
94 
95 size_t
96 beos_read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
97 {
98     size_t ret = 0;
99     size_t sum;
100 
101     if (chatty_io)
102         printf("R: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
103 
104     if (num_blocks * bsize < CHUNK)
105         ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize);
106     else {
107         for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
108             ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK);
109             if (ret != CHUNK)
110                 break;
111 
112             data = (void *)((char *)data + CHUNK);
113         }
114 
115         if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
116             ret = read_pos(fd, (bnum * bsize) + sum, data,
117                            (num_blocks * bsize) - sum);
118 
119             if (ret == (num_blocks * bsize) - sum)
120                 ret = num_blocks * bsize;
121         } else if (ret == CHUNK) {
122             ret = num_blocks * bsize;
123         }
124     }
125 
126     if (ret == num_blocks * bsize)
127         return 0;
128     else
129         return EBADF;
130 }
131 
132 size_t
133 beos_write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
134 {
135     size_t ret = 0;
136     size_t sum;
137 
138     if (chatty_io)
139         printf("W: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
140 
141     if (num_blocks * bsize < CHUNK)
142         ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize);
143     else {
144         for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
145             ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK);
146             if (ret != CHUNK)
147                 break;
148 
149             data = (void *)((char *)data + CHUNK);
150         }
151 
152         if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
153             ret = write_pos(fd, (bnum * bsize) + sum, data,
154                             (num_blocks * bsize) - sum);
155 
156             if (ret == (num_blocks * bsize) - sum)
157                 ret = num_blocks * bsize;
158         } else if (ret == CHUNK) {
159             ret = num_blocks * bsize;
160         }
161     }
162 
163 
164     if (ret == num_blocks * bsize)
165         return 0;
166     else
167         return EBADF;
168 }
169 
170 //	#pragma mark -
171 
172 static int
173 init_hash_table(hash_table *ht)
174 {
175     ht->max   = HT_DEFAULT_MAX;
176     ht->mask  = ht->max - 1;
177     ht->num_elements = 0;
178 
179     ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *));
180     if (ht->table == NULL)
181         return ENOMEM;
182 
183     return 0;
184 }
185 
186 
187 static void
188 shutdown_hash_table(hash_table *ht)
189 {
190     int       i, hash_len;
191     hash_ent *he, *next;
192 
193     for(i=0; i < ht->max; i++) {
194         he = ht->table[i];
195 
196         for(hash_len=0; he; hash_len++, he=next) {
197             next = he->next;
198             free(he);
199         }
200     }
201 
202     if (ht->table)
203         free(ht->table);
204     ht->table = NULL;
205 }
206 
207 #if 0
208 static void
209 print_hash_stats(hash_table *ht)
210 {
211     int       i, hash_len, max = -1, sum = 0;
212     hash_ent *he, *next;
213 
214     for(i=0; i < ht->max; i++) {
215         he = ht->table[i];
216 
217         for(hash_len=0; he; hash_len++, he=next) {
218             next = he->next;
219         }
220         if (hash_len)
221             printf("bucket %3d : %3d\n", i, hash_len);
222 
223         sum += hash_len;
224         if (hash_len > max)
225             max = hash_len;
226     }
227 
228     printf("max # of chains: %d,  average chain length %d\n", max,sum/ht->max);
229 }
230 #endif
231 
232 #define HASH(d, b)   ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b))
233 
234 static hash_ent *
235 new_hash_ent(int dev, fs_off_t bnum, void *data)
236 {
237     hash_ent *he;
238 
239     he = (hash_ent *)malloc(sizeof(*he));
240     if (he == NULL)
241         return NULL;
242 
243     he->hash_val = HASH(dev, bnum);
244     he->dev      = dev;
245     he->bnum     = bnum;
246     he->data     = data;
247     he->next     = NULL;
248 
249     return he;
250 }
251 
252 
253 static int
254 grow_hash_table(hash_table *ht)
255 {
256     int        i, omax, newsize, newmask;
257     fs_off_t      hash;
258     hash_ent **new_table, *he, *next;
259 
260     if (ht->max & ht->mask) {
261         printf("*** hashtable size %d or mask %d looks weird!\n", ht->max,
262                ht->mask);
263     }
264 
265     omax    = ht->max;
266     newsize = omax * 2;        /* have to grow in powers of two */
267     newmask = newsize - 1;
268 
269     new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *));
270     if (new_table == NULL)
271         return ENOMEM;
272 
273     for(i=0; i < omax; i++) {
274         for(he=ht->table[i]; he; he=next) {
275             hash = he->hash_val & newmask;
276             next = he->next;
277 
278             he->next        = new_table[hash];
279             new_table[hash] = he;
280         }
281     }
282 
283     free(ht->table);
284     ht->table = new_table;
285     ht->max   = newsize;
286     ht->mask  = newmask;
287 
288     return 0;
289 }
290 
291 
292 
293 
294 static int
295 hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data)
296 {
297     fs_off_t    hash;
298     hash_ent *he, *curr;
299 
300     hash = HASH(dev, bnum) & ht->mask;
301 
302     curr = ht->table[hash];
303     for(; curr != NULL; curr=curr->next)
304         if (curr->dev == dev && curr->bnum == bnum)
305             break;
306 
307     if (curr && curr->dev == dev && curr->bnum == bnum) {
308         printf("entry %d:%" B_PRIdOFF " already in the hash table!\n",
309             dev, bnum);
310         return EEXIST;
311     }
312 
313     he = new_hash_ent(dev, bnum, data);
314     if (he == NULL)
315         return ENOMEM;
316 
317     he->next        = ht->table[hash];
318     ht->table[hash] = he;
319 
320     ht->num_elements++;
321     if (ht->num_elements >= ((ht->max * 3) / 4)) {
322         if (grow_hash_table(ht) != 0)
323             return ENOMEM;
324     }
325 
326     return 0;
327 }
328 
329 static void *
330 hash_lookup(hash_table *ht, int dev, fs_off_t bnum)
331 {
332     hash_ent *he;
333 
334     he = ht->table[HASH(dev, bnum) & ht->mask];
335 
336     for(; he != NULL; he=he->next) {
337         if (he->dev == dev && he->bnum == bnum)
338             break;
339     }
340 
341     if (he)
342         return he->data;
343     else
344         return NULL;
345 }
346 
347 
348 static void *
349 hash_delete(hash_table *ht, int dev, fs_off_t bnum)
350 {
351     void     *data;
352     fs_off_t     hash;
353     hash_ent *he, *prev = NULL;
354 
355     hash = HASH(dev, bnum) & ht->mask;
356     he = ht->table[hash];
357 
358     for(; he != NULL; prev=he,he=he->next) {
359         if (he->dev == dev && he->bnum == bnum)
360             break;
361     }
362 
363     if (he == NULL) {
364         printf("*** hash_delete: tried to delete non-existent block %d:%"
365             B_PRIdOFF "\n", dev, bnum);
366         return NULL;
367     }
368 
369     data = he->data;
370 
371     if (ht->table[hash] == he)
372         ht->table[hash] = he->next;
373     else if (prev)
374         prev->next = he->next;
375     else
376         beos_panic("hash table is inconsistent\n");
377 
378     free(he);
379     ht->num_elements--;
380 
381     return data;
382 }
383 
384 //	#pragma mark -
385 
386 /*
387   These are the global variables for the cache.
388 */
389 static block_cache  bc;
390 
391 #define       MAX_IOVECS  64           /* # of iovecs for use by cache code */
392 static beos_lock   iovec_lock;
393 static struct iovec *iovec_pool[MAX_IOVECS];  /* each ptr is to an array of iovecs */
394 static int    iovec_used[MAX_IOVECS];  /* non-zero == iovec is in use */
395 
396 #define NUM_FLUSH_BLOCKS 64    /* size of the iovec array pointed by each ptr */
397 
398 
399 #define DEFAULT_READ_AHEAD_SIZE  (32 * 1024)
400 static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE;
401 
402 /* this array stores the size of each device so we can error check requests */
403 #define MAX_DEVICES  256
404 fs_off_t max_device_blocks[MAX_DEVICES];
405 
406 
407 /* has the time of the last cache access so cache flushing doesn't interfere */
408 static bigtime_t last_cache_access = 0;
409 
410 
411 int
412 beos_init_block_cache(int max_blocks, int flags)
413 {
414     memset(&bc, 0, sizeof(bc));
415     memset(iovec_pool, 0, sizeof(iovec_pool));
416     memset(iovec_used, 0, sizeof(iovec_used));
417     memset(&max_device_blocks, 0, sizeof(max_device_blocks));
418 
419     if (init_hash_table(&bc.ht) != 0)
420         return ENOMEM;
421 
422     bc.lock.s = iovec_lock.s = -1;
423 
424     bc.max_blocks = max_blocks;
425     bc.flags      = flags;
426     if (beos_new_lock(&bc.lock, "bollockcache") != 0)
427         goto err;
428 
429     if (beos_new_lock(&iovec_lock, "iovec_lock") != 0)
430         goto err;
431 
432     /* allocate two of these up front so vm won't accidently re-enter itself */
433     iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
434     iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
435 
436 #ifndef USER
437 #ifdef DEBUG
438     add_debugger_command("bcache", do_dump, "dump the block cache list");
439     add_debugger_command("fblock", do_find_block, "find a block in the cache");
440     add_debugger_command("fdata",  do_find_data, "find a data block ptr in the cache");
441 #endif
442     register_kernel_daemon(cache_flusher, NULL, 3);
443 #endif
444 
445     return 0;
446 
447  err:
448     if (bc.lock.s >= 0)
449         beos_free_lock(&bc.lock);
450 
451     if (iovec_lock.s >= 0)
452         beos_free_lock(&iovec_lock);
453 
454     shutdown_hash_table(&bc.ht);
455     memset((void *)&bc, 0, sizeof(bc));
456     return ENOMEM;
457 }
458 
459 
460 static struct iovec *
461 get_iovec_array(void)
462 {
463     int i;
464     struct iovec *iov;
465 
466     LOCK(iovec_lock);
467 
468     for(i = 0; i < MAX_IOVECS; i++) {
469         if (iovec_used[i] == 0)
470             break;
471     }
472 
473     if (i >= MAX_IOVECS)       /* uh-oh */
474         beos_panic("cache: ran out of iovecs (pool 0x%x, used 0x%x)!\n",
475               &iovec_pool[0], &iovec_used[0]);
476 
477     if (iovec_pool[i] == NULL) {
478         iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
479         if (iovec_pool[i] == NULL)
480             beos_panic("can't allocate an iovec!\n");
481     }
482 
483     iov = iovec_pool[i];
484     iovec_used[i] = 1;
485 
486     UNLOCK(iovec_lock);
487 
488     return iov;
489 }
490 
491 
492 static void
493 release_iovec_array(struct iovec *iov)
494 {
495     int i;
496 
497     LOCK(iovec_lock);
498 
499     for(i=0; i < MAX_IOVECS; i++) {
500         if (iov == iovec_pool[i])
501             break;
502     }
503 
504     if (i < MAX_IOVECS)
505         iovec_used[i] = 0;
506     else                     /* uh-oh */
507         printf("cache: released an iovec I don't own (iov %p)\n", iov);
508 
509 
510     UNLOCK(iovec_lock);
511 }
512 
513 
514 
515 
516 static void
517 real_dump_cache_list(cache_ent_list *cel)
518 {
519     cache_ent *ce;
520 
521     kprintf("starting from LRU end:\n");
522 
523     for (ce = cel->lru; ce; ce = ce->next) {
524         kprintf("ce %p dev %2d bnum %6" B_PRIdOFF " lock %d flag %d arg %p "
525                "clone %p\n", ce, ce->dev, ce->block_num, ce->lock,
526                ce->flags, ce->arg, ce->clone);
527     }
528     kprintf("MRU end\n");
529 }
530 
531 static void
532 dump_cache_list(void)
533 {
534     kprintf("NORMAL BLOCKS\n");
535     real_dump_cache_list(&bc.normal);
536 
537     kprintf("LOCKED BLOCKS\n");
538     real_dump_cache_list(&bc.locked);
539 
540     kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks,
541            bc.max_blocks, &bc.ht);
542 }
543 
544 #if 0
545 static void
546 check_bcache(char *str)
547 {
548     int count = 0;
549     cache_ent *ce, *prev = NULL;
550 
551     LOCK(bc.lock);
552 
553     for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) {
554         count++;
555     }
556 
557     for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) {
558         count++;
559     }
560 
561     if (count != bc.cur_blocks) {
562         if (count < bc.cur_blocks - 16)
563             beos_panic("%s: count == %d, cur_blocks %d, prev 0x%x\n",
564                     str, count, bc.cur_blocks, prev);
565         else
566             printf("%s: count == %d, cur_blocks %d, prev 0x%x\n",
567                     str, count, bc.cur_blocks, prev);
568     }
569 
570     UNLOCK(bc.lock);
571 }
572 
573 
574 static void
575 dump_lists(void)
576 {
577     cache_ent *nce;
578 
579     printf("LOCKED 0x%x  (tail 0x%x, head 0x%x)\n", &bc.locked,
580            bc.locked.lru, bc.locked.mru);
581     for(nce=bc.locked.lru; nce; nce=nce->next)
582         printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
583                nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
584                nce->func);
585 
586     printf("NORMAL 0x%x  (tail 0x%x, head 0x%x)\n", &bc.normal,
587            bc.normal.lru, bc.normal.mru);
588     for(nce=bc.normal.lru; nce; nce=nce->next)
589         printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
590                nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
591                nce->func);
592 }
593 
594 
595 static void
596 check_lists(void)
597 {
598     cache_ent *ce, *prev, *oce;
599     cache_ent_list *cel;
600 
601     cel = &bc.normal;
602     for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
603         for(oce=bc.locked.lru; oce; oce=oce->next) {
604             if (oce == ce) {
605                 dump_lists();
606                 beos_panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel);
607             }
608         }
609     }
610     if (prev && prev != cel->mru) {
611         dump_lists();
612         beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
613               prev, cel);
614     }
615 
616     cel = &bc.locked;
617     for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
618         for(oce=bc.normal.lru; oce; oce=oce->next) {
619             if (oce == ce) {
620                 dump_lists();
621                 beos_panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel);
622             }
623         }
624     }
625     if (prev && prev != cel->mru) {
626         dump_lists();
627         beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
628               prev, cel);
629     }
630 }
631 #endif
632 
633 
634 #ifdef DEBUG
635 #if 0
636 static int
637 do_dump(int argc, char **argv)
638 {
639     dump_cache_list();
640     return 1;
641 }
642 
643 
644 static int
645 do_find_block(int argc, char **argv)
646 {
647     int        i;
648     fs_off_t  bnum;
649     cache_ent *ce;
650 
651     if (argc < 2) {
652         kprintf("%s: needs a block # argument\n", argv[0]);
653         return 1;
654     }
655 
656     for(i=1; i < argc; i++) {
657         bnum = strtoul(argv[i], NULL, 0);
658 
659         for(ce=bc.normal.lru; ce; ce=ce->next) {
660             if (ce->block_num == bnum) {
661                 kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n",
662                         bnum, ce, ce->data);
663             }
664         }
665 
666         for(ce=bc.locked.lru; ce; ce=ce->next) {
667             if (ce->block_num == bnum) {
668                 kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n",
669                         bnum, ce, ce->data);
670             }
671         }
672     }
673 
674     return 0;
675 }
676 
677 static int
678 do_find_data(int argc, char **argv)
679 {
680     int        i;
681     void      *data;
682     cache_ent *ce;
683 
684     if (argc < 2) {
685         kprintf("%s: needs a block # argument\n", argv[0]);
686         return 1;
687     }
688 
689     for(i=1; i < argc; i++) {
690         data = (void *)strtoul(argv[i], NULL, 0);
691 
692         for(ce=bc.normal.lru; ce; ce=ce->next) {
693             if (ce->data == data) {
694                 kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n",
695                         ce->block_num, ce);
696             }
697         }
698 
699         for(ce=bc.locked.lru; ce; ce=ce->next) {
700             if (ce->data == data) {
701                 kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n",
702                         ce->block_num, ce);
703             }
704         }
705     }
706 
707     return 0;
708 }
709 #endif
710 #endif /* DEBUG */
711 
712 
713 
714 /*
715   this function detaches the cache_ent from the list.
716 */
717 static void
718 delete_from_list(cache_ent_list *cel, cache_ent *ce)
719 {
720     if (ce->next)
721         ce->next->prev = ce->prev;
722     if (ce->prev)
723         ce->prev->next = ce->next;
724 
725     if (cel->lru == ce)
726         cel->lru = ce->next;
727     if (cel->mru == ce)
728         cel->mru = ce->prev;
729 
730     ce->next = NULL;
731     ce->prev = NULL;
732 }
733 
734 
735 
736 /*
737   this function adds the cache_ent ce to the head of the
738   list (i.e. the MRU end).  the cache_ent should *not*
739   be in any lists.
740 */
741 static void
742 add_to_head(cache_ent_list *cel, cache_ent *ce)
743 {
744 if (ce->next != NULL || ce->prev != NULL) {
745     beos_panic("*** ath: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
746            ce, ce->next, ce->prev);
747 }
748 
749     ce->next = NULL;
750     ce->prev = cel->mru;
751 
752     if (cel->mru)
753         cel->mru->next = ce;
754     cel->mru = ce;
755 
756     if (cel->lru == NULL)
757         cel->lru = ce;
758 }
759 
760 
761 /*
762   this function adds the cache_ent ce to the tail of the
763   list (i.e. the MRU end).  the cache_ent should *not*
764   be in any lists.
765 */
766 static void
767 add_to_tail(cache_ent_list *cel, cache_ent *ce)
768 {
769 if (ce->next != NULL || ce->prev != NULL) {
770     beos_panic("*** att: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
771            ce, ce->next, ce->prev);
772 }
773 
774     ce->next = cel->lru;
775     ce->prev = NULL;
776 
777     if (cel->lru)
778         cel->lru->prev = ce;
779     cel->lru = ce;
780 
781     if (cel->mru == NULL)
782         cel->mru = ce;
783 }
784 
785 
786 static int
787 cache_ent_cmp(const void *a, const void *b)
788 {
789     fs_off_t  diff;
790     cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b;
791 
792     if (p1 == NULL || p2 == NULL)
793         beos_panic("cache_ent pointers are null?!? (a 0x%lx, b 0x%lx\n)\n", a, b);
794 
795     if (p1->dev == p2->dev) {
796         diff = p1->block_num - p2->block_num;
797         return (int)diff;
798     } else {
799         return p1->dev - p2->dev;
800     }
801 }
802 
803 #if 0
804 // ToDo: add this to the fsh (as a background thread)?
805 static void
806 cache_flusher(void *arg, int phase)
807 {
808     int    i, num_ents, err;
809     bigtime_t now = system_time();
810     static cache_ent *ce = NULL;
811     static cache_ent *ents[NUM_FLUSH_BLOCKS];
812 
813     /*
814        if someone else was in the cache recently then just bail out so
815        we don't lock them out unnecessarily
816     */
817     if ((now - last_cache_access) < 1000000)
818         return;
819 
820     LOCK(bc.lock);
821 
822     ce = bc.normal.lru;
823 
824     for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) {
825         if (ce->flags & CE_BUSY)
826             continue;
827 
828         if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
829             continue;
830 
831         ents[num_ents] = ce;
832         ents[num_ents]->flags |= CE_BUSY;
833         num_ents++;
834     }
835 
836     /* if we've got some room left over, look for cloned locked blocks */
837     if (num_ents < NUM_FLUSH_BLOCKS) {
838         ce = bc.locked.lru;
839 
840         for(; num_ents < NUM_FLUSH_BLOCKS;) {
841             for(;
842                 ce && ((ce->flags & CE_BUSY) || ce->clone == NULL);
843                 ce=ce->next)
844                 /* skip ents that meet the above criteria */;
845 
846             if (ce == NULL)
847                 break;
848 
849             ents[num_ents] = ce;
850             ents[num_ents]->flags |= CE_BUSY;
851             ce = ce->next;
852             num_ents++;
853         }
854     }
855 
856     UNLOCK(bc.lock);
857 
858     if (num_ents == 0)
859         return;
860 
861     qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp);
862 
863     if ((err = flush_ents(ents, num_ents)) != 0) {
864         printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n",
865                (ulong)ents, num_ents);
866     }
867 
868     for(i=0; i < num_ents; i++) {       /* clear the busy bit on each of ent */
869         ents[i]->flags &= ~CE_BUSY;
870     }
871 }
872 #endif
873 
874 
875 static int
876 flush_cache_ent(cache_ent *ce)
877 {
878     int   ret = 0;
879     void *data;
880 
881     /* if true, then there's nothing to flush */
882     if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
883         return 0;
884 
885     /* same thing here */
886     if (ce->clone == NULL && ce->lock != 0)
887         return 0;
888 
889  restart:
890 	if (ce->clone)
891 		data = ce->clone;
892 	else
893 		data = ce->data;
894 
895 	if (chatty_io > 2) printf("flush: %7" B_PRIdOFF "\n", ce->block_num);
896 	ret = beos_write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize);
897 
898     if (ce->func) {
899         ce->func(ce->logged_bnum, 1, ce->arg);
900         ce->func = NULL;
901     }
902 
903     if (ce->clone) {
904         free(ce->clone);
905         ce->clone = NULL;
906 
907         if (ce->lock == 0 && (ce->flags & CE_DIRTY))
908             goto restart;     /* also write the real data ptr */
909     } else {
910         ce->flags &= ~CE_DIRTY;
911     }
912 
913     return ret;
914 }
915 
916 
917 static int
918 flush_ents(cache_ent **ents, int n_ents)
919 {
920     int    i, j, k, ret = 0, bsize, iocnt, do_again = 0;
921     fs_off_t  start_bnum;
922     struct iovec *iov;
923 
924     iov = get_iovec_array();
925     if (iov == NULL)
926         return ENOMEM;
927 
928 restart:
929     for(i=0; i < n_ents; i++) {
930         /* if true, then there's nothing to flush */
931         if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL)
932             continue;
933 
934         /* if true we can't touch the dirty data yet because it's locked */
935         if (ents[i]->clone == NULL && ents[i]->lock != 0)
936             continue;
937 
938 
939         bsize      = ents[i]->bsize;
940         start_bnum = ents[i]->block_num;
941 
942         for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) {
943             if (ents[j]->dev != ents[i]->dev ||
944                 ents[j]->block_num != start_bnum + (j - i))
945                 break;
946 
947             if (ents[j]->clone == NULL && ents[j]->lock != 0)
948                 break;
949         }
950 
951         if (j == i+1) {           /* only one block, just flush it directly */
952             if ((ret = flush_cache_ent(ents[i])) != 0)
953                 break;
954             continue;
955         }
956 
957 
958         for(k=i,iocnt=0; k < j; k++,iocnt++) {
959             if (ents[k]->clone)
960                 iov[iocnt].iov_base = ents[k]->clone;
961             else
962                 iov[iocnt].iov_base = ents[k]->data;
963 
964             iov[iocnt].iov_len = bsize;
965         }
966 
967 		if (chatty_io)
968 	        printf("writev @ %" B_PRIdOFF " for %d blocks\n",
969                 start_bnum, iocnt);
970 
971         ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize,
972                          &iov[0], iocnt);
973         if (ret != iocnt*bsize) {
974             int idx;
975 
976             printf("flush_ents: writev failed: "
977                 "iocnt %d start bnum %" B_PRIdOFF " bsize %d, ret %d\n",
978                 iocnt, start_bnum, bsize, ret);
979 
980             for(idx=0; idx < iocnt; idx++)
981                 printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base,
982                        iov[idx].iov_len);
983 
984             printf("error %s writing blocks %" B_PRIdOFF ":%d (%d != %d)\n",
985                    strerror(errno), start_bnum, iocnt, ret, iocnt*bsize);
986             ret = EINVAL;
987             break;
988         }
989         ret = 0;
990 
991 
992         for(k=i; k < j; k++) {
993             if (ents[k]->func) {
994                 ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg);
995                 ents[k]->func = NULL;
996             }
997 
998             if (ents[k]->clone) {
999                 free(ents[k]->clone);
1000                 ents[k]->clone = NULL;
1001             } else {
1002                 ents[k]->flags &= ~CE_DIRTY;
1003             }
1004         }
1005 
1006 
1007         i = j - 1;  /* i gets incremented by the outer for loop */
1008     }
1009 
1010     /*
1011        here we have to go back through and flush any blocks that are
1012        still dirty.  with an arched brow you astutely ask, "but how
1013        could this happen given the above loop?"  Ahhh young grasshopper
1014        I say, the path through the cache is long and twisty and fraught
1015        with peril.  The reason it can happen is that a block can be both
1016        cloned and dirty.  The above loop would only flush the cloned half
1017        of the data, not the main dirty block.  So we have to go back
1018        through and see if there are any blocks that are still dirty.  If
1019        there are we go back to the top of the function and do the whole
1020        thing over.  Kind of grody but it is necessary to insure the
1021        correctness of the log for the Be file system.
1022     */
1023     if (do_again == 0) {
1024         for(i=0; i < n_ents; i++) {
1025             if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock)
1026                 continue;
1027 
1028             do_again = 1;
1029             break;
1030         }
1031 
1032         if (do_again)
1033             goto restart;
1034     }
1035 
1036     release_iovec_array(iov);
1037 
1038 
1039     return ret;
1040 }
1041 
1042 static void
1043 delete_cache_list(cache_ent_list *cel)
1044 {
1045     void      *junk;
1046     cache_ent *ce, *next;
1047 
1048     for (ce = cel->lru; ce; ce = next) {
1049         next = ce->next;
1050         if (ce->lock != 0) {
1051             if (ce->func)
1052                 printf("*** shutdown_block_cache: "
1053                     "block %" B_PRIdOFF ", lock == %d (arg %p)!\n",
1054                     ce->block_num, ce->lock, ce->arg);
1055             else
1056                 printf("*** shutdown_block_cache: "
1057                     "block %" B_PRIdOFF ", lock == %d!\n",
1058                     ce->block_num, ce->lock);
1059         }
1060 
1061         if (ce->flags & CE_BUSY) {
1062             printf("* shutdown block cache: "
1063                 "bnum %" B_PRIdOFF " is busy? ce %p\n", ce->block_num, ce);
1064         }
1065 
1066         if ((ce->flags & CE_DIRTY) || ce->clone) {
1067             flush_cache_ent(ce);
1068         }
1069 
1070         if (ce->clone)
1071             free(ce->clone);
1072         ce->clone = NULL;
1073 
1074         if (ce->data)
1075             free(ce->data);
1076         ce->data = NULL;
1077 
1078         if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1079             printf("*** free_device_cache: "
1080                 "bad hash table entry %" B_PRIdOFF " %p != %p\n",
1081                 ce->block_num, junk, ce);
1082         }
1083 
1084         explicit_bzero(ce, sizeof(*ce));
1085         free(ce);
1086 
1087         bc.cur_blocks--;
1088     }
1089 }
1090 
1091 
1092 void
1093 beos_shutdown_block_cache(void)
1094 {
1095     /* print_hash_stats(&bc.ht); */
1096 
1097     if (bc.lock.s > 0)
1098         LOCK(bc.lock);
1099 
1100 #ifndef USER
1101     unregister_kernel_daemon(cache_flusher, NULL);
1102 #endif
1103 
1104     delete_cache_list(&bc.normal);
1105     delete_cache_list(&bc.locked);
1106 
1107     bc.normal.lru = bc.normal.mru = NULL;
1108     bc.locked.lru = bc.locked.mru = NULL;
1109 
1110     shutdown_hash_table(&bc.ht);
1111 
1112     if (bc.lock.s > 0)
1113         beos_free_lock(&bc.lock);
1114     bc.lock.s = -1;
1115 
1116     if (iovec_lock.s >= 0)
1117         beos_free_lock(&iovec_lock);
1118 }
1119 
1120 
1121 
1122 int
1123 beos_init_cache_for_device(int fd, fs_off_t max_blocks)
1124 {
1125     int ret = 0;
1126 
1127     if (fd >= MAX_DEVICES)
1128         return -1;
1129 
1130     LOCK(bc.lock);
1131 
1132     if (max_device_blocks[fd] != 0) {
1133         printf("device %d is already initialized!\n", fd);
1134         ret = -1;
1135     } else {
1136         max_device_blocks[fd] = max_blocks;
1137     }
1138 
1139     UNLOCK(bc.lock);
1140 
1141     return ret;
1142 }
1143 
1144 
1145 
1146 /*
1147    this routine assumes that bc.lock has been acquired
1148 */
1149 static cache_ent *
1150 block_lookup(int dev, fs_off_t bnum)
1151 {
1152     int        count = 0;
1153     cache_ent *ce;
1154 
1155     while (1) {
1156         ce = hash_lookup(&bc.ht, dev, bnum);
1157         if (ce == NULL)
1158             return NULL;
1159 
1160         if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */
1161             break;
1162 
1163         /* else, it's busy and we need to retry our lookup */
1164         UNLOCK(bc.lock);
1165 
1166         snooze(5000);
1167         if (count++ == 5000) {  /* then a lot of time has elapsed */
1168             printf("block %" B_PRIdOFF " isn't coming un-busy (ce @ %p)\n",
1169                    ce->block_num, ce);
1170         }
1171 
1172         LOCK(bc.lock);
1173     }
1174 
1175     if (ce->flags & CE_BUSY)
1176         beos_panic("block lookup: returning a busy block @ 0x%lx?!?\n",(ulong)ce);
1177 
1178     return ce;
1179 }
1180 
1181 
1182 
1183 int
1184 beos_set_blocks_info(int dev, fs_off_t *blocks, int nblocks,
1185                void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg)
1186 {
1187     int        i, j, cur;
1188     cache_ent *ce;
1189     cache_ent *ents[NUM_FLUSH_BLOCKS];
1190 
1191     LOCK(bc.lock);
1192 
1193 
1194     for(i=0, cur=0; i < nblocks; i++) {
1195 
1196         /* printf("sbi:   %ld (arg 0x%x)\n", blocks[i], arg); */
1197         ce = block_lookup(dev, blocks[i]);
1198         if (ce == NULL) {
1199             beos_panic("*** set_block_info can't find bnum %ld!\n", blocks[i]);
1200             UNLOCK(bc.lock);
1201             return ENOENT;   /* hopefully this doesn't happen... */
1202         }
1203 
1204 
1205         if (blocks[i] != ce->block_num || dev != ce->dev) {
1206             UNLOCK(bc.lock);
1207             beos_panic("** error1: looked up dev %d block %ld but found dev %d "
1208                     "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num);
1209             return EBADF;
1210         }
1211 
1212         if (ce->lock == 0) {
1213             beos_panic("* set_block_info on bnum %ld (%d) but it's not locked!\n",
1214                     blocks[i], nblocks);
1215         }
1216 
1217 
1218         if ((ce->flags & CE_DIRTY) == 0) {
1219             beos_panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n",
1220                     blocks[i], nblocks);
1221         }
1222 
1223         ce->flags |= CE_BUSY;     /* mark all blocks as busy till we're done */
1224 
1225         /* if there is cloned data, it needs to be flushed now */
1226         if (ce->clone && ce->func) {
1227             ents[cur++] = ce;
1228 
1229             if (cur >= NUM_FLUSH_BLOCKS) {
1230                 UNLOCK(bc.lock);
1231 
1232                 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1233 
1234                 flush_ents(ents, cur);
1235 
1236                 LOCK(bc.lock);
1237                 for(j=0; j < cur; j++)
1238                     ents[j]->flags &= ~CE_BUSY;
1239                 cur = 0;
1240             }
1241         }
1242     }
1243 
1244 
1245     if (cur != 0) {
1246         UNLOCK(bc.lock);
1247 
1248         qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1249 
1250         flush_ents(ents, cur);
1251 
1252         LOCK(bc.lock);
1253         for(j=0; j < cur; j++)
1254             ents[j]->flags &= ~CE_BUSY;
1255         cur = 0;
1256     }
1257 
1258 
1259     /* now go through and set the info that we were asked to */
1260     for (i = 0; i < nblocks; i++) {
1261         /* we can call hash_lookup() here because we know it's around */
1262         ce = hash_lookup(&bc.ht, dev, blocks[i]);
1263         if (ce == NULL) {
1264             beos_panic("*** set_block_info can't find bnum %Ld!\n", blocks[i]);
1265             UNLOCK(bc.lock);
1266             return ENOENT;   /* hopefully this doesn't happen... */
1267         }
1268 
1269         ce->flags &= ~(CE_DIRTY | CE_BUSY);
1270 
1271         if (ce->func != NULL) {
1272             beos_panic("*** set_block_info non-null callback on bnum %Ld\n",
1273                     ce->block_num);
1274         }
1275 
1276         if (ce->clone != NULL) {
1277             beos_panic("*** ce->clone == %p, not NULL in set_block_info\n",
1278                     ce->clone);
1279         }
1280 
1281         ce->clone = (void *)malloc(ce->bsize);
1282         if (ce->clone == NULL)
1283             beos_panic("*** can't clone bnum %Ld (bsize %d)\n",
1284                     ce->block_num, ce->bsize);
1285 
1286 
1287         memcpy(ce->clone, ce->data, ce->bsize);
1288 
1289         ce->func   = func;
1290         ce->arg    = arg;
1291 
1292         ce->logged_bnum = blocks[i];
1293 
1294         ce->lock--;
1295         if (ce->lock < 0) {
1296             printf("sbi: "
1297                 "whoa nellie! ce @ %p (%" B_PRIdOFF ") has lock == %d\n",
1298                    ce, ce->block_num, ce->lock);
1299         }
1300 
1301         if (ce->lock == 0) {
1302             delete_from_list(&bc.locked, ce);
1303             add_to_head(&bc.normal, ce);
1304         }
1305     }
1306 
1307     UNLOCK(bc.lock);
1308 
1309     return 0;
1310 }
1311 
1312 
1313 /* this function is only for use by flush_device() */
1314 static void
1315 do_flush(cache_ent **ents, int max)
1316 {
1317     int i;
1318 
1319     for(i=0; i < max; i++) {
1320         ents[i]->flags |= CE_BUSY;
1321     }
1322 
1323     UNLOCK(bc.lock);
1324 
1325     qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp);
1326     flush_ents(ents, max);
1327 
1328     LOCK(bc.lock);
1329     for(i=0; i < max; i++) {
1330         ents[i]->flags &= ~CE_BUSY;
1331     }
1332 }
1333 
1334 int
1335 beos_flush_device(int dev, int warn_locked)
1336 {
1337     int cur;
1338     cache_ent *ce;
1339     cache_ent *ents[NUM_FLUSH_BLOCKS];
1340 
1341     LOCK(bc.lock);
1342 
1343     cur = 0;
1344     ce = bc.normal.lru;
1345     while (ce) {
1346         if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1347             ce = ce->next;
1348             continue;
1349         }
1350 
1351         if ((ce->flags & CE_DIRTY) || ce->clone) {
1352             ents[cur++] = ce;
1353             if (cur >= NUM_FLUSH_BLOCKS) {
1354                 do_flush(ents, cur);
1355 
1356                 ce = bc.normal.lru;
1357                 cur = 0;
1358                 continue;
1359             }
1360         }
1361 
1362         ce = ce->next;
1363     }
1364 
1365     if (cur != 0)
1366         do_flush(ents, cur);
1367 
1368     cur = 0;
1369     ce = bc.locked.lru;
1370     while (ce) {
1371         if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1372             ce = ce->next;
1373             continue;
1374         }
1375 
1376         if (ce->clone) {
1377             ents[cur++] = ce;
1378             if (cur >= NUM_FLUSH_BLOCKS) {
1379                 do_flush(ents, cur);
1380 
1381                 ce = bc.locked.lru;
1382                 cur = 0;
1383                 continue;
1384             }
1385         }
1386 
1387         ce = ce->next;
1388     }
1389 
1390     if (cur != 0)
1391         do_flush(ents, cur);
1392 
1393     UNLOCK(bc.lock);
1394 
1395     return 0;
1396 }
1397 
1398 
1399 static void
1400 real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel)
1401 {
1402     void      *junk;
1403     cache_ent *ce, *next = NULL;
1404 
1405     for(ce=cel->lru; ce; ce=next) {
1406         next = ce->next;
1407 
1408         if (ce->dev != dev) {
1409             continue;
1410         }
1411 
1412         if (ce->lock != 0 || (ce->flags & CE_BUSY)) {
1413             printf("*** remove_cached_dev: "
1414                 "block %" B_PRIdOFF " has lock = %d, flags 0x%x! ce @ 0x%lx\n",
1415                 ce->block_num, ce->lock, ce->flags, (ulong)ce);
1416         }
1417 
1418         if (allow_writes == ALLOW_WRITES &&
1419             ((ce->flags & CE_DIRTY) || ce->clone)) {
1420             ce->flags |= CE_BUSY;
1421             flush_cache_ent(ce);
1422             ce->flags &= ~CE_BUSY;
1423         }
1424 
1425         /* unlink this guy */
1426         if (cel->lru == ce)
1427             cel->lru = ce->next;
1428 
1429         if (cel->mru == ce)
1430             cel->mru = ce->prev;
1431 
1432         if (ce->prev)
1433             ce->prev->next = ce->next;
1434         if (ce->next)
1435             ce->next->prev = ce->prev;
1436 
1437         if (ce->clone)
1438             free(ce->clone);
1439         ce->clone = NULL;
1440 
1441         if (ce->data)
1442             free(ce->data);
1443         ce->data = NULL;
1444 
1445         if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1446             beos_panic("*** remove_cached_device: bad hash table entry %ld "
1447                    "0x%lx != 0x%lx\n", ce->block_num, (ulong)junk, (ulong)ce);
1448         }
1449 
1450         free(ce);
1451 
1452         bc.cur_blocks--;
1453     }
1454 
1455 }
1456 
1457 int
1458 beos_remove_cached_device_blocks(int dev, int allow_writes)
1459 {
1460     LOCK(bc.lock);
1461 
1462     real_remove_cached_blocks(dev, allow_writes, &bc.normal);
1463     real_remove_cached_blocks(dev, allow_writes, &bc.locked);
1464 
1465     max_device_blocks[dev] = 0;
1466 
1467     UNLOCK(bc.lock);
1468 
1469     return 0;
1470 }
1471 
1472 
1473 
1474 int
1475 beos_flush_blocks(int dev, fs_off_t bnum, int nblocks)
1476 {
1477     int        cur, i;
1478     cache_ent *ce;
1479     cache_ent *ents[NUM_FLUSH_BLOCKS];
1480 
1481     if (nblocks == 0)   /* might as well check for this */
1482         return 0;
1483 
1484     LOCK(bc.lock);
1485 
1486     cur = 0;
1487     for(; nblocks > 0; nblocks--, bnum++) {
1488         ce = block_lookup(dev, bnum);
1489         if (ce == NULL)
1490             continue;
1491 
1492         if (bnum != ce->block_num || dev != ce->dev) {
1493             UNLOCK(bc.lock);
1494             beos_panic("error2: looked up dev %d block %ld but found %d %ld\n",
1495                   dev, bnum, ce->dev, ce->block_num);
1496             return EBADF;
1497         }
1498 
1499         if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
1500             continue;
1501 
1502         ce->flags |= CE_BUSY;
1503         ents[cur++] = ce;
1504 
1505         if (cur >= NUM_FLUSH_BLOCKS) {
1506             UNLOCK(bc.lock);
1507 
1508             qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1509             flush_ents(ents, cur);
1510 
1511             LOCK(bc.lock);
1512             for(i=0; i < cur; i++) {
1513                 ents[i]->flags &= ~CE_BUSY;
1514             }
1515             cur = 0;
1516         }
1517     }
1518 
1519     UNLOCK(bc.lock);
1520 
1521     if (cur == 0)     /* nothing more to do */
1522         return 0;
1523 
1524     /* flush out the last few buggers */
1525     qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1526     flush_ents(ents, cur);
1527 
1528     for(i=0; i < cur; i++) {
1529         ents[i]->flags &= ~CE_BUSY;
1530     }
1531 
1532     return 0;
1533 }
1534 
1535 
1536 int
1537 beos_mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks)
1538 {
1539     int        ret = 0;
1540     cache_ent *ce;
1541 
1542     LOCK(bc.lock);
1543 
1544     while(nblocks > 0) {
1545         ce = block_lookup(dev, bnum);
1546         if (ce) {
1547             ce->flags |= CE_DIRTY;
1548             bnum      += 1;
1549             nblocks   -= 1;
1550         } else {     /* hmmm, that's odd, didn't find it */
1551             printf("** mark_blocks_diry couldn't find block %" B_PRIdOFF " "
1552                 "(len %d)\n", bnum, nblocks);
1553             ret = ENOENT;
1554             break;
1555         }
1556     }
1557 
1558     UNLOCK(bc.lock);
1559 
1560     return ret;
1561 }
1562 
1563 
1564 
1565 int
1566 beos_release_block(int dev, fs_off_t bnum)
1567 {
1568     cache_ent *ce;
1569 
1570     /* printf("rlsb: %ld\n", bnum); */
1571     LOCK(bc.lock);
1572 
1573     ce = block_lookup(dev, bnum);
1574     if (ce) {
1575         if (bnum != ce->block_num || dev != ce->dev) {
1576             beos_panic("*** error3: looked up dev %d block %ld but found %d %ld\n",
1577                     dev, bnum, ce->dev, ce->block_num);
1578             UNLOCK(bc.lock);
1579             return EBADF;
1580         }
1581 
1582         ce->lock--;
1583 
1584         if (ce->lock < 0) {
1585             printf("rlsb: whoa nellie! ce %" B_PRIdOFF " has lock == %d\n",
1586                    ce->block_num, ce->lock);
1587         }
1588 
1589         if (ce->lock == 0) {
1590             delete_from_list(&bc.locked, ce);
1591             add_to_head(&bc.normal, ce);
1592         }
1593 
1594     } else {     /* hmmm, that's odd, didn't find it */
1595         beos_panic("** release_block asked to find %ld but it's not here\n",
1596                bnum);
1597     }
1598 
1599     UNLOCK(bc.lock);
1600 
1601     return 0;
1602 }
1603 
1604 
1605 static cache_ent *
1606 new_cache_ent(int bsize)
1607 {
1608     cache_ent *ce;
1609 
1610     ce = (cache_ent *)calloc(1, sizeof(cache_ent));
1611     if (ce == NULL) {
1612         beos_panic("*** error: cache can't allocate memory!\n");
1613         return NULL;
1614     }
1615 
1616     ce->data = malloc(bsize);
1617     if (ce->data == NULL) {
1618         free(ce);
1619         beos_panic("** error cache can't allocate data memory\n");
1620         UNLOCK(bc.lock);
1621         return NULL;
1622     }
1623 
1624     ce->dev       = -1;
1625     ce->block_num = -1;
1626 
1627     return ce;
1628 }
1629 
1630 
1631 static void
1632 get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize)
1633 {
1634     int        cur, retry_counter = 0, max_retry = num_needed * 256;
1635     cache_ent *ce;
1636 
1637     if (num_needed > max)
1638         beos_panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max);
1639 
1640     /* if the cache isn't full yet, just allocate the blocks */
1641     for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) {
1642         ents[cur] = new_cache_ent(bsize);
1643         if (ents[cur] == NULL)
1644             break;
1645         bc.cur_blocks++;
1646     }
1647 
1648     /* pluck off blocks from the LRU end of the normal list, keep trying too */
1649     while(cur < num_needed && retry_counter < max_retry) {
1650         for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) {
1651             if (ce->lock)
1652                 beos_panic("get_ents: normal list has locked blocks (ce 0x%x)\n",ce);
1653 
1654             if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1655                 continue;
1656 
1657             ce->flags   |= CE_BUSY;
1658             ents[cur++]  = ce;
1659         }
1660 
1661         if (cur < num_needed) {
1662             UNLOCK(bc.lock);
1663             snooze(10000);
1664             LOCK(bc.lock);
1665             retry_counter++;
1666         }
1667     }
1668 
1669     if (cur < num_needed && retry_counter >= max_retry) {  /* oh shit! */
1670         dump_cache_list();
1671         UNLOCK(bc.lock);
1672         beos_panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n",
1673               cur, num_needed);
1674     }
1675 
1676     /*
1677       If the last block is a dirty one, try to get more of 'em so
1678       that we can flush a bunch of blocks at once.
1679     */
1680     if (cur && cur < max &&
1681         ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) {
1682 
1683         for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) {
1684             if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1685                 continue;
1686 
1687             if (ce->lock)
1688                 beos_panic("get_ents:2 dirty list has locked blocks (ce 0x%x)\n",ce);
1689 
1690             ce->flags   |= CE_BUSY;
1691             ents[cur++]  = ce;
1692         }
1693     }
1694 
1695     *num_gotten = cur;
1696 }
1697 
1698 
1699 static int
1700 read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize)
1701 {
1702     int    i, ret;
1703     struct iovec *iov;
1704 
1705     iov = get_iovec_array();
1706 
1707     for (i = 0; i < num; i++) {
1708         iov[i].iov_base = ents[i]->data;
1709         iov[i].iov_len  = bsize;
1710     }
1711 
1712 	if (chatty_io > 2)
1713 		printf("readv @ %" B_PRIdOFF " for %d blocks "
1714 			"(at %" B_PRIdOFF ", block_size = %d)\n",
1715 			bnum, num, bnum*bsize, bsize);
1716     ret = readv_pos(dev, bnum*bsize, iov, num);
1717 
1718     release_iovec_array(iov);
1719 
1720     if (ret != num*bsize) {
1721         printf("read_into_ents: asked to read %d bytes but got %d\n",
1722                num*bsize, ret);
1723         printf("*** iov @ %p (num %d)\n", iov, num);
1724         return EINVAL;
1725     } else
1726         return 0;
1727 }
1728 
1729 
1730 
1731 #define CACHE_READ          0x0001
1732 #define CACHE_WRITE         0x0002
1733 #define CACHE_NOOP          0x0004     /* for getting empty blocks */
1734 #define CACHE_LOCKED        0x0008
1735 #define CACHE_READ_AHEAD_OK 0x0010     /* it's ok to do read-ahead */
1736 
1737 
1738 static char *
1739 op_to_str(int op)
1740 {
1741     static char buff[128];
1742 
1743     if (op & CACHE_READ)
1744         strcpy(buff, "READ");
1745     else if (op & CACHE_WRITE)
1746         strcpy(buff, "WRITE");
1747     else if (op & CACHE_NOOP)
1748         strcpy(buff, "NOP");
1749 
1750     if (op & CACHE_LOCKED)
1751         strcat(buff, " LOCKED");
1752 
1753     if (op & CACHE_READ_AHEAD_OK)
1754         strcat(buff, " (AHEAD)");
1755 
1756     return buff;
1757 }
1758 
1759 static int
1760 cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize,
1761                int op, void **dataptr)
1762 {
1763     size_t          err = 0;
1764     cache_ent      *ce;
1765     cache_ent_list *cel;
1766 
1767     if (chatty_io > 1)
1768         printf("cbio: bnum = %" B_PRIdOFF ", num_blocks = %" B_PRIdOFF ", "
1769             "bsize = %d, op = %s\n", bnum, num_blocks, bsize, op_to_str(op));
1770 
1771     /* some sanity checks first */
1772     if (bsize == 0)
1773         beos_panic("cache_io: block size == 0 for bnum %Ld?!?\n", bnum);
1774 
1775     if (num_blocks == 0)
1776         beos_panic("cache_io: bnum %Ld has num_blocks == 0!\n", bnum);
1777 
1778     if (data == NULL && dataptr == NULL) {
1779         printf("major butthead move: "
1780             "null data and dataptr! bnum %" B_PRIdOFF ":%" B_PRIdOFF "\n",
1781             bnum, num_blocks);
1782         return ENOMEM;
1783     }
1784 
1785     if (data == NULL) {
1786         if (num_blocks != 1)    /* get_block() should never do that */
1787             beos_panic("cache_io: num_blocks %Ld but should be 1\n",
1788                   num_blocks);
1789 
1790         if (op & CACHE_WRITE)
1791             beos_panic("cache_io: get_block() asked to write?!?\n");
1792     }
1793 
1794     if (bnum + num_blocks > max_device_blocks[dev]) {
1795         printf("dev %d: "
1796             "access to blocks %" B_PRIdOFF ":%" B_PRIdOFF " "
1797             "but max_dev_blocks is %" B_PRIdOFF "\n",
1798             dev, bnum, num_blocks, max_device_blocks[dev]);
1799 
1800 		// let the app crash here
1801 		*(int *)0x3100 = 0xc0debabe;
1802         return EINVAL;
1803     }
1804 
1805     last_cache_access = system_time();
1806 
1807     /* if the i/o is greater than 64k, do it directly */
1808     if (num_blocks * bsize >= 64 * 1024) {
1809         char  *ptr;
1810         fs_off_t  tmp;
1811 
1812         if (data == NULL || (op & CACHE_LOCKED)) {
1813             beos_panic("*** asked to do a large locked io that's too hard!\n");
1814         }
1815 
1816 
1817         if (op & CACHE_READ) {
1818             if (beos_read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1819                 printf("cache read:read_phys_blocks failed "
1820                     "(%s on blocks %" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1821                      strerror(errno), bnum, num_blocks);
1822                 return EINVAL;
1823             }
1824 
1825             LOCK(bc.lock);
1826 
1827             /* if any of the blocks are in the cache, grab them instead */
1828             ptr = data;
1829             for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1830                 ce = block_lookup(dev, tmp);
1831                 /*
1832                     if we find a block in the cache we have to copy its
1833                     data just in case it is more recent than what we just
1834                     read from disk (which could happen if someone wrote
1835                     these blocks after we did the read but before we locked
1836                     the cache and entered this loop).
1837                 */
1838                 if (ce) {
1839                     if (tmp != ce->block_num || dev != ce->dev) {
1840                         UNLOCK(bc.lock);
1841                         beos_panic("*** error4: looked up dev %d block %Ld but "
1842                                 "found %d %Ld\n", dev, tmp, ce->dev,
1843                                 ce->block_num);
1844                     }
1845 
1846                     memcpy(ptr, ce->data, bsize);
1847                 }
1848             }
1849 
1850             UNLOCK(bc.lock);
1851         } else if (op & CACHE_WRITE) {
1852             LOCK(bc.lock);
1853 
1854             /* if any of the blocks are in the cache, update them too */
1855             ptr = data;
1856             for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1857                 ce = block_lookup(dev, tmp);
1858                 if (ce) {
1859                     if (tmp != ce->block_num || dev != ce->dev) {
1860                         UNLOCK(bc.lock);
1861                         beos_panic("*** error5: looked up dev %d block %Ld but "
1862                                 "found %d %Ld\n", dev, tmp, ce->dev,
1863                                 ce->block_num);
1864                         return EBADF;
1865                     }
1866 
1867                     /* XXXdbg -- this isn't strictly necessary */
1868                     if (ce->clone) {
1869                         printf("over-writing cloned data "
1870                             "(ce %p bnum %" B_PRIdOFF ")...\n", ce, tmp);
1871                         flush_cache_ent(ce);
1872                     }
1873 
1874                     /* copy the data into the cache */
1875                     memcpy(ce->data, ptr, bsize);
1876                 }
1877             }
1878 
1879             UNLOCK(bc.lock);
1880 
1881             if (beos_write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1882                 printf("cache write: write_phys_blocks failed (%s on blocks "
1883                     "%" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1884                     strerror(errno), bnum, num_blocks);
1885                 return EINVAL;
1886             }
1887         } else {
1888             printf("bad cache op %d "
1889                 "(bnum %" B_PRIdOFF " nblocks %" B_PRIdOFF ")\n",
1890                 op, bnum, num_blocks);
1891             return EINVAL;
1892         }
1893 
1894         return 0;
1895     }
1896 
1897 
1898     LOCK(bc.lock);
1899     while(num_blocks) {
1900 
1901         ce = block_lookup(dev, bnum);
1902         if (ce) {
1903             if (bnum != ce->block_num || dev != ce->dev) {
1904                 UNLOCK(bc.lock);
1905                 beos_panic("*** error6: looked up dev %d block %ld but found "
1906                         "%d %ld\n", dev, bnum, ce->dev, ce->block_num);
1907                 return EBADF;
1908             }
1909 
1910             if (bsize != ce->bsize) {
1911                 beos_panic("*** requested bsize %d but ce->bsize %d ce @ 0x%x\n",
1912                         bsize, ce->bsize, ce);
1913             }
1914 
1915             /* delete this ent from the list it is in because it may change */
1916             if (ce->lock)
1917                 cel = &bc.locked;
1918             else
1919                 cel = &bc.normal;
1920 
1921             delete_from_list(cel, ce);
1922 
1923             if (op & CACHE_READ) {
1924                 if (data && data != ce->data) {
1925                     memcpy(data, ce->data, bsize);
1926                 } else if (dataptr) {
1927                     *dataptr = ce->data;
1928                 } else {
1929                     printf("cbio:data %p dptr %p ce @ %p ce->data %p\n",
1930                            data, dataptr, ce, ce->data);
1931                 }
1932             } else if (op & CACHE_WRITE) {
1933                 if (data && data != ce->data)
1934                     memcpy(ce->data, data, bsize);
1935 
1936                 ce->flags |= CE_DIRTY;
1937             } else if (op & CACHE_NOOP) {
1938                 memset(ce->data, 0, bsize);
1939                 if (data)
1940                     memset(data, 0, bsize);
1941 
1942                 if (dataptr)
1943                     *dataptr = ce->data;
1944 
1945                 ce->flags |= CE_DIRTY;
1946             } else {
1947                 beos_panic("cached_block_io: bogus op %d\n", op);
1948             }
1949 
1950             if (op & CACHE_LOCKED)
1951                 ce->lock++;
1952 
1953             if (ce->lock)
1954                 cel = &bc.locked;
1955             else
1956                 cel = &bc.normal;
1957 
1958             /* now put this ent at the head of the appropriate list */
1959             add_to_head(cel, ce);
1960 
1961             if (data != NULL)
1962                 data = (void *)((char *)data + bsize);
1963 
1964             bnum       += 1;
1965             num_blocks -= 1;
1966 
1967             continue;
1968         } else {                                  /* it's not in the cache */
1969             int        cur, cur_nblocks, num_dirty, real_nblocks, num_needed;
1970             cache_ent *ents[NUM_FLUSH_BLOCKS];
1971 
1972             /*
1973                here we find out how many additional blocks in this request
1974                are not in the cache.  the idea is that then we can do one
1975                big i/o on that many blocks at once.
1976             */
1977             for(cur_nblocks=1;
1978                 cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS;
1979                 cur_nblocks++) {
1980 
1981                 /* we can call hash_lookup() directly instead of
1982                    block_lookup() because we don't care about the
1983                    state of the busy bit of the block at this point
1984                 */
1985                 if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks))
1986                     break;
1987             }
1988 
1989             /*
1990               here we try to figure out how many extra blocks we should read
1991               for read-ahead.  we want to read as many as possible that are
1992               not already in the cache and that don't cause us to try and
1993               read beyond the end of the disk.
1994             */
1995             if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) &&
1996                 (cur_nblocks * bsize) < read_ahead_size) {
1997 
1998                 for(num_needed=cur_nblocks;
1999                     num_needed < (read_ahead_size / bsize);
2000                     num_needed++) {
2001 
2002                     if ((bnum + num_needed) >= max_device_blocks[dev])
2003                         break;
2004 
2005                     if (hash_lookup(&bc.ht, dev, bnum + num_needed))
2006                         break;
2007                 }
2008             } else {
2009                 num_needed = cur_nblocks;
2010             }
2011 
2012             /* this will get us pointers to a bunch of cache_ents we can use */
2013             get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize);
2014 
2015             if (real_nblocks < num_needed) {
2016                 beos_panic("don't have enough cache ents (need %d got %d %ld::%d)\n",
2017                       num_needed, real_nblocks, bnum, num_blocks);
2018             }
2019 
2020             /*
2021               There are now three variables used as limits within the ents
2022               array.  This is how they are related:
2023 
2024                  cur_nblocks <= num_needed <= real_nblocks
2025 
2026               Ents from 0 to cur_nblocks-1 are going to be used to fulfill
2027               this IO request.  Ents from cur_nblocks to num_needed-1 are
2028               for read-ahead.  Ents from num_needed to real_nblocks are
2029               extra blocks that get_ents() asked us to flush.  Often (and
2030               always on writes) cur_nblocks == num_needed.
2031 
2032               Below, we sort the list of ents so that when we flush them
2033               they go out in order.
2034             */
2035 
2036             qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp);
2037 
2038             /*
2039               delete each ent from its list because it will change.  also
2040               count up how many dirty blocks there are and insert into the
2041               hash table any new blocks so that no one else will try to
2042               read them in when we release the cache semaphore to do our I/O.
2043             */
2044             for(cur=0,num_dirty=0; cur < real_nblocks; cur++) {
2045                 ce = ents[cur];
2046                 ce->flags |= CE_BUSY;
2047 
2048                 /*
2049                    insert the new block into the hash table with its new block
2050                    number. note that the block is still in the hash table for
2051                    its old block number -- and it has to be until we are done
2052                    flushing it from the cache (to prevent someone else from
2053                    sneaking in in front of us and trying to read the same
2054                    block that we're flushing).
2055                 */
2056                 if (cur < num_needed) {
2057                     if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0)
2058                         beos_panic("could not insert cache ent for %d %ld (0x%lx)\n",
2059                               dev, bnum + cur, (ulong)ents[cur]);
2060                 }
2061 
2062                 if (ce->dev == -1)
2063                     continue;
2064 
2065                 if ((ce->flags & CE_DIRTY) || ce->clone)
2066                     num_dirty++;
2067 
2068                 if (ce->lock) {
2069                     beos_panic("cbio: can't use locked blocks here ce @ 0x%x\n",ce);
2070                 } else {
2071                     cel = &bc.normal;
2072 	                delete_from_list(cel, ce);
2073 				}
2074             }
2075             ce = NULL;
2076 
2077 
2078             /*
2079                we release the block cache semaphore here so that we can
2080                go do all the i/o we need to do (flushing dirty blocks
2081                that we're kicking out as well as reading any new data).
2082 
2083                because all the blocks we're touching are marked busy
2084                no one else should mess with them while we're doing this.
2085             */
2086             if (num_dirty || (op & CACHE_READ)) {
2087                 UNLOCK(bc.lock);
2088 
2089                 /* this flushes any blocks we're kicking out that are dirty */
2090                 if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) {
2091                     printf("flush ents failed (ents @ 0x%lx, nblocks %d!\n",
2092                            (ulong)ents, cur_nblocks);
2093                     goto handle_err;
2094                 }
2095 
2096             }
2097 
2098             /*
2099                now that everything is flushed to disk, go through and
2100                make sure that the data blocks we're going to use are
2101                the right block size for this current request (it's
2102                possible we're kicking out some smaller blocks and need
2103                to reallocate the data block pointer). We do this in two
2104                steps, first free'ing everything and then going through
2105                and doing the malloc's to try and be nice to the memory
2106                system (i.e. allow it to coalesce stuff, etc).
2107             */
2108             err = 0;
2109             for(cur=0; cur < num_needed; cur++) {
2110                 if (ents[cur]->bsize != bsize) {
2111                     free(ents[cur]->data);
2112                     ents[cur]->data = NULL;
2113 
2114                     if (ents[cur]->clone) {
2115                         free(ents[cur]->clone);
2116                         ents[cur]->clone = NULL;
2117                     }
2118                 }
2119             }
2120 
2121             for(cur=0; cur < num_needed; cur++) {
2122                 if (ents[cur]->data == NULL) {
2123                     ents[cur]->data  = (void *)malloc(bsize);
2124                     ents[cur]->bsize = bsize;
2125                 }
2126 
2127                 if (ents[cur]->data == NULL) {
2128                     printf("cache: no memory for block (bsize %d)!\n",
2129                            bsize);
2130                     err = ENOMEM;
2131                     break;
2132                 }
2133             }
2134 
2135             /*
2136                if this condition is true it's a pretty serious error.
2137                we'll try and back out gracefully but we're in pretty
2138                deep at this point and it ain't going to be easy.
2139             */
2140   handle_err:
2141             if (err) {
2142                 for(cur=0; cur < num_needed; cur++) {
2143                     cache_ent *tmp_ce;
2144 
2145                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2146                     if (tmp_ce != ents[cur]) {
2147                         beos_panic("hash_del0: %d %ld got 0x%lx, not 0x%lx\n",
2148                                 dev, bnum+cur, (ulong)tmp_ce,
2149                                 (ulong)ents[cur]);
2150                     }
2151 
2152                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev,
2153                                                         ents[cur]->block_num);
2154                     if (tmp_ce != ents[cur]) {
2155                         beos_panic("hash_del1: %d %ld got 0x%lx, not 0x%lx\n",
2156                                 ents[cur]->dev, ents[cur]->block_num, (ulong)tmp_ce,
2157                                 (ulong)ents[cur]);
2158                     }
2159 
2160                     ents[cur]->flags &= ~CE_BUSY;
2161                     if (ents[cur]->data)
2162                         free(ents[cur]->data);
2163                     free(ents[cur]);
2164                     ents[cur] = NULL;
2165 
2166                     bc.cur_blocks--;
2167                 }
2168 
2169                 if (cur < real_nblocks) {
2170                     LOCK(bc.lock);
2171                     for(; cur < real_nblocks; cur++) {
2172                         ents[cur]->flags &= ~CE_BUSY;
2173 
2174                         /* we have to put them back here */
2175                         add_to_tail(&bc.normal, ents[cur]);
2176                     }
2177                     UNLOCK(bc.lock);
2178                 }
2179 
2180                 return ENOMEM;
2181             }
2182 
2183 
2184             /*
2185                If we go into this if statement, the block cache lock
2186                has *already been released* up above when we flushed the
2187                dirty entries.  As always, since the blocks we're mucking
2188                with are marked busy, they shouldn't get messed with.
2189             */
2190             err = 0;
2191             if (num_dirty || (op & CACHE_READ)) {
2192                 /* this section performs the i/o that we need to do */
2193                 if (op & CACHE_READ) {
2194                     err = read_into_ents(dev, bnum, ents, num_needed, bsize);
2195                 } else {
2196                     err = 0;
2197                 }
2198 
2199                 if (err != 0) {
2200                     printf("err %s on dev %d block %" B_PRIdOFF ":%d (%d) "
2201                            "data %p, ents[0] %p\n",
2202                            strerror(errno), dev, bnum, cur_nblocks,
2203                            bsize, data, ents[0]);
2204                 }
2205 
2206                 /*
2207                    acquire the semaphore here so that we can go on mucking
2208                    with the cache data structures.  We need to delete old
2209                    block numbers from the hash table and set the new block
2210                    number's for the blocks we just read in.  We also put the
2211                    read-ahead blocks at the head of mru list.
2212                 */
2213 
2214                 LOCK(bc.lock);
2215             }
2216 
2217             for(cur=0; cur < num_needed; cur++) {
2218                 cache_ent *tmp_ce;
2219 
2220                 ce = ents[cur];
2221                 if (ce->dev != -1) {
2222                     tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num);
2223                     if (tmp_ce == NULL || tmp_ce != ce) {
2224                         beos_panic("*** hash_delete failure (ce 0x%x tce 0x%x)\n",
2225                               ce, tmp_ce);
2226                     }
2227                 }
2228 
2229                 if (err == 0 && cur >= cur_nblocks) {
2230                     ce->dev       = dev;
2231                     ce->block_num = bnum + cur;
2232                     ce->flags    &= ~CE_BUSY;
2233                     add_to_head(&bc.normal, ce);
2234                 }
2235             }
2236             ce = NULL;
2237 
2238             /*
2239               clear the busy bit on the blocks we force-flushed and
2240               put them on the normal list since they're now clean.
2241             */
2242             for(; cur < real_nblocks; cur++) {
2243                 ents[cur]->flags &= ~CE_BUSY;
2244 
2245                 if (ents[cur]->lock)
2246                     beos_panic("should not have locked blocks here (ce 0x%x)\n",
2247                           ents[cur]);
2248 
2249                 add_to_tail(&bc.normal, ents[cur]);
2250             }
2251 
2252             if (err) {   /* then we have some cleanup to do */
2253                 for(cur=0; cur < num_needed; cur++) {
2254                     cache_ent *tmp_ce;
2255 
2256                     /* we delete all blocks from the cache so we don't
2257                        leave partially written blocks in the cache */
2258 
2259                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2260                     if (tmp_ce != ents[cur]) {
2261                         beos_panic("hash_del: %d %ld got 0x%lx, not 0x%lx\n",
2262                                 dev, bnum+cur, (ulong)tmp_ce,
2263                                 (ulong)ents[cur]);
2264                     }
2265 
2266                     ce = ents[cur];
2267                     ce->flags &= ~CE_BUSY;
2268 
2269                     free(ce->data);
2270                     ce->data = NULL;
2271 
2272                     free(ce);
2273                     ents[cur] = NULL;
2274 
2275                     bc.cur_blocks--;
2276                 }
2277                 ce = NULL;
2278 
2279                 UNLOCK(bc.lock);
2280                 return err;
2281             }
2282 
2283 
2284             /*
2285                last step: go through and make sure all the cache_ent
2286                structures have the right data in them, delete old guys, etc.
2287             */
2288             for(cur=0; cur < cur_nblocks; cur++) {
2289                 ce = ents[cur];
2290 
2291                 if (ce->dev != -1) {   /* then clean this guy up */
2292                     if (ce->next || ce->prev)
2293                         beos_panic("ce @ 0x%x should not be in a list yet!\n", ce);
2294 
2295                     if (ce->clone)
2296                         free(ce->clone);
2297 
2298                     if (ce->data == NULL)
2299                         beos_panic("ce @ 0x%lx has a null data ptr\n", (ulong)ce);
2300                 }
2301 
2302                 ce->dev        = dev;
2303                 ce->block_num  = bnum + cur;
2304                 ce->bsize      = bsize;
2305                 ce->flags      = CE_NORMAL;
2306                 ce->lock       = 0;
2307                 ce->clone      = NULL;
2308                 ce->func = ce->arg  = NULL;
2309                 ce->next = ce->prev = NULL;
2310 
2311                 if (op & CACHE_READ) {
2312                     if (data)
2313                         memcpy(data, ce->data, bsize);
2314                 } else if (op & CACHE_WRITE) {
2315                     ce->flags  |= CE_DIRTY;
2316                     memcpy(ce->data, data, bsize);
2317                 } else if (op & CACHE_NOOP) {
2318                     memset(ce->data, 0, bsize);
2319                     if (data)
2320                         memset(data, 0, bsize);
2321 
2322                     ce->flags |= CE_DIRTY;
2323                 }
2324 
2325                 if (op & CACHE_LOCKED) {
2326                     ce->lock++;
2327                     cel = &bc.locked;
2328                 } else {
2329                     cel = &bc.normal;
2330                 }
2331 
2332                 /* now stick this puppy at the head of the mru list */
2333                 add_to_head(cel, ce);
2334 
2335 
2336                 if (dataptr) {
2337                     *dataptr = ce->data;
2338                 }
2339 
2340                 if (data != NULL)
2341                     data = (void *)((char *)data + bsize);
2342                 else if (cur_nblocks != 1)
2343                     beos_panic("cache can't handle setting data_ptr twice!\n");
2344             }  /* end of for(cur=0; cur < cur_nblocks; cur++) */
2345 
2346             bnum       += cur_nblocks;
2347             num_blocks -= cur_nblocks;
2348 
2349         }   /* end of else it's not in the cache */
2350 
2351     }   /* end of while(num_blocks) */
2352 
2353     UNLOCK(bc.lock);
2354 
2355     return 0;
2356 }
2357 
2358 
2359 void *
2360 beos_get_block(int dev, fs_off_t bnum, int bsize)
2361 {
2362     void *data;
2363 
2364     if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK,
2365                        &data) != 0)
2366         return NULL;
2367 
2368     return data;
2369 }
2370 
2371 void *
2372 beos_get_empty_block(int dev, fs_off_t bnum, int bsize)
2373 {
2374     void *data;
2375 
2376     if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED,
2377                        &data) != 0)
2378         return NULL;
2379 
2380     return data;
2381 }
2382 
2383 int
2384 beos_cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize)
2385 {
2386     return cache_block_io(dev, bnum, data, num_blocks, bsize,
2387                           CACHE_READ | CACHE_READ_AHEAD_OK, NULL);
2388 }
2389 
2390 
2391 int
2392 beos_cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize)
2393 {
2394     return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2395                           CACHE_WRITE, NULL);
2396 }
2397 
2398 int
2399 beos_cached_write_locked(int dev, fs_off_t bnum, const void *data,
2400                     fs_off_t num_blocks, int bsize)
2401 {
2402     return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2403                           CACHE_WRITE | CACHE_LOCKED, NULL);
2404 }
2405 
2406 
2407 void
2408 beos_force_cache_flush(int dev, int prefer_log_blocks)
2409 {
2410     int        i, count = 0;
2411     cache_ent *ce;
2412     cache_ent *ents[NUM_FLUSH_BLOCKS];
2413 
2414 
2415     LOCK(bc.lock);
2416 
2417     for(ce=bc.normal.lru; ce; ce=ce->next) {
2418         if ((ce->dev == dev) &&
2419             (ce->flags & CE_BUSY) == 0 &&
2420             ((ce->flags & CE_DIRTY) || ce->clone) &&
2421             ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) {
2422 
2423             ce->flags |= CE_BUSY;
2424             ents[count++] = ce;
2425 
2426             if (count >= NUM_FLUSH_BLOCKS) {
2427                 break;
2428             }
2429         }
2430     }
2431 
2432     /* if we've got some room left, try and grab any cloned blocks */
2433     if (count < NUM_FLUSH_BLOCKS) {
2434         for(ce=bc.locked.lru; ce; ce=ce->next) {
2435             if ((ce->dev == dev) &&
2436                 (ce->flags & CE_BUSY) == 0 &&
2437                 (ce->clone)) {
2438 
2439                 ce->flags |= CE_BUSY;
2440                 ents[count++] = ce;
2441 
2442                 if (count >= NUM_FLUSH_BLOCKS) {
2443                     break;
2444                 }
2445             }
2446         }
2447     }
2448 
2449     UNLOCK(bc.lock);
2450 
2451     if (count != 0) {
2452         qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp);
2453         flush_ents(ents, count);
2454 
2455         for(i=0; i < count; i++)
2456             ents[i]->flags &= ~CE_BUSY;
2457     }
2458 }
2459