xref: /haiku/src/add-ons/kernel/file_systems/userlandfs/server/beos/fs_cache.c (revision 1f52c921e27aa442370e1bd4adc021acf2b78b64)
1 /*
2    This file contains the global device cache for the BeOS.  All
3    file system I/O comes through here.  The cache can handle blocks
4    of different sizes for multiple different underlying physical
5    devices.
6 
7    The cache is organized as a hash table (for lookups by device
8    and block number) and two doubly-linked lists.  The normal
9    list is for "normal" blocks which are either clean or dirty.
10    The locked list is for blocks that are locked in the cache by
11    BFS.  The lists are LRU ordered.
12 
13    Most of the work happens in the function cache_block_io() which
14    is quite lengthy.  The other functions of interest are get_ents()
15    which picks victims to be kicked out of the cache; flush_ents()
16    which does the work of flushing them; and set_blocks_info() which
17    handles cloning blocks and setting callbacks so that the BFS
18    journal will work properly.  If you want to modify this code it
19    will take some study but it's not too bad.  Do not think about
20    separating the list of clean and dirty blocks into two lists as
21    I did that already and it's slower.
22 
23    Originally this cache code was written while listening to the album
24    "Ride the Lightning" by Metallica.  The current version was written
25    while listening to "Welcome to SkyValley" by Kyuss as well as an
26    ambient collection on the German label FAX.  Music helps a lot when
27    writing code.
28 
29    THIS CODE COPYRIGHT DOMINIC GIAMPAOLO.  NO WARRANTY IS EXPRESSED
30    OR IMPLIED.  YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR
31    NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED.
32 
33    FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com).
34 
35    Dominic Giampaolo
36    dbg@be.com
37 */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <memory.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <sys/types.h>
46 #include <sys/uio.h>
47 #include <unistd.h>
48 
49 
50 #include <OS.h>
51 #include <KernelExport.h>
52 
53 #include "fs_cache.h"
54 #include "fs_cache_priv.h"
55 #include "lock.h"
56 
57 
58 
59 #ifndef USER
60 #define printf dprintf
61 #endif
62 #ifdef USER
63 #define kprintf printf
64 #endif
65 
66 
67 typedef off_t fs_off_t;
68 
69 /* forward prototypes */
70 static int   flush_ents(cache_ent **ents, int n_ents);
71 
72 //static int   do_dump(int argc, char **argv);
73 //static int   do_find_block(int argc, char **argv);
74 //static int   do_find_data(int argc, char **argv);
75 //static void  cache_flusher(void *arg, int phase);
76 
77 
78 int chatty_io = 0;
79 
80 #define CHUNK (512 * 1024)   /* a hack to work around scsi driver bugs */
81 
82 static void
83 beos_panic(const char *format, ...)
84 {
85     va_list     ap;
86 
87     va_start(ap, format);
88     vfprintf(stderr, format, ap);
89     va_end(ap);
90 
91     while (TRUE)
92         ;
93 }
94 
95 size_t
96 beos_read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
97 {
98     size_t ret = 0;
99     size_t sum;
100 
101     if (chatty_io)
102         printf("R: %8Ld : %3d\n", bnum, num_blocks);
103 
104     if (num_blocks * bsize < CHUNK)
105         ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize);
106     else {
107         for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
108             ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK);
109             if (ret != CHUNK)
110                 break;
111 
112             data = (void *)((char *)data + CHUNK);
113         }
114 
115         if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
116             ret = read_pos(fd, (bnum * bsize) + sum, data,
117                            (num_blocks * bsize) - sum);
118 
119             if (ret == (num_blocks * bsize) - sum)
120                 ret = num_blocks * bsize;
121         } else if (ret == CHUNK) {
122             ret = num_blocks * bsize;
123         }
124     }
125 
126     if (ret == num_blocks * bsize)
127         return 0;
128     else
129         return EBADF;
130 }
131 
132 size_t
133 beos_write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
134 {
135     size_t ret = 0;
136     size_t sum;
137 
138     if (chatty_io)
139         printf("W: %8Ld : %3d\n", bnum, num_blocks);
140 
141     if (num_blocks * bsize < CHUNK)
142         ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize);
143     else {
144         for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
145             ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK);
146             if (ret != CHUNK)
147                 break;
148 
149             data = (void *)((char *)data + CHUNK);
150         }
151 
152         if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
153             ret = write_pos(fd, (bnum * bsize) + sum, data,
154                             (num_blocks * bsize) - sum);
155 
156             if (ret == (num_blocks * bsize) - sum)
157                 ret = num_blocks * bsize;
158         } else if (ret == CHUNK) {
159             ret = num_blocks * bsize;
160         }
161     }
162 
163 
164     if (ret == num_blocks * bsize)
165         return 0;
166     else
167         return EBADF;
168 }
169 
170 //	#pragma mark -
171 
172 static int
173 init_hash_table(hash_table *ht)
174 {
175     ht->max   = HT_DEFAULT_MAX;
176     ht->mask  = ht->max - 1;
177     ht->num_elements = 0;
178 
179     ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *));
180     if (ht->table == NULL)
181         return ENOMEM;
182 
183     return 0;
184 }
185 
186 
187 static void
188 shutdown_hash_table(hash_table *ht)
189 {
190     int       i, hash_len;
191     hash_ent *he, *next;
192 
193     for(i=0; i < ht->max; i++) {
194         he = ht->table[i];
195 
196         for(hash_len=0; he; hash_len++, he=next) {
197             next = he->next;
198             free(he);
199         }
200     }
201 
202     if (ht->table)
203         free(ht->table);
204     ht->table = NULL;
205 }
206 
207 #if 0
208 static void
209 print_hash_stats(hash_table *ht)
210 {
211     int       i, hash_len, max = -1, sum = 0;
212     hash_ent *he, *next;
213 
214     for(i=0; i < ht->max; i++) {
215         he = ht->table[i];
216 
217         for(hash_len=0; he; hash_len++, he=next) {
218             next = he->next;
219         }
220         if (hash_len)
221             printf("bucket %3d : %3d\n", i, hash_len);
222 
223         sum += hash_len;
224         if (hash_len > max)
225             max = hash_len;
226     }
227 
228     printf("max # of chains: %d,  average chain length %d\n", max,sum/ht->max);
229 }
230 #endif
231 
232 #define HASH(d, b)   ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b))
233 
234 static hash_ent *
235 new_hash_ent(int dev, fs_off_t bnum, void *data)
236 {
237     hash_ent *he;
238 
239     he = (hash_ent *)malloc(sizeof(*he));
240     if (he == NULL)
241         return NULL;
242 
243     he->hash_val = HASH(dev, bnum);
244     he->dev      = dev;
245     he->bnum     = bnum;
246     he->data     = data;
247     he->next     = NULL;
248 
249     return he;
250 }
251 
252 
253 static int
254 grow_hash_table(hash_table *ht)
255 {
256     int        i, omax, newsize, newmask;
257     fs_off_t      hash;
258     hash_ent **new_table, *he, *next;
259 
260     if (ht->max & ht->mask) {
261         printf("*** hashtable size %d or mask %d looks weird!\n", ht->max,
262                ht->mask);
263     }
264 
265     omax    = ht->max;
266     newsize = omax * 2;        /* have to grow in powers of two */
267     newmask = newsize - 1;
268 
269     new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *));
270     if (new_table == NULL)
271         return ENOMEM;
272 
273     for(i=0; i < omax; i++) {
274         for(he=ht->table[i]; he; he=next) {
275             hash = he->hash_val & newmask;
276             next = he->next;
277 
278             he->next        = new_table[hash];
279             new_table[hash] = he;
280         }
281     }
282 
283     free(ht->table);
284     ht->table = new_table;
285     ht->max   = newsize;
286     ht->mask  = newmask;
287 
288     return 0;
289 }
290 
291 
292 
293 
294 static int
295 hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data)
296 {
297     fs_off_t    hash;
298     hash_ent *he, *curr;
299 
300     hash = HASH(dev, bnum) & ht->mask;
301 
302     curr = ht->table[hash];
303     for(; curr != NULL; curr=curr->next)
304         if (curr->dev == dev && curr->bnum == bnum)
305             break;
306 
307     if (curr && curr->dev == dev && curr->bnum == bnum) {
308         printf("entry %d:%Ld already in the hash table!\n", dev, bnum);
309         return EEXIST;
310     }
311 
312     he = new_hash_ent(dev, bnum, data);
313     if (he == NULL)
314         return ENOMEM;
315 
316     he->next        = ht->table[hash];
317     ht->table[hash] = he;
318 
319     ht->num_elements++;
320     if (ht->num_elements >= ((ht->max * 3) / 4)) {
321         if (grow_hash_table(ht) != 0)
322             return ENOMEM;
323     }
324 
325     return 0;
326 }
327 
328 static void *
329 hash_lookup(hash_table *ht, int dev, fs_off_t bnum)
330 {
331     hash_ent *he;
332 
333     he = ht->table[HASH(dev, bnum) & ht->mask];
334 
335     for(; he != NULL; he=he->next) {
336         if (he->dev == dev && he->bnum == bnum)
337             break;
338     }
339 
340     if (he)
341         return he->data;
342     else
343         return NULL;
344 }
345 
346 
347 static void *
348 hash_delete(hash_table *ht, int dev, fs_off_t bnum)
349 {
350     void     *data;
351     fs_off_t     hash;
352     hash_ent *he, *prev = NULL;
353 
354     hash = HASH(dev, bnum) & ht->mask;
355     he = ht->table[hash];
356 
357     for(; he != NULL; prev=he,he=he->next) {
358         if (he->dev == dev && he->bnum == bnum)
359             break;
360     }
361 
362     if (he == NULL) {
363         printf("*** hash_delete: tried to delete non-existent block %d:%Ld\n",
364                dev, bnum);
365         return NULL;
366     }
367 
368     data = he->data;
369 
370     if (ht->table[hash] == he)
371         ht->table[hash] = he->next;
372     else if (prev)
373         prev->next = he->next;
374     else
375         beos_panic("hash table is inconsistent\n");
376 
377     free(he);
378     ht->num_elements--;
379 
380     return data;
381 }
382 
383 //	#pragma mark -
384 
385 /*
386   These are the global variables for the cache.
387 */
388 static block_cache  bc;
389 
390 #define       MAX_IOVECS  64           /* # of iovecs for use by cache code */
391 static beos_lock   iovec_lock;
392 static struct iovec *iovec_pool[MAX_IOVECS];  /* each ptr is to an array of iovecs */
393 static int    iovec_used[MAX_IOVECS];  /* non-zero == iovec is in use */
394 
395 #define NUM_FLUSH_BLOCKS 64    /* size of the iovec array pointed by each ptr */
396 
397 
398 #define DEFAULT_READ_AHEAD_SIZE  (32 * 1024)
399 static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE;
400 
401 /* this array stores the size of each device so we can error check requests */
402 #define MAX_DEVICES  256
403 fs_off_t max_device_blocks[MAX_DEVICES];
404 
405 
406 /* has the time of the last cache access so cache flushing doesn't interfere */
407 static bigtime_t last_cache_access = 0;
408 
409 
410 int
411 beos_init_block_cache(int max_blocks, int flags)
412 {
413     memset(&bc, 0, sizeof(bc));
414     memset(iovec_pool, 0, sizeof(iovec_pool));
415     memset(iovec_used, 0, sizeof(iovec_used));
416     memset(&max_device_blocks, 0, sizeof(max_device_blocks));
417 
418     if (init_hash_table(&bc.ht) != 0)
419         return ENOMEM;
420 
421     bc.lock.s = iovec_lock.s = -1;
422 
423     bc.max_blocks = max_blocks;
424     bc.flags      = flags;
425     if (beos_new_lock(&bc.lock, "bollockcache") != 0)
426         goto err;
427 
428     if (beos_new_lock(&iovec_lock, "iovec_lock") != 0)
429         goto err;
430 
431     /* allocate two of these up front so vm won't accidently re-enter itself */
432     iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
433     iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
434 
435 #ifndef USER
436 #ifdef DEBUG
437     add_debugger_command("bcache", do_dump, "dump the block cache list");
438     add_debugger_command("fblock", do_find_block, "find a block in the cache");
439     add_debugger_command("fdata",  do_find_data, "find a data block ptr in the cache");
440 #endif
441     register_kernel_daemon(cache_flusher, NULL, 3);
442 #endif
443 
444     return 0;
445 
446  err:
447     if (bc.lock.s >= 0)
448         beos_free_lock(&bc.lock);
449 
450     if (iovec_lock.s >= 0)
451         beos_free_lock(&iovec_lock);
452 
453     shutdown_hash_table(&bc.ht);
454     memset((void *)&bc, 0, sizeof(bc));
455     return ENOMEM;
456 }
457 
458 
459 static struct iovec *
460 get_iovec_array(void)
461 {
462     int i;
463     struct iovec *iov;
464 
465     LOCK(iovec_lock);
466 
467     for(i = 0; i < MAX_IOVECS; i++) {
468         if (iovec_used[i] == 0)
469             break;
470     }
471 
472     if (i >= MAX_IOVECS)       /* uh-oh */
473         beos_panic("cache: ran out of iovecs (pool 0x%x, used 0x%x)!\n",
474               &iovec_pool[0], &iovec_used[0]);
475 
476     if (iovec_pool[i] == NULL) {
477         iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
478         if (iovec_pool[i] == NULL)
479             beos_panic("can't allocate an iovec!\n");
480     }
481 
482     iov = iovec_pool[i];
483     iovec_used[i] = 1;
484 
485     UNLOCK(iovec_lock);
486 
487     return iov;
488 }
489 
490 
491 static void
492 release_iovec_array(struct iovec *iov)
493 {
494     int i;
495 
496     LOCK(iovec_lock);
497 
498     for(i=0; i < MAX_IOVECS; i++) {
499         if (iov == iovec_pool[i])
500             break;
501     }
502 
503     if (i < MAX_IOVECS)
504         iovec_used[i] = 0;
505     else                     /* uh-oh */
506         printf("cache: released an iovec I don't own (iov %p)\n", iov);
507 
508 
509     UNLOCK(iovec_lock);
510 }
511 
512 
513 
514 
515 static void
516 real_dump_cache_list(cache_ent_list *cel)
517 {
518     cache_ent *ce;
519 
520     kprintf("starting from LRU end:\n");
521 
522     for (ce = cel->lru; ce; ce = ce->next) {
523         kprintf("ce %p dev %2d bnum %6Ld lock %d flag %d arg %p "
524                "clone %p\n", ce, ce->dev, ce->block_num, ce->lock,
525                ce->flags, ce->arg, ce->clone);
526     }
527     kprintf("MRU end\n");
528 }
529 
530 static void
531 dump_cache_list(void)
532 {
533     kprintf("NORMAL BLOCKS\n");
534     real_dump_cache_list(&bc.normal);
535 
536     kprintf("LOCKED BLOCKS\n");
537     real_dump_cache_list(&bc.locked);
538 
539     kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks,
540            bc.max_blocks, &bc.ht);
541 }
542 
543 #if 0
544 static void
545 check_bcache(char *str)
546 {
547     int count = 0;
548     cache_ent *ce, *prev = NULL;
549 
550     LOCK(bc.lock);
551 
552     for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) {
553         count++;
554     }
555 
556     for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) {
557         count++;
558     }
559 
560     if (count != bc.cur_blocks) {
561         if (count < bc.cur_blocks - 16)
562             beos_panic("%s: count == %d, cur_blocks %d, prev 0x%x\n",
563                     str, count, bc.cur_blocks, prev);
564         else
565             printf("%s: count == %d, cur_blocks %d, prev 0x%x\n",
566                     str, count, bc.cur_blocks, prev);
567     }
568 
569     UNLOCK(bc.lock);
570 }
571 
572 
573 static void
574 dump_lists(void)
575 {
576     cache_ent *nce;
577 
578     printf("LOCKED 0x%x  (tail 0x%x, head 0x%x)\n", &bc.locked,
579            bc.locked.lru, bc.locked.mru);
580     for(nce=bc.locked.lru; nce; nce=nce->next)
581         printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
582                nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
583                nce->func);
584 
585     printf("NORMAL 0x%x  (tail 0x%x, head 0x%x)\n", &bc.normal,
586            bc.normal.lru, bc.normal.mru);
587     for(nce=bc.normal.lru; nce; nce=nce->next)
588         printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
589                nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
590                nce->func);
591 }
592 
593 
594 static void
595 check_lists(void)
596 {
597     cache_ent *ce, *prev, *oce;
598     cache_ent_list *cel;
599 
600     cel = &bc.normal;
601     for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
602         for(oce=bc.locked.lru; oce; oce=oce->next) {
603             if (oce == ce) {
604                 dump_lists();
605                 beos_panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel);
606             }
607         }
608     }
609     if (prev && prev != cel->mru) {
610         dump_lists();
611         beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
612               prev, cel);
613     }
614 
615     cel = &bc.locked;
616     for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
617         for(oce=bc.normal.lru; oce; oce=oce->next) {
618             if (oce == ce) {
619                 dump_lists();
620                 beos_panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel);
621             }
622         }
623     }
624     if (prev && prev != cel->mru) {
625         dump_lists();
626         beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
627               prev, cel);
628     }
629 }
630 #endif
631 
632 
633 #ifdef DEBUG
634 #if 0
635 static int
636 do_dump(int argc, char **argv)
637 {
638     dump_cache_list();
639     return 1;
640 }
641 
642 
643 static int
644 do_find_block(int argc, char **argv)
645 {
646     int        i;
647     fs_off_t  bnum;
648     cache_ent *ce;
649 
650     if (argc < 2) {
651         kprintf("%s: needs a block # argument\n", argv[0]);
652         return 1;
653     }
654 
655     for(i=1; i < argc; i++) {
656         bnum = strtoul(argv[i], NULL, 0);
657 
658         for(ce=bc.normal.lru; ce; ce=ce->next) {
659             if (ce->block_num == bnum) {
660                 kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n",
661                         bnum, ce, ce->data);
662             }
663         }
664 
665         for(ce=bc.locked.lru; ce; ce=ce->next) {
666             if (ce->block_num == bnum) {
667                 kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n",
668                         bnum, ce, ce->data);
669             }
670         }
671     }
672 
673     return 0;
674 }
675 
676 static int
677 do_find_data(int argc, char **argv)
678 {
679     int        i;
680     void      *data;
681     cache_ent *ce;
682 
683     if (argc < 2) {
684         kprintf("%s: needs a block # argument\n", argv[0]);
685         return 1;
686     }
687 
688     for(i=1; i < argc; i++) {
689         data = (void *)strtoul(argv[i], NULL, 0);
690 
691         for(ce=bc.normal.lru; ce; ce=ce->next) {
692             if (ce->data == data) {
693                 kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n",
694                         ce->block_num, ce);
695             }
696         }
697 
698         for(ce=bc.locked.lru; ce; ce=ce->next) {
699             if (ce->data == data) {
700                 kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n",
701                         ce->block_num, ce);
702             }
703         }
704     }
705 
706     return 0;
707 }
708 #endif
709 #endif /* DEBUG */
710 
711 
712 
713 /*
714   this function detaches the cache_ent from the list.
715 */
716 static void
717 delete_from_list(cache_ent_list *cel, cache_ent *ce)
718 {
719     if (ce->next)
720         ce->next->prev = ce->prev;
721     if (ce->prev)
722         ce->prev->next = ce->next;
723 
724     if (cel->lru == ce)
725         cel->lru = ce->next;
726     if (cel->mru == ce)
727         cel->mru = ce->prev;
728 
729     ce->next = NULL;
730     ce->prev = NULL;
731 }
732 
733 
734 
735 /*
736   this function adds the cache_ent ce to the head of the
737   list (i.e. the MRU end).  the cache_ent should *not*
738   be in any lists.
739 */
740 static void
741 add_to_head(cache_ent_list *cel, cache_ent *ce)
742 {
743 if (ce->next != NULL || ce->prev != NULL) {
744     beos_panic("*** ath: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
745            ce, ce->next, ce->prev);
746 }
747 
748     ce->next = NULL;
749     ce->prev = cel->mru;
750 
751     if (cel->mru)
752         cel->mru->next = ce;
753     cel->mru = ce;
754 
755     if (cel->lru == NULL)
756         cel->lru = ce;
757 }
758 
759 
760 /*
761   this function adds the cache_ent ce to the tail of the
762   list (i.e. the MRU end).  the cache_ent should *not*
763   be in any lists.
764 */
765 static void
766 add_to_tail(cache_ent_list *cel, cache_ent *ce)
767 {
768 if (ce->next != NULL || ce->prev != NULL) {
769     beos_panic("*** att: ce has non-null next/prev ptr (ce 0x%x nxt 0x%x, prv 0x%x)\n",
770            ce, ce->next, ce->prev);
771 }
772 
773     ce->next = cel->lru;
774     ce->prev = NULL;
775 
776     if (cel->lru)
777         cel->lru->prev = ce;
778     cel->lru = ce;
779 
780     if (cel->mru == NULL)
781         cel->mru = ce;
782 }
783 
784 
785 static int
786 cache_ent_cmp(const void *a, const void *b)
787 {
788     fs_off_t  diff;
789     cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b;
790 
791     if (p1 == NULL || p2 == NULL)
792         beos_panic("cache_ent pointers are null?!? (a 0x%lx, b 0x%lx\n)\n", a, b);
793 
794     if (p1->dev == p2->dev) {
795         diff = p1->block_num - p2->block_num;
796         return (int)diff;
797     } else {
798         return p1->dev - p2->dev;
799     }
800 }
801 
802 #if 0
803 // ToDo: add this to the fsh (as a background thread)?
804 static void
805 cache_flusher(void *arg, int phase)
806 {
807     int    i, num_ents, err;
808     bigtime_t now = system_time();
809     static cache_ent *ce = NULL;
810     static cache_ent *ents[NUM_FLUSH_BLOCKS];
811 
812     /*
813        if someone else was in the cache recently then just bail out so
814        we don't lock them out unnecessarily
815     */
816     if ((now - last_cache_access) < 1000000)
817         return;
818 
819     LOCK(bc.lock);
820 
821     ce = bc.normal.lru;
822 
823     for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) {
824         if (ce->flags & CE_BUSY)
825             continue;
826 
827         if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
828             continue;
829 
830         ents[num_ents] = ce;
831         ents[num_ents]->flags |= CE_BUSY;
832         num_ents++;
833     }
834 
835     /* if we've got some room left over, look for cloned locked blocks */
836     if (num_ents < NUM_FLUSH_BLOCKS) {
837         ce = bc.locked.lru;
838 
839         for(; num_ents < NUM_FLUSH_BLOCKS;) {
840             for(;
841                 ce && ((ce->flags & CE_BUSY) || ce->clone == NULL);
842                 ce=ce->next)
843                 /* skip ents that meet the above criteria */;
844 
845             if (ce == NULL)
846                 break;
847 
848             ents[num_ents] = ce;
849             ents[num_ents]->flags |= CE_BUSY;
850             ce = ce->next;
851             num_ents++;
852         }
853     }
854 
855     UNLOCK(bc.lock);
856 
857     if (num_ents == 0)
858         return;
859 
860     qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp);
861 
862     if ((err = flush_ents(ents, num_ents)) != 0) {
863         printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n",
864                (ulong)ents, num_ents);
865     }
866 
867     for(i=0; i < num_ents; i++) {       /* clear the busy bit on each of ent */
868         ents[i]->flags &= ~CE_BUSY;
869     }
870 }
871 #endif
872 
873 
874 static int
875 flush_cache_ent(cache_ent *ce)
876 {
877     int   ret = 0;
878     void *data;
879 
880     /* if true, then there's nothing to flush */
881     if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
882         return 0;
883 
884     /* same thing here */
885     if (ce->clone == NULL && ce->lock != 0)
886         return 0;
887 
888  restart:
889 	if (ce->clone)
890 		data = ce->clone;
891 	else
892 		data = ce->data;
893 
894 	if (chatty_io > 2) printf("flush: %7Ld\n", ce->block_num);
895 	ret = beos_write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize);
896 
897     if (ce->func) {
898         ce->func(ce->logged_bnum, 1, ce->arg);
899         ce->func = NULL;
900     }
901 
902     if (ce->clone) {
903         free(ce->clone);
904         ce->clone = NULL;
905 
906         if (ce->lock == 0 && (ce->flags & CE_DIRTY))
907             goto restart;     /* also write the real data ptr */
908     } else {
909         ce->flags &= ~CE_DIRTY;
910     }
911 
912     return ret;
913 }
914 
915 
916 static int
917 flush_ents(cache_ent **ents, int n_ents)
918 {
919     int    i, j, k, ret = 0, bsize, iocnt, do_again = 0;
920     fs_off_t  start_bnum;
921     struct iovec *iov;
922 
923     iov = get_iovec_array();
924     if (iov == NULL)
925         return ENOMEM;
926 
927 restart:
928     for(i=0; i < n_ents; i++) {
929         /* if true, then there's nothing to flush */
930         if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL)
931             continue;
932 
933         /* if true we can't touch the dirty data yet because it's locked */
934         if (ents[i]->clone == NULL && ents[i]->lock != 0)
935             continue;
936 
937 
938         bsize      = ents[i]->bsize;
939         start_bnum = ents[i]->block_num;
940 
941         for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) {
942             if (ents[j]->dev != ents[i]->dev ||
943                 ents[j]->block_num != start_bnum + (j - i))
944                 break;
945 
946             if (ents[j]->clone == NULL && ents[j]->lock != 0)
947                 break;
948         }
949 
950         if (j == i+1) {           /* only one block, just flush it directly */
951             if ((ret = flush_cache_ent(ents[i])) != 0)
952                 break;
953             continue;
954         }
955 
956 
957         for(k=i,iocnt=0; k < j; k++,iocnt++) {
958             if (ents[k]->clone)
959                 iov[iocnt].iov_base = ents[k]->clone;
960             else
961                 iov[iocnt].iov_base = ents[k]->data;
962 
963             iov[iocnt].iov_len = bsize;
964         }
965 
966 		if (chatty_io)
967 	        printf("writev @ %Ld for %d blocks\n", start_bnum, iocnt);
968 
969         ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize,
970                          &iov[0], iocnt);
971         if (ret != iocnt*bsize) {
972             int idx;
973 
974             printf("flush_ents: writev failed: iocnt %d start bnum %Ld "
975                    "bsize %d, ret %d\n", iocnt, start_bnum, bsize, ret);
976 
977             for(idx=0; idx < iocnt; idx++)
978                 printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base,
979                        iov[idx].iov_len);
980 
981             printf("error %s writing blocks %Ld:%d (%d != %d)\n",
982                    strerror(errno), start_bnum, iocnt, ret, iocnt*bsize);
983             ret = EINVAL;
984             break;
985         }
986         ret = 0;
987 
988 
989         for(k=i; k < j; k++) {
990             if (ents[k]->func) {
991                 ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg);
992                 ents[k]->func = NULL;
993             }
994 
995             if (ents[k]->clone) {
996                 free(ents[k]->clone);
997                 ents[k]->clone = NULL;
998             } else {
999                 ents[k]->flags &= ~CE_DIRTY;
1000             }
1001         }
1002 
1003 
1004         i = j - 1;  /* i gets incremented by the outer for loop */
1005     }
1006 
1007     /*
1008        here we have to go back through and flush any blocks that are
1009        still dirty.  with an arched brow you astutely ask, "but how
1010        could this happen given the above loop?"  Ahhh young grasshopper
1011        I say, the path through the cache is long and twisty and fraught
1012        with peril.  The reason it can happen is that a block can be both
1013        cloned and dirty.  The above loop would only flush the cloned half
1014        of the data, not the main dirty block.  So we have to go back
1015        through and see if there are any blocks that are still dirty.  If
1016        there are we go back to the top of the function and do the whole
1017        thing over.  Kind of grody but it is necessary to insure the
1018        correctness of the log for the Be file system.
1019     */
1020     if (do_again == 0) {
1021         for(i=0; i < n_ents; i++) {
1022             if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock)
1023                 continue;
1024 
1025             do_again = 1;
1026             break;
1027         }
1028 
1029         if (do_again)
1030             goto restart;
1031     }
1032 
1033     release_iovec_array(iov);
1034 
1035 
1036     return ret;
1037 }
1038 
1039 static void
1040 delete_cache_list(cache_ent_list *cel)
1041 {
1042     void      *junk;
1043     cache_ent *ce, *next;
1044 
1045     for (ce = cel->lru; ce; ce = next) {
1046         next = ce->next;
1047         if (ce->lock != 0) {
1048             if (ce->func)
1049                 printf("*** shutdown_block_cache: block %Ld, lock == %d "
1050                        "(arg %p)!\n", ce->block_num, ce->lock, ce->arg);
1051             else
1052                 printf("*** shutdown_block_cache: block %Ld, lock == %d!\n",
1053                        ce->block_num, ce->lock);
1054         }
1055 
1056         if (ce->flags & CE_BUSY) {
1057             printf("* shutdown block cache: bnum %Ld is busy? ce %p\n",
1058                    ce->block_num, ce);
1059         }
1060 
1061         if ((ce->flags & CE_DIRTY) || ce->clone) {
1062             flush_cache_ent(ce);
1063         }
1064 
1065         if (ce->clone)
1066             free(ce->clone);
1067         ce->clone = NULL;
1068 
1069         if (ce->data)
1070             free(ce->data);
1071         ce->data = NULL;
1072 
1073         if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1074             printf("*** free_device_cache: bad hash table entry %Ld "
1075                    "%p != %p\n", ce->block_num, junk, ce);
1076         }
1077 
1078         explicit_bzero(ce, sizeof(*ce));
1079         free(ce);
1080 
1081         bc.cur_blocks--;
1082     }
1083 }
1084 
1085 
1086 void
1087 beos_shutdown_block_cache(void)
1088 {
1089     /* print_hash_stats(&bc.ht); */
1090 
1091     if (bc.lock.s > 0)
1092         LOCK(bc.lock);
1093 
1094 #ifndef USER
1095     unregister_kernel_daemon(cache_flusher, NULL);
1096 #endif
1097 
1098     delete_cache_list(&bc.normal);
1099     delete_cache_list(&bc.locked);
1100 
1101     bc.normal.lru = bc.normal.mru = NULL;
1102     bc.locked.lru = bc.locked.mru = NULL;
1103 
1104     shutdown_hash_table(&bc.ht);
1105 
1106     if (bc.lock.s > 0)
1107         beos_free_lock(&bc.lock);
1108     bc.lock.s = -1;
1109 
1110     if (iovec_lock.s >= 0)
1111         beos_free_lock(&iovec_lock);
1112 }
1113 
1114 
1115 
1116 int
1117 beos_init_cache_for_device(int fd, fs_off_t max_blocks)
1118 {
1119     int ret = 0;
1120 
1121     if (fd >= MAX_DEVICES)
1122         return -1;
1123 
1124     LOCK(bc.lock);
1125 
1126     if (max_device_blocks[fd] != 0) {
1127         printf("device %d is already initialized!\n", fd);
1128         ret = -1;
1129     } else {
1130         max_device_blocks[fd] = max_blocks;
1131     }
1132 
1133     UNLOCK(bc.lock);
1134 
1135     return ret;
1136 }
1137 
1138 
1139 
1140 /*
1141    this routine assumes that bc.lock has been acquired
1142 */
1143 static cache_ent *
1144 block_lookup(int dev, fs_off_t bnum)
1145 {
1146     int        count = 0;
1147     cache_ent *ce;
1148 
1149     while (1) {
1150         ce = hash_lookup(&bc.ht, dev, bnum);
1151         if (ce == NULL)
1152             return NULL;
1153 
1154         if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */
1155             break;
1156 
1157         /* else, it's busy and we need to retry our lookup */
1158         UNLOCK(bc.lock);
1159 
1160         snooze(5000);
1161         if (count++ == 5000) {  /* then a lot of time has elapsed */
1162             printf("block %Ld isn't coming un-busy (ce @ %p)\n",
1163                    ce->block_num, ce);
1164         }
1165 
1166         LOCK(bc.lock);
1167     }
1168 
1169     if (ce->flags & CE_BUSY)
1170         beos_panic("block lookup: returning a busy block @ 0x%lx?!?\n",(ulong)ce);
1171 
1172     return ce;
1173 }
1174 
1175 
1176 
1177 int
1178 beos_set_blocks_info(int dev, fs_off_t *blocks, int nblocks,
1179                void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg)
1180 {
1181     int        i, j, cur;
1182     cache_ent *ce;
1183     cache_ent *ents[NUM_FLUSH_BLOCKS];
1184 
1185     LOCK(bc.lock);
1186 
1187 
1188     for(i=0, cur=0; i < nblocks; i++) {
1189 
1190         /* printf("sbi:   %ld (arg 0x%x)\n", blocks[i], arg); */
1191         ce = block_lookup(dev, blocks[i]);
1192         if (ce == NULL) {
1193             beos_panic("*** set_block_info can't find bnum %ld!\n", blocks[i]);
1194             UNLOCK(bc.lock);
1195             return ENOENT;   /* hopefully this doesn't happen... */
1196         }
1197 
1198 
1199         if (blocks[i] != ce->block_num || dev != ce->dev) {
1200             UNLOCK(bc.lock);
1201             beos_panic("** error1: looked up dev %d block %ld but found dev %d "
1202                     "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num);
1203             return EBADF;
1204         }
1205 
1206         if (ce->lock == 0) {
1207             beos_panic("* set_block_info on bnum %ld (%d) but it's not locked!\n",
1208                     blocks[i], nblocks);
1209         }
1210 
1211 
1212         if ((ce->flags & CE_DIRTY) == 0) {
1213             beos_panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n",
1214                     blocks[i], nblocks);
1215         }
1216 
1217         ce->flags |= CE_BUSY;     /* mark all blocks as busy till we're done */
1218 
1219         /* if there is cloned data, it needs to be flushed now */
1220         if (ce->clone && ce->func) {
1221             ents[cur++] = ce;
1222 
1223             if (cur >= NUM_FLUSH_BLOCKS) {
1224                 UNLOCK(bc.lock);
1225 
1226                 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1227 
1228                 flush_ents(ents, cur);
1229 
1230                 LOCK(bc.lock);
1231                 for(j=0; j < cur; j++)
1232                     ents[j]->flags &= ~CE_BUSY;
1233                 cur = 0;
1234             }
1235         }
1236     }
1237 
1238 
1239     if (cur != 0) {
1240         UNLOCK(bc.lock);
1241 
1242         qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1243 
1244         flush_ents(ents, cur);
1245 
1246         LOCK(bc.lock);
1247         for(j=0; j < cur; j++)
1248             ents[j]->flags &= ~CE_BUSY;
1249         cur = 0;
1250     }
1251 
1252 
1253     /* now go through and set the info that we were asked to */
1254     for (i = 0; i < nblocks; i++) {
1255         /* we can call hash_lookup() here because we know it's around */
1256         ce = hash_lookup(&bc.ht, dev, blocks[i]);
1257         if (ce == NULL) {
1258             beos_panic("*** set_block_info can't find bnum %Ld!\n", blocks[i]);
1259             UNLOCK(bc.lock);
1260             return ENOENT;   /* hopefully this doesn't happen... */
1261         }
1262 
1263         ce->flags &= ~(CE_DIRTY | CE_BUSY);
1264 
1265         if (ce->func != NULL) {
1266             beos_panic("*** set_block_info non-null callback on bnum %Ld\n",
1267                     ce->block_num);
1268         }
1269 
1270         if (ce->clone != NULL) {
1271             beos_panic("*** ce->clone == %p, not NULL in set_block_info\n",
1272                     ce->clone);
1273         }
1274 
1275         ce->clone = (void *)malloc(ce->bsize);
1276         if (ce->clone == NULL)
1277             beos_panic("*** can't clone bnum %Ld (bsize %d)\n",
1278                     ce->block_num, ce->bsize);
1279 
1280 
1281         memcpy(ce->clone, ce->data, ce->bsize);
1282 
1283         ce->func   = func;
1284         ce->arg    = arg;
1285 
1286         ce->logged_bnum = blocks[i];
1287 
1288         ce->lock--;
1289         if (ce->lock < 0) {
1290             printf("sbi: whoa nellie! ce @ %p (%Ld) has lock == %d\n",
1291                    ce, ce->block_num, ce->lock);
1292         }
1293 
1294         if (ce->lock == 0) {
1295             delete_from_list(&bc.locked, ce);
1296             add_to_head(&bc.normal, ce);
1297         }
1298     }
1299 
1300     UNLOCK(bc.lock);
1301 
1302     return 0;
1303 }
1304 
1305 
1306 /* this function is only for use by flush_device() */
1307 static void
1308 do_flush(cache_ent **ents, int max)
1309 {
1310     int i;
1311 
1312     for(i=0; i < max; i++) {
1313         ents[i]->flags |= CE_BUSY;
1314     }
1315 
1316     UNLOCK(bc.lock);
1317 
1318     qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp);
1319     flush_ents(ents, max);
1320 
1321     LOCK(bc.lock);
1322     for(i=0; i < max; i++) {
1323         ents[i]->flags &= ~CE_BUSY;
1324     }
1325 }
1326 
1327 int
1328 beos_flush_device(int dev, int warn_locked)
1329 {
1330     int cur;
1331     cache_ent *ce;
1332     cache_ent *ents[NUM_FLUSH_BLOCKS];
1333 
1334     LOCK(bc.lock);
1335 
1336     cur = 0;
1337     ce = bc.normal.lru;
1338     while (ce) {
1339         if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1340             ce = ce->next;
1341             continue;
1342         }
1343 
1344         if ((ce->flags & CE_DIRTY) || ce->clone) {
1345             ents[cur++] = ce;
1346             if (cur >= NUM_FLUSH_BLOCKS) {
1347                 do_flush(ents, cur);
1348 
1349                 ce = bc.normal.lru;
1350                 cur = 0;
1351                 continue;
1352             }
1353         }
1354 
1355         ce = ce->next;
1356     }
1357 
1358     if (cur != 0)
1359         do_flush(ents, cur);
1360 
1361     cur = 0;
1362     ce = bc.locked.lru;
1363     while (ce) {
1364         if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1365             ce = ce->next;
1366             continue;
1367         }
1368 
1369         if (ce->clone) {
1370             ents[cur++] = ce;
1371             if (cur >= NUM_FLUSH_BLOCKS) {
1372                 do_flush(ents, cur);
1373 
1374                 ce = bc.locked.lru;
1375                 cur = 0;
1376                 continue;
1377             }
1378         }
1379 
1380         ce = ce->next;
1381     }
1382 
1383     if (cur != 0)
1384         do_flush(ents, cur);
1385 
1386     UNLOCK(bc.lock);
1387 
1388     return 0;
1389 }
1390 
1391 
1392 static void
1393 real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel)
1394 {
1395     void      *junk;
1396     cache_ent *ce, *next = NULL;
1397 
1398     for(ce=cel->lru; ce; ce=next) {
1399         next = ce->next;
1400 
1401         if (ce->dev != dev) {
1402             continue;
1403         }
1404 
1405         if (ce->lock != 0 || (ce->flags & CE_BUSY)) {
1406             printf("*** remove_cached_dev: block %Ld has lock = %d, flags "
1407                    "0x%x! ce @ 0x%lx\n", ce->block_num, ce->lock, ce->flags,
1408                    (ulong)ce);
1409         }
1410 
1411         if (allow_writes == ALLOW_WRITES &&
1412             ((ce->flags & CE_DIRTY) || ce->clone)) {
1413             ce->flags |= CE_BUSY;
1414             flush_cache_ent(ce);
1415             ce->flags &= ~CE_BUSY;
1416         }
1417 
1418         /* unlink this guy */
1419         if (cel->lru == ce)
1420             cel->lru = ce->next;
1421 
1422         if (cel->mru == ce)
1423             cel->mru = ce->prev;
1424 
1425         if (ce->prev)
1426             ce->prev->next = ce->next;
1427         if (ce->next)
1428             ce->next->prev = ce->prev;
1429 
1430         if (ce->clone)
1431             free(ce->clone);
1432         ce->clone = NULL;
1433 
1434         if (ce->data)
1435             free(ce->data);
1436         ce->data = NULL;
1437 
1438         if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1439             beos_panic("*** remove_cached_device: bad hash table entry %ld "
1440                    "0x%lx != 0x%lx\n", ce->block_num, (ulong)junk, (ulong)ce);
1441         }
1442 
1443         free(ce);
1444 
1445         bc.cur_blocks--;
1446     }
1447 
1448 }
1449 
1450 int
1451 beos_remove_cached_device_blocks(int dev, int allow_writes)
1452 {
1453     LOCK(bc.lock);
1454 
1455     real_remove_cached_blocks(dev, allow_writes, &bc.normal);
1456     real_remove_cached_blocks(dev, allow_writes, &bc.locked);
1457 
1458     max_device_blocks[dev] = 0;
1459 
1460     UNLOCK(bc.lock);
1461 
1462     return 0;
1463 }
1464 
1465 
1466 
1467 int
1468 beos_flush_blocks(int dev, fs_off_t bnum, int nblocks)
1469 {
1470     int        cur, i;
1471     cache_ent *ce;
1472     cache_ent *ents[NUM_FLUSH_BLOCKS];
1473 
1474     if (nblocks == 0)   /* might as well check for this */
1475         return 0;
1476 
1477     LOCK(bc.lock);
1478 
1479     cur = 0;
1480     for(; nblocks > 0; nblocks--, bnum++) {
1481         ce = block_lookup(dev, bnum);
1482         if (ce == NULL)
1483             continue;
1484 
1485         if (bnum != ce->block_num || dev != ce->dev) {
1486             UNLOCK(bc.lock);
1487             beos_panic("error2: looked up dev %d block %ld but found %d %ld\n",
1488                   dev, bnum, ce->dev, ce->block_num);
1489             return EBADF;
1490         }
1491 
1492         if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
1493             continue;
1494 
1495         ce->flags |= CE_BUSY;
1496         ents[cur++] = ce;
1497 
1498         if (cur >= NUM_FLUSH_BLOCKS) {
1499             UNLOCK(bc.lock);
1500 
1501             qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1502             flush_ents(ents, cur);
1503 
1504             LOCK(bc.lock);
1505             for(i=0; i < cur; i++) {
1506                 ents[i]->flags &= ~CE_BUSY;
1507             }
1508             cur = 0;
1509         }
1510     }
1511 
1512     UNLOCK(bc.lock);
1513 
1514     if (cur == 0)     /* nothing more to do */
1515         return 0;
1516 
1517     /* flush out the last few buggers */
1518     qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1519     flush_ents(ents, cur);
1520 
1521     for(i=0; i < cur; i++) {
1522         ents[i]->flags &= ~CE_BUSY;
1523     }
1524 
1525     return 0;
1526 }
1527 
1528 
1529 int
1530 beos_mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks)
1531 {
1532     int        ret = 0;
1533     cache_ent *ce;
1534 
1535     LOCK(bc.lock);
1536 
1537     while(nblocks > 0) {
1538         ce = block_lookup(dev, bnum);
1539         if (ce) {
1540             ce->flags |= CE_DIRTY;
1541             bnum      += 1;
1542             nblocks   -= 1;
1543         } else {     /* hmmm, that's odd, didn't find it */
1544             printf("** mark_blocks_diry couldn't find block %Ld (len %d)\n",
1545                    bnum, nblocks);
1546             ret = ENOENT;
1547             break;
1548         }
1549     }
1550 
1551     UNLOCK(bc.lock);
1552 
1553     return ret;
1554 }
1555 
1556 
1557 
1558 int
1559 beos_release_block(int dev, fs_off_t bnum)
1560 {
1561     cache_ent *ce;
1562 
1563     /* printf("rlsb: %ld\n", bnum); */
1564     LOCK(bc.lock);
1565 
1566     ce = block_lookup(dev, bnum);
1567     if (ce) {
1568         if (bnum != ce->block_num || dev != ce->dev) {
1569             beos_panic("*** error3: looked up dev %d block %ld but found %d %ld\n",
1570                     dev, bnum, ce->dev, ce->block_num);
1571             UNLOCK(bc.lock);
1572             return EBADF;
1573         }
1574 
1575         ce->lock--;
1576 
1577         if (ce->lock < 0) {
1578             printf("rlsb: whoa nellie! ce %Ld has lock == %d\n",
1579                    ce->block_num, ce->lock);
1580         }
1581 
1582         if (ce->lock == 0) {
1583             delete_from_list(&bc.locked, ce);
1584             add_to_head(&bc.normal, ce);
1585         }
1586 
1587     } else {     /* hmmm, that's odd, didn't find it */
1588         beos_panic("** release_block asked to find %ld but it's not here\n",
1589                bnum);
1590     }
1591 
1592     UNLOCK(bc.lock);
1593 
1594     return 0;
1595 }
1596 
1597 
1598 static cache_ent *
1599 new_cache_ent(int bsize)
1600 {
1601     cache_ent *ce;
1602 
1603     ce = (cache_ent *)calloc(1, sizeof(cache_ent));
1604     if (ce == NULL) {
1605         beos_panic("*** error: cache can't allocate memory!\n");
1606         return NULL;
1607     }
1608 
1609     ce->data = malloc(bsize);
1610     if (ce->data == NULL) {
1611         free(ce);
1612         beos_panic("** error cache can't allocate data memory\n");
1613         UNLOCK(bc.lock);
1614         return NULL;
1615     }
1616 
1617     ce->dev       = -1;
1618     ce->block_num = -1;
1619 
1620     return ce;
1621 }
1622 
1623 
1624 static void
1625 get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize)
1626 {
1627     int        cur, retry_counter = 0, max_retry = num_needed * 256;
1628     cache_ent *ce;
1629 
1630     if (num_needed > max)
1631         beos_panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max);
1632 
1633     /* if the cache isn't full yet, just allocate the blocks */
1634     for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) {
1635         ents[cur] = new_cache_ent(bsize);
1636         if (ents[cur] == NULL)
1637             break;
1638         bc.cur_blocks++;
1639     }
1640 
1641     /* pluck off blocks from the LRU end of the normal list, keep trying too */
1642     while(cur < num_needed && retry_counter < max_retry) {
1643         for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) {
1644             if (ce->lock)
1645                 beos_panic("get_ents: normal list has locked blocks (ce 0x%x)\n",ce);
1646 
1647             if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1648                 continue;
1649 
1650             ce->flags   |= CE_BUSY;
1651             ents[cur++]  = ce;
1652         }
1653 
1654         if (cur < num_needed) {
1655             UNLOCK(bc.lock);
1656             snooze(10000);
1657             LOCK(bc.lock);
1658             retry_counter++;
1659         }
1660     }
1661 
1662     if (cur < num_needed && retry_counter >= max_retry) {  /* oh shit! */
1663         dump_cache_list();
1664         UNLOCK(bc.lock);
1665         beos_panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n",
1666               cur, num_needed);
1667     }
1668 
1669     /*
1670       If the last block is a dirty one, try to get more of 'em so
1671       that we can flush a bunch of blocks at once.
1672     */
1673     if (cur && cur < max &&
1674         ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) {
1675 
1676         for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) {
1677             if (ce->flags & CE_BUSY)   /* don't touch busy blocks */
1678                 continue;
1679 
1680             if (ce->lock)
1681                 beos_panic("get_ents:2 dirty list has locked blocks (ce 0x%x)\n",ce);
1682 
1683             ce->flags   |= CE_BUSY;
1684             ents[cur++]  = ce;
1685         }
1686     }
1687 
1688     *num_gotten = cur;
1689 }
1690 
1691 
1692 static int
1693 read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize)
1694 {
1695     int    i, ret;
1696     struct iovec *iov;
1697 
1698     iov = get_iovec_array();
1699 
1700     for (i = 0; i < num; i++) {
1701         iov[i].iov_base = ents[i]->data;
1702         iov[i].iov_len  = bsize;
1703     }
1704 
1705 	if (chatty_io > 2)
1706 		printf("readv @ %Ld for %d blocks (at %Ld, block_size = %d)\n", bnum, num, bnum*bsize, bsize);
1707     ret = readv_pos(dev, bnum*bsize, iov, num);
1708 
1709     release_iovec_array(iov);
1710 
1711     if (ret != num*bsize) {
1712         printf("read_into_ents: asked to read %d bytes but got %d\n",
1713                num*bsize, ret);
1714         printf("*** iov @ %p (num %d)\n", iov, num);
1715         return EINVAL;
1716     } else
1717         return 0;
1718 }
1719 
1720 
1721 
1722 #define CACHE_READ          0x0001
1723 #define CACHE_WRITE         0x0002
1724 #define CACHE_NOOP          0x0004     /* for getting empty blocks */
1725 #define CACHE_LOCKED        0x0008
1726 #define CACHE_READ_AHEAD_OK 0x0010     /* it's ok to do read-ahead */
1727 
1728 
1729 static char *
1730 op_to_str(int op)
1731 {
1732     static char buff[128];
1733 
1734     if (op & CACHE_READ)
1735         strcpy(buff, "READ");
1736     else if (op & CACHE_WRITE)
1737         strcpy(buff, "WRITE");
1738     else if (op & CACHE_NOOP)
1739         strcpy(buff, "NOP");
1740 
1741     if (op & CACHE_LOCKED)
1742         strcat(buff, " LOCKED");
1743 
1744     if (op & CACHE_READ_AHEAD_OK)
1745         strcat(buff, " (AHEAD)");
1746 
1747     return buff;
1748 }
1749 
1750 static int
1751 cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize,
1752                int op, void **dataptr)
1753 {
1754     size_t          err = 0;
1755     cache_ent      *ce;
1756     cache_ent_list *cel;
1757 
1758     if (chatty_io > 1)
1759         printf("cbio: bnum = %Ld, num_blocks = %Ld, bsize = %d, op = %s\n", bnum, num_blocks,
1760                bsize, op_to_str(op));
1761 
1762     /* some sanity checks first */
1763     if (bsize == 0)
1764         beos_panic("cache_io: block size == 0 for bnum %Ld?!?\n", bnum);
1765 
1766     if (num_blocks == 0)
1767         beos_panic("cache_io: bnum %Ld has num_blocks == 0!\n", bnum);
1768 
1769     if (data == NULL && dataptr == NULL) {
1770         printf("major butthead move: null data and dataptr! bnum %Ld:%Ld\n",
1771                 bnum, num_blocks);
1772         return ENOMEM;
1773     }
1774 
1775     if (data == NULL) {
1776         if (num_blocks != 1)    /* get_block() should never do that */
1777             beos_panic("cache_io: num_blocks %Ld but should be 1\n",
1778                   num_blocks);
1779 
1780         if (op & CACHE_WRITE)
1781             beos_panic("cache_io: get_block() asked to write?!?\n");
1782     }
1783 
1784     if (bnum + num_blocks > max_device_blocks[dev]) {
1785         printf("dev %d: access to blocks %Ld:%Ld but max_dev_blocks is %Ld\n",
1786                dev, bnum, num_blocks, max_device_blocks[dev]);
1787 
1788 		// let the app crash here
1789 		*(int *)0x3100 = 0xc0debabe;
1790         return EINVAL;
1791     }
1792 
1793     last_cache_access = system_time();
1794 
1795     /* if the i/o is greater than 64k, do it directly */
1796     if (num_blocks * bsize >= 64 * 1024) {
1797         char  *ptr;
1798         fs_off_t  tmp;
1799 
1800         if (data == NULL || (op & CACHE_LOCKED)) {
1801             beos_panic("*** asked to do a large locked io that's too hard!\n");
1802         }
1803 
1804 
1805         if (op & CACHE_READ) {
1806             if (beos_read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1807                 printf("cache read:read_phys_blocks failed (%s on blocks %Ld:%Ld)!\n",
1808                         strerror(errno), bnum, num_blocks);
1809                 return EINVAL;
1810             }
1811 
1812             LOCK(bc.lock);
1813 
1814             /* if any of the blocks are in the cache, grab them instead */
1815             ptr = data;
1816             for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1817                 ce = block_lookup(dev, tmp);
1818                 /*
1819                     if we find a block in the cache we have to copy its
1820                     data just in case it is more recent than what we just
1821                     read from disk (which could happen if someone wrote
1822                     these blocks after we did the read but before we locked
1823                     the cache and entered this loop).
1824                 */
1825                 if (ce) {
1826                     if (tmp != ce->block_num || dev != ce->dev) {
1827                         UNLOCK(bc.lock);
1828                         beos_panic("*** error4: looked up dev %d block %Ld but "
1829                                 "found %d %Ld\n", dev, tmp, ce->dev,
1830                                 ce->block_num);
1831                     }
1832 
1833                     memcpy(ptr, ce->data, bsize);
1834                 }
1835             }
1836 
1837             UNLOCK(bc.lock);
1838         } else if (op & CACHE_WRITE) {
1839             LOCK(bc.lock);
1840 
1841             /* if any of the blocks are in the cache, update them too */
1842             ptr = data;
1843             for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1844                 ce = block_lookup(dev, tmp);
1845                 if (ce) {
1846                     if (tmp != ce->block_num || dev != ce->dev) {
1847                         UNLOCK(bc.lock);
1848                         beos_panic("*** error5: looked up dev %d block %Ld but "
1849                                 "found %d %Ld\n", dev, tmp, ce->dev,
1850                                 ce->block_num);
1851                         return EBADF;
1852                     }
1853 
1854                     /* XXXdbg -- this isn't strictly necessary */
1855                     if (ce->clone) {
1856                         printf("over-writing cloned data (ce %p bnum %Ld)...\n", ce, tmp);
1857                         flush_cache_ent(ce);
1858                     }
1859 
1860                     /* copy the data into the cache */
1861                     memcpy(ce->data, ptr, bsize);
1862                 }
1863             }
1864 
1865             UNLOCK(bc.lock);
1866 
1867             if (beos_write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1868                 printf("cache write: write_phys_blocks failed (%s on blocks "
1869                        "%Ld:%Ld)!\n", strerror(errno), bnum, num_blocks);
1870                 return EINVAL;
1871             }
1872         } else {
1873             printf("bad cache op %d (bnum %Ld nblocks %Ld)\n", op, bnum, num_blocks);
1874             return EINVAL;
1875         }
1876 
1877         return 0;
1878     }
1879 
1880 
1881     LOCK(bc.lock);
1882     while(num_blocks) {
1883 
1884         ce = block_lookup(dev, bnum);
1885         if (ce) {
1886             if (bnum != ce->block_num || dev != ce->dev) {
1887                 UNLOCK(bc.lock);
1888                 beos_panic("*** error6: looked up dev %d block %ld but found "
1889                         "%d %ld\n", dev, bnum, ce->dev, ce->block_num);
1890                 return EBADF;
1891             }
1892 
1893             if (bsize != ce->bsize) {
1894                 beos_panic("*** requested bsize %d but ce->bsize %d ce @ 0x%x\n",
1895                         bsize, ce->bsize, ce);
1896             }
1897 
1898             /* delete this ent from the list it is in because it may change */
1899             if (ce->lock)
1900                 cel = &bc.locked;
1901             else
1902                 cel = &bc.normal;
1903 
1904             delete_from_list(cel, ce);
1905 
1906             if (op & CACHE_READ) {
1907                 if (data && data != ce->data) {
1908                     memcpy(data, ce->data, bsize);
1909                 } else if (dataptr) {
1910                     *dataptr = ce->data;
1911                 } else {
1912                     printf("cbio:data %p dptr %p ce @ %p ce->data %p\n",
1913                            data, dataptr, ce, ce->data);
1914                 }
1915             } else if (op & CACHE_WRITE) {
1916                 if (data && data != ce->data)
1917                     memcpy(ce->data, data, bsize);
1918 
1919                 ce->flags |= CE_DIRTY;
1920             } else if (op & CACHE_NOOP) {
1921                 memset(ce->data, 0, bsize);
1922                 if (data)
1923                     memset(data, 0, bsize);
1924 
1925                 if (dataptr)
1926                     *dataptr = ce->data;
1927 
1928                 ce->flags |= CE_DIRTY;
1929             } else {
1930                 beos_panic("cached_block_io: bogus op %d\n", op);
1931             }
1932 
1933             if (op & CACHE_LOCKED)
1934                 ce->lock++;
1935 
1936             if (ce->lock)
1937                 cel = &bc.locked;
1938             else
1939                 cel = &bc.normal;
1940 
1941             /* now put this ent at the head of the appropriate list */
1942             add_to_head(cel, ce);
1943 
1944             if (data != NULL)
1945                 data = (void *)((char *)data + bsize);
1946 
1947             bnum       += 1;
1948             num_blocks -= 1;
1949 
1950             continue;
1951         } else {                                  /* it's not in the cache */
1952             int        cur, cur_nblocks, num_dirty, real_nblocks, num_needed;
1953             cache_ent *ents[NUM_FLUSH_BLOCKS];
1954 
1955             /*
1956                here we find out how many additional blocks in this request
1957                are not in the cache.  the idea is that then we can do one
1958                big i/o on that many blocks at once.
1959             */
1960             for(cur_nblocks=1;
1961                 cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS;
1962                 cur_nblocks++) {
1963 
1964                 /* we can call hash_lookup() directly instead of
1965                    block_lookup() because we don't care about the
1966                    state of the busy bit of the block at this point
1967                 */
1968                 if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks))
1969                     break;
1970             }
1971 
1972             /*
1973               here we try to figure out how many extra blocks we should read
1974               for read-ahead.  we want to read as many as possible that are
1975               not already in the cache and that don't cause us to try and
1976               read beyond the end of the disk.
1977             */
1978             if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) &&
1979                 (cur_nblocks * bsize) < read_ahead_size) {
1980 
1981                 for(num_needed=cur_nblocks;
1982                     num_needed < (read_ahead_size / bsize);
1983                     num_needed++) {
1984 
1985                     if ((bnum + num_needed) >= max_device_blocks[dev])
1986                         break;
1987 
1988                     if (hash_lookup(&bc.ht, dev, bnum + num_needed))
1989                         break;
1990                 }
1991             } else {
1992                 num_needed = cur_nblocks;
1993             }
1994 
1995             /* this will get us pointers to a bunch of cache_ents we can use */
1996             get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize);
1997 
1998             if (real_nblocks < num_needed) {
1999                 beos_panic("don't have enough cache ents (need %d got %d %ld::%d)\n",
2000                       num_needed, real_nblocks, bnum, num_blocks);
2001             }
2002 
2003             /*
2004               There are now three variables used as limits within the ents
2005               array.  This is how they are related:
2006 
2007                  cur_nblocks <= num_needed <= real_nblocks
2008 
2009               Ents from 0 to cur_nblocks-1 are going to be used to fulfill
2010               this IO request.  Ents from cur_nblocks to num_needed-1 are
2011               for read-ahead.  Ents from num_needed to real_nblocks are
2012               extra blocks that get_ents() asked us to flush.  Often (and
2013               always on writes) cur_nblocks == num_needed.
2014 
2015               Below, we sort the list of ents so that when we flush them
2016               they go out in order.
2017             */
2018 
2019             qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp);
2020 
2021             /*
2022               delete each ent from its list because it will change.  also
2023               count up how many dirty blocks there are and insert into the
2024               hash table any new blocks so that no one else will try to
2025               read them in when we release the cache semaphore to do our I/O.
2026             */
2027             for(cur=0,num_dirty=0; cur < real_nblocks; cur++) {
2028                 ce = ents[cur];
2029                 ce->flags |= CE_BUSY;
2030 
2031                 /*
2032                    insert the new block into the hash table with its new block
2033                    number. note that the block is still in the hash table for
2034                    its old block number -- and it has to be until we are done
2035                    flushing it from the cache (to prevent someone else from
2036                    sneaking in in front of us and trying to read the same
2037                    block that we're flushing).
2038                 */
2039                 if (cur < num_needed) {
2040                     if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0)
2041                         beos_panic("could not insert cache ent for %d %ld (0x%lx)\n",
2042                               dev, bnum + cur, (ulong)ents[cur]);
2043                 }
2044 
2045                 if (ce->dev == -1)
2046                     continue;
2047 
2048                 if ((ce->flags & CE_DIRTY) || ce->clone)
2049                     num_dirty++;
2050 
2051                 if (ce->lock) {
2052                     beos_panic("cbio: can't use locked blocks here ce @ 0x%x\n",ce);
2053                 } else {
2054                     cel = &bc.normal;
2055 	                delete_from_list(cel, ce);
2056 				}
2057             }
2058             ce = NULL;
2059 
2060 
2061             /*
2062                we release the block cache semaphore here so that we can
2063                go do all the i/o we need to do (flushing dirty blocks
2064                that we're kicking out as well as reading any new data).
2065 
2066                because all the blocks we're touching are marked busy
2067                no one else should mess with them while we're doing this.
2068             */
2069             if (num_dirty || (op & CACHE_READ)) {
2070                 UNLOCK(bc.lock);
2071 
2072                 /* this flushes any blocks we're kicking out that are dirty */
2073                 if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) {
2074                     printf("flush ents failed (ents @ 0x%lx, nblocks %d!\n",
2075                            (ulong)ents, cur_nblocks);
2076                     goto handle_err;
2077                 }
2078 
2079             }
2080 
2081             /*
2082                now that everything is flushed to disk, go through and
2083                make sure that the data blocks we're going to use are
2084                the right block size for this current request (it's
2085                possible we're kicking out some smaller blocks and need
2086                to reallocate the data block pointer). We do this in two
2087                steps, first free'ing everything and then going through
2088                and doing the malloc's to try and be nice to the memory
2089                system (i.e. allow it to coalesce stuff, etc).
2090             */
2091             err = 0;
2092             for(cur=0; cur < num_needed; cur++) {
2093                 if (ents[cur]->bsize != bsize) {
2094                     free(ents[cur]->data);
2095                     ents[cur]->data = NULL;
2096 
2097                     if (ents[cur]->clone) {
2098                         free(ents[cur]->clone);
2099                         ents[cur]->clone = NULL;
2100                     }
2101                 }
2102             }
2103 
2104             for(cur=0; cur < num_needed; cur++) {
2105                 if (ents[cur]->data == NULL) {
2106                     ents[cur]->data  = (void *)malloc(bsize);
2107                     ents[cur]->bsize = bsize;
2108                 }
2109 
2110                 if (ents[cur]->data == NULL) {
2111                     printf("cache: no memory for block (bsize %d)!\n",
2112                            bsize);
2113                     err = ENOMEM;
2114                     break;
2115                 }
2116             }
2117 
2118             /*
2119                if this condition is true it's a pretty serious error.
2120                we'll try and back out gracefully but we're in pretty
2121                deep at this point and it ain't going to be easy.
2122             */
2123   handle_err:
2124             if (err) {
2125                 for(cur=0; cur < num_needed; cur++) {
2126                     cache_ent *tmp_ce;
2127 
2128                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2129                     if (tmp_ce != ents[cur]) {
2130                         beos_panic("hash_del0: %d %ld got 0x%lx, not 0x%lx\n",
2131                                 dev, bnum+cur, (ulong)tmp_ce,
2132                                 (ulong)ents[cur]);
2133                     }
2134 
2135                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev,
2136                                                         ents[cur]->block_num);
2137                     if (tmp_ce != ents[cur]) {
2138                         beos_panic("hash_del1: %d %ld got 0x%lx, not 0x%lx\n",
2139                                 ents[cur]->dev, ents[cur]->block_num, (ulong)tmp_ce,
2140                                 (ulong)ents[cur]);
2141                     }
2142 
2143                     ents[cur]->flags &= ~CE_BUSY;
2144                     if (ents[cur]->data)
2145                         free(ents[cur]->data);
2146                     free(ents[cur]);
2147                     ents[cur] = NULL;
2148 
2149                     bc.cur_blocks--;
2150                 }
2151 
2152                 if (cur < real_nblocks) {
2153                     LOCK(bc.lock);
2154                     for(; cur < real_nblocks; cur++) {
2155                         ents[cur]->flags &= ~CE_BUSY;
2156 
2157                         /* we have to put them back here */
2158                         add_to_tail(&bc.normal, ents[cur]);
2159                     }
2160                     UNLOCK(bc.lock);
2161                 }
2162 
2163                 return ENOMEM;
2164             }
2165 
2166 
2167             /*
2168                If we go into this if statement, the block cache lock
2169                has *already been released* up above when we flushed the
2170                dirty entries.  As always, since the blocks we're mucking
2171                with are marked busy, they shouldn't get messed with.
2172             */
2173             err = 0;
2174             if (num_dirty || (op & CACHE_READ)) {
2175                 /* this section performs the i/o that we need to do */
2176                 if (op & CACHE_READ) {
2177                     err = read_into_ents(dev, bnum, ents, num_needed, bsize);
2178                 } else {
2179                     err = 0;
2180                 }
2181 
2182                 if (err != 0) {
2183                     printf("err %s on dev %d block %Ld:%d (%d) "
2184                            "data %p, ents[0] %p\n",
2185                            strerror(errno), dev, bnum, cur_nblocks,
2186                            bsize, data, ents[0]);
2187                 }
2188 
2189                 /*
2190                    acquire the semaphore here so that we can go on mucking
2191                    with the cache data structures.  We need to delete old
2192                    block numbers from the hash table and set the new block
2193                    number's for the blocks we just read in.  We also put the
2194                    read-ahead blocks at the head of mru list.
2195                 */
2196 
2197                 LOCK(bc.lock);
2198             }
2199 
2200             for(cur=0; cur < num_needed; cur++) {
2201                 cache_ent *tmp_ce;
2202 
2203                 ce = ents[cur];
2204                 if (ce->dev != -1) {
2205                     tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num);
2206                     if (tmp_ce == NULL || tmp_ce != ce) {
2207                         beos_panic("*** hash_delete failure (ce 0x%x tce 0x%x)\n",
2208                               ce, tmp_ce);
2209                     }
2210                 }
2211 
2212                 if (err == 0 && cur >= cur_nblocks) {
2213                     ce->dev       = dev;
2214                     ce->block_num = bnum + cur;
2215                     ce->flags    &= ~CE_BUSY;
2216                     add_to_head(&bc.normal, ce);
2217                 }
2218             }
2219             ce = NULL;
2220 
2221             /*
2222               clear the busy bit on the blocks we force-flushed and
2223               put them on the normal list since they're now clean.
2224             */
2225             for(; cur < real_nblocks; cur++) {
2226                 ents[cur]->flags &= ~CE_BUSY;
2227 
2228                 if (ents[cur]->lock)
2229                     beos_panic("should not have locked blocks here (ce 0x%x)\n",
2230                           ents[cur]);
2231 
2232                 add_to_tail(&bc.normal, ents[cur]);
2233             }
2234 
2235             if (err) {   /* then we have some cleanup to do */
2236                 for(cur=0; cur < num_needed; cur++) {
2237                     cache_ent *tmp_ce;
2238 
2239                     /* we delete all blocks from the cache so we don't
2240                        leave partially written blocks in the cache */
2241 
2242                     tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2243                     if (tmp_ce != ents[cur]) {
2244                         beos_panic("hash_del: %d %ld got 0x%lx, not 0x%lx\n",
2245                                 dev, bnum+cur, (ulong)tmp_ce,
2246                                 (ulong)ents[cur]);
2247                     }
2248 
2249                     ce = ents[cur];
2250                     ce->flags &= ~CE_BUSY;
2251 
2252                     free(ce->data);
2253                     ce->data = NULL;
2254 
2255                     free(ce);
2256                     ents[cur] = NULL;
2257 
2258                     bc.cur_blocks--;
2259                 }
2260                 ce = NULL;
2261 
2262                 UNLOCK(bc.lock);
2263                 return err;
2264             }
2265 
2266 
2267             /*
2268                last step: go through and make sure all the cache_ent
2269                structures have the right data in them, delete old guys, etc.
2270             */
2271             for(cur=0; cur < cur_nblocks; cur++) {
2272                 ce = ents[cur];
2273 
2274                 if (ce->dev != -1) {   /* then clean this guy up */
2275                     if (ce->next || ce->prev)
2276                         beos_panic("ce @ 0x%x should not be in a list yet!\n", ce);
2277 
2278                     if (ce->clone)
2279                         free(ce->clone);
2280 
2281                     if (ce->data == NULL)
2282                         beos_panic("ce @ 0x%lx has a null data ptr\n", (ulong)ce);
2283                 }
2284 
2285                 ce->dev        = dev;
2286                 ce->block_num  = bnum + cur;
2287                 ce->bsize      = bsize;
2288                 ce->flags      = CE_NORMAL;
2289                 ce->lock       = 0;
2290                 ce->clone      = NULL;
2291                 ce->func = ce->arg  = NULL;
2292                 ce->next = ce->prev = NULL;
2293 
2294                 if (op & CACHE_READ) {
2295                     if (data)
2296                         memcpy(data, ce->data, bsize);
2297                 } else if (op & CACHE_WRITE) {
2298                     ce->flags  |= CE_DIRTY;
2299                     memcpy(ce->data, data, bsize);
2300                 } else if (op & CACHE_NOOP) {
2301                     memset(ce->data, 0, bsize);
2302                     if (data)
2303                         memset(data, 0, bsize);
2304 
2305                     ce->flags |= CE_DIRTY;
2306                 }
2307 
2308                 if (op & CACHE_LOCKED) {
2309                     ce->lock++;
2310                     cel = &bc.locked;
2311                 } else {
2312                     cel = &bc.normal;
2313                 }
2314 
2315                 /* now stick this puppy at the head of the mru list */
2316                 add_to_head(cel, ce);
2317 
2318 
2319                 if (dataptr) {
2320                     *dataptr = ce->data;
2321                 }
2322 
2323                 if (data != NULL)
2324                     data = (void *)((char *)data + bsize);
2325                 else if (cur_nblocks != 1)
2326                     beos_panic("cache can't handle setting data_ptr twice!\n");
2327             }  /* end of for(cur=0; cur < cur_nblocks; cur++) */
2328 
2329             bnum       += cur_nblocks;
2330             num_blocks -= cur_nblocks;
2331 
2332         }   /* end of else it's not in the cache */
2333 
2334     }   /* end of while(num_blocks) */
2335 
2336     UNLOCK(bc.lock);
2337 
2338     return 0;
2339 }
2340 
2341 
2342 void *
2343 beos_get_block(int dev, fs_off_t bnum, int bsize)
2344 {
2345     void *data;
2346 
2347     if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK,
2348                        &data) != 0)
2349         return NULL;
2350 
2351     return data;
2352 }
2353 
2354 void *
2355 beos_get_empty_block(int dev, fs_off_t bnum, int bsize)
2356 {
2357     void *data;
2358 
2359     if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED,
2360                        &data) != 0)
2361         return NULL;
2362 
2363     return data;
2364 }
2365 
2366 int
2367 beos_cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize)
2368 {
2369     return cache_block_io(dev, bnum, data, num_blocks, bsize,
2370                           CACHE_READ | CACHE_READ_AHEAD_OK, NULL);
2371 }
2372 
2373 
2374 int
2375 beos_cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize)
2376 {
2377     return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2378                           CACHE_WRITE, NULL);
2379 }
2380 
2381 int
2382 beos_cached_write_locked(int dev, fs_off_t bnum, const void *data,
2383                     fs_off_t num_blocks, int bsize)
2384 {
2385     return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2386                           CACHE_WRITE | CACHE_LOCKED, NULL);
2387 }
2388 
2389 
2390 void
2391 beos_force_cache_flush(int dev, int prefer_log_blocks)
2392 {
2393     int        i, count = 0;
2394     cache_ent *ce;
2395     cache_ent *ents[NUM_FLUSH_BLOCKS];
2396 
2397 
2398     LOCK(bc.lock);
2399 
2400     for(ce=bc.normal.lru; ce; ce=ce->next) {
2401         if ((ce->dev == dev) &&
2402             (ce->flags & CE_BUSY) == 0 &&
2403             ((ce->flags & CE_DIRTY) || ce->clone) &&
2404             ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) {
2405 
2406             ce->flags |= CE_BUSY;
2407             ents[count++] = ce;
2408 
2409             if (count >= NUM_FLUSH_BLOCKS) {
2410                 break;
2411             }
2412         }
2413     }
2414 
2415     /* if we've got some room left, try and grab any cloned blocks */
2416     if (count < NUM_FLUSH_BLOCKS) {
2417         for(ce=bc.locked.lru; ce; ce=ce->next) {
2418             if ((ce->dev == dev) &&
2419                 (ce->flags & CE_BUSY) == 0 &&
2420                 (ce->clone)) {
2421 
2422                 ce->flags |= CE_BUSY;
2423                 ents[count++] = ce;
2424 
2425                 if (count >= NUM_FLUSH_BLOCKS) {
2426                     break;
2427                 }
2428             }
2429         }
2430     }
2431 
2432     UNLOCK(bc.lock);
2433 
2434     if (count != 0) {
2435         qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp);
2436         flush_ents(ents, count);
2437 
2438         for(i=0; i < count; i++)
2439             ents[i]->flags &= ~CE_BUSY;
2440     }
2441 }
2442