1 /*
2 This file contains the global device cache for the BeOS. All
3 file system I/O comes through here. The cache can handle blocks
4 of different sizes for multiple different underlying physical
5 devices.
6
7 The cache is organized as a hash table (for lookups by device
8 and block number) and two doubly-linked lists. The normal
9 list is for "normal" blocks which are either clean or dirty.
10 The locked list is for blocks that are locked in the cache by
11 BFS. The lists are LRU ordered.
12
13 Most of the work happens in the function cache_block_io() which
14 is quite lengthy. The other functions of interest are get_ents()
15 which picks victims to be kicked out of the cache; flush_ents()
16 which does the work of flushing them; and set_blocks_info() which
17 handles cloning blocks and setting callbacks so that the BFS
18 journal will work properly. If you want to modify this code it
19 will take some study but it's not too bad. Do not think about
20 separating the list of clean and dirty blocks into two lists as
21 I did that already and it's slower.
22
23 Originally this cache code was written while listening to the album
24 "Ride the Lightning" by Metallica. The current version was written
25 while listening to "Welcome to SkyValley" by Kyuss as well as an
26 ambient collection on the German label FAX. Music helps a lot when
27 writing code.
28
29 THIS CODE COPYRIGHT DOMINIC GIAMPAOLO. NO WARRANTY IS EXPRESSED
30 OR IMPLIED. YOU MAY USE THIS CODE AND FREELY DISTRIBUTE IT FOR
31 NON-COMMERCIAL USE AS LONG AS THIS NOTICE REMAINS ATTACHED.
32
33 FOR COMMERCIAL USE, CONTACT DOMINIC GIAMPAOLO (dbg@be.com).
34
35 Dominic Giampaolo
36 dbg@be.com
37 */
38
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <memory.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <sys/types.h>
46 #include <sys/uio.h>
47 #include <unistd.h>
48
49
50 #include <OS.h>
51 #include <KernelExport.h>
52
53 #include "fs_cache.h"
54 #include "fs_cache_priv.h"
55 #include "lock.h"
56
57
58
59 #ifndef USER
60 #define printf dprintf
61 #endif
62 #ifdef USER
63 #define kprintf printf
64 #endif
65
66
67 typedef off_t fs_off_t;
68
69 /* forward prototypes */
70 static int flush_ents(cache_ent **ents, int n_ents);
71
72 //static int do_dump(int argc, char **argv);
73 //static int do_find_block(int argc, char **argv);
74 //static int do_find_data(int argc, char **argv);
75 //static void cache_flusher(void *arg, int phase);
76
77
78 int chatty_io = 0;
79
80 #define CHUNK (512 * 1024) /* a hack to work around scsi driver bugs */
81
82 static void
beos_panic(const char * format,...)83 beos_panic(const char *format, ...)
84 {
85 va_list ap;
86
87 va_start(ap, format);
88 vfprintf(stderr, format, ap);
89 va_end(ap);
90
91 while (TRUE)
92 ;
93 }
94
95 size_t
beos_read_phys_blocks(int fd,fs_off_t bnum,void * data,uint num_blocks,int bsize)96 beos_read_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
97 {
98 size_t ret = 0;
99 size_t sum;
100
101 if (chatty_io)
102 printf("R: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
103
104 if (num_blocks * bsize < CHUNK)
105 ret = read_pos(fd, bnum * bsize, data, num_blocks * bsize);
106 else {
107 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
108 ret = read_pos(fd, (bnum * bsize) + sum, data, CHUNK);
109 if (ret != CHUNK)
110 break;
111
112 data = (void *)((char *)data + CHUNK);
113 }
114
115 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
116 ret = read_pos(fd, (bnum * bsize) + sum, data,
117 (num_blocks * bsize) - sum);
118
119 if (ret == (num_blocks * bsize) - sum)
120 ret = num_blocks * bsize;
121 } else if (ret == CHUNK) {
122 ret = num_blocks * bsize;
123 }
124 }
125
126 if (ret == num_blocks * bsize)
127 return 0;
128 else
129 return EBADF;
130 }
131
132 size_t
beos_write_phys_blocks(int fd,fs_off_t bnum,void * data,uint num_blocks,int bsize)133 beos_write_phys_blocks(int fd, fs_off_t bnum, void *data, uint num_blocks, int bsize)
134 {
135 size_t ret = 0;
136 size_t sum;
137
138 if (chatty_io)
139 printf("W: %8" B_PRIdOFF " : %3d\n", bnum, num_blocks);
140
141 if (num_blocks * bsize < CHUNK)
142 ret = write_pos(fd, bnum * bsize, data, num_blocks * bsize);
143 else {
144 for(sum=0; (sum + CHUNK) <= (num_blocks * bsize); sum += CHUNK) {
145 ret = write_pos(fd, (bnum * bsize) + sum, data, CHUNK);
146 if (ret != CHUNK)
147 break;
148
149 data = (void *)((char *)data + CHUNK);
150 }
151
152 if (ret == CHUNK && ((num_blocks * bsize) - sum) > 0) {
153 ret = write_pos(fd, (bnum * bsize) + sum, data,
154 (num_blocks * bsize) - sum);
155
156 if (ret == (num_blocks * bsize) - sum)
157 ret = num_blocks * bsize;
158 } else if (ret == CHUNK) {
159 ret = num_blocks * bsize;
160 }
161 }
162
163
164 if (ret == num_blocks * bsize)
165 return 0;
166 else
167 return EBADF;
168 }
169
170 // #pragma mark -
171
172 static int
init_hash_table(hash_table * ht)173 init_hash_table(hash_table *ht)
174 {
175 ht->max = HT_DEFAULT_MAX;
176 ht->mask = ht->max - 1;
177 ht->num_elements = 0;
178
179 ht->table = (hash_ent **)calloc(ht->max, sizeof(hash_ent *));
180 if (ht->table == NULL)
181 return ENOMEM;
182
183 return 0;
184 }
185
186
187 static void
shutdown_hash_table(hash_table * ht)188 shutdown_hash_table(hash_table *ht)
189 {
190 int i, hash_len;
191 hash_ent *he, *next;
192
193 for(i=0; i < ht->max; i++) {
194 he = ht->table[i];
195
196 for(hash_len=0; he; hash_len++, he=next) {
197 next = he->next;
198 free(he);
199 }
200 }
201
202 if (ht->table)
203 free(ht->table);
204 ht->table = NULL;
205 }
206
207 #if 0
208 static void
209 print_hash_stats(hash_table *ht)
210 {
211 int i, hash_len, max = -1, sum = 0;
212 hash_ent *he, *next;
213
214 for(i=0; i < ht->max; i++) {
215 he = ht->table[i];
216
217 for(hash_len=0; he; hash_len++, he=next) {
218 next = he->next;
219 }
220 if (hash_len)
221 printf("bucket %3d : %3d\n", i, hash_len);
222
223 sum += hash_len;
224 if (hash_len > max)
225 max = hash_len;
226 }
227
228 printf("max # of chains: %d, average chain length %d\n", max,sum/ht->max);
229 }
230 #endif
231
232 #define HASH(d, b) ((((fs_off_t)d) << (sizeof(fs_off_t)*8 - 6)) | (b))
233
234 static hash_ent *
new_hash_ent(int dev,fs_off_t bnum,void * data)235 new_hash_ent(int dev, fs_off_t bnum, void *data)
236 {
237 hash_ent *he;
238
239 he = (hash_ent *)malloc(sizeof(*he));
240 if (he == NULL)
241 return NULL;
242
243 he->hash_val = HASH(dev, bnum);
244 he->dev = dev;
245 he->bnum = bnum;
246 he->data = data;
247 he->next = NULL;
248
249 return he;
250 }
251
252
253 static int
grow_hash_table(hash_table * ht)254 grow_hash_table(hash_table *ht)
255 {
256 int i, omax, newsize, newmask;
257 fs_off_t hash;
258 hash_ent **new_table, *he, *next;
259
260 if (ht->max & ht->mask) {
261 printf("*** hashtable size %d or mask %d looks weird!\n", ht->max,
262 ht->mask);
263 }
264
265 omax = ht->max;
266 newsize = omax * 2; /* have to grow in powers of two */
267 newmask = newsize - 1;
268
269 new_table = (hash_ent **)calloc(newsize, sizeof(hash_ent *));
270 if (new_table == NULL)
271 return ENOMEM;
272
273 for(i=0; i < omax; i++) {
274 for(he=ht->table[i]; he; he=next) {
275 hash = he->hash_val & newmask;
276 next = he->next;
277
278 he->next = new_table[hash];
279 new_table[hash] = he;
280 }
281 }
282
283 free(ht->table);
284 ht->table = new_table;
285 ht->max = newsize;
286 ht->mask = newmask;
287
288 return 0;
289 }
290
291
292
293
294 static int
hash_insert(hash_table * ht,int dev,fs_off_t bnum,void * data)295 hash_insert(hash_table *ht, int dev, fs_off_t bnum, void *data)
296 {
297 fs_off_t hash;
298 hash_ent *he, *curr;
299
300 hash = HASH(dev, bnum) & ht->mask;
301
302 curr = ht->table[hash];
303 for(; curr != NULL; curr=curr->next)
304 if (curr->dev == dev && curr->bnum == bnum)
305 break;
306
307 if (curr && curr->dev == dev && curr->bnum == bnum) {
308 printf("entry %d:%" B_PRIdOFF " already in the hash table!\n",
309 dev, bnum);
310 return EEXIST;
311 }
312
313 he = new_hash_ent(dev, bnum, data);
314 if (he == NULL)
315 return ENOMEM;
316
317 he->next = ht->table[hash];
318 ht->table[hash] = he;
319
320 ht->num_elements++;
321 if (ht->num_elements >= ((ht->max * 3) / 4)) {
322 if (grow_hash_table(ht) != 0)
323 return ENOMEM;
324 }
325
326 return 0;
327 }
328
329 static void *
hash_lookup(hash_table * ht,int dev,fs_off_t bnum)330 hash_lookup(hash_table *ht, int dev, fs_off_t bnum)
331 {
332 hash_ent *he;
333
334 he = ht->table[HASH(dev, bnum) & ht->mask];
335
336 for(; he != NULL; he=he->next) {
337 if (he->dev == dev && he->bnum == bnum)
338 break;
339 }
340
341 if (he)
342 return he->data;
343 else
344 return NULL;
345 }
346
347
348 static void *
hash_delete(hash_table * ht,int dev,fs_off_t bnum)349 hash_delete(hash_table *ht, int dev, fs_off_t bnum)
350 {
351 void *data;
352 fs_off_t hash;
353 hash_ent *he, *prev = NULL;
354
355 hash = HASH(dev, bnum) & ht->mask;
356 he = ht->table[hash];
357
358 for(; he != NULL; prev=he,he=he->next) {
359 if (he->dev == dev && he->bnum == bnum)
360 break;
361 }
362
363 if (he == NULL) {
364 printf("*** hash_delete: tried to delete non-existent block %d:%"
365 B_PRIdOFF "\n", dev, bnum);
366 return NULL;
367 }
368
369 data = he->data;
370
371 if (ht->table[hash] == he)
372 ht->table[hash] = he->next;
373 else if (prev)
374 prev->next = he->next;
375 else
376 beos_panic("hash table is inconsistent\n");
377
378 free(he);
379 ht->num_elements--;
380
381 return data;
382 }
383
384 // #pragma mark -
385
386 /*
387 These are the global variables for the cache.
388 */
389 static block_cache bc;
390
391 #define MAX_IOVECS 64 /* # of iovecs for use by cache code */
392 static beos_lock iovec_lock;
393 static struct iovec *iovec_pool[MAX_IOVECS]; /* each ptr is to an array of iovecs */
394 static int iovec_used[MAX_IOVECS]; /* non-zero == iovec is in use */
395
396 #define NUM_FLUSH_BLOCKS 64 /* size of the iovec array pointed by each ptr */
397
398
399 #define DEFAULT_READ_AHEAD_SIZE (32 * 1024)
400 static int read_ahead_size = DEFAULT_READ_AHEAD_SIZE;
401
402 /* this array stores the size of each device so we can error check requests */
403 #define MAX_DEVICES 256
404 fs_off_t max_device_blocks[MAX_DEVICES];
405
406
407 /* has the time of the last cache access so cache flushing doesn't interfere */
408 static bigtime_t last_cache_access = 0;
409
410
411 int
beos_init_block_cache(int max_blocks,int flags)412 beos_init_block_cache(int max_blocks, int flags)
413 {
414 memset(&bc, 0, sizeof(bc));
415 memset(iovec_pool, 0, sizeof(iovec_pool));
416 memset(iovec_used, 0, sizeof(iovec_used));
417 memset(&max_device_blocks, 0, sizeof(max_device_blocks));
418
419 if (init_hash_table(&bc.ht) != 0)
420 return ENOMEM;
421
422 bc.lock.s = iovec_lock.s = -1;
423
424 bc.max_blocks = max_blocks;
425 bc.flags = flags;
426 if (beos_new_lock(&bc.lock, "bollockcache") != 0)
427 goto err;
428
429 if (beos_new_lock(&iovec_lock, "iovec_lock") != 0)
430 goto err;
431
432 /* allocate two of these up front so vm won't accidently re-enter itself */
433 iovec_pool[0] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
434 iovec_pool[1] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
435
436 #ifndef USER
437 #ifdef DEBUG
438 add_debugger_command("bcache", do_dump, "dump the block cache list");
439 add_debugger_command("fblock", do_find_block, "find a block in the cache");
440 add_debugger_command("fdata", do_find_data, "find a data block ptr in the cache");
441 #endif
442 register_kernel_daemon(cache_flusher, NULL, 3);
443 #endif
444
445 return 0;
446
447 err:
448 if (bc.lock.s >= 0)
449 beos_free_lock(&bc.lock);
450
451 if (iovec_lock.s >= 0)
452 beos_free_lock(&iovec_lock);
453
454 shutdown_hash_table(&bc.ht);
455 memset((void *)&bc, 0, sizeof(bc));
456 return ENOMEM;
457 }
458
459
460 static struct iovec *
get_iovec_array(void)461 get_iovec_array(void)
462 {
463 int i;
464 struct iovec *iov;
465
466 LOCK(iovec_lock);
467
468 for(i = 0; i < MAX_IOVECS; i++) {
469 if (iovec_used[i] == 0)
470 break;
471 }
472
473 if (i >= MAX_IOVECS) /* uh-oh */
474 beos_panic("cache: ran out of iovecs (pool %p, used %p)!\n",
475 &iovec_pool[0], &iovec_used[0]);
476
477 if (iovec_pool[i] == NULL) {
478 iovec_pool[i] = (struct iovec *)malloc(sizeof(struct iovec)*NUM_FLUSH_BLOCKS);
479 if (iovec_pool[i] == NULL)
480 beos_panic("can't allocate an iovec!\n");
481 }
482
483 iov = iovec_pool[i];
484 iovec_used[i] = 1;
485
486 UNLOCK(iovec_lock);
487
488 return iov;
489 }
490
491
492 static void
release_iovec_array(struct iovec * iov)493 release_iovec_array(struct iovec *iov)
494 {
495 int i;
496
497 LOCK(iovec_lock);
498
499 for(i=0; i < MAX_IOVECS; i++) {
500 if (iov == iovec_pool[i])
501 break;
502 }
503
504 if (i < MAX_IOVECS)
505 iovec_used[i] = 0;
506 else /* uh-oh */
507 printf("cache: released an iovec I don't own (iov %p)\n", iov);
508
509
510 UNLOCK(iovec_lock);
511 }
512
513
514
515
516 static void
real_dump_cache_list(cache_ent_list * cel)517 real_dump_cache_list(cache_ent_list *cel)
518 {
519 cache_ent *ce;
520
521 kprintf("starting from LRU end:\n");
522
523 for (ce = cel->lru; ce; ce = ce->next) {
524 kprintf("ce %p dev %2d bnum %6" B_PRIdOFF " lock %d flag %d arg %p "
525 "clone %p\n", ce, ce->dev, ce->block_num, ce->lock,
526 ce->flags, ce->arg, ce->clone);
527 }
528 kprintf("MRU end\n");
529 }
530
531 static void
dump_cache_list(void)532 dump_cache_list(void)
533 {
534 kprintf("NORMAL BLOCKS\n");
535 real_dump_cache_list(&bc.normal);
536
537 kprintf("LOCKED BLOCKS\n");
538 real_dump_cache_list(&bc.locked);
539
540 kprintf("cur blocks %d, max blocks %d ht @ %p\n", bc.cur_blocks,
541 bc.max_blocks, &bc.ht);
542 }
543
544 #if 0
545 static void
546 check_bcache(char *str)
547 {
548 int count = 0;
549 cache_ent *ce, *prev = NULL;
550
551 LOCK(bc.lock);
552
553 for(ce=bc.normal.lru; ce; prev=ce, ce=ce->next) {
554 count++;
555 }
556
557 for(ce=bc.locked.lru; ce; prev=ce, ce=ce->next) {
558 count++;
559 }
560
561 if (count != bc.cur_blocks) {
562 if (count < bc.cur_blocks - 16)
563 beos_panic("%s: count == %d, cur_blocks %d, prev 0x%x\n",
564 str, count, bc.cur_blocks, prev);
565 else
566 printf("%s: count == %d, cur_blocks %d, prev 0x%x\n",
567 str, count, bc.cur_blocks, prev);
568 }
569
570 UNLOCK(bc.lock);
571 }
572
573
574 static void
575 dump_lists(void)
576 {
577 cache_ent *nce;
578
579 printf("LOCKED 0x%x (tail 0x%x, head 0x%x)\n", &bc.locked,
580 bc.locked.lru, bc.locked.mru);
581 for(nce=bc.locked.lru; nce; nce=nce->next)
582 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
583 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
584 nce->func);
585
586 printf("NORMAL 0x%x (tail 0x%x, head 0x%x)\n", &bc.normal,
587 bc.normal.lru, bc.normal.mru);
588 for(nce=bc.normal.lru; nce; nce=nce->next)
589 printf("nce @ 0x%x dev %d bnum %ld flags %d lock %d clone 0x%x func 0x%x\n",
590 nce, nce->dev, nce->block_num, nce->flags, nce->lock, nce->clone,
591 nce->func);
592 }
593
594
595 static void
596 check_lists(void)
597 {
598 cache_ent *ce, *prev, *oce;
599 cache_ent_list *cel;
600
601 cel = &bc.normal;
602 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
603 for(oce=bc.locked.lru; oce; oce=oce->next) {
604 if (oce == ce) {
605 dump_lists();
606 beos_panic("1:ce @ 0x%x is in two lists(cel 0x%x &LOCKED)\n",ce,cel);
607 }
608 }
609 }
610 if (prev && prev != cel->mru) {
611 dump_lists();
612 beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
613 prev, cel);
614 }
615
616 cel = &bc.locked;
617 for(ce=cel->lru,prev=NULL; ce; prev=ce, ce=ce->next) {
618 for(oce=bc.normal.lru; oce; oce=oce->next) {
619 if (oce == ce) {
620 dump_lists();
621 beos_panic("3:ce @ 0x%x is in two lists(cel 0x%x & DIRTY)\n",ce,cel);
622 }
623 }
624 }
625 if (prev && prev != cel->mru) {
626 dump_lists();
627 beos_panic("*** last element in list != cel mru (ce 0x%x, cel 0x%x)\n",
628 prev, cel);
629 }
630 }
631 #endif
632
633
634 #ifdef DEBUG
635 #if 0
636 static int
637 do_dump(int argc, char **argv)
638 {
639 dump_cache_list();
640 return 1;
641 }
642
643
644 static int
645 do_find_block(int argc, char **argv)
646 {
647 int i;
648 fs_off_t bnum;
649 cache_ent *ce;
650
651 if (argc < 2) {
652 kprintf("%s: needs a block # argument\n", argv[0]);
653 return 1;
654 }
655
656 for(i=1; i < argc; i++) {
657 bnum = strtoul(argv[i], NULL, 0);
658
659 for(ce=bc.normal.lru; ce; ce=ce->next) {
660 if (ce->block_num == bnum) {
661 kprintf("found clean bnum %ld @ 0x%lx (data @ 0x%lx)\n",
662 bnum, ce, ce->data);
663 }
664 }
665
666 for(ce=bc.locked.lru; ce; ce=ce->next) {
667 if (ce->block_num == bnum) {
668 kprintf("found locked bnum %ld @ 0x%lx (data @ 0x%lx)\n",
669 bnum, ce, ce->data);
670 }
671 }
672 }
673
674 return 0;
675 }
676
677 static int
678 do_find_data(int argc, char **argv)
679 {
680 int i;
681 void *data;
682 cache_ent *ce;
683
684 if (argc < 2) {
685 kprintf("%s: needs a block # argument\n", argv[0]);
686 return 1;
687 }
688
689 for(i=1; i < argc; i++) {
690 data = (void *)strtoul(argv[i], NULL, 0);
691
692 for(ce=bc.normal.lru; ce; ce=ce->next) {
693 if (ce->data == data) {
694 kprintf("found normal data ptr for bnum %ld @ ce 0x%lx\n",
695 ce->block_num, ce);
696 }
697 }
698
699 for(ce=bc.locked.lru; ce; ce=ce->next) {
700 if (ce->data == data) {
701 kprintf("found locked data ptr for bnum %ld @ ce 0x%lx\n",
702 ce->block_num, ce);
703 }
704 }
705 }
706
707 return 0;
708 }
709 #endif
710 #endif /* DEBUG */
711
712
713
714 /*
715 this function detaches the cache_ent from the list.
716 */
717 static void
delete_from_list(cache_ent_list * cel,cache_ent * ce)718 delete_from_list(cache_ent_list *cel, cache_ent *ce)
719 {
720 if (ce->next)
721 ce->next->prev = ce->prev;
722 if (ce->prev)
723 ce->prev->next = ce->next;
724
725 if (cel->lru == ce)
726 cel->lru = ce->next;
727 if (cel->mru == ce)
728 cel->mru = ce->prev;
729
730 ce->next = NULL;
731 ce->prev = NULL;
732 }
733
734
735
736 /*
737 this function adds the cache_ent ce to the head of the
738 list (i.e. the MRU end). the cache_ent should *not*
739 be in any lists.
740 */
741 static void
add_to_head(cache_ent_list * cel,cache_ent * ce)742 add_to_head(cache_ent_list *cel, cache_ent *ce)
743 {
744 if (ce->next != NULL || ce->prev != NULL) {
745 beos_panic("*** ath: ce has non-null next/prev ptr (ce %p nxt %p, prv %p)\n",
746 ce, ce->next, ce->prev);
747 }
748
749 ce->next = NULL;
750 ce->prev = cel->mru;
751
752 if (cel->mru)
753 cel->mru->next = ce;
754 cel->mru = ce;
755
756 if (cel->lru == NULL)
757 cel->lru = ce;
758 }
759
760
761 /*
762 this function adds the cache_ent ce to the tail of the
763 list (i.e. the MRU end). the cache_ent should *not*
764 be in any lists.
765 */
766 static void
add_to_tail(cache_ent_list * cel,cache_ent * ce)767 add_to_tail(cache_ent_list *cel, cache_ent *ce)
768 {
769 if (ce->next != NULL || ce->prev != NULL) {
770 beos_panic("*** att: ce has non-null next/prev ptr (ce %p nxt %p, prv %p)\n",
771 ce, ce->next, ce->prev);
772 }
773
774 ce->next = cel->lru;
775 ce->prev = NULL;
776
777 if (cel->lru)
778 cel->lru->prev = ce;
779 cel->lru = ce;
780
781 if (cel->mru == NULL)
782 cel->mru = ce;
783 }
784
785
786 static int
cache_ent_cmp(const void * a,const void * b)787 cache_ent_cmp(const void *a, const void *b)
788 {
789 fs_off_t diff;
790 cache_ent *p1 = *(cache_ent **)a, *p2 = *(cache_ent **)b;
791
792 if (p1 == NULL || p2 == NULL)
793 beos_panic("cache_ent pointers are null?!? (a %p, b %p\n)\n", a, b);
794
795 if (p1->dev == p2->dev) {
796 diff = p1->block_num - p2->block_num;
797 return (int)diff;
798 } else {
799 return p1->dev - p2->dev;
800 }
801 }
802
803 #if 0
804 // ToDo: add this to the fsh (as a background thread)?
805 static void
806 cache_flusher(void *arg, int phase)
807 {
808 int i, num_ents, err;
809 bigtime_t now = system_time();
810 static cache_ent *ce = NULL;
811 static cache_ent *ents[NUM_FLUSH_BLOCKS];
812
813 /*
814 if someone else was in the cache recently then just bail out so
815 we don't lock them out unnecessarily
816 */
817 if ((now - last_cache_access) < 1000000)
818 return;
819
820 LOCK(bc.lock);
821
822 ce = bc.normal.lru;
823
824 for(num_ents=0; ce && num_ents < NUM_FLUSH_BLOCKS; ce=ce->next) {
825 if (ce->flags & CE_BUSY)
826 continue;
827
828 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
829 continue;
830
831 ents[num_ents] = ce;
832 ents[num_ents]->flags |= CE_BUSY;
833 num_ents++;
834 }
835
836 /* if we've got some room left over, look for cloned locked blocks */
837 if (num_ents < NUM_FLUSH_BLOCKS) {
838 ce = bc.locked.lru;
839
840 for(; num_ents < NUM_FLUSH_BLOCKS;) {
841 for(;
842 ce && ((ce->flags & CE_BUSY) || ce->clone == NULL);
843 ce=ce->next)
844 /* skip ents that meet the above criteria */;
845
846 if (ce == NULL)
847 break;
848
849 ents[num_ents] = ce;
850 ents[num_ents]->flags |= CE_BUSY;
851 ce = ce->next;
852 num_ents++;
853 }
854 }
855
856 UNLOCK(bc.lock);
857
858 if (num_ents == 0)
859 return;
860
861 qsort(ents, num_ents, sizeof(cache_ent **), cache_ent_cmp);
862
863 if ((err = flush_ents(ents, num_ents)) != 0) {
864 printf("flush ents failed (ents @ 0x%lx, num_ents %d!\n",
865 (ulong)ents, num_ents);
866 }
867
868 for(i=0; i < num_ents; i++) { /* clear the busy bit on each of ent */
869 ents[i]->flags &= ~CE_BUSY;
870 }
871 }
872 #endif
873
874
875 static int
flush_cache_ent(cache_ent * ce)876 flush_cache_ent(cache_ent *ce)
877 {
878 int ret = 0;
879 void *data;
880
881 /* if true, then there's nothing to flush */
882 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
883 return 0;
884
885 /* same thing here */
886 if (ce->clone == NULL && ce->lock != 0)
887 return 0;
888
889 restart:
890 if (ce->clone)
891 data = ce->clone;
892 else
893 data = ce->data;
894
895 if (chatty_io > 2) printf("flush: %7" B_PRIdOFF "\n", ce->block_num);
896 ret = beos_write_phys_blocks(ce->dev, ce->block_num, data, 1, ce->bsize);
897
898 if (ce->func) {
899 ce->func(ce->logged_bnum, 1, ce->arg);
900 ce->func = NULL;
901 }
902
903 if (ce->clone) {
904 free(ce->clone);
905 ce->clone = NULL;
906
907 if (ce->lock == 0 && (ce->flags & CE_DIRTY))
908 goto restart; /* also write the real data ptr */
909 } else {
910 ce->flags &= ~CE_DIRTY;
911 }
912
913 return ret;
914 }
915
916
917 static int
flush_ents(cache_ent ** ents,int n_ents)918 flush_ents(cache_ent **ents, int n_ents)
919 {
920 int i, j, k, ret = 0, bsize, iocnt, do_again = 0;
921 fs_off_t start_bnum;
922 struct iovec *iov;
923
924 iov = get_iovec_array();
925 if (iov == NULL)
926 return ENOMEM;
927
928 restart:
929 for(i=0; i < n_ents; i++) {
930 /* if true, then there's nothing to flush */
931 if ((ents[i]->flags & CE_DIRTY) == 0 && ents[i]->clone == NULL)
932 continue;
933
934 /* if true we can't touch the dirty data yet because it's locked */
935 if (ents[i]->clone == NULL && ents[i]->lock != 0)
936 continue;
937
938
939 bsize = ents[i]->bsize;
940 start_bnum = ents[i]->block_num;
941
942 for(j=i+1; j < n_ents && (j - i) < NUM_FLUSH_BLOCKS; j++) {
943 if (ents[j]->dev != ents[i]->dev ||
944 ents[j]->block_num != start_bnum + (j - i))
945 break;
946
947 if (ents[j]->clone == NULL && ents[j]->lock != 0)
948 break;
949 }
950
951 if (j == i+1) { /* only one block, just flush it directly */
952 if ((ret = flush_cache_ent(ents[i])) != 0)
953 break;
954 continue;
955 }
956
957
958 for(k=i,iocnt=0; k < j; k++,iocnt++) {
959 if (ents[k]->clone)
960 iov[iocnt].iov_base = ents[k]->clone;
961 else
962 iov[iocnt].iov_base = ents[k]->data;
963
964 iov[iocnt].iov_len = bsize;
965 }
966
967 if (chatty_io)
968 printf("writev @ %" B_PRIdOFF " for %d blocks\n",
969 start_bnum, iocnt);
970
971 ret = writev_pos(ents[i]->dev, start_bnum * (fs_off_t)bsize,
972 &iov[0], iocnt);
973 if (ret != iocnt*bsize) {
974 int idx;
975
976 printf("flush_ents: writev failed: "
977 "iocnt %d start bnum %" B_PRIdOFF " bsize %d, ret %d\n",
978 iocnt, start_bnum, bsize, ret);
979
980 for(idx=0; idx < iocnt; idx++)
981 printf("iov[%2d] = %p :: %ld\n", idx, iov[idx].iov_base,
982 iov[idx].iov_len);
983
984 printf("error %s writing blocks %" B_PRIdOFF ":%d (%d != %d)\n",
985 strerror(errno), start_bnum, iocnt, ret, iocnt*bsize);
986 ret = EINVAL;
987 break;
988 }
989 ret = 0;
990
991
992 for(k=i; k < j; k++) {
993 if (ents[k]->func) {
994 ents[k]->func(ents[k]->logged_bnum, 1, ents[k]->arg);
995 ents[k]->func = NULL;
996 }
997
998 if (ents[k]->clone) {
999 free(ents[k]->clone);
1000 ents[k]->clone = NULL;
1001 } else {
1002 ents[k]->flags &= ~CE_DIRTY;
1003 }
1004 }
1005
1006
1007 i = j - 1; /* i gets incremented by the outer for loop */
1008 }
1009
1010 /*
1011 here we have to go back through and flush any blocks that are
1012 still dirty. with an arched brow you astutely ask, "but how
1013 could this happen given the above loop?" Ahhh young grasshopper
1014 I say, the path through the cache is long and twisty and fraught
1015 with peril. The reason it can happen is that a block can be both
1016 cloned and dirty. The above loop would only flush the cloned half
1017 of the data, not the main dirty block. So we have to go back
1018 through and see if there are any blocks that are still dirty. If
1019 there are we go back to the top of the function and do the whole
1020 thing over. Kind of grody but it is necessary to insure the
1021 correctness of the log for the Be file system.
1022 */
1023 if (do_again == 0) {
1024 for(i=0; i < n_ents; i++) {
1025 if ((ents[i]->flags & CE_DIRTY) == 0 || ents[i]->lock)
1026 continue;
1027
1028 do_again = 1;
1029 break;
1030 }
1031
1032 if (do_again)
1033 goto restart;
1034 }
1035
1036 release_iovec_array(iov);
1037
1038
1039 return ret;
1040 }
1041
1042 static void
delete_cache_list(cache_ent_list * cel)1043 delete_cache_list(cache_ent_list *cel)
1044 {
1045 void *junk;
1046 cache_ent *ce, *next;
1047
1048 for (ce = cel->lru; ce; ce = next) {
1049 next = ce->next;
1050 if (ce->lock != 0) {
1051 if (ce->func)
1052 printf("*** shutdown_block_cache: "
1053 "block %" B_PRIdOFF ", lock == %d (arg %p)!\n",
1054 ce->block_num, ce->lock, ce->arg);
1055 else
1056 printf("*** shutdown_block_cache: "
1057 "block %" B_PRIdOFF ", lock == %d!\n",
1058 ce->block_num, ce->lock);
1059 }
1060
1061 if (ce->flags & CE_BUSY) {
1062 printf("* shutdown block cache: "
1063 "bnum %" B_PRIdOFF " is busy? ce %p\n", ce->block_num, ce);
1064 }
1065
1066 if ((ce->flags & CE_DIRTY) || ce->clone) {
1067 flush_cache_ent(ce);
1068 }
1069
1070 if (ce->clone)
1071 free(ce->clone);
1072 ce->clone = NULL;
1073
1074 if (ce->data)
1075 free(ce->data);
1076 ce->data = NULL;
1077
1078 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1079 printf("*** free_device_cache: "
1080 "bad hash table entry %" B_PRIdOFF " %p != %p\n",
1081 ce->block_num, junk, ce);
1082 }
1083
1084 explicit_bzero(ce, sizeof(*ce));
1085 free(ce);
1086
1087 bc.cur_blocks--;
1088 }
1089 }
1090
1091
1092 void
beos_shutdown_block_cache(void)1093 beos_shutdown_block_cache(void)
1094 {
1095 /* print_hash_stats(&bc.ht); */
1096
1097 if (bc.lock.s > 0)
1098 LOCK(bc.lock);
1099
1100 #ifndef USER
1101 unregister_kernel_daemon(cache_flusher, NULL);
1102 #endif
1103
1104 delete_cache_list(&bc.normal);
1105 delete_cache_list(&bc.locked);
1106
1107 bc.normal.lru = bc.normal.mru = NULL;
1108 bc.locked.lru = bc.locked.mru = NULL;
1109
1110 shutdown_hash_table(&bc.ht);
1111
1112 if (bc.lock.s > 0)
1113 beos_free_lock(&bc.lock);
1114 bc.lock.s = -1;
1115
1116 if (iovec_lock.s >= 0)
1117 beos_free_lock(&iovec_lock);
1118 }
1119
1120
1121
1122 int
beos_init_cache_for_device(int fd,fs_off_t max_blocks)1123 beos_init_cache_for_device(int fd, fs_off_t max_blocks)
1124 {
1125 int ret = 0;
1126
1127 if (fd >= MAX_DEVICES)
1128 return -1;
1129
1130 LOCK(bc.lock);
1131
1132 if (max_device_blocks[fd] != 0) {
1133 printf("device %d is already initialized!\n", fd);
1134 ret = -1;
1135 } else {
1136 max_device_blocks[fd] = max_blocks;
1137 }
1138
1139 UNLOCK(bc.lock);
1140
1141 return ret;
1142 }
1143
1144
1145
1146 /*
1147 this routine assumes that bc.lock has been acquired
1148 */
1149 static cache_ent *
block_lookup(int dev,fs_off_t bnum)1150 block_lookup(int dev, fs_off_t bnum)
1151 {
1152 int count = 0;
1153 cache_ent *ce;
1154
1155 while (1) {
1156 ce = hash_lookup(&bc.ht, dev, bnum);
1157 if (ce == NULL)
1158 return NULL;
1159
1160 if ((ce->flags & CE_BUSY) == 0) /* it's ok, break out and return it */
1161 break;
1162
1163 /* else, it's busy and we need to retry our lookup */
1164 UNLOCK(bc.lock);
1165
1166 snooze(5000);
1167 if (count++ == 5000) { /* then a lot of time has elapsed */
1168 printf("block %" B_PRIdOFF " isn't coming un-busy (ce @ %p)\n",
1169 ce->block_num, ce);
1170 }
1171
1172 LOCK(bc.lock);
1173 }
1174
1175 if (ce->flags & CE_BUSY)
1176 beos_panic("block lookup: returning a busy block @ %p?!?\n", ce);
1177
1178 return ce;
1179 }
1180
1181
1182
1183 int
beos_set_blocks_info(int dev,fs_off_t * blocks,int nblocks,void (* func)(fs_off_t bnum,size_t nblocks,void * arg),void * arg)1184 beos_set_blocks_info(int dev, fs_off_t *blocks, int nblocks,
1185 void (*func)(fs_off_t bnum, size_t nblocks, void *arg), void *arg)
1186 {
1187 int i, j, cur;
1188 cache_ent *ce;
1189 cache_ent *ents[NUM_FLUSH_BLOCKS];
1190
1191 LOCK(bc.lock);
1192
1193
1194 for(i=0, cur=0; i < nblocks; i++) {
1195
1196 /* printf("sbi: %ld (arg 0x%x)\n", blocks[i], arg); */
1197 ce = block_lookup(dev, blocks[i]);
1198 if (ce == NULL) {
1199 beos_panic("*** set_block_info can't find bnum %ld!\n", blocks[i]);
1200 UNLOCK(bc.lock);
1201 return ENOENT; /* hopefully this doesn't happen... */
1202 }
1203
1204
1205 if (blocks[i] != ce->block_num || dev != ce->dev) {
1206 UNLOCK(bc.lock);
1207 beos_panic("** error1: looked up dev %d block %ld but found dev %d "
1208 "bnum %ld\n", dev, blocks[i], ce->dev, ce->block_num);
1209 return EBADF;
1210 }
1211
1212 if (ce->lock == 0) {
1213 beos_panic("* set_block_info on bnum %ld (%d) but it's not locked!\n",
1214 blocks[i], nblocks);
1215 }
1216
1217
1218 if ((ce->flags & CE_DIRTY) == 0) {
1219 beos_panic("*** set_block_info on non-dirty block bnum %ld (%d)!\n",
1220 blocks[i], nblocks);
1221 }
1222
1223 ce->flags |= CE_BUSY; /* mark all blocks as busy till we're done */
1224
1225 /* if there is cloned data, it needs to be flushed now */
1226 if (ce->clone && ce->func) {
1227 ents[cur++] = ce;
1228
1229 if (cur >= NUM_FLUSH_BLOCKS) {
1230 UNLOCK(bc.lock);
1231
1232 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1233
1234 flush_ents(ents, cur);
1235
1236 LOCK(bc.lock);
1237 for(j=0; j < cur; j++)
1238 ents[j]->flags &= ~CE_BUSY;
1239 cur = 0;
1240 }
1241 }
1242 }
1243
1244
1245 if (cur != 0) {
1246 UNLOCK(bc.lock);
1247
1248 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1249
1250 flush_ents(ents, cur);
1251
1252 LOCK(bc.lock);
1253 for(j=0; j < cur; j++)
1254 ents[j]->flags &= ~CE_BUSY;
1255 cur = 0;
1256 }
1257
1258
1259 /* now go through and set the info that we were asked to */
1260 for (i = 0; i < nblocks; i++) {
1261 /* we can call hash_lookup() here because we know it's around */
1262 ce = hash_lookup(&bc.ht, dev, blocks[i]);
1263 if (ce == NULL) {
1264 beos_panic("*** set_block_info can't find bnum %lld!\n", blocks[i]);
1265 UNLOCK(bc.lock);
1266 return ENOENT; /* hopefully this doesn't happen... */
1267 }
1268
1269 ce->flags &= ~(CE_DIRTY | CE_BUSY);
1270
1271 if (ce->func != NULL) {
1272 beos_panic("*** set_block_info non-null callback on bnum %lld\n",
1273 ce->block_num);
1274 }
1275
1276 if (ce->clone != NULL) {
1277 beos_panic("*** ce->clone == %p, not NULL in set_block_info\n",
1278 ce->clone);
1279 }
1280
1281 ce->clone = (void *)malloc(ce->bsize);
1282 if (ce->clone == NULL)
1283 beos_panic("*** can't clone bnum %lld (bsize %d)\n",
1284 ce->block_num, ce->bsize);
1285
1286
1287 memcpy(ce->clone, ce->data, ce->bsize);
1288
1289 ce->func = func;
1290 ce->arg = arg;
1291
1292 ce->logged_bnum = blocks[i];
1293
1294 ce->lock--;
1295 if (ce->lock < 0) {
1296 printf("sbi: "
1297 "whoa nellie! ce @ %p (%" B_PRIdOFF ") has lock == %d\n",
1298 ce, ce->block_num, ce->lock);
1299 }
1300
1301 if (ce->lock == 0) {
1302 delete_from_list(&bc.locked, ce);
1303 add_to_head(&bc.normal, ce);
1304 }
1305 }
1306
1307 UNLOCK(bc.lock);
1308
1309 return 0;
1310 }
1311
1312
1313 /* this function is only for use by flush_device() */
1314 static void
do_flush(cache_ent ** ents,int max)1315 do_flush(cache_ent **ents, int max)
1316 {
1317 int i;
1318
1319 for(i=0; i < max; i++) {
1320 ents[i]->flags |= CE_BUSY;
1321 }
1322
1323 UNLOCK(bc.lock);
1324
1325 qsort(ents, max, sizeof(cache_ent **), cache_ent_cmp);
1326 flush_ents(ents, max);
1327
1328 LOCK(bc.lock);
1329 for(i=0; i < max; i++) {
1330 ents[i]->flags &= ~CE_BUSY;
1331 }
1332 }
1333
1334 int
beos_flush_device(int dev,int warn_locked)1335 beos_flush_device(int dev, int warn_locked)
1336 {
1337 int cur;
1338 cache_ent *ce;
1339 cache_ent *ents[NUM_FLUSH_BLOCKS];
1340
1341 LOCK(bc.lock);
1342
1343 cur = 0;
1344 ce = bc.normal.lru;
1345 while (ce) {
1346 if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1347 ce = ce->next;
1348 continue;
1349 }
1350
1351 if ((ce->flags & CE_DIRTY) || ce->clone) {
1352 ents[cur++] = ce;
1353 if (cur >= NUM_FLUSH_BLOCKS) {
1354 do_flush(ents, cur);
1355
1356 ce = bc.normal.lru;
1357 cur = 0;
1358 continue;
1359 }
1360 }
1361
1362 ce = ce->next;
1363 }
1364
1365 if (cur != 0)
1366 do_flush(ents, cur);
1367
1368 cur = 0;
1369 ce = bc.locked.lru;
1370 while (ce) {
1371 if (ce->dev != dev || (ce->flags & CE_BUSY)) {
1372 ce = ce->next;
1373 continue;
1374 }
1375
1376 if (ce->clone) {
1377 ents[cur++] = ce;
1378 if (cur >= NUM_FLUSH_BLOCKS) {
1379 do_flush(ents, cur);
1380
1381 ce = bc.locked.lru;
1382 cur = 0;
1383 continue;
1384 }
1385 }
1386
1387 ce = ce->next;
1388 }
1389
1390 if (cur != 0)
1391 do_flush(ents, cur);
1392
1393 UNLOCK(bc.lock);
1394
1395 return 0;
1396 }
1397
1398
1399 static void
real_remove_cached_blocks(int dev,int allow_writes,cache_ent_list * cel)1400 real_remove_cached_blocks(int dev, int allow_writes, cache_ent_list *cel)
1401 {
1402 void *junk;
1403 cache_ent *ce, *next = NULL;
1404
1405 for(ce=cel->lru; ce; ce=next) {
1406 next = ce->next;
1407
1408 if (ce->dev != dev) {
1409 continue;
1410 }
1411
1412 if (ce->lock != 0 || (ce->flags & CE_BUSY)) {
1413 printf("*** remove_cached_dev: "
1414 "block %" B_PRIdOFF " has lock = %d, flags 0x%x! ce @ %p\n",
1415 ce->block_num, ce->lock, ce->flags, ce);
1416 }
1417
1418 if (allow_writes == ALLOW_WRITES &&
1419 ((ce->flags & CE_DIRTY) || ce->clone)) {
1420 ce->flags |= CE_BUSY;
1421 flush_cache_ent(ce);
1422 ce->flags &= ~CE_BUSY;
1423 }
1424
1425 /* unlink this guy */
1426 if (cel->lru == ce)
1427 cel->lru = ce->next;
1428
1429 if (cel->mru == ce)
1430 cel->mru = ce->prev;
1431
1432 if (ce->prev)
1433 ce->prev->next = ce->next;
1434 if (ce->next)
1435 ce->next->prev = ce->prev;
1436
1437 if (ce->clone)
1438 free(ce->clone);
1439 ce->clone = NULL;
1440
1441 if (ce->data)
1442 free(ce->data);
1443 ce->data = NULL;
1444
1445 if ((junk = hash_delete(&bc.ht, ce->dev, ce->block_num)) != ce) {
1446 beos_panic("*** remove_cached_device: bad hash table entry %ld "
1447 "%p != %p\n", ce->block_num, junk, ce);
1448 }
1449
1450 free(ce);
1451
1452 bc.cur_blocks--;
1453 }
1454
1455 }
1456
1457 int
beos_remove_cached_device_blocks(int dev,int allow_writes)1458 beos_remove_cached_device_blocks(int dev, int allow_writes)
1459 {
1460 LOCK(bc.lock);
1461
1462 real_remove_cached_blocks(dev, allow_writes, &bc.normal);
1463 real_remove_cached_blocks(dev, allow_writes, &bc.locked);
1464
1465 max_device_blocks[dev] = 0;
1466
1467 UNLOCK(bc.lock);
1468
1469 return 0;
1470 }
1471
1472
1473
1474 int
beos_flush_blocks(int dev,fs_off_t bnum,int nblocks)1475 beos_flush_blocks(int dev, fs_off_t bnum, int nblocks)
1476 {
1477 int cur, i;
1478 cache_ent *ce;
1479 cache_ent *ents[NUM_FLUSH_BLOCKS];
1480
1481 if (nblocks == 0) /* might as well check for this */
1482 return 0;
1483
1484 LOCK(bc.lock);
1485
1486 cur = 0;
1487 for(; nblocks > 0; nblocks--, bnum++) {
1488 ce = block_lookup(dev, bnum);
1489 if (ce == NULL)
1490 continue;
1491
1492 if (bnum != ce->block_num || dev != ce->dev) {
1493 UNLOCK(bc.lock);
1494 beos_panic("error2: looked up dev %d block %ld but found %d %ld\n",
1495 dev, bnum, ce->dev, ce->block_num);
1496 return EBADF;
1497 }
1498
1499 if ((ce->flags & CE_DIRTY) == 0 && ce->clone == NULL)
1500 continue;
1501
1502 ce->flags |= CE_BUSY;
1503 ents[cur++] = ce;
1504
1505 if (cur >= NUM_FLUSH_BLOCKS) {
1506 UNLOCK(bc.lock);
1507
1508 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1509 flush_ents(ents, cur);
1510
1511 LOCK(bc.lock);
1512 for(i=0; i < cur; i++) {
1513 ents[i]->flags &= ~CE_BUSY;
1514 }
1515 cur = 0;
1516 }
1517 }
1518
1519 UNLOCK(bc.lock);
1520
1521 if (cur == 0) /* nothing more to do */
1522 return 0;
1523
1524 /* flush out the last few buggers */
1525 qsort(ents, cur, sizeof(cache_ent **), cache_ent_cmp);
1526 flush_ents(ents, cur);
1527
1528 for(i=0; i < cur; i++) {
1529 ents[i]->flags &= ~CE_BUSY;
1530 }
1531
1532 return 0;
1533 }
1534
1535
1536 int
beos_mark_blocks_dirty(int dev,fs_off_t bnum,int nblocks)1537 beos_mark_blocks_dirty(int dev, fs_off_t bnum, int nblocks)
1538 {
1539 int ret = 0;
1540 cache_ent *ce;
1541
1542 LOCK(bc.lock);
1543
1544 while(nblocks > 0) {
1545 ce = block_lookup(dev, bnum);
1546 if (ce) {
1547 ce->flags |= CE_DIRTY;
1548 bnum += 1;
1549 nblocks -= 1;
1550 } else { /* hmmm, that's odd, didn't find it */
1551 printf("** mark_blocks_diry couldn't find block %" B_PRIdOFF " "
1552 "(len %d)\n", bnum, nblocks);
1553 ret = ENOENT;
1554 break;
1555 }
1556 }
1557
1558 UNLOCK(bc.lock);
1559
1560 return ret;
1561 }
1562
1563
1564
1565 int
beos_release_block(int dev,fs_off_t bnum)1566 beos_release_block(int dev, fs_off_t bnum)
1567 {
1568 cache_ent *ce;
1569
1570 /* printf("rlsb: %ld\n", bnum); */
1571 LOCK(bc.lock);
1572
1573 ce = block_lookup(dev, bnum);
1574 if (ce) {
1575 if (bnum != ce->block_num || dev != ce->dev) {
1576 beos_panic("*** error3: looked up dev %d block %ld but found %d %ld\n",
1577 dev, bnum, ce->dev, ce->block_num);
1578 UNLOCK(bc.lock);
1579 return EBADF;
1580 }
1581
1582 ce->lock--;
1583
1584 if (ce->lock < 0) {
1585 printf("rlsb: whoa nellie! ce %" B_PRIdOFF " has lock == %d\n",
1586 ce->block_num, ce->lock);
1587 }
1588
1589 if (ce->lock == 0) {
1590 delete_from_list(&bc.locked, ce);
1591 add_to_head(&bc.normal, ce);
1592 }
1593
1594 } else { /* hmmm, that's odd, didn't find it */
1595 beos_panic("** release_block asked to find %ld but it's not here\n",
1596 bnum);
1597 }
1598
1599 UNLOCK(bc.lock);
1600
1601 return 0;
1602 }
1603
1604
1605 static cache_ent *
new_cache_ent(int bsize)1606 new_cache_ent(int bsize)
1607 {
1608 cache_ent *ce;
1609
1610 ce = (cache_ent *)calloc(1, sizeof(cache_ent));
1611 if (ce == NULL) {
1612 beos_panic("*** error: cache can't allocate memory!\n");
1613 return NULL;
1614 }
1615
1616 ce->data = malloc(bsize);
1617 if (ce->data == NULL) {
1618 free(ce);
1619 beos_panic("** error cache can't allocate data memory\n");
1620 UNLOCK(bc.lock);
1621 return NULL;
1622 }
1623
1624 ce->dev = -1;
1625 ce->block_num = -1;
1626
1627 return ce;
1628 }
1629
1630
1631 static void
get_ents(cache_ent ** ents,int num_needed,int max,int * num_gotten,int bsize)1632 get_ents(cache_ent **ents, int num_needed, int max, int *num_gotten, int bsize)
1633 {
1634 int cur, retry_counter = 0, max_retry = num_needed * 256;
1635 cache_ent *ce;
1636
1637 if (num_needed > max)
1638 beos_panic("get_ents: num_needed %d but max %d (doh!)\n", num_needed, max);
1639
1640 /* if the cache isn't full yet, just allocate the blocks */
1641 for(cur=0; bc.cur_blocks < bc.max_blocks && cur < num_needed; cur++) {
1642 ents[cur] = new_cache_ent(bsize);
1643 if (ents[cur] == NULL)
1644 break;
1645 bc.cur_blocks++;
1646 }
1647
1648 /* pluck off blocks from the LRU end of the normal list, keep trying too */
1649 while(cur < num_needed && retry_counter < max_retry) {
1650 for(ce=bc.normal.lru; ce && cur < num_needed; ce=ce->next) {
1651 if (ce->lock)
1652 beos_panic("get_ents: normal list has locked blocks (ce %p)\n",ce);
1653
1654 if (ce->flags & CE_BUSY) /* don't touch busy blocks */
1655 continue;
1656
1657 ce->flags |= CE_BUSY;
1658 ents[cur++] = ce;
1659 }
1660
1661 if (cur < num_needed) {
1662 UNLOCK(bc.lock);
1663 snooze(10000);
1664 LOCK(bc.lock);
1665 retry_counter++;
1666 }
1667 }
1668
1669 if (cur < num_needed && retry_counter >= max_retry) { /* oh shit! */
1670 dump_cache_list();
1671 UNLOCK(bc.lock);
1672 beos_panic("get_ents: waited too long; can't get enough ce's (c %d n %d)\n",
1673 cur, num_needed);
1674 }
1675
1676 /*
1677 If the last block is a dirty one, try to get more of 'em so
1678 that we can flush a bunch of blocks at once.
1679 */
1680 if (cur && cur < max &&
1681 ((ents[cur-1]->flags & CE_DIRTY) || ents[cur-1]->clone)) {
1682
1683 for(ce=ents[cur-1]->next; ce && cur < max; ce=ce->next) {
1684 if (ce->flags & CE_BUSY) /* don't touch busy blocks */
1685 continue;
1686
1687 if (ce->lock)
1688 beos_panic("get_ents:2 dirty list has locked blocks (ce %p)\n",ce);
1689
1690 ce->flags |= CE_BUSY;
1691 ents[cur++] = ce;
1692 }
1693 }
1694
1695 *num_gotten = cur;
1696 }
1697
1698
1699 static int
read_into_ents(int dev,fs_off_t bnum,cache_ent ** ents,int num,int bsize)1700 read_into_ents(int dev, fs_off_t bnum, cache_ent **ents, int num, int bsize)
1701 {
1702 int i, ret;
1703 struct iovec *iov;
1704
1705 iov = get_iovec_array();
1706
1707 for (i = 0; i < num; i++) {
1708 iov[i].iov_base = ents[i]->data;
1709 iov[i].iov_len = bsize;
1710 }
1711
1712 if (chatty_io > 2)
1713 printf("readv @ %" B_PRIdOFF " for %d blocks "
1714 "(at %" B_PRIdOFF ", block_size = %d)\n",
1715 bnum, num, bnum*bsize, bsize);
1716 ret = readv_pos(dev, bnum*bsize, iov, num);
1717
1718 release_iovec_array(iov);
1719
1720 if (ret != num*bsize) {
1721 printf("read_into_ents: asked to read %d bytes but got %d\n",
1722 num*bsize, ret);
1723 printf("*** iov @ %p (num %d)\n", iov, num);
1724 return EINVAL;
1725 } else
1726 return 0;
1727 }
1728
1729
1730
1731 #define CACHE_READ 0x0001
1732 #define CACHE_WRITE 0x0002
1733 #define CACHE_NOOP 0x0004 /* for getting empty blocks */
1734 #define CACHE_LOCKED 0x0008
1735 #define CACHE_READ_AHEAD_OK 0x0010 /* it's ok to do read-ahead */
1736
1737
1738 static char *
op_to_str(int op)1739 op_to_str(int op)
1740 {
1741 static char buff[128];
1742
1743 if (op & CACHE_READ)
1744 strcpy(buff, "READ");
1745 else if (op & CACHE_WRITE)
1746 strcpy(buff, "WRITE");
1747 else if (op & CACHE_NOOP)
1748 strcpy(buff, "NOP");
1749
1750 if (op & CACHE_LOCKED)
1751 strcat(buff, " LOCKED");
1752
1753 if (op & CACHE_READ_AHEAD_OK)
1754 strcat(buff, " (AHEAD)");
1755
1756 return buff;
1757 }
1758
1759 static int
cache_block_io(int dev,fs_off_t bnum,void * data,fs_off_t num_blocks,int bsize,int op,void ** dataptr)1760 cache_block_io(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize,
1761 int op, void **dataptr)
1762 {
1763 size_t err = 0;
1764 cache_ent *ce;
1765 cache_ent_list *cel;
1766
1767 if (chatty_io > 1)
1768 printf("cbio: bnum = %" B_PRIdOFF ", num_blocks = %" B_PRIdOFF ", "
1769 "bsize = %d, op = %s\n", bnum, num_blocks, bsize, op_to_str(op));
1770
1771 /* some sanity checks first */
1772 if (bsize == 0)
1773 beos_panic("cache_io: block size == 0 for bnum %lld?!?\n", bnum);
1774
1775 if (num_blocks == 0)
1776 beos_panic("cache_io: bnum %lld has num_blocks == 0!\n", bnum);
1777
1778 if (data == NULL && dataptr == NULL) {
1779 printf("major butthead move: "
1780 "null data and dataptr! bnum %" B_PRIdOFF ":%" B_PRIdOFF "\n",
1781 bnum, num_blocks);
1782 return ENOMEM;
1783 }
1784
1785 if (data == NULL) {
1786 if (num_blocks != 1) /* get_block() should never do that */
1787 beos_panic("cache_io: num_blocks %lld but should be 1\n",
1788 num_blocks);
1789
1790 if (op & CACHE_WRITE)
1791 beos_panic("cache_io: get_block() asked to write?!?\n");
1792 }
1793
1794 if (bnum + num_blocks > max_device_blocks[dev]) {
1795 printf("dev %d: "
1796 "access to blocks %" B_PRIdOFF ":%" B_PRIdOFF " "
1797 "but max_dev_blocks is %" B_PRIdOFF "\n",
1798 dev, bnum, num_blocks, max_device_blocks[dev]);
1799
1800 // let the app crash here
1801 *(int *)0x3100 = 0xc0debabe;
1802 return EINVAL;
1803 }
1804
1805 last_cache_access = system_time();
1806
1807 /* if the i/o is greater than 64k, do it directly */
1808 if (num_blocks * bsize >= 64 * 1024) {
1809 char *ptr;
1810 fs_off_t tmp;
1811
1812 if (data == NULL || (op & CACHE_LOCKED)) {
1813 beos_panic("*** asked to do a large locked io that's too hard!\n");
1814 }
1815
1816
1817 if (op & CACHE_READ) {
1818 if (beos_read_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1819 printf("cache read:read_phys_blocks failed "
1820 "(%s on blocks %" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1821 strerror(errno), bnum, num_blocks);
1822 return EINVAL;
1823 }
1824
1825 LOCK(bc.lock);
1826
1827 /* if any of the blocks are in the cache, grab them instead */
1828 ptr = data;
1829 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1830 ce = block_lookup(dev, tmp);
1831 /*
1832 if we find a block in the cache we have to copy its
1833 data just in case it is more recent than what we just
1834 read from disk (which could happen if someone wrote
1835 these blocks after we did the read but before we locked
1836 the cache and entered this loop).
1837 */
1838 if (ce) {
1839 if (tmp != ce->block_num || dev != ce->dev) {
1840 UNLOCK(bc.lock);
1841 beos_panic("*** error4: looked up dev %d block %lld but "
1842 "found %d %lld\n", dev, tmp, ce->dev,
1843 ce->block_num);
1844 }
1845
1846 memcpy(ptr, ce->data, bsize);
1847 }
1848 }
1849
1850 UNLOCK(bc.lock);
1851 } else if (op & CACHE_WRITE) {
1852 LOCK(bc.lock);
1853
1854 /* if any of the blocks are in the cache, update them too */
1855 ptr = data;
1856 for(tmp=bnum; tmp < bnum+num_blocks; tmp++, ptr+=bsize) {
1857 ce = block_lookup(dev, tmp);
1858 if (ce) {
1859 if (tmp != ce->block_num || dev != ce->dev) {
1860 UNLOCK(bc.lock);
1861 beos_panic("*** error5: looked up dev %d block %lld but "
1862 "found %d %lld\n", dev, tmp, ce->dev,
1863 ce->block_num);
1864 return EBADF;
1865 }
1866
1867 /* XXXdbg -- this isn't strictly necessary */
1868 if (ce->clone) {
1869 printf("over-writing cloned data "
1870 "(ce %p bnum %" B_PRIdOFF ")...\n", ce, tmp);
1871 flush_cache_ent(ce);
1872 }
1873
1874 /* copy the data into the cache */
1875 memcpy(ce->data, ptr, bsize);
1876 }
1877 }
1878
1879 UNLOCK(bc.lock);
1880
1881 if (beos_write_phys_blocks(dev, bnum, data, num_blocks, bsize) != 0) {
1882 printf("cache write: write_phys_blocks failed (%s on blocks "
1883 "%" B_PRIdOFF ":%" B_PRIdOFF ")!\n",
1884 strerror(errno), bnum, num_blocks);
1885 return EINVAL;
1886 }
1887 } else {
1888 printf("bad cache op %d "
1889 "(bnum %" B_PRIdOFF " nblocks %" B_PRIdOFF ")\n",
1890 op, bnum, num_blocks);
1891 return EINVAL;
1892 }
1893
1894 return 0;
1895 }
1896
1897
1898 LOCK(bc.lock);
1899 while(num_blocks) {
1900
1901 ce = block_lookup(dev, bnum);
1902 if (ce) {
1903 if (bnum != ce->block_num || dev != ce->dev) {
1904 UNLOCK(bc.lock);
1905 beos_panic("*** error6: looked up dev %d block %ld but found "
1906 "%d %ld\n", dev, bnum, ce->dev, ce->block_num);
1907 return EBADF;
1908 }
1909
1910 if (bsize != ce->bsize) {
1911 beos_panic("*** requested bsize %d but ce->bsize %d ce @ %p\n",
1912 bsize, ce->bsize, ce);
1913 }
1914
1915 /* delete this ent from the list it is in because it may change */
1916 if (ce->lock)
1917 cel = &bc.locked;
1918 else
1919 cel = &bc.normal;
1920
1921 delete_from_list(cel, ce);
1922
1923 if (op & CACHE_READ) {
1924 if (data && data != ce->data) {
1925 memcpy(data, ce->data, bsize);
1926 } else if (dataptr) {
1927 *dataptr = ce->data;
1928 } else {
1929 printf("cbio:data %p dptr %p ce @ %p ce->data %p\n",
1930 data, dataptr, ce, ce->data);
1931 }
1932 } else if (op & CACHE_WRITE) {
1933 if (data && data != ce->data)
1934 memcpy(ce->data, data, bsize);
1935
1936 ce->flags |= CE_DIRTY;
1937 } else if (op & CACHE_NOOP) {
1938 memset(ce->data, 0, bsize);
1939 if (data)
1940 memset(data, 0, bsize);
1941
1942 if (dataptr)
1943 *dataptr = ce->data;
1944
1945 ce->flags |= CE_DIRTY;
1946 } else {
1947 beos_panic("cached_block_io: bogus op %d\n", op);
1948 }
1949
1950 if (op & CACHE_LOCKED)
1951 ce->lock++;
1952
1953 if (ce->lock)
1954 cel = &bc.locked;
1955 else
1956 cel = &bc.normal;
1957
1958 /* now put this ent at the head of the appropriate list */
1959 add_to_head(cel, ce);
1960
1961 if (data != NULL)
1962 data = (void *)((char *)data + bsize);
1963
1964 bnum += 1;
1965 num_blocks -= 1;
1966
1967 continue;
1968 } else { /* it's not in the cache */
1969 int cur, cur_nblocks, num_dirty, real_nblocks, num_needed;
1970 cache_ent *ents[NUM_FLUSH_BLOCKS];
1971
1972 /*
1973 here we find out how many additional blocks in this request
1974 are not in the cache. the idea is that then we can do one
1975 big i/o on that many blocks at once.
1976 */
1977 for(cur_nblocks=1;
1978 cur_nblocks < num_blocks && cur_nblocks < NUM_FLUSH_BLOCKS;
1979 cur_nblocks++) {
1980
1981 /* we can call hash_lookup() directly instead of
1982 block_lookup() because we don't care about the
1983 state of the busy bit of the block at this point
1984 */
1985 if (hash_lookup(&bc.ht, dev, bnum + cur_nblocks))
1986 break;
1987 }
1988
1989 /*
1990 here we try to figure out how many extra blocks we should read
1991 for read-ahead. we want to read as many as possible that are
1992 not already in the cache and that don't cause us to try and
1993 read beyond the end of the disk.
1994 */
1995 if ((op & CACHE_READ) && (op & CACHE_READ_AHEAD_OK) &&
1996 (cur_nblocks * bsize) < read_ahead_size) {
1997
1998 for(num_needed=cur_nblocks;
1999 num_needed < (read_ahead_size / bsize);
2000 num_needed++) {
2001
2002 if ((bnum + num_needed) >= max_device_blocks[dev])
2003 break;
2004
2005 if (hash_lookup(&bc.ht, dev, bnum + num_needed))
2006 break;
2007 }
2008 } else {
2009 num_needed = cur_nblocks;
2010 }
2011
2012 /* this will get us pointers to a bunch of cache_ents we can use */
2013 get_ents(ents, num_needed, NUM_FLUSH_BLOCKS, &real_nblocks, bsize);
2014
2015 if (real_nblocks < num_needed) {
2016 beos_panic("don't have enough cache ents "
2017 "(need %d got %d %ld::%" B_PRIdOFF ")\n",
2018 num_needed, real_nblocks, bnum, num_blocks);
2019 }
2020
2021 /*
2022 There are now three variables used as limits within the ents
2023 array. This is how they are related:
2024
2025 cur_nblocks <= num_needed <= real_nblocks
2026
2027 Ents from 0 to cur_nblocks-1 are going to be used to fulfill
2028 this IO request. Ents from cur_nblocks to num_needed-1 are
2029 for read-ahead. Ents from num_needed to real_nblocks are
2030 extra blocks that get_ents() asked us to flush. Often (and
2031 always on writes) cur_nblocks == num_needed.
2032
2033 Below, we sort the list of ents so that when we flush them
2034 they go out in order.
2035 */
2036
2037 qsort(ents, real_nblocks, sizeof(cache_ent **), cache_ent_cmp);
2038
2039 /*
2040 delete each ent from its list because it will change. also
2041 count up how many dirty blocks there are and insert into the
2042 hash table any new blocks so that no one else will try to
2043 read them in when we release the cache semaphore to do our I/O.
2044 */
2045 for(cur=0,num_dirty=0; cur < real_nblocks; cur++) {
2046 ce = ents[cur];
2047 ce->flags |= CE_BUSY;
2048
2049 /*
2050 insert the new block into the hash table with its new block
2051 number. note that the block is still in the hash table for
2052 its old block number -- and it has to be until we are done
2053 flushing it from the cache (to prevent someone else from
2054 sneaking in in front of us and trying to read the same
2055 block that we're flushing).
2056 */
2057 if (cur < num_needed) {
2058 if (hash_insert(&bc.ht, dev, bnum + cur, ce) != 0)
2059 beos_panic("could not insert cache ent for %d %ld (%p)\n",
2060 dev, bnum + cur, ents[cur]);
2061 }
2062
2063 if (ce->dev == -1)
2064 continue;
2065
2066 if ((ce->flags & CE_DIRTY) || ce->clone)
2067 num_dirty++;
2068
2069 if (ce->lock) {
2070 beos_panic("cbio: can't use locked blocks here ce @ %p\n",ce);
2071 } else {
2072 cel = &bc.normal;
2073 delete_from_list(cel, ce);
2074 }
2075 }
2076 ce = NULL;
2077
2078
2079 /*
2080 we release the block cache semaphore here so that we can
2081 go do all the i/o we need to do (flushing dirty blocks
2082 that we're kicking out as well as reading any new data).
2083
2084 because all the blocks we're touching are marked busy
2085 no one else should mess with them while we're doing this.
2086 */
2087 if (num_dirty || (op & CACHE_READ)) {
2088 UNLOCK(bc.lock);
2089
2090 /* this flushes any blocks we're kicking out that are dirty */
2091 if (num_dirty && (err = flush_ents(ents, real_nblocks)) != 0) {
2092 printf("flush ents failed (ents @ %p, nblocks %d!\n",
2093 ents, cur_nblocks);
2094 goto handle_err;
2095 }
2096
2097 }
2098
2099 /*
2100 now that everything is flushed to disk, go through and
2101 make sure that the data blocks we're going to use are
2102 the right block size for this current request (it's
2103 possible we're kicking out some smaller blocks and need
2104 to reallocate the data block pointer). We do this in two
2105 steps, first free'ing everything and then going through
2106 and doing the malloc's to try and be nice to the memory
2107 system (i.e. allow it to coalesce stuff, etc).
2108 */
2109 err = 0;
2110 for(cur=0; cur < num_needed; cur++) {
2111 if (ents[cur]->bsize != bsize) {
2112 free(ents[cur]->data);
2113 ents[cur]->data = NULL;
2114
2115 if (ents[cur]->clone) {
2116 free(ents[cur]->clone);
2117 ents[cur]->clone = NULL;
2118 }
2119 }
2120 }
2121
2122 for(cur=0; cur < num_needed; cur++) {
2123 if (ents[cur]->data == NULL) {
2124 ents[cur]->data = (void *)malloc(bsize);
2125 ents[cur]->bsize = bsize;
2126 }
2127
2128 if (ents[cur]->data == NULL) {
2129 printf("cache: no memory for block (bsize %d)!\n",
2130 bsize);
2131 err = ENOMEM;
2132 break;
2133 }
2134 }
2135
2136 /*
2137 if this condition is true it's a pretty serious error.
2138 we'll try and back out gracefully but we're in pretty
2139 deep at this point and it ain't going to be easy.
2140 */
2141 handle_err:
2142 if (err) {
2143 for(cur=0; cur < num_needed; cur++) {
2144 cache_ent *tmp_ce;
2145
2146 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2147 if (tmp_ce != ents[cur]) {
2148 beos_panic("hash_del0: %d %ld got %p, not %p\n",
2149 dev, bnum+cur, tmp_ce,
2150 ents[cur]);
2151 }
2152
2153 tmp_ce = (cache_ent *)hash_delete(&bc.ht,ents[cur]->dev,
2154 ents[cur]->block_num);
2155 if (tmp_ce != ents[cur]) {
2156 beos_panic("hash_del1: %d %ld got %p, not %p\n",
2157 ents[cur]->dev, ents[cur]->block_num, tmp_ce,
2158 ents[cur]);
2159 }
2160
2161 ents[cur]->flags &= ~CE_BUSY;
2162 if (ents[cur]->data)
2163 free(ents[cur]->data);
2164 free(ents[cur]);
2165 ents[cur] = NULL;
2166
2167 bc.cur_blocks--;
2168 }
2169
2170 if (cur < real_nblocks) {
2171 LOCK(bc.lock);
2172 for(; cur < real_nblocks; cur++) {
2173 ents[cur]->flags &= ~CE_BUSY;
2174
2175 /* we have to put them back here */
2176 add_to_tail(&bc.normal, ents[cur]);
2177 }
2178 UNLOCK(bc.lock);
2179 }
2180
2181 return ENOMEM;
2182 }
2183
2184
2185 /*
2186 If we go into this if statement, the block cache lock
2187 has *already been released* up above when we flushed the
2188 dirty entries. As always, since the blocks we're mucking
2189 with are marked busy, they shouldn't get messed with.
2190 */
2191 err = 0;
2192 if (num_dirty || (op & CACHE_READ)) {
2193 /* this section performs the i/o that we need to do */
2194 if (op & CACHE_READ) {
2195 err = read_into_ents(dev, bnum, ents, num_needed, bsize);
2196 } else {
2197 err = 0;
2198 }
2199
2200 if (err != 0) {
2201 printf("err %s on dev %d block %" B_PRIdOFF ":%d (%d) "
2202 "data %p, ents[0] %p\n",
2203 strerror(errno), dev, bnum, cur_nblocks,
2204 bsize, data, ents[0]);
2205 }
2206
2207 /*
2208 acquire the semaphore here so that we can go on mucking
2209 with the cache data structures. We need to delete old
2210 block numbers from the hash table and set the new block
2211 number's for the blocks we just read in. We also put the
2212 read-ahead blocks at the head of mru list.
2213 */
2214
2215 LOCK(bc.lock);
2216 }
2217
2218 for(cur=0; cur < num_needed; cur++) {
2219 cache_ent *tmp_ce;
2220
2221 ce = ents[cur];
2222 if (ce->dev != -1) {
2223 tmp_ce = hash_delete(&bc.ht, ce->dev, ce->block_num);
2224 if (tmp_ce == NULL || tmp_ce != ce) {
2225 beos_panic("*** hash_delete failure (ce %p tce %p)\n",
2226 ce, tmp_ce);
2227 }
2228 }
2229
2230 if (err == 0 && cur >= cur_nblocks) {
2231 ce->dev = dev;
2232 ce->block_num = bnum + cur;
2233 ce->flags &= ~CE_BUSY;
2234 add_to_head(&bc.normal, ce);
2235 }
2236 }
2237 ce = NULL;
2238
2239 /*
2240 clear the busy bit on the blocks we force-flushed and
2241 put them on the normal list since they're now clean.
2242 */
2243 for(; cur < real_nblocks; cur++) {
2244 ents[cur]->flags &= ~CE_BUSY;
2245
2246 if (ents[cur]->lock)
2247 beos_panic("should not have locked blocks here (ce %p)\n",
2248 ents[cur]);
2249
2250 add_to_tail(&bc.normal, ents[cur]);
2251 }
2252
2253 if (err) { /* then we have some cleanup to do */
2254 for(cur=0; cur < num_needed; cur++) {
2255 cache_ent *tmp_ce;
2256
2257 /* we delete all blocks from the cache so we don't
2258 leave partially written blocks in the cache */
2259
2260 tmp_ce = (cache_ent *)hash_delete(&bc.ht,dev,bnum+cur);
2261 if (tmp_ce != ents[cur]) {
2262 beos_panic("hash_del: %d %ld got %p, not %p\n",
2263 dev, bnum+cur, tmp_ce,
2264 ents[cur]);
2265 }
2266
2267 ce = ents[cur];
2268 ce->flags &= ~CE_BUSY;
2269
2270 free(ce->data);
2271 ce->data = NULL;
2272
2273 free(ce);
2274 ents[cur] = NULL;
2275
2276 bc.cur_blocks--;
2277 }
2278 ce = NULL;
2279
2280 UNLOCK(bc.lock);
2281 return err;
2282 }
2283
2284
2285 /*
2286 last step: go through and make sure all the cache_ent
2287 structures have the right data in them, delete old guys, etc.
2288 */
2289 for(cur=0; cur < cur_nblocks; cur++) {
2290 ce = ents[cur];
2291
2292 if (ce->dev != -1) { /* then clean this guy up */
2293 if (ce->next || ce->prev)
2294 beos_panic("ce @ %p should not be in a list yet!\n", ce);
2295
2296 if (ce->clone)
2297 free(ce->clone);
2298
2299 if (ce->data == NULL)
2300 beos_panic("ce @ %p has a null data ptr\n", ce);
2301 }
2302
2303 ce->dev = dev;
2304 ce->block_num = bnum + cur;
2305 ce->bsize = bsize;
2306 ce->flags = CE_NORMAL;
2307 ce->lock = 0;
2308 ce->clone = NULL;
2309 ce->func = ce->arg = NULL;
2310 ce->next = ce->prev = NULL;
2311
2312 if (op & CACHE_READ) {
2313 if (data)
2314 memcpy(data, ce->data, bsize);
2315 } else if (op & CACHE_WRITE) {
2316 ce->flags |= CE_DIRTY;
2317 memcpy(ce->data, data, bsize);
2318 } else if (op & CACHE_NOOP) {
2319 memset(ce->data, 0, bsize);
2320 if (data)
2321 memset(data, 0, bsize);
2322
2323 ce->flags |= CE_DIRTY;
2324 }
2325
2326 if (op & CACHE_LOCKED) {
2327 ce->lock++;
2328 cel = &bc.locked;
2329 } else {
2330 cel = &bc.normal;
2331 }
2332
2333 /* now stick this puppy at the head of the mru list */
2334 add_to_head(cel, ce);
2335
2336
2337 if (dataptr) {
2338 *dataptr = ce->data;
2339 }
2340
2341 if (data != NULL)
2342 data = (void *)((char *)data + bsize);
2343 else if (cur_nblocks != 1)
2344 beos_panic("cache can't handle setting data_ptr twice!\n");
2345 } /* end of for(cur=0; cur < cur_nblocks; cur++) */
2346
2347 bnum += cur_nblocks;
2348 num_blocks -= cur_nblocks;
2349
2350 } /* end of else it's not in the cache */
2351
2352 } /* end of while(num_blocks) */
2353
2354 UNLOCK(bc.lock);
2355
2356 return 0;
2357 }
2358
2359
2360 void *
beos_get_block(int dev,fs_off_t bnum,int bsize)2361 beos_get_block(int dev, fs_off_t bnum, int bsize)
2362 {
2363 void *data;
2364
2365 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_READ|CACHE_LOCKED|CACHE_READ_AHEAD_OK,
2366 &data) != 0)
2367 return NULL;
2368
2369 return data;
2370 }
2371
2372 void *
beos_get_empty_block(int dev,fs_off_t bnum,int bsize)2373 beos_get_empty_block(int dev, fs_off_t bnum, int bsize)
2374 {
2375 void *data;
2376
2377 if (cache_block_io(dev, bnum, NULL, 1, bsize, CACHE_NOOP|CACHE_LOCKED,
2378 &data) != 0)
2379 return NULL;
2380
2381 return data;
2382 }
2383
2384 int
beos_cached_read(int dev,fs_off_t bnum,void * data,fs_off_t num_blocks,int bsize)2385 beos_cached_read(int dev, fs_off_t bnum, void *data, fs_off_t num_blocks, int bsize)
2386 {
2387 return cache_block_io(dev, bnum, data, num_blocks, bsize,
2388 CACHE_READ | CACHE_READ_AHEAD_OK, NULL);
2389 }
2390
2391
2392 int
beos_cached_write(int dev,fs_off_t bnum,const void * data,fs_off_t num_blocks,int bsize)2393 beos_cached_write(int dev, fs_off_t bnum, const void *data, fs_off_t num_blocks,int bsize)
2394 {
2395 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2396 CACHE_WRITE, NULL);
2397 }
2398
2399 int
beos_cached_write_locked(int dev,fs_off_t bnum,const void * data,fs_off_t num_blocks,int bsize)2400 beos_cached_write_locked(int dev, fs_off_t bnum, const void *data,
2401 fs_off_t num_blocks, int bsize)
2402 {
2403 return cache_block_io(dev, bnum, (void *)data, num_blocks, bsize,
2404 CACHE_WRITE | CACHE_LOCKED, NULL);
2405 }
2406
2407
2408 void
beos_force_cache_flush(int dev,int prefer_log_blocks)2409 beos_force_cache_flush(int dev, int prefer_log_blocks)
2410 {
2411 int i, count = 0;
2412 cache_ent *ce;
2413 cache_ent *ents[NUM_FLUSH_BLOCKS];
2414
2415
2416 LOCK(bc.lock);
2417
2418 for(ce=bc.normal.lru; ce; ce=ce->next) {
2419 if ((ce->dev == dev) &&
2420 (ce->flags & CE_BUSY) == 0 &&
2421 ((ce->flags & CE_DIRTY) || ce->clone) &&
2422 ((prefer_log_blocks && ce->func) || (prefer_log_blocks == 0))) {
2423
2424 ce->flags |= CE_BUSY;
2425 ents[count++] = ce;
2426
2427 if (count >= NUM_FLUSH_BLOCKS) {
2428 break;
2429 }
2430 }
2431 }
2432
2433 /* if we've got some room left, try and grab any cloned blocks */
2434 if (count < NUM_FLUSH_BLOCKS) {
2435 for(ce=bc.locked.lru; ce; ce=ce->next) {
2436 if ((ce->dev == dev) &&
2437 (ce->flags & CE_BUSY) == 0 &&
2438 (ce->clone)) {
2439
2440 ce->flags |= CE_BUSY;
2441 ents[count++] = ce;
2442
2443 if (count >= NUM_FLUSH_BLOCKS) {
2444 break;
2445 }
2446 }
2447 }
2448 }
2449
2450 UNLOCK(bc.lock);
2451
2452 if (count != 0) {
2453 qsort(ents, count, sizeof(cache_ent **), cache_ent_cmp);
2454 flush_ents(ents, count);
2455
2456 for(i=0; i < count; i++)
2457 ents[i]->flags &= ~CE_BUSY;
2458 }
2459 }
2460