xref: /haiku/src/add-ons/kernel/file_cache/rule_based_prefetcher.cpp (revision b671e9bbdbd10268a042b4f4cc4317ccd03d105e)
1 /*
2  * Copyright 2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 /** This module applies rules that were learned by another component by
7  *	evaluating the cache log files.
8  *
9  *	Note: this module is using private kernel API and is definitely not
10  *		meant to be an example on how to write modules.
11  */
12 
13 
14 #include <KernelExport.h>
15 #include <Node.h>
16 
17 #include <util/kernel_cpp.h>
18 #include <util/khash.h>
19 #include <util/AutoLock.h>
20 #include <thread.h>
21 #include <team.h>
22 #include <file_cache.h>
23 #include <generic_syscall.h>
24 #include <syscalls.h>
25 
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdio.h>
30 #include <errno.h>
31 #include <ctype.h>
32 
33 
34 #define TRACE_CACHE_MODULE
35 #ifdef TRACE_CACHE_MODULE
36 #	define TRACE(x) dprintf x
37 #else
38 #	define TRACE(x) ;
39 #endif
40 
41 
42 extern dev_t gBootDevice;
43 
44 
45 enum match_type {
46 	NO_MATCH,
47 	CONTINUE_MATCHING,
48 	MATCH,
49 };
50 
51 enum rule_type {
52 	LAUNCH_TYPE		= 0x1,
53 	OPEN_FILE_TYPE	= 0x2,
54 	ARGUMENTS_TYPE	= 0x4,
55 	ALL_TYPES		= 0xff,
56 };
57 
58 struct head {
59 	head();
60 
61 	struct list_link	link;
62 	mount_id			device;
63 	vnode_id			parent;
64 	vnode_id			node;
65 	char				name[B_FILE_NAME_LENGTH];
66 	int32				confidence;
67 	bigtime_t			timestamp;
68 	int32				used_count;
69 };
70 
71 struct body {
72 	struct list_link	link;
73 };
74 
75 class Rule {
76 	public:
77 		Rule(rule_type type);
78 		~Rule();
79 
80 		status_t InitCheck();
81 		rule_type Type() const { return fType; }
82 
83 		void AddHead(struct head *head);
84 		void AddBody(struct body *body);
85 
86 		struct head *FindHead(mount_id device, vnode_id node);
87 		match_type Match(int32 state, mount_id device, vnode_id parent, const char *name);
88 		void Apply();
89 
90 		void KnownFileOpened() { fKnownFileOpened++; }
91 		void UnknownFileOpened() { fUnknownFileOpened++; }
92 
93 		void Dump();
94 
95 		Rule *&Next() { return fNext; }
96 
97 		static uint32 NextOffset() { return offsetof(Rule, fNext); }
98 		static status_t LoadRules();
99 
100 	private:
101 		Rule		*fNext;
102 		rule_type	fType;
103 		struct list	fHeads;
104 		struct list	fBodies;
105 		int32		fHeadCount;
106 		int32		fBodyCount;
107 		int32		fConfidence;
108 		int32		fAppliedCount;
109 		int32		fKnownFileOpened;
110 		int32		fUnknownFileOpened;
111 };
112 
113 struct rule_state {
114 	list_link	link;
115 	Rule		*rule;
116 	int32		state;
117 };
118 
119 struct rules {
120 	rules		*next;
121 	char		name[B_FILE_NAME_LENGTH];
122 	Rule		*first;
123 };
124 
125 struct team_rules {
126 	~team_rules();
127 
128 	team_rules	*next;
129 	team_id		team;
130 	struct list	states;
131 	struct list	applied;
132 	bigtime_t	timestamp;
133 };
134 
135 class RuleMatcher {
136 	public:
137 		RuleMatcher(team_id team, const char *name = NULL);
138 		~RuleMatcher();
139 
140 		void GotFile(mount_id device, vnode_id node);
141 		void GotArguments(int32 argCount, char * const *args);
142 
143 	private:
144 		void _CollectRules(const char *name);
145 		void _MatchFile(mount_id device, vnode_id parent, const char *name);
146 		void _MatchArguments(int32 argCount, char * const *args);
147 
148 		team_rules	*fRules;
149 };
150 
151 static hash_table *sRulesHash;
152 static hash_table *sTeamHash;
153 static recursive_lock sLock;
154 int32 sMinConfidence = 5000;
155 
156 
157 static int
158 rules_compare(void *_rules, const void *_key)
159 {
160 	struct rules *rules = (struct rules *)_rules;
161 	const char *key = (const char *)_key;
162 
163 	return strcmp(rules->name, key);
164 }
165 
166 
167 static uint32
168 rules_hash(void *_rules, const void *_key, uint32 range)
169 {
170 	struct rules *rules = (struct rules *)_rules;
171 	const char *key = (const char *)_key;
172 
173 	if (rules != NULL)
174 		return hash_hash_string(rules->name) % range;
175 
176 	return hash_hash_string(key) % range;
177 }
178 
179 
180 //	#pragma mark -
181 
182 
183 static int
184 team_compare(void *_rules, const void *_key)
185 {
186 	team_rules *rules = (team_rules *)_rules;
187 	const team_id *team = (const team_id *)_key;
188 
189 	if (rules->team == *team)
190 		return 0;
191 
192 	return -1;
193 }
194 
195 
196 static uint32
197 team_hash(void *_rules, const void *_key, uint32 range)
198 {
199 	team_rules *rules = (team_rules *)_rules;
200 	const team_id *team = (const team_id *)_key;
201 
202 	if (rules != NULL)
203 		return rules->team % range;
204 
205 	return *team % range;
206 }
207 
208 
209 static void
210 team_gone(team_id team, void *_rules)
211 {
212 	team_rules *rules = (team_rules *)_rules;
213 
214 	recursive_lock_lock(&sLock);
215 	hash_remove(sTeamHash, rules);
216 	recursive_lock_unlock(&sLock);
217 
218 	delete rules;
219 }
220 
221 
222 team_rules::~team_rules()
223 {
224 	// free rule states
225 
226 	rule_state *state;
227 	while ((state = (rule_state *)list_remove_head_item(&states)) != NULL) {
228 		delete state;
229 	}
230 	while ((state = (rule_state *)list_remove_head_item(&applied)) != NULL) {
231 		delete state;
232 	}
233 
234 	stop_watching_team(team, team_gone, this);
235 }
236 
237 
238 //	#pragma mark -
239 
240 
241 head::head()
242 {
243 	device = -1;
244 	parent = -1;
245 	node = -1;
246 	name[0] = '\0';
247 	confidence = -1;
248 	timestamp = 0;
249 	used_count = 0;
250 }
251 
252 
253 static inline rules *
254 find_rules(const char *name)
255 {
256 	return (rules *)hash_lookup(sRulesHash, name);
257 }
258 
259 
260 /**	Finds the rule matching to the criteria (name and type).
261  *	If there is no matching rule yet, it will create one.
262  *	It will only return NULL in out of memory conditions.
263  */
264 
265 static Rule *
266 get_rule(const char *name, rule_type type)
267 {
268 	struct rules *rules = find_rules(name);
269 	if (rules == NULL) {
270 		// add new rules
271 		rules = new ::rules;
272 		if (rules == NULL)
273 			return NULL;
274 
275 		strlcpy(rules->name, name, B_FILE_NAME_LENGTH);
276 		rules->first = NULL;
277 		hash_insert(sRulesHash, rules);
278 	}
279 
280 	// search for matching rule type
281 
282 	Rule *rule = rules->first;
283 	while (rule && rule->Type() != type) {
284 		rule = rule->Next();
285 	}
286 
287 	if (rule == NULL) {
288 		TRACE(("create new rule for \"%s\", type %d\n", name, type));
289 		// there is no rule yet, create one
290 		rule = new Rule(type);
291 		if (rule == NULL)
292 			return NULL;
293 
294 		rule->Next() = rules->first;
295 		rules->first = rule;
296 	}
297 
298 	return rule;
299 }
300 
301 
302 static void
303 eat_spaces(char *&line)
304 {
305 	// eat starting white space
306 	while (isspace(line[0]))
307 		line++;
308 }
309 
310 
311 static bool
312 parse_ref(const char *string, mount_id &device, vnode_id &node, char **_end = NULL)
313 {
314 	// parse node ref
315 	char *end;
316 	device = strtol(string, &end, 0);
317 	if (end == NULL || device == 0 || end[0] != ':')
318 		return false;
319 
320 	node = strtoull(end + 1, &end, 0);
321 	if (end == NULL || end[0] != ':')
322 		return false;
323 
324 	if (_end)
325 		*_end = end + 1;
326 	return true;
327 }
328 
329 
330 static void
331 ignore_line(char *&line)
332 {
333 	while (line[0] && line[0] != '\n')
334 		line++;
335 }
336 
337 
338 static const char *
339 get_name(char *&line)
340 {
341 	if (line[0] != '"')
342 		return NULL;
343 
344 	const char *name = ++line;
345 
346 	while (line[0] && line[0] != '"') {
347 		if (line[0] == '\\')
348 			line++;
349 		line++;
350 	}
351 
352 	line[0] = '\0';
353 	line++;
354 
355 	return name;
356 }
357 
358 
359 static status_t
360 load_rules()
361 {
362 	int fd = open("/etc/cache_rules", O_RDONLY);
363 	if (fd < B_OK)
364 		return fd;
365 
366 	struct stat stat;
367 	if (fstat(fd, &stat) != 0) {
368 		close(fd);
369 		return errno;
370 	}
371 
372 	if (stat.st_size > 32767) {
373 		// for safety reasons
374 		// ToDo: make a bit larger later
375 		close(fd);
376 		return B_BAD_DATA;
377 	}
378 
379 	char *buffer = (char *)malloc(stat.st_size + 1);
380 	if (buffer == NULL) {
381 		close(fd);
382 		return B_NO_MEMORY;
383 	}
384 
385 	if (read(fd, buffer, stat.st_size) < stat.st_size) {
386 		free(buffer);
387 		close(fd);
388 		return B_ERROR;
389 	}
390 
391 	buffer[stat.st_size] = '\0';
392 
393 	char *line = buffer;
394 	while (line[0]) {
395 		switch (line[0]) {
396 			case 'c':
397 			{
398 				mount_id device;
399 				vnode_id node;
400 
401 				// direct "command opens file" rule
402 				line += 2;
403 				if (parse_ref(line, device, node, &line)) {
404 					const char *fileName = get_name(line);
405 					if (fileName != NULL) {
406 						eat_spaces(line);
407 						const char *name = get_name(line);
408 						if (name != NULL) {
409 							eat_spaces(line);
410 							int32 confidence = strtoul(line, &line, 10);
411 							TRACE(("c %ld:%Ld:%s %s %ld\n", device, node, fileName, name, confidence));
412 
413 							struct head *head = new ::head;
414 							head->device = device;
415 							head->parent = node;
416 							strlcpy(head->name, fileName, B_FILE_NAME_LENGTH);
417 							head->confidence = confidence;
418 
419 							Rule *rule = get_rule(name, LAUNCH_TYPE);
420 							if (rule != NULL)
421 								rule->AddHead(head);
422 							break;
423 						}
424 					}
425 				}
426 				ignore_line(line);
427 				break;
428 			}
429 			default:
430 				// unknown rule - ignore line
431 				ignore_line(line);
432 				break;
433 		}
434 		line++;
435 	}
436 
437 	free(buffer);
438 	close(fd);
439 	return B_OK;
440 }
441 
442 
443 //	#pragma mark -
444 
445 
446 Rule::Rule(rule_type type)
447 	:
448 	fType(type),
449 	fHeadCount(0),
450 	fBodyCount(0),
451 	fAppliedCount(0),
452 	fKnownFileOpened(0),
453 	fUnknownFileOpened(0)
454 {
455 	list_init(&fHeads);
456 	list_init(&fBodies);
457 }
458 
459 
460 Rule::~Rule()
461 {
462 	struct head *head;
463 	while ((head = (struct head *)list_remove_head_item(&fHeads)) != NULL) {
464 		delete head;
465 	}
466 
467 	struct body *body;
468 	while ((body = (struct body *)list_remove_head_item(&fBodies)) != NULL) {
469 		delete body;
470 	}
471 }
472 
473 
474 void
475 Rule::AddHead(struct head *head)
476 {
477 	list_add_item(&fHeads, head);
478 	fHeadCount++;
479 }
480 
481 
482 void
483 Rule::AddBody(struct body *body)
484 {
485 	list_add_item(&fBodies, body);
486 	fBodyCount++;
487 }
488 
489 
490 void
491 Rule::Apply()
492 {
493 	TRACE(("Apply rule %p", this));
494 
495 	struct head *head = NULL;
496 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
497 		if (head->confidence < sMinConfidence)
498 			continue;
499 
500 		// prefetch node
501 		void *vnode;
502 		if (vfs_entry_ref_to_vnode(head->device, head->parent, head->name, &vnode) == B_OK) {
503 			vfs_vnode_to_node_ref(vnode, &head->device, &head->node);
504 
505 			TRACE(("prefetch: %ld:%Ld:%s\n", head->device, head->parent, head->name));
506 			cache_prefetch(head->device, head->node, 0, ~0UL);
507 
508 			// ToDo: put head into a hash so that some statistics can be backpropagated quickly
509 		} else {
510 			// node doesn't exist anymore
511 			head->confidence = -1;
512 		}
513 	}
514 
515 	fAppliedCount++;
516 }
517 
518 
519 struct head *
520 Rule::FindHead(mount_id device, vnode_id node)
521 {
522 	// ToDo: use a hash for this!
523 
524 	struct head *head = NULL;
525 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
526 		if (head->node == node && head->device == device)
527 			return head;
528 	}
529 
530 	return NULL;
531 }
532 
533 
534 void
535 Rule::Dump()
536 {
537 	dprintf("  applied: %ld, known: %ld, unknown: %ld\n",
538 		fAppliedCount, fKnownFileOpened, fUnknownFileOpened);
539 
540 	struct head *head = NULL;
541 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
542 		dprintf("  %ld:%Ld:\"%s\", ", head->device, head->parent, head->name);
543 		if (head->confidence < sMinConfidence)
544 			dprintf("-\n");
545 		else
546 			dprintf("%ld (%ld), %Ld us\n", head->used_count,
547 				head->used_count - fAppliedCount, head->timestamp);
548 	}
549 }
550 
551 
552 //	#pragma mark -
553 
554 
555 RuleMatcher::RuleMatcher(team_id team, const char *name)
556 	:
557 	fRules(NULL)
558 {
559 	recursive_lock_lock(&sLock);
560 
561 	if (name == NULL)
562 		return;
563 
564 	fRules = (team_rules *)hash_lookup(sTeamHash, &team);
565 	if (fRules != NULL)
566 		return;
567 
568 	fRules = new team_rules;
569 	if (fRules == NULL)
570 		return;
571 
572 	fRules->team = team;
573 	list_init(&fRules->states);
574 	list_init(&fRules->applied);
575 
576 dprintf("new rules for \"%s\"\n", name);
577 	_CollectRules(name);
578 
579 	hash_insert(sTeamHash, fRules);
580 	start_watching_team(team, team_gone, fRules);
581 
582 	fRules->timestamp = system_time();
583 }
584 
585 
586 RuleMatcher::~RuleMatcher()
587 {
588 	recursive_lock_unlock(&sLock);
589 }
590 
591 
592 void
593 RuleMatcher::_CollectRules(const char *name)
594 {
595 	struct rules *rules = (struct rules *)hash_lookup(sRulesHash, name);
596 	if (rules == NULL) {
597 		// there are no rules for this command
598 		return;
599 	}
600 
601 	// allocate states for all rules found
602 
603 	for (Rule *rule = rules->first; rule != NULL; rule = rule->Next()) {
604 		rule_state *state = new rule_state;
605 		if (state == NULL)
606 			continue;
607 
608 		TRACE(("found rule %p for \"%s\"\n", rule, rules->name));
609 		state->rule = rule;
610 		state->state = 0;
611 
612 		if (rule->Type() == LAUNCH_TYPE) {
613 			// we can already prefetch the simplest of all rules here
614 			// (it's fulfilled as soon as the command is launched)
615 			rule->Apply();
616 			list_add_item(&fRules->applied, state);
617 		} else
618 			list_add_item(&fRules->states, state);
619 	}
620 }
621 
622 
623 void
624 RuleMatcher::GotFile(mount_id device, vnode_id node)
625 {
626 	if (fRules == NULL)
627 		return;
628 
629 	// try to match the file with all open rules
630 
631 	rule_state *state = NULL;
632 	while ((state = (rule_state *)list_get_next_item(&fRules->states, state)) != NULL) {
633 		if ((state->rule->Type() & OPEN_FILE_TYPE) == 0)
634 			continue;
635 
636 		// ToDo
637 	}
638 
639 	bigtime_t diff = system_time() - fRules->timestamp;
640 
641 	// propagate the usage of this file back to the applied rules
642 
643 	while ((state = (rule_state *)list_get_next_item(&fRules->applied, state)) != NULL) {
644 		struct head *head = state->rule->FindHead(device, node);
645 		if (head != NULL) {
646 			// grow confidence
647 			state->rule->KnownFileOpened();
648 			head->used_count++;
649 			if (head->used_count > 1)
650 				head->timestamp = (head->timestamp * (head->used_count - 1) + diff) / head->used_count;
651 		} else
652 			state->rule->UnknownFileOpened();
653 	}
654 }
655 
656 
657 void
658 RuleMatcher::GotArguments(int32 argCount, char * const *args)
659 {
660 	if (fRules == NULL)
661 		return;
662 
663 	// try to match the arguments with all open rules
664 
665 	rule_state *state = NULL;
666 	while ((state = (rule_state *)list_get_next_item(&fRules->states, state)) != NULL) {
667 		if ((state->rule->Type() & ARGUMENTS_TYPE) == 0)
668 			continue;
669 
670 		// ToDo
671 	}
672 }
673 
674 
675 //	#pragma mark -
676 
677 
678 static void
679 node_opened(void *vnode, int32 fdType, mount_id device, vnode_id parent,
680 	vnode_id node, const char *name, off_t size)
681 {
682 	if (device < gBootDevice) {
683 		// we ignore any access to rootfs, pipefs, and devfs
684 		// ToDo: if we can ever move the boot device on the fly, this will break
685 		return;
686 	}
687 
688 	// ToDo: this is only needed if there is no rule for this team yet - ideally,
689 	//	it would be handled by the log module, so that the vnode ID is sufficient
690 	//	to recognize a rule
691 	char buffer[B_FILE_NAME_LENGTH];
692 	if (name == NULL
693 		&& vfs_get_vnode_name(vnode, buffer, sizeof(buffer)) == B_OK)
694 		name = buffer;
695 
696 	//dprintf("opened: %ld:%Ld:%Ld:%s (%s)\n", device, parent, node, name, thread_get_current_thread()->name);
697 	RuleMatcher matcher(team_get_current_team_id(), name);
698 	matcher.GotFile(device, node);
699 }
700 
701 
702 static void
703 node_launched(size_t argCount, char * const *args)
704 {
705 	//dprintf("launched: %s (%s)\n", args[0], thread_get_current_thread()->name);
706 	RuleMatcher matcher(team_get_current_team_id());
707 	matcher.GotArguments(argCount, args);
708 }
709 
710 
711 static void
712 uninit()
713 {
714 	recursive_lock_lock(&sLock);
715 
716 	// free all sessions from the hashes
717 
718 	uint32 cookie = 0;
719 	team_rules *teamRules;
720 	while ((teamRules = (team_rules *)hash_remove_first(sTeamHash, &cookie)) != NULL) {
721 		delete teamRules;
722 	}
723 	struct rules *rules;
724 	cookie = 0;
725 	while ((rules = (struct rules *)hash_remove_first(sRulesHash, &cookie)) != NULL) {
726 		Rule *rule = rules->first;
727 		while (rule) {
728 			Rule *next = rule->Next();
729 
730 			dprintf("Rule %p \"%s\"\n", rule, rules->name);
731 			rule->Dump();
732 
733 			delete rule;
734 			rule = next;
735 		}
736 		delete rules;
737 	}
738 
739 	hash_uninit(sTeamHash);
740 	hash_uninit(sRulesHash);
741 	recursive_lock_destroy(&sLock);
742 }
743 
744 
745 static status_t
746 init()
747 {
748 	sTeamHash = hash_init(64, 0, &team_compare, &team_hash);
749 	if (sTeamHash == NULL)
750 		return B_NO_MEMORY;
751 
752 	status_t status;
753 
754 	sRulesHash = hash_init(64, 0, &rules_compare, &rules_hash);
755 	if (sRulesHash == NULL) {
756 		hash_uninit(sTeamHash);
757 		return B_NO_MEMORY;
758 	}
759 
760 	recursive_lock_init(&sLock, "rule based prefetcher");
761 
762 	load_rules();
763 	return B_OK;
764 }
765 
766 
767 static status_t
768 std_ops(int32 op, ...)
769 {
770 	switch (op) {
771 		case B_MODULE_INIT:
772 			return init();
773 
774 		case B_MODULE_UNINIT:
775 			uninit();
776 			return B_OK;
777 
778 		default:
779 			return B_ERROR;
780 	}
781 }
782 
783 
784 static struct cache_module_info sRuleBasedPrefetcherModule = {
785 	{
786 		CACHE_MODULES_NAME "/rule_based_prefetcher/v1",
787 		0,
788 		std_ops,
789 	},
790 	node_opened,
791 	NULL,
792 	node_launched,
793 };
794 
795 
796 module_info *modules[] = {
797 	(module_info *)&sRuleBasedPrefetcherModule,
798 	NULL
799 };
800