xref: /haiku/src/add-ons/kernel/file_cache/rule_based_prefetcher.cpp (revision cb29eafe2586fdb2d7685afa69fdab5d88a8b576)
1 /*
2  * Copyright 2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 /** This module applies rules that were learned by another component by
7  *	evaluating the cache log files.
8  *
9  *	Note: this module is using private kernel API and is definitely not
10  *		meant to be an example on how to write modules.
11  */
12 
13 
14 #include <KernelExport.h>
15 #include <Node.h>
16 
17 #include <util/kernel_cpp.h>
18 #include <util/AutoLock.h>
19 #include <thread.h>
20 #include <team.h>
21 #include <file_cache.h>
22 #include <generic_syscall.h>
23 #include <syscalls.h>
24 
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29 #include <errno.h>
30 #include <ctype.h>
31 
32 
33 #define TRACE_CACHE_MODULE
34 #ifdef TRACE_CACHE_MODULE
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x) ;
38 #endif
39 
40 
41 extern dev_t gBootDevice;
42 
43 
44 enum match_type {
45 	NO_MATCH,
46 	CONTINUE_MATCHING,
47 	MATCH,
48 };
49 
50 enum rule_type {
51 	LAUNCH_TYPE		= 0x1,
52 	OPEN_FILE_TYPE	= 0x2,
53 	ARGUMENTS_TYPE	= 0x4,
54 	ALL_TYPES		= 0xff,
55 };
56 
57 struct head {
58 	head();
59 
60 	struct list_link	link;
61 	mount_id			device;
62 	vnode_id			parent;
63 	vnode_id			node;
64 	char				name[B_FILE_NAME_LENGTH];
65 	int32				confidence;
66 	bigtime_t			timestamp;
67 	int32				used_count;
68 };
69 
70 struct body {
71 	struct list_link	link;
72 };
73 
74 class Rule {
75 	public:
76 		Rule(rule_type type);
77 		~Rule();
78 
79 		status_t InitCheck();
80 		rule_type Type() const { return fType; }
81 
82 		void AddHead(struct head *head);
83 		void AddBody(struct body *body);
84 
85 		struct head *FindHead(mount_id device, vnode_id node);
86 		match_type Match(int32 state, mount_id device, vnode_id parent,
87 			const char *name);
88 		void Apply();
89 
90 		void KnownFileOpened() { fKnownFileOpened++; }
91 		void UnknownFileOpened() { fUnknownFileOpened++; }
92 
93 		void Dump();
94 
95 		Rule *&Next() { return fNext; }
96 
97 		static uint32 NextOffset() { return offsetof(Rule, fNext); }
98 		static status_t LoadRules();
99 
100 	private:
101 		Rule		*fNext;
102 		rule_type	fType;
103 		struct list	fHeads;
104 		struct list	fBodies;
105 		int32		fHeadCount;
106 		int32		fBodyCount;
107 		int32		fConfidence;
108 		int32		fAppliedCount;
109 		int32		fKnownFileOpened;
110 		int32		fUnknownFileOpened;
111 };
112 
113 struct rule_state {
114 	list_link	link;
115 	Rule		*rule;
116 	int32		state;
117 };
118 
119 struct rules {
120 	rules		*next;
121 	char		name[B_FILE_NAME_LENGTH];
122 	Rule		*first;
123 };
124 
125 struct team_rules {
126 	~team_rules();
127 
128 	team_rules	*next;
129 	team_id		team;
130 	struct list	states;
131 	struct list	applied;
132 	bigtime_t	timestamp;
133 };
134 
135 class RuleMatcher {
136 	public:
137 		RuleMatcher(team_id team, const char *name = NULL);
138 		~RuleMatcher();
139 
140 		void GotFile(mount_id device, vnode_id node);
141 		void GotArguments(int32 argCount, char * const *args);
142 
143 	private:
144 		void _CollectRules(const char *name);
145 		void _MatchFile(mount_id device, vnode_id parent, const char *name);
146 		void _MatchArguments(int32 argCount, char * const *args);
147 
148 		team_rules	*fRules;
149 };
150 
151 
152 struct RuleHash {
153 		typedef char*		KeyType;
154 		typedef	rules		ValueType;
155 
156 		size_t HashKey(KeyType key) const
157 		{
158 			return hash_hash_string(key);
159 		}
160 
161 		size_t Hash(ValueType* value) const
162 		{
163 			return HashKey(value->name);
164 		}
165 
166 		bool Compare(KeyType key, ValueType* rules) const
167 		{
168 			return strcmp(rules->name, key) == 0;
169 		}
170 
171 		ValueType*& GetLink(ValueType* value) const
172 		{
173 			return value->fNext;
174 		}
175 };
176 
177 
178 struct TeamHash {
179 		typedef team_id		KeyType;
180 		typedef	team_rules	ValueType;
181 
182 		size_t HashKey(KeyType key) const
183 		{
184 			return key;
185 		}
186 
187 		size_t Hash(ValueType* value) const
188 		{
189 			return value->team;
190 		}
191 
192 		bool Compare(KeyType key, ValueType* value) const
193 		{
194 			return value->team == key;
195 		}
196 
197 		ValueType*& GetLink(ValueType* value) const
198 		{
199 			return value->fNext;
200 		}
201 };
202 
203 
204 typedef BOpenHashTable<RuleHash> RuleTable;
205 typedef BOpenHashTable<TeamHash> TeamTable;
206 
207 static RuleTable *sRulesHash;
208 static TeamTable *sTeamHash;
209 static recursive_lock sLock;
210 int32 sMinConfidence = 5000;
211 
212 
213 static void
214 team_gone(team_id team, void *_rules)
215 {
216 	team_rules *rules = (team_rules *)_rules;
217 
218 	recursive_lock_lock(&sLock);
219 	sTeamHash->Remove(rules);
220 	recursive_lock_unlock(&sLock);
221 
222 	delete rules;
223 }
224 
225 
226 team_rules::~team_rules()
227 {
228 	// free rule states
229 
230 	rule_state *state;
231 	while ((state = (rule_state *)list_remove_head_item(&states)) != NULL) {
232 		delete state;
233 	}
234 	while ((state = (rule_state *)list_remove_head_item(&applied)) != NULL) {
235 		delete state;
236 	}
237 
238 	stop_watching_team(team, team_gone, this);
239 }
240 
241 
242 //	#pragma mark -
243 
244 
245 head::head()
246 {
247 	device = -1;
248 	parent = -1;
249 	node = -1;
250 	name[0] = '\0';
251 	confidence = -1;
252 	timestamp = 0;
253 	used_count = 0;
254 }
255 
256 
257 static inline rules *
258 find_rules(const char *name)
259 {
260 	return sRulesHash->Lookup(name);
261 }
262 
263 
264 /**	Finds the rule matching to the criteria (name and type).
265  *	If there is no matching rule yet, it will create one.
266  *	It will only return NULL in out of memory conditions.
267  */
268 
269 static Rule *
270 get_rule(const char *name, rule_type type)
271 {
272 	struct rules *rules = find_rules(name);
273 	if (rules == NULL) {
274 		// add new rules
275 		rules = new ::rules;
276 		if (rules == NULL)
277 			return NULL;
278 
279 		strlcpy(rules->name, name, B_FILE_NAME_LENGTH);
280 		rules->first = NULL;
281 		sRulesHash->Insert(rules);
282 	}
283 
284 	// search for matching rule type
285 
286 	Rule *rule = rules->first;
287 	while (rule && rule->Type() != type) {
288 		rule = rule->Next();
289 	}
290 
291 	if (rule == NULL) {
292 		TRACE(("create new rule for \"%s\", type %d\n", name, type));
293 		// there is no rule yet, create one
294 		rule = new Rule(type);
295 		if (rule == NULL)
296 			return NULL;
297 
298 		rule->Next() = rules->first;
299 		rules->first = rule;
300 	}
301 
302 	return rule;
303 }
304 
305 
306 static void
307 eat_spaces(char *&line)
308 {
309 	// eat starting white space
310 	while (isspace(line[0]))
311 		line++;
312 }
313 
314 
315 static bool
316 parse_ref(const char *string, mount_id &device, vnode_id &node, char **_end = NULL)
317 {
318 	// parse node ref
319 	char *end;
320 	device = strtol(string, &end, 0);
321 	if (end == NULL || device == 0 || end[0] != ':')
322 		return false;
323 
324 	node = strtoull(end + 1, &end, 0);
325 	if (end == NULL || end[0] != ':')
326 		return false;
327 
328 	if (_end)
329 		*_end = end + 1;
330 	return true;
331 }
332 
333 
334 static void
335 ignore_line(char *&line)
336 {
337 	while (line[0] && line[0] != '\n')
338 		line++;
339 }
340 
341 
342 static const char *
343 get_name(char *&line)
344 {
345 	if (line[0] != '"')
346 		return NULL;
347 
348 	const char *name = ++line;
349 
350 	while (line[0] && line[0] != '"') {
351 		if (line[0] == '\\')
352 			line++;
353 		line++;
354 	}
355 
356 	line[0] = '\0';
357 	line++;
358 
359 	return name;
360 }
361 
362 
363 static status_t
364 load_rules()
365 {
366 	int fd = open("/etc/cache_rules", O_RDONLY);
367 	if (fd < B_OK)
368 		return fd;
369 
370 	struct stat stat;
371 	if (fstat(fd, &stat) != 0) {
372 		close(fd);
373 		return errno;
374 	}
375 
376 	if (stat.st_size > 32767) {
377 		// for safety reasons
378 		// ToDo: make a bit larger later
379 		close(fd);
380 		return B_BAD_DATA;
381 	}
382 
383 	char *buffer = (char *)malloc(stat.st_size + 1);
384 	if (buffer == NULL) {
385 		close(fd);
386 		return B_NO_MEMORY;
387 	}
388 
389 	if (read(fd, buffer, stat.st_size) < stat.st_size) {
390 		free(buffer);
391 		close(fd);
392 		return B_ERROR;
393 	}
394 
395 	buffer[stat.st_size] = '\0';
396 
397 	char *line = buffer;
398 	while (line[0]) {
399 		switch (line[0]) {
400 			case 'c':
401 			{
402 				mount_id device;
403 				vnode_id node;
404 
405 				// direct "command opens file" rule
406 				line += 2;
407 				if (parse_ref(line, device, node, &line)) {
408 					const char *fileName = get_name(line);
409 					if (fileName != NULL) {
410 						eat_spaces(line);
411 						const char *name = get_name(line);
412 						if (name != NULL) {
413 							eat_spaces(line);
414 							int32 confidence = strtoul(line, &line, 10);
415 							TRACE(("c %ld:%Ld:%s %s %ld\n", device, node, fileName, name, confidence));
416 
417 							struct head *head = new ::head;
418 							head->device = device;
419 							head->parent = node;
420 							strlcpy(head->name, fileName, B_FILE_NAME_LENGTH);
421 							head->confidence = confidence;
422 
423 							Rule *rule = get_rule(name, LAUNCH_TYPE);
424 							if (rule != NULL)
425 								rule->AddHead(head);
426 							break;
427 						}
428 					}
429 				}
430 				ignore_line(line);
431 				break;
432 			}
433 			default:
434 				// unknown rule - ignore line
435 				ignore_line(line);
436 				break;
437 		}
438 		line++;
439 	}
440 
441 	free(buffer);
442 	close(fd);
443 	return B_OK;
444 }
445 
446 
447 //	#pragma mark -
448 
449 
450 Rule::Rule(rule_type type)
451 	:
452 	fType(type),
453 	fHeadCount(0),
454 	fBodyCount(0),
455 	fAppliedCount(0),
456 	fKnownFileOpened(0),
457 	fUnknownFileOpened(0)
458 {
459 	list_init(&fHeads);
460 	list_init(&fBodies);
461 }
462 
463 
464 Rule::~Rule()
465 {
466 	struct head *head;
467 	while ((head = (struct head *)list_remove_head_item(&fHeads)) != NULL) {
468 		delete head;
469 	}
470 
471 	struct body *body;
472 	while ((body = (struct body *)list_remove_head_item(&fBodies)) != NULL) {
473 		delete body;
474 	}
475 }
476 
477 
478 void
479 Rule::AddHead(struct head *head)
480 {
481 	list_add_item(&fHeads, head);
482 	fHeadCount++;
483 }
484 
485 
486 void
487 Rule::AddBody(struct body *body)
488 {
489 	list_add_item(&fBodies, body);
490 	fBodyCount++;
491 }
492 
493 
494 void
495 Rule::Apply()
496 {
497 	TRACE(("Apply rule %p", this));
498 
499 	struct head *head = NULL;
500 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
501 		if (head->confidence < sMinConfidence)
502 			continue;
503 
504 		// prefetch node
505 		void *vnode;
506 		if (vfs_entry_ref_to_vnode(head->device, head->parent, head->name, &vnode) == B_OK) {
507 			vfs_vnode_to_node_ref(vnode, &head->device, &head->node);
508 
509 			TRACE(("prefetch: %ld:%Ld:%s\n", head->device, head->parent, head->name));
510 			cache_prefetch(head->device, head->node, 0, ~0UL);
511 
512 			// ToDo: put head into a hash so that some statistics can be backpropagated quickly
513 		} else {
514 			// node doesn't exist anymore
515 			head->confidence = -1;
516 		}
517 	}
518 
519 	fAppliedCount++;
520 }
521 
522 
523 struct head *
524 Rule::FindHead(mount_id device, vnode_id node)
525 {
526 	// ToDo: use a hash for this!
527 
528 	struct head *head = NULL;
529 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
530 		if (head->node == node && head->device == device)
531 			return head;
532 	}
533 
534 	return NULL;
535 }
536 
537 
538 void
539 Rule::Dump()
540 {
541 	dprintf("  applied: %ld, known: %ld, unknown: %ld\n",
542 		fAppliedCount, fKnownFileOpened, fUnknownFileOpened);
543 
544 	struct head *head = NULL;
545 	while ((head = (struct head *)list_get_next_item(&fHeads, head)) != NULL) {
546 		dprintf("  %ld:%Ld:\"%s\", ", head->device, head->parent, head->name);
547 		if (head->confidence < sMinConfidence)
548 			dprintf("-\n");
549 		else
550 			dprintf("%ld (%ld), %Ld us\n", head->used_count,
551 				head->used_count - fAppliedCount, head->timestamp);
552 	}
553 }
554 
555 
556 //	#pragma mark -
557 
558 
559 RuleMatcher::RuleMatcher(team_id team, const char *name)
560 	:
561 	fRules(NULL)
562 {
563 	recursive_lock_lock(&sLock);
564 
565 	if (name == NULL)
566 		return;
567 
568 	fRules = sTeamHash->Lookup(team);
569 	if (fRules != NULL)
570 		return;
571 
572 	fRules = new team_rules;
573 	if (fRules == NULL)
574 		return;
575 
576 	fRules->team = team;
577 	list_init(&fRules->states);
578 	list_init(&fRules->applied);
579 
580 dprintf("new rules for \"%s\"\n", name);
581 	_CollectRules(name);
582 
583 	sTeamHash->Insert(fRules);
584 	start_watching_team(team, team_gone, fRules);
585 
586 	fRules->timestamp = system_time();
587 }
588 
589 
590 RuleMatcher::~RuleMatcher()
591 {
592 	recursive_lock_unlock(&sLock);
593 }
594 
595 
596 void
597 RuleMatcher::_CollectRules(const char *name)
598 {
599 	struct rules *rules = sRulesHash->Lookup(name);
600 	if (rules == NULL) {
601 		// there are no rules for this command
602 		return;
603 	}
604 
605 	// allocate states for all rules found
606 
607 	for (Rule *rule = rules->first; rule != NULL; rule = rule->Next()) {
608 		rule_state *state = new rule_state;
609 		if (state == NULL)
610 			continue;
611 
612 		TRACE(("found rule %p for \"%s\"\n", rule, rules->name));
613 		state->rule = rule;
614 		state->state = 0;
615 
616 		if (rule->Type() == LAUNCH_TYPE) {
617 			// we can already prefetch the simplest of all rules here
618 			// (it's fulfilled as soon as the command is launched)
619 			rule->Apply();
620 			list_add_item(&fRules->applied, state);
621 		} else
622 			list_add_item(&fRules->states, state);
623 	}
624 }
625 
626 
627 void
628 RuleMatcher::GotFile(mount_id device, vnode_id node)
629 {
630 	if (fRules == NULL)
631 		return;
632 
633 	// try to match the file with all open rules
634 
635 	rule_state *state = NULL;
636 	while ((state = (rule_state *)list_get_next_item(&fRules->states, state)) != NULL) {
637 		if ((state->rule->Type() & OPEN_FILE_TYPE) == 0)
638 			continue;
639 
640 		// ToDo
641 	}
642 
643 	bigtime_t diff = system_time() - fRules->timestamp;
644 
645 	// propagate the usage of this file back to the applied rules
646 
647 	while ((state = (rule_state *)list_get_next_item(&fRules->applied, state)) != NULL) {
648 		struct head *head = state->rule->FindHead(device, node);
649 		if (head != NULL) {
650 			// grow confidence
651 			state->rule->KnownFileOpened();
652 			head->used_count++;
653 			if (head->used_count > 1)
654 				head->timestamp = (head->timestamp * (head->used_count - 1) + diff) / head->used_count;
655 		} else
656 			state->rule->UnknownFileOpened();
657 	}
658 }
659 
660 
661 void
662 RuleMatcher::GotArguments(int32 argCount, char * const *args)
663 {
664 	if (fRules == NULL)
665 		return;
666 
667 	// try to match the arguments with all open rules
668 
669 	rule_state *state = NULL;
670 	while ((state = (rule_state *)list_get_next_item(&fRules->states, state)) != NULL) {
671 		if ((state->rule->Type() & ARGUMENTS_TYPE) == 0)
672 			continue;
673 
674 		// ToDo
675 	}
676 }
677 
678 
679 //	#pragma mark -
680 
681 
682 static void
683 node_opened(struct vnode *vnode, int32 fdType, dev_t device, vnode_id parent,
684 	vnode_id node, const char *name, off_t size)
685 {
686 	if (device < gBootDevice) {
687 		// we ignore any access to rootfs, pipefs, and devfs
688 		// ToDo: if we can ever move the boot device on the fly, this will break
689 		return;
690 	}
691 
692 	// ToDo: this is only needed if there is no rule for this team yet - ideally,
693 	//	it would be handled by the log module, so that the vnode ID is sufficient
694 	//	to recognize a rule
695 	char buffer[B_FILE_NAME_LENGTH];
696 	if (name == NULL
697 		&& vfs_get_vnode_name(vnode, buffer, sizeof(buffer)) == B_OK)
698 		name = buffer;
699 
700 	//dprintf("opened: %ld:%Ld:%Ld:%s (%s)\n", device, parent, node, name, thread_get_current_thread()->name);
701 	RuleMatcher matcher(team_get_current_team_id(), name);
702 	matcher.GotFile(device, node);
703 }
704 
705 
706 static void
707 node_launched(size_t argCount, char * const *args)
708 {
709 	//dprintf("launched: %s (%s)\n", args[0], thread_get_current_thread()->name);
710 	RuleMatcher matcher(team_get_current_team_id());
711 	matcher.GotArguments(argCount, args);
712 }
713 
714 
715 static void
716 uninit()
717 {
718 	recursive_lock_lock(&sLock);
719 
720 	// free all sessions from the hashes
721 
722 	team_rules *teamRules = sTeamHash->Clear(true);
723 	while (teamRules != NULL) {
724 		team_rules *next = teamRules->next;
725 		delete teamRules;
726 		teamRules = next;
727 	}
728 
729 	struct rules *rules = sRulesHash->Clear(true);
730 	while (rules != NULL) {
731 		Rule *rule = rules->first;
732 		while (rule) {
733 			Rule *next = rule->Next();
734 
735 			dprintf("Rule %p \"%s\"\n", rule, rules->name);
736 			rule->Dump();
737 
738 			delete rule;
739 			rule = next;
740 		}
741 
742 		struct rules *next = rules->next;
743 		delete rules;
744 		rules = next;
745 	}
746 
747 	delete sTeamHash;
748 	delete sRulesHash;
749 	recursive_lock_destroy(&sLock);
750 }
751 
752 
753 static status_t
754 init()
755 {
756 	sTeamHash = new(std::nothrow) TeamTable();
757 	if (sTeamHash == NULL || sTeamHash->Init(64) != B_OK)
758 		return B_NO_MEMORY;
759 
760 	status_t status;
761 
762 	sRulesHash = new(std::nothrow) RuleTable();
763 	if (sRulesHash == NULL || sRulesHash->Init(64) != B_OK) {
764 		delete sTeamHash;
765 		return B_NO_MEMORY;
766 	}
767 
768 	recursive_lock_init(&sLock, "rule based prefetcher");
769 
770 	load_rules();
771 	return B_OK;
772 }
773 
774 
775 static status_t
776 std_ops(int32 op, ...)
777 {
778 	switch (op) {
779 		case B_MODULE_INIT:
780 			return init();
781 
782 		case B_MODULE_UNINIT:
783 			uninit();
784 			return B_OK;
785 
786 		default:
787 			return B_ERROR;
788 	}
789 }
790 
791 
792 static struct cache_module_info sRuleBasedPrefetcherModule = {
793 	{
794 		CACHE_MODULES_NAME "/rule_based_prefetcher/v1",
795 		0,
796 		std_ops,
797 	},
798 	node_opened,
799 	NULL,
800 	node_launched,
801 };
802 
803 
804 module_info *modules[] = {
805 	(module_info *)&sRuleBasedPrefetcherModule,
806 	NULL
807 };
808