xref: /haiku/src/tests/add-ons/kernel/file_systems/shared/consistency_check/generate_hashs.cpp (revision f91b5cb2974402711461e10ab37efc85343af6dd)
1 /*
2  * Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include <algorithm>
8 #include <string>
9 #include <vector>
10 
11 #include <dirent.h>
12 #include <errno.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <unistd.h>
17 
18 #include <OS.h>
19 #include <Path.h>
20 
21 #include <SHA256.h>
22 
23 #include "AdaptiveBuffering.h"
24 
25 
26 //#define TRACE(x...) printf(x)
27 #define TRACE(x...) ;
28 
29 
30 extern const char *__progname;
31 static const char *kProgramName = __progname;
32 
33 const size_t kInitialBufferSize = 1 * 1024 * 1024;
34 const size_t kMaxBufferSize = 10 * 1024 * 1024;
35 
36 
37 class SHAProcessor : public AdaptiveBuffering {
38 public:
SHAProcessor()39 	SHAProcessor()
40 		: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
41 		fFile(-1)
42 	{
43 	}
44 
~SHAProcessor()45 	virtual ~SHAProcessor()
46 	{
47 		Unset();
48 	}
49 
Unset()50 	void Unset()
51 	{
52 		if (fFile >= 0)
53 			close(fFile);
54 	}
55 
Process(int file)56 	status_t Process(int file)
57 	{
58 		Unset();
59 		fSHA.Init();
60 		fFile = file;
61 
62 		return Run();
63 	}
64 
Read(uint8 * buffer,size_t * _length)65 	virtual status_t Read(uint8* buffer, size_t* _length)
66 	{
67 		ssize_t bytes = read(fFile, buffer, *_length);
68 		if (bytes < B_OK)
69 			return errno;
70 
71 		*_length = bytes;
72 		return B_OK;
73 	}
74 
Write(uint8 * buffer,size_t length)75 	virtual status_t Write(uint8* buffer, size_t length)
76 	{
77 		fSHA.Update(buffer, length);
78 		return B_OK;
79 	}
80 
Digest()81 	const uint8* Digest() { return fSHA.Digest(); }
DigestLength() const82 	size_t DigestLength() const	{ return fSHA.DigestLength(); }
83 
84 private:
85 	SHA256	fSHA;
86 	int		fFile;
87 };
88 
89 struct file_entry {
90 	uint8			hash[SHA_DIGEST_LENGTH];
91 	ino_t			node;
92 	std::string		path;
93 
operator <file_entry94 	bool operator<(const struct file_entry& other) const
95 	{
96 		return path < other.path;
97 	}
98 
HashStringfile_entry99 	std::string HashString() const
100 	{
101 		char buffer[128];
102 		for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
103 			sprintf(buffer + i * 2, "%02x", hash[i]);
104 		}
105 
106 		return buffer;
107 	}
108 };
109 
110 typedef std::vector<file_entry> FileList;
111 
112 void process_file(const char* path);
113 
114 
115 SHAProcessor gSHA;
116 FileList gFiles;
117 
118 
119 void
process_directory(const char * path)120 process_directory(const char* path)
121 {
122 	DIR* dir = opendir(path);
123 	if (dir == NULL)
124 		return;
125 
126 	size_t pathLength = strlen(path);
127 
128 	while (struct dirent* entry = readdir(dir)) {
129 		if (!strcmp(entry->d_name, ".")
130 			|| !strcmp(entry->d_name, ".."))
131 			continue;
132 
133 		char fullPath[1024];
134 		strlcpy(fullPath, path, sizeof(fullPath));
135 		if (path[pathLength - 1] != '/')
136 			strlcat(fullPath, "/", sizeof(fullPath));
137 		strlcat(fullPath, entry->d_name, sizeof(fullPath));
138 
139 		process_file(fullPath);
140 	}
141 
142 	closedir(dir);
143 }
144 
145 
146 void
process_file(const char * path)147 process_file(const char* path)
148 {
149 	struct stat stat;
150 	if (::lstat(path, &stat) != 0) {
151 		fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
152 			strerror(errno));
153 		return;
154 	}
155 
156 	if (S_ISDIR(stat.st_mode)) {
157 		process_directory(path);
158 		return;
159 	}
160 	if (S_ISLNK(stat.st_mode))
161 		return;
162 
163 	int file = open(path, O_RDONLY);
164 	if (file < 0) {
165 		fprintf(stderr, "Could not open file \"%s\": %s\n", path,
166 			strerror(errno));
167 		return;
168 	}
169 
170 	status_t status = gSHA.Process(file);
171 	if (status != B_OK) {
172 		fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
173 			strerror(status));
174 		return;
175 	}
176 
177 	file_entry entry;
178 	memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
179 	entry.node = stat.st_ino;
180 	entry.path = path;
181 
182 	//printf("%s  %s\n", entry.HashString().c_str(), path);
183 
184 	gFiles.push_back(entry);
185 
186 	static bigtime_t sLastUpdate = -1;
187 	if (system_time() - sLastUpdate > 500000) {
188 		printf("%ld files scanned\33[1A\n", gFiles.size());
189 		sLastUpdate = system_time();
190 	}
191 }
192 
193 
194 void
write_hash_file(const char * name,int fileCount,char ** files)195 write_hash_file(const char* name, int fileCount, char** files)
196 {
197 	int file = open(name, O_WRONLY | O_TRUNC | O_CREAT);
198 	if (file < 0) {
199 		fprintf(stderr, "%s: Could not write hash file \"%s\": %s\n",
200 			kProgramName, name, strerror(errno));
201 		return;
202 	}
203 
204 	write(file, "HASH", 4);
205 
206 	write(file, &fileCount, sizeof(int));
207 	for (int i = 0; i < fileCount; i++) {
208 		int length = strlen(files[i]);
209 		write(file, &length, sizeof(int));
210 		write(file, files[i], length + 1);
211 	}
212 
213 	fileCount = gFiles.size();
214 	write(file, &fileCount, sizeof(int));
215 	for (int i = 0; i < fileCount; i++) {
216 		file_entry& entry = gFiles[i];
217 
218 		write(file, entry.hash, SHA_DIGEST_LENGTH);
219 		write(file, &entry.node, sizeof(ino_t));
220 
221 		int length = entry.path.size();
222 		write(file, &length, sizeof(int));
223 		write(file, entry.path.c_str(), length + 1);
224 	}
225 
226 	close(file);
227 }
228 
229 
230 int
main(int argc,char ** argv)231 main(int argc, char** argv)
232 {
233 	if (argc < 2) {
234 		fprintf(stderr, "usage: %s <hash-file> [<files> ...]\n"
235 			"\tWhen invoked without files, the hash-file is updated only.\n",
236 			kProgramName);
237 		return 1;
238 	}
239 
240 	const char* hashFileName = argv[1];
241 
242 	status_t status = gSHA.Init();
243 	if (status != B_OK) {
244 		fprintf(stderr, "%s: Could not initialize SHA processor: %s\n",
245 			kProgramName, strerror(status));
246 		return 1;
247 	}
248 
249 	int fileCount = argc - 2;
250 	char** files = argv + 2;
251 
252 	if (argc == 2) {
253 		// read files from hash file
254 
255 		int file = open(hashFileName, O_RDONLY);
256 		if (file < 0) {
257 			fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n",
258 				kProgramName, hashFileName, strerror(status));
259 			return 1;
260 		}
261 
262 		char buffer[2048];
263 		read(file, buffer, 4);
264 		if (memcmp(buffer, "HASH", 4)) {
265 			fprintf(stderr, "%s: \"%s\" is not a hash file\n",
266 				kProgramName, hashFileName);
267 			close(file);
268 			return 1;
269 		}
270 		read(file, &fileCount, sizeof(int));
271 		TRACE("Found %d path(s):\n", fileCount);
272 
273 		files = (char**)malloc(fileCount * sizeof(char*));
274 		if (files == NULL) {
275 			fprintf(stderr, "%s: Could not allocate %ld bytes\n",
276 				kProgramName, fileCount * sizeof(char*));
277 			close(file);
278 			return 1;
279 		}
280 
281 		for (int i = 0; i < fileCount; i++) {
282 			int length;
283 			read(file, &length, sizeof(int));
284 
285 			files[i] = (char*)malloc(length + 1);
286 			if (files[i] == NULL) {
287 				fprintf(stderr, "%s: Could not allocate %d bytes\n",
288 					kProgramName, length + 1);
289 				close(file);
290 				// TODO: we actually leak memory here, but it's not important in this context
291 				return 1;
292 			}
293 			read(file, files[i], length + 1);
294 			TRACE("\t%s\n", files[i]);
295 		}
296 
297 		close(file);
298 	} else {
299 		// Normalize paths
300 		char** normalizedFiles = (char**)malloc(fileCount * sizeof(char*));
301 		if (normalizedFiles == NULL) {
302 			fprintf(stderr, "%s: Could not allocate %ld bytes\n",
303 				kProgramName, fileCount * sizeof(char*));
304 			return 1;
305 		}
306 
307 		for (int i = 0; i < fileCount; i++) {
308 			BPath path(files[i], NULL, true);
309 			normalizedFiles[i] = strdup(path.Path());
310 			if (normalizedFiles[i] == NULL) {
311 				fprintf(stderr, "%s: Could not allocate %ld bytes\n",
312 					kProgramName, strlen(path.Path()) + 1);
313 				return 1;
314 			}
315 		}
316 
317 		files = normalizedFiles;
318 	}
319 
320 	bigtime_t start = system_time();
321 
322 	for (int i = 0; i < fileCount; i++) {
323 		process_file(files[i]);
324 	}
325 
326 	sort(gFiles.begin(), gFiles.end());
327 
328 	bigtime_t runtime = system_time() - start;
329 
330 	write_hash_file(hashFileName, fileCount, files);
331 
332 	if (gFiles.size() > 0) {
333 		printf("Generated hashes for %ld files in %g seconds, %g msec per "
334 			"file.\n", gFiles.size(), runtime / 1000000.0,
335 			runtime / 1000.0 / gFiles.size());
336 	}
337 
338 	for (int i = 0; i < fileCount; i++) {
339 		free(files[i]);
340 	}
341 	free(files);
342 
343 	return 0;
344 }
345