1 /*
2 * Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7 #include <algorithm>
8 #include <string>
9 #include <vector>
10
11 #include <dirent.h>
12 #include <errno.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <unistd.h>
17
18 #include <OS.h>
19 #include <Path.h>
20
21 #include <SHA256.h>
22
23 #include "AdaptiveBuffering.h"
24
25
26 //#define TRACE(x...) printf(x)
27 #define TRACE(x...) ;
28
29
30 extern const char *__progname;
31 static const char *kProgramName = __progname;
32
33 const size_t kInitialBufferSize = 1 * 1024 * 1024;
34 const size_t kMaxBufferSize = 10 * 1024 * 1024;
35
36
37 class SHAProcessor : public AdaptiveBuffering {
38 public:
SHAProcessor()39 SHAProcessor()
40 : AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
41 fFile(-1)
42 {
43 }
44
~SHAProcessor()45 virtual ~SHAProcessor()
46 {
47 Unset();
48 }
49
Unset()50 void Unset()
51 {
52 if (fFile >= 0)
53 close(fFile);
54 }
55
Process(int file)56 status_t Process(int file)
57 {
58 Unset();
59 fSHA.Init();
60 fFile = file;
61
62 return Run();
63 }
64
Read(uint8 * buffer,size_t * _length)65 virtual status_t Read(uint8* buffer, size_t* _length)
66 {
67 ssize_t bytes = read(fFile, buffer, *_length);
68 if (bytes < B_OK)
69 return errno;
70
71 *_length = bytes;
72 return B_OK;
73 }
74
Write(uint8 * buffer,size_t length)75 virtual status_t Write(uint8* buffer, size_t length)
76 {
77 fSHA.Update(buffer, length);
78 return B_OK;
79 }
80
Digest()81 const uint8* Digest() { return fSHA.Digest(); }
DigestLength() const82 size_t DigestLength() const { return fSHA.DigestLength(); }
83
84 private:
85 SHA256 fSHA;
86 int fFile;
87 };
88
89 struct file_entry {
90 uint8 hash[SHA_DIGEST_LENGTH];
91 ino_t node;
92 std::string path;
93
operator <file_entry94 bool operator<(const struct file_entry& other) const
95 {
96 return path < other.path;
97 }
98
HashStringfile_entry99 std::string HashString() const
100 {
101 char buffer[128];
102 for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
103 sprintf(buffer + i * 2, "%02x", hash[i]);
104 }
105
106 return buffer;
107 }
108 };
109
110 typedef std::vector<file_entry> FileList;
111
112 void process_file(const char* path);
113
114
115 SHAProcessor gSHA;
116 FileList gFiles;
117
118
119 void
process_directory(const char * path)120 process_directory(const char* path)
121 {
122 DIR* dir = opendir(path);
123 if (dir == NULL)
124 return;
125
126 size_t pathLength = strlen(path);
127
128 while (struct dirent* entry = readdir(dir)) {
129 if (!strcmp(entry->d_name, ".")
130 || !strcmp(entry->d_name, ".."))
131 continue;
132
133 char fullPath[1024];
134 strlcpy(fullPath, path, sizeof(fullPath));
135 if (path[pathLength - 1] != '/')
136 strlcat(fullPath, "/", sizeof(fullPath));
137 strlcat(fullPath, entry->d_name, sizeof(fullPath));
138
139 process_file(fullPath);
140 }
141
142 closedir(dir);
143 }
144
145
146 void
process_file(const char * path)147 process_file(const char* path)
148 {
149 struct stat stat;
150 if (::lstat(path, &stat) != 0) {
151 fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
152 strerror(errno));
153 return;
154 }
155
156 if (S_ISDIR(stat.st_mode)) {
157 process_directory(path);
158 return;
159 }
160 if (S_ISLNK(stat.st_mode))
161 return;
162
163 int file = open(path, O_RDONLY);
164 if (file < 0) {
165 fprintf(stderr, "Could not open file \"%s\": %s\n", path,
166 strerror(errno));
167 return;
168 }
169
170 status_t status = gSHA.Process(file);
171 if (status != B_OK) {
172 fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
173 strerror(status));
174 return;
175 }
176
177 file_entry entry;
178 memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
179 entry.node = stat.st_ino;
180 entry.path = path;
181
182 //printf("%s %s\n", entry.HashString().c_str(), path);
183
184 gFiles.push_back(entry);
185
186 static bigtime_t sLastUpdate = -1;
187 if (system_time() - sLastUpdate > 500000) {
188 printf("%ld files scanned\33[1A\n", gFiles.size());
189 sLastUpdate = system_time();
190 }
191 }
192
193
194 void
write_hash_file(const char * name,int fileCount,char ** files)195 write_hash_file(const char* name, int fileCount, char** files)
196 {
197 int file = open(name, O_WRONLY | O_TRUNC | O_CREAT);
198 if (file < 0) {
199 fprintf(stderr, "%s: Could not write hash file \"%s\": %s\n",
200 kProgramName, name, strerror(errno));
201 return;
202 }
203
204 write(file, "HASH", 4);
205
206 write(file, &fileCount, sizeof(int));
207 for (int i = 0; i < fileCount; i++) {
208 int length = strlen(files[i]);
209 write(file, &length, sizeof(int));
210 write(file, files[i], length + 1);
211 }
212
213 fileCount = gFiles.size();
214 write(file, &fileCount, sizeof(int));
215 for (int i = 0; i < fileCount; i++) {
216 file_entry& entry = gFiles[i];
217
218 write(file, entry.hash, SHA_DIGEST_LENGTH);
219 write(file, &entry.node, sizeof(ino_t));
220
221 int length = entry.path.size();
222 write(file, &length, sizeof(int));
223 write(file, entry.path.c_str(), length + 1);
224 }
225
226 close(file);
227 }
228
229
230 int
main(int argc,char ** argv)231 main(int argc, char** argv)
232 {
233 if (argc < 2) {
234 fprintf(stderr, "usage: %s <hash-file> [<files> ...]\n"
235 "\tWhen invoked without files, the hash-file is updated only.\n",
236 kProgramName);
237 return 1;
238 }
239
240 const char* hashFileName = argv[1];
241
242 status_t status = gSHA.Init();
243 if (status != B_OK) {
244 fprintf(stderr, "%s: Could not initialize SHA processor: %s\n",
245 kProgramName, strerror(status));
246 return 1;
247 }
248
249 int fileCount = argc - 2;
250 char** files = argv + 2;
251
252 if (argc == 2) {
253 // read files from hash file
254
255 int file = open(hashFileName, O_RDONLY);
256 if (file < 0) {
257 fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n",
258 kProgramName, hashFileName, strerror(status));
259 return 1;
260 }
261
262 char buffer[2048];
263 read(file, buffer, 4);
264 if (memcmp(buffer, "HASH", 4)) {
265 fprintf(stderr, "%s: \"%s\" is not a hash file\n",
266 kProgramName, hashFileName);
267 close(file);
268 return 1;
269 }
270 read(file, &fileCount, sizeof(int));
271 TRACE("Found %d path(s):\n", fileCount);
272
273 files = (char**)malloc(fileCount * sizeof(char*));
274 if (files == NULL) {
275 fprintf(stderr, "%s: Could not allocate %ld bytes\n",
276 kProgramName, fileCount * sizeof(char*));
277 close(file);
278 return 1;
279 }
280
281 for (int i = 0; i < fileCount; i++) {
282 int length;
283 read(file, &length, sizeof(int));
284
285 files[i] = (char*)malloc(length + 1);
286 if (files[i] == NULL) {
287 fprintf(stderr, "%s: Could not allocate %d bytes\n",
288 kProgramName, length + 1);
289 close(file);
290 // TODO: we actually leak memory here, but it's not important in this context
291 return 1;
292 }
293 read(file, files[i], length + 1);
294 TRACE("\t%s\n", files[i]);
295 }
296
297 close(file);
298 } else {
299 // Normalize paths
300 char** normalizedFiles = (char**)malloc(fileCount * sizeof(char*));
301 if (normalizedFiles == NULL) {
302 fprintf(stderr, "%s: Could not allocate %ld bytes\n",
303 kProgramName, fileCount * sizeof(char*));
304 return 1;
305 }
306
307 for (int i = 0; i < fileCount; i++) {
308 BPath path(files[i], NULL, true);
309 normalizedFiles[i] = strdup(path.Path());
310 if (normalizedFiles[i] == NULL) {
311 fprintf(stderr, "%s: Could not allocate %ld bytes\n",
312 kProgramName, strlen(path.Path()) + 1);
313 return 1;
314 }
315 }
316
317 files = normalizedFiles;
318 }
319
320 bigtime_t start = system_time();
321
322 for (int i = 0; i < fileCount; i++) {
323 process_file(files[i]);
324 }
325
326 sort(gFiles.begin(), gFiles.end());
327
328 bigtime_t runtime = system_time() - start;
329
330 write_hash_file(hashFileName, fileCount, files);
331
332 if (gFiles.size() > 0) {
333 printf("Generated hashes for %ld files in %g seconds, %g msec per "
334 "file.\n", gFiles.size(), runtime / 1000000.0,
335 runtime / 1000.0 / gFiles.size());
336 }
337
338 for (int i = 0; i < fileCount; i++) {
339 free(files[i]);
340 }
341 free(files);
342
343 return 0;
344 }
345