xref: /haiku/src/bin/mail_utils/mail2mbox.cpp (revision c237c4ce593ee823d9867fd997e51e4c447f5623)
1 /*
2  * Copyright 2005-2009, Haiku Inc.
3  * This file may be used under the terms of the MIT License.
4  *
5  * Originally public domain written by Alexander G. M. Smith.
6  */
7 
8 
9 /*!	BeMailToMBox is a utility program (requested by Frank Zschockelt) that
10 	converts BeOS e-mail files into Unix mailbox files (the kind that Pine
11 	uses).  All the files in the input directory are concatenated with the
12 	appropriate mbox header lines added between them, and trailing blank lines
13 	reduced.  The resulting text is written to standard output.  Command line
14 	driven.
15 */
16 
17 #include <ctype.h>
18 #include <errno.h>
19 #include <string.h>
20 #include <stdio.h>
21 #include <time.h>
22 
23 #include <Application.h>
24 #include <StorageKit.h>
25 #include <SupportKit.h>
26 
27 
28 extern const char* __progname;
29 static const char* kProgramName = __progname;
30 
31 time_t gDateStampTime;
32 	// Time value used for stamping each message header. Incremented by 1 second
33 	// for each message, starts out with the current local time.
34 
35 
36 /*!	Global utility function to display an error message and return.  The message
37 	part describes the error, and if errorNumber is non-zero, gets the string
38 	", error code $X (standard description)." appended to it.  If the message
39 	is NULL then it gets defaulted to "Something went wrong".
40 */
41 static void
42 DisplayErrorMessage(const char* messageString = NULL, status_t errorNumber = 0,
43 	const char* titleString = NULL)
44 {
45 	char errorBuffer[2048];
46 
47 	if (titleString == NULL)
48 		titleString = "Error Message:";
49 
50 	if (messageString == NULL) {
51 		if (errorNumber == B_OK)
52 			messageString = "No error, no message, why bother?";
53 		else
54 			messageString = "Error";
55 	}
56 
57 	if (errorNumber != 0) {
58 		snprintf(errorBuffer, sizeof(errorBuffer), "%s: %s (%" B_PRIx32 ")"
59 			"has occured.", messageString, strerror(errorNumber), errorNumber);
60 		messageString = errorBuffer;
61 	}
62 
63 	fputs(titleString, stderr);
64 	fputc('\n', stderr);
65 	fputs(messageString, stderr);
66 	fputc('\n', stderr);
67 }
68 
69 
70 /*!	Determine if a line of text is the start of another message.  Pine mailbox
71 	files have messages that start with a line that could say something like
72 	"From agmsmith@achilles.net Fri Oct 31 21:19:36 EST 1997" or maybe something
73 	like "From POPmail Mon Oct 20 21:12:36 1997" or in a more modern format,
74 	"From agmsmith@achilles.net Tue Sep 4 09:04:11 2001 -0400".  I generalise it
75 	to "From blah Day MMM NN XX:XX:XX TZONE1 YYYY TZONE2".  Blah is an e-mail
76 	address you can ignore (just treat it as a word separated by spaces).  Day
77 	is a 3 letter day of the week.  MMM is a 3 letter month name.  NN is the two
78 	digit day of the week, has a leading space if the day is less than 10.
79 	XX:XX:XX is the time, the X's are digits.  TZONE1 is the old style optional
80 	time zone of 3 capital letters.  YYYY is the four digit year.  TZONE2 is the
81 	optional modern time zone info, a plus or minus sign and 4 digits.  Returns
82 	true if the line of text (ended with a NUL byte, no line feed or carriage
83 	returns at the end) is the start of a message.
84 */
85 bool
86 IsStartOfMailMessage(char* lineString)
87 {
88 	// It starts with "From "
89 	if (memcmp("From ", lineString, 5) != 0)
90 		return false;
91 
92 	char* string = lineString + 4;
93 	while (*string == ' ')
94 		string++;
95 
96 	// Skip over the e-mail address (or stop at the end of string).
97 
98 	while (*string != ' ' && *string != 0)
99 		string++;
100 	while (*string == ' ')
101 		string++;
102 
103 	// TODO: improve this!!!
104 
105 	// Look for the 3 letter day of the week.
106 	if (memcmp(string, "Mon", 3) != 0 && memcmp(string, "Tue", 3) != 0
107 		&& memcmp(string, "Wed", 3) != 0 && memcmp(string, "Thu", 3) != 0
108 		&& memcmp(string, "Fri", 3) != 0 && memcmp(string, "Sat", 3) != 0
109 		&& memcmp(string, "Sun", 3) != 0) {
110 		fprintf(stderr, "False alarm, not a valid day of the week in \"%s\""
111 			".\n", lineString);
112 		return false;
113 	}
114 
115 	string += 3;
116 	while (*string == ' ')
117 		string++;
118 
119 	// Look for the 3 letter month code.
120 	if (memcmp(string, "Jan", 3) != 0 && memcmp(string, "Feb", 3) != 0
121 		&& memcmp(string, "Mar", 3) != 0 && memcmp(string, "Apr", 3) != 0
122 		&& memcmp(string, "May", 3) != 0 && memcmp(string, "Jun", 3) != 0
123 		&& memcmp(string, "Jul", 3) != 0 && memcmp(string, "Aug", 3) != 0
124 		&& memcmp(string, "Sep", 3) != 0 && memcmp(string, "Oct", 3) != 0
125 		&& memcmp(string, "Nov", 3) != 0 && memcmp(string, "Dec", 3) != 0) {
126 		fprintf(stderr, "False alarm, not a valid month name in \"%s\".\n",
127 			lineString);
128 		return false;
129 	}
130 
131 	string += 3;
132 	while (*string == ' ')
133 		string++;
134 
135 	// Skip the day of the month.  Require at least one digit.
136 	if (*string < '0' || *string > '9') {
137 		fprintf(stderr, "False alarm, not a valid day of the "
138 			"month number in \"%s\".\n", lineString);
139 		return false;
140 	}
141 
142 	while (*string >= '0' && *string <= '9')
143 		string++;
144 	while (*string == ' ')
145 		string++;
146 
147 	// Check the time.  Look for the sequence
148 	// digit-digit-colon-digit-digit-colon-digit-digit.
149 
150 	if (string[0] < '0' || string[0] > '9'
151 		|| string[1] < '0' || string[1] > '9'
152 		|| string[2] != ':'
153 		|| string[3] < '0' || string[3] > '9'
154 		|| string[4] < '0' || string[4] > '9'
155 		|| string[5] != ':'
156 		|| string[6] < '0' || string[6] > '9'
157 		|| string[7] < '0' || string[7] > '9') {
158 		fprintf(stderr, "False alarm, not a valid time value in \"%s\".\n",
159 			lineString);
160 		return false;
161 	}
162 
163 	string += 8;
164 	while (*string == ' ')
165 		string++;
166 
167 	// Look for the optional antique 3 capital letter time zone and skip it.
168 	if (string[0] >= 'A' && string[0] <= 'Z'
169 		&& string[1] >= 'A' && string[1] <= 'Z'
170 		&& string[2] >= 'A' && string[2] <= 'Z') {
171 		string += 3;
172 		while (*string == ' ')
173 			string++;
174 	}
175 
176 	// Look for the 4 digit year.
177 	if (string[0] < '0' || string[0] > '9'
178 		|| string[1] < '0' || string[1] > '9'
179 		|| string[2] < '0' || string[2] > '9'
180 		|| string[3] < '0' || string[3] > '9') {
181 		fprintf(stderr, "False alarm, not a valid 4 digit year in \"%s\".\n",
182 			lineString);
183 		return false;
184 	}
185 
186 	string += 4;
187 	while (*string == ' ')
188 		string++;
189 
190 	// Look for the optional modern time zone and skip over it if present.
191 	if ((string[0] == '+' || string[0] == '-')
192 		&& string[1] >= '0' && string[1] <= '9'
193 		&& string[2] >= '0' && string[2] <= '9'
194 		&& string[3] >= '0' && string[3] <= '9'
195 		&& string[4] >= '0' && string[4] <= '9') {
196 		string += 5;
197 		while (*string == ' ')
198 			string++;
199 	}
200 
201 	// Look for end of string.
202 	if (*string != 0) {
203 		fprintf(stderr, "False alarm, extra stuff after the "
204 			"year/time zone in \"%s\".\n", lineString);
205 		return false;
206 	}
207 
208 	return true;
209 }
210 
211 
212 /*!	Read the input file, convert it to mbox format, and write it to standard
213 	output.  Returns zero if successful, a negative error code if an error
214 	occured.
215 */
216 status_t
217 ProcessMessageFile(char* fileName)
218 {
219 	fprintf(stdout, "Now processing: \"%s\"\n", fileName);
220 
221 	FILE* inputFile = fopen(fileName, "rb");
222 	if (inputFile == NULL) {
223 		DisplayErrorMessage("Unable to open file", errno);
224 		return errno;
225 	}
226 
227 	// Extract a text message from the Mail file.
228 
229 	BString messageText;
230 	int lineNumber = 0;
231 
232 	while (!feof(inputFile)) {
233 		// First read in one line of text.
234 		char line[102400];
235 		if (fgets(line, sizeof(line), inputFile) == NULL) {
236 			if (ferror(inputFile)) {
237 				char errorString[2048];
238 				snprintf(errorString, sizeof(errorString),
239 					"Error while reading from \"%s\"", fileName);
240 				DisplayErrorMessage(errorString, errno);
241 				fclose(inputFile);
242 				return errno;
243 			}
244 			break;
245 				// No error, just end of file.
246 		}
247 
248 		// Remove any trailing control characters (line feed usually, or CRLF).
249 		// Might also nuke trailing tabs too. Doesn't usually matter. The main
250 		// thing is to allow input files with both LF and CRLF endings (and
251 		// even CR endings if you come from the Macintosh world).
252 
253 		char* string = line + strlen(line) - 1;
254 		while (string >= line && *string < 32)
255 			string--;
256 		*(++string) = 0;
257 
258 		if (lineNumber == 0 && line[0] == 0) {
259 			// Skip leading blank lines.
260 			continue;
261 		}
262 		lineNumber++;
263 
264 		// Prepend the new mbox message header, if the first line of the message
265 		// doesn't already have one.
266 		if (lineNumber == 1 && !IsStartOfMailMessage(line)) {
267 			time_t timestamp = gDateStampTime++;
268 			messageText.Append("From baron@be.com ");
269 			messageText.Append(ctime(&timestamp));
270 		}
271 
272 		// Append the line to the current message text.
273 		messageText.Append(line);
274 		messageText.Append("\n");
275 	}
276 
277 	// Remove blank lines from the end of the message (a pet peeve of mine), but
278 	// end the message with two new lines to separate it from the next message.
279 	int i = messageText.Length();
280 	while (i > 0 && (messageText[i - 1] == '\n' || messageText[i - 1] == '\r'))
281 		i--;
282 	messageText.Truncate(i);
283 	messageText.Append("\n\n");
284 
285 	// Write the message out.
286 
287 	status_t status = B_OK;
288 
289 	if (puts(messageText.String()) < 0) {
290 		DisplayErrorMessage ("Error while writing the message", errno);
291 		status = errno;
292 	}
293 
294 	fclose(inputFile);
295 	return status;
296 }
297 
298 
299 int
300 main(int argc, char** argv)
301 {
302 	BApplication app("application/x-vnd.Haiku-mail2mbox");
303 
304 	if (argc <= 1 || argc >= 3) {
305 		printf("%s is a utility for converting Mail e-mail\n", argv[0]);
306 		printf("files to Unix Pine style e-mail files.  It could well\n");
307 		printf("work with other Unix style mailbox files.  Each message in\n");
308 		printf("the input directory is converted and sent to the standard\n");
309 		printf("output.  Usage:\n\n");
310 		printf("%s InputDirectory >OutputFile\n\n", kProgramName);
311 		printf("Public domain, by Alexander G. M. Smith.\n");
312 		return -10;
313 	}
314 
315 	// Set the date stamp to the current time.
316 	gDateStampTime = time (NULL);
317 
318 	// Try to open the input directory.
319 	char inputPathName[B_PATH_NAME_LENGTH];
320 	strlcpy(inputPathName, argv[1], sizeof(inputPathName) - 2);
321 
322 	char tempString[2048];
323 
324 	DIR* dir = opendir(inputPathName);
325 	if (dir == NULL) {
326 		sprintf(tempString, "Problems opening directory named \"%s\".",
327 			inputPathName);
328 		DisplayErrorMessage(tempString, errno);
329 		return 1;
330 	}
331 
332 	// Append a trailing slash to the directory name, if it needs one.
333 	if (inputPathName[strlen(inputPathName) - 1] != '/')
334 		strcat(inputPathName, "/");
335 
336 	int messagesDoneCount = 0;
337 	status_t status = B_OK;
338 
339 	while (dirent_t* entry = readdir(dir)) {
340 		// skip '.' and '..'
341 		if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
342 			break;
343 
344 		strlcpy(tempString, inputPathName, sizeof(tempString));
345 		strlcat(tempString, entry->d_name, sizeof(tempString));
346 
347 		status = ProcessMessageFile(tempString);
348 		if (status != B_OK)
349 			break;
350 
351 		messagesDoneCount++;
352 	}
353 
354 	closedir(dir);
355 
356 	if (status != B_OK) {
357 		DisplayErrorMessage("Stopping early because an error occured", status);
358 		return status;
359 	}
360 
361 	fprintf(stderr, "Did %d messages successfully.\n", messagesDoneCount);
362 	return 0;
363 }
364