xref: /haiku/src/bin/mail_utils/spamdbm.cpp (revision 425ac1b60a56f4df7a0e88bd784545c0ec4fa01f)
1 /******************************************************************************
2  * $Id: spamdbm.cpp 30630 2009-05-05 01:31:01Z bga $
3  *
4  * This is a BeOS program for classifying e-mail messages as spam (unwanted
5  * junk mail) or as genuine mail using a Bayesian statistical approach.  There
6  * is also a Mail Daemon Replacement add-on to filter mail using the
7  * classification statistics collected earlier.
8  *
9  * See also http://www.paulgraham.com/spam.html for a good writeup and
10  * http://www.tuxedo.org/~esr/bogofilter/ for another implementation.
11  * And more recently, Gary Robinson's write up of his improved algorithm
12  * at http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html
13  * which gives a better spread in spam ratios and slightly fewer
14  * misclassifications.
15  *
16  * Note that this uses the AGMS vacation coding style, not the OpenTracker one.
17  * That means no tabs, indents are two spaces, m_ is the prefix for member
18  * variables, g_ is the prefix for global names, C style comments, constants
19  * are in all capital letters and most other things are mixed case, it's word
20  * wrapped to fit in 79 characters per line to make proofreading on paper
21  * easier, and functions are listed in reverse dependency order so that forward
22  * declarations (function prototypes with no code) aren't needed.
23  *
24  * The Original Design:
25  * There is a spam database (just a file listing words and number of times they
26  * were used in spam and non-spam messages) that a BeMailDaemon input filter
27  * will use when scanning email.  It will mark the mail with the spam
28  * probability (an attribute, optionally a mail header field) and optionally do
29  * something if the probability exceeds a user defined level (delete message,
30  * change subject, file in a different folder).  Or should that be a different
31  * filter?  Outside the mail system, the probability can be used in queries to
32  * find spam.
33  *
34  * A second user application will be used to update the database.  Besides
35  * showing you the current list of words, you can drag and drop files to mark
36  * them as spam or non-spam (a balanced binary tree is used internally to make
37  * word storage fast).  It will add a second attribute to the files to show how
38  * they have been classified by the user (and won't update the database if you
39  * accidentally try to classify a file again).  Besides drag and drop, there
40  * will be a command line interface and a message passing interface.  BeMail
41  * (or other programs) will then communicate via messages to tell it when the
42  * user marks a message as spam or not (via having separate delete spam /
43  * delete genuine mail buttons and a menu item or two).
44  *
45  * Plus lots of details, like the rename swap method to update the database
46  * file (so programs with the old file open aren't affected).  A nice tab text
47  * format so you can open the database in a spreadsheet.  Startup and shutdown
48  * control of the updater from BeMail.  Automatic creation of the indices
49  * needed by the filter.  MIME types for the database file.  Icons for the app.
50  * System settings to enable tracker to display the new attributes when viewing
51  * e-mail (and maybe news articles if someone ever gets around to an NNTP as
52  * files reader).  Documentation.  Recursive directory traversal for the
53  * command line or directory drag and drop.  Options for the updater to warn or
54  * ignore non-email files.  Etc.
55  *
56  * The Actual Implementation:
57  * The spam database updates and the test for spam have been combined into one
58  * program which runs as a server.  That way there won't be as long a delay
59  * when the e-mail system wants to check for spam, because the database is
60  * already loaded by the server and in memory.  The MDR mail filter add-on
61  * simply sends scripting commands to the server (and starts it up if it isn't
62  * already running).  The filter takes care of marking the messages when it
63  * gets the rating back from the server, and then the rest of the mail system
64  * rule chain can delete the message or otherwise manipulate it.
65  *
66  * Revision History (now manually updated due to SVN's philosophy)
67  * $Log: spamdbm.cpp,v $
68  * ------------------------------------------------------------------------
69  * r15195 | agmsmith | 2005-11-27 21:07:55 -0500 (Sun, 27 Nov 2005) | 4 lines
70  * Just a few minutes after checking in, I mentioned it to Japanese expert Koki
71  * and he suggested also including the Japanese comma.  So before I forget to
72  * do it...
73  *
74  * ------------------------------------------------------------------------
75  * r15194 | agmsmith | 2005-11-27 20:37:13 -0500 (Sun, 27 Nov 2005) | 5 lines
76  * Truncate overly long URLs to the maximum word length.  Convert Japanese
77  * periods to spaces so that more "words" are found.  Fix UTF-8 comparison
78  * problems with tolower() incorrectly converting characters with the high bit
79  * set.
80  *
81  * r15098 | agmsmith | 2005-11-23 23:17:00 -0500 (Wed, 23 Nov 2005) | 5 lines
82  * Added better tokenization so that HTML is parsed and things like tags
83  * between letters of a word no longer hide that word.  After testing, the
84  * result seems to be a tighter spread of ratings when done in full text plus
85  * header mode.
86  *
87  * Revision 1.10  2005/11/24 02:08:39  agmsmith
88  * Fixed up prefix codes, Z for things that are inside other things.
89  *
90  * Revision 1.9  2005/11/21 03:28:03  agmsmith
91  * Added a function for extracting URLs.
92  *
93  * Revision 1.8  2005/11/09 03:36:18  agmsmith
94  * Removed noframes detection (doesn't show up in e-mails).  Now use
95  * just H for headers and Z for HTML tag junk.
96  *
97  * Revision 1.7  2005/10/24 00:00:08  agmsmith
98  * Adding HTML tag removal, which also affected the search function so it
99  * could search for single part things like  .
100  *
101  * Revision 1.6  2005/10/17 01:55:08  agmsmith
102  * Remove HTML comments and a few other similar things.
103  *
104  * Revision 1.5  2005/10/16 18:35:36  agmsmith
105  * Under construction - looking into HTML not being in UTF-8.
106  *
107  * Revision 1.4  2005/10/11 01:51:21  agmsmith
108  * Starting on the tokenising passes.  Still need to test asian truncation.
109  *
110  * Revision 1.3  2005/10/06 11:54:07  agmsmith
111  * Not much.
112  *
113  * Revision 1.2  2005/09/12 01:49:37  agmsmith
114  * Enable case folding for the whole file tokenizer.
115  *
116  * r13961 | agmsmith | 2005-08-13 22:25:28 -0400 (Sat, 13 Aug 2005) | 2 lines
117  * Source code changes so that mboxtobemail now compiles and is in the build
118  * system.
119  *
120  * r13959 | agmsmith | 2005-08-13 22:05:27 -0400 (Sat, 13 Aug 2005) | 2 lines
121  * Rename the directory before doing anything else, otherwise svn dies badly.
122  *
123  * r13952 | agmsmith | 2005-08-13 15:31:42 -0400 (Sat, 13 Aug 2005) | 3 lines
124  * Added the resources and file type associations, changed the application
125  * signature and otherwise made the spam detection system work properly again.
126  *
127  * r13951 | agmsmith | 2005-08-13 11:40:01 -0400 (Sat, 13 Aug 2005) | 2 lines
128  * Had to do the file rename as a separate operation due to SVN limitations.
129  *
130  * r13950 | agmsmith | 2005-08-13 11:38:44 -0400 (Sat, 13 Aug 2005) | 3 lines
131  * Oops, "spamdb" is already used for a Unix package.  And spamdatabase is
132  * already reserved by a domain name squatter.  Use "spamdbm" instead.
133  *
134  * r13949 | agmsmith | 2005-08-13 11:17:52 -0400 (Sat, 13 Aug 2005) | 3 lines
135  * Renamed spamfilter to be the more meaningful spamdb (spam database) and
136  * moved it into its own source directory in preparation for adding resources.
137  *
138  * r13628 | agmsmith | 2005-07-10 20:11:29 -0400 (Sun, 10 Jul 2005) | 3 lines
139  * Updated keyword expansion to use SVN keywords.  Also seeing if svn is
140  * working well enough for me to update files from BeOS R5.
141  *
142  * r11909 | axeld | 2005-03-18 19:09:19 -0500 (Fri, 18 Mar 2005) | 2 lines
143  * Moved bin/ directory out of apps/.
144  *
145  * r11769 | bonefish | 2005-03-17 03:30:54 -0500 (Thu, 17 Mar 2005) | 1 line
146  * Move trunk into respective module.
147  *
148  * r10362 | nwhitehorn | 2004-12-06 20:14:05 -0500 (Mon, 06 Dec 2004) | 2 lines
149  * Fixed the spam filter so it works correctly now.
150  *
151  * r9934 | nwhitehorn | 2004-11-11 21:55:05 -0500 (Thu, 11 Nov 2004) | 2 lines
152  * Added AGMS's excellent spam detection software.  Still some weirdness with
153  * the configuration interface from E-mail prefs.
154  *
155  * Revision 1.2  2004/12/07 01:14:05  nwhitehorn
156  * Fixed the spam filter so it works correctly now.
157  *
158  * Revision 1.87  2004/09/20 15:57:26  nwhitehorn
159  * Mostly updated the tree to Be/Haiku style identifier naming conventions.  I
160  * have a few more things to work out, mostly in mail_util.h, and then I'm
161  * proceeding to jamify the build system.  Then we go into Haiku CVS.
162  *
163  * Revision 1.86  2003/07/26 16:47:46  agmsmith
164  * Bug - wasn't allowing double classification if the user had turned on
165  * the option to ignore the previous classification.
166  *
167  * Revision 1.85  2003/07/08 14:52:57  agmsmith
168  * Fix bug with classification choices dialog box coming up with weird
169  * sizes due to RefsReceived message coming in before ReadyToRun had
170  * finished setting up the default sizes of the controls.
171  *
172  * Revision 1.84  2003/07/04 19:59:29  agmsmith
173  * Now with a GUI option to let you declassify messages (set them back
174  * to uncertain, rather than spam or genuine).  Required a BAlert
175  * replacement since BAlerts can't do four buttons.
176  *
177  * Revision 1.83  2003/07/03 20:40:36  agmsmith
178  * Added Uncertain option for declassifying messages.
179  *
180  * Revision 1.82  2003/06/16 14:57:13  agmsmith
181  * Detect spam which uses mislabeled text attachments, going by the file name
182  * extension.
183  *
184  * Revision 1.81  2003/04/08 20:27:04  agmsmith
185  * AGMSBayesianSpamServer now shuts down immediately and returns true if
186  * it is asked to quit by the registrar.
187  *
188  * Revision 1.80  2003/04/07 19:20:27  agmsmith
189  * Ooops, int64 doesn't exist, use long long instead.
190  *
191  * Revision 1.79  2003/04/07 19:05:22  agmsmith
192  * Now with Allen Brunson's atoll for PPC (you need the %lld, but that
193  * becomes %lld on other systems).
194  *
195  * Revision 1.78  2003/04/04 22:43:53  agmsmith
196  * Fixed up atoll PPC processor hack so it would actually work, was just
197  * returning zero which meant that it wouldn't load in the database file
198  * (read the size as zero).
199  *
200  * Revision 1.77  2003/01/22 03:19:48  agmsmith
201  * Don't convert words to lower case, the case is important for spam.
202  * Particularly sentences which start with exciting words, which you
203  * normally won't use at the start of a sentence (and thus capitalize).
204  *
205  * Revision 1.76  2002/12/18 02:29:22  agmsmith
206  * Add space for the Uncertain display in Tracker.
207  *
208  * Revision 1.75  2002/12/18 01:54:37  agmsmith
209  * Added uncertain sound effect.
210  *
211  * Revision 1.74  2002/12/13 23:53:12  agmsmith
212  * Minimize the window before opening it so that it doesn't flash on the
213  * screen in server mode.  Also load the database when the window is
214  * displayed so that the user can see the words.
215  *
216  * Revision 1.73  2002/12/13 20:55:57  agmsmith
217  * Documentation.
218  *
219  * Revision 1.72  2002/12/13 20:26:11  agmsmith
220  * Fixed bug with adding messages in strings to database (was limited to
221  * messages at most 1K long).  Also changed default server mode to true
222  * since that's what people use most.
223  *
224  * Revision 1.71  2002/12/11 22:37:30  agmsmith
225  * Added commands to train on spam and genuine e-mail messages passed
226  * in string arguments rather then via external files.
227  *
228  * Revision 1.70  2002/12/10 22:12:41  agmsmith
229  * Adding a message to the database now uses a BPositionIO rather than a
230  * file and file name (for future string rather than file additions).  Also
231  * now re-evaluate a file after reclassifying it so that the user can see
232  * the new ratio.  Also remove the [Spam 99.9%] subject prefix when doing
233  * a re-evaluation or classification (the number would be wrong).
234  *
235  * Revision 1.69  2002/12/10 01:46:04  agmsmith
236  * Added the Chi-Squared scoring method.
237  *
238  * Revision 1.68  2002/11/29 22:08:25  agmsmith
239  * Change default purge age to 2000 so that hitting the purge button
240  * doesn't erase stuff from the new sample database.
241  *
242  * Revision 1.67  2002/11/25 20:39:39  agmsmith
243  * Don't need to massage the MIME type since the mail library now does
244  * the lower case conversion and converts TEXT to text/plain too.
245  *
246  * Revision 1.66  2002/11/20 22:57:12  nwhitehorn
247  * PPC Compatibility Fixes
248  *
249  * Revision 1.65  2002/11/10 18:43:55  agmsmith
250  * Added a time delay to some quitting operations so that scripting commands
251  * from a second client (like a second e-mail account) will make the program
252  * abort the quit operation.
253  *
254  * Revision 1.64  2002/11/05 18:05:16  agmsmith
255  * Looked at Nathan's PPC changes (thanks!), modified style a bit.
256  *
257  * Revision 1.63  2002/11/04 03:30:22  nwhitehorn
258  * Now works (or compiles at least) on PowerPC.  I'll get around to testing it
259  * later.
260  *
261  * Revision 1.62  2002/11/04 01:03:33  agmsmith
262  * Fixed warnings so it compiles under the bemaildaemon system.
263  *
264  * Revision 1.61  2002/11/03 23:00:37  agmsmith
265  * Added to the bemaildaemon project on SourceForge.  Hmmmm, seems to switch to
266  * a new version if I commit and specify a message, but doesn't accept the
267  * message and puts up the text editor.  Must be a bug where cvs eats the first
268  * option after "commit".
269  *
270  * Revision 1.60.1.1  2002/10/22 14:29:27  agmsmith
271  * Needed to recompile with the original Libmail.so from Beta/1 since
272  * the current library uses a different constructor, and thus wouldn't
273  * run when used with the old library.
274  *
275  * Revision 1.60  2002/10/21 16:41:27  agmsmith
276  * Return a special error code when no words are found in a message,
277  * so that messages without text/plain parts can be recognized as
278  * spam by the mail filter.
279  *
280  * Revision 1.59  2002/10/20 21:29:47  agmsmith
281  * Watch out for MIME types of "text", treat as text/plain.
282  *
283  * Revision 1.58  2002/10/20 18:29:07  agmsmith
284  * *** empty log message ***
285  *
286  * Revision 1.57  2002/10/20 18:25:02  agmsmith
287  * Fix case sensitivity in MIME type tests, and fix text/any test.
288  *
289  * Revision 1.56  2002/10/19 17:00:10  agmsmith
290  * Added the pop-up menu for the tokenize modes.
291  *
292  * Revision 1.55  2002/10/19 14:54:06  agmsmith
293  * Fudge MIME type of body text components so that they get
294  * treated as text.
295  *
296  * Revision 1.54  2002/10/19 00:56:37  agmsmith
297  * The parsing of e-mail messages seems to be working now, just need
298  * to add some user interface stuff for the tokenizing mode.
299  *
300  * Revision 1.53  2002/10/18 23:37:56  agmsmith
301  * More mail kit usage, can now decode headers, but more to do.
302  *
303  * Revision 1.52  2002/10/16 23:52:33  agmsmith
304  * Getting ready to add more tokenizing modes, exploring Mail Kit to break
305  * apart messages into components (and decode BASE64 and other encodings).
306  *
307  * Revision 1.51  2002/10/11 20:05:31  agmsmith
308  * Added installation of sound effect names, which the filter will use.
309  *
310  * Revision 1.50  2002/10/02 16:50:02  agmsmith
311  * Forgot to add credits to the algorithm inventors.
312  *
313  * Revision 1.49  2002/10/01 00:39:29  agmsmith
314  * Added drag and drop to evaluate files or to add them to the list.
315  *
316  * Revision 1.48  2002/09/30 19:44:17  agmsmith
317  * Switched to Gary Robinson's method, removed max spam/genuine word.
318  *
319  * Revision 1.47  2002/09/23 17:08:55  agmsmith
320  * Add an attribute with the spam ratio to files which have been evaluated.
321  *
322  * Revision 1.46  2002/09/23 02:50:32  agmsmith
323  * Fiddling with display width of e-mail attributes.
324  *
325  * Revision 1.45  2002/09/23 01:13:56  agmsmith
326  * Oops, bug in string evaluation scripting.
327  *
328  * Revision 1.44  2002/09/22 21:00:55  agmsmith
329  * Added EvaluateString so that the BeMail add-on can pass the info without
330  * having to create a temporary file.
331  *
332  * Revision 1.43  2002/09/20 19:56:02  agmsmith
333  * Added about box and button for estimating the spam ratio of a file.
334  *
335  * Revision 1.42  2002/09/20 01:22:26  agmsmith
336  * More testing, decide that an extreme ratio bias point of 0.5 is good.
337  *
338  * Revision 1.41  2002/09/19 21:17:12  agmsmith
339  * Changed a few names and proofread the program.
340  *
341  * Revision 1.40  2002/09/19 14:27:17  agmsmith
342  * Rearranged execution of commands, moving them to a separate looper
343  * rather than the BApplication, so that thousands of files could be
344  * processed without worrying about the message queue filling up.
345  *
346  * Revision 1.39  2002/09/18 18:47:16  agmsmith
347  * Stop flickering when the view is partially obscured, update cached
348  * values in all situations except when app is busy.
349  *
350  * Revision 1.38  2002/09/18 18:08:11  agmsmith
351  * Add a function for evaluating the spam ratio of a message.
352  *
353  * Revision 1.37  2002/09/16 01:30:16  agmsmith
354  * Added Get Oldest command.
355  *
356  * Revision 1.36  2002/09/16 00:47:52  agmsmith
357  * Change the display to counter-weigh the spam ratio by the number of
358  * messages.
359  *
360  * Revision 1.35  2002/09/15 20:49:35  agmsmith
361  * Scrolling improved, buttons, keys and mouse wheel added.
362  *
363  * Revision 1.34  2002/09/15 03:46:10  agmsmith
364  * Up and down buttons under construction.
365  *
366  * Revision 1.33  2002/09/15 02:09:21  agmsmith
367  * Took out scroll bar.
368  *
369  * Revision 1.32  2002/09/15 02:05:30  agmsmith
370  * Trying to add a scroll bar, but it isn't very useful.
371  *
372  * Revision 1.31  2002/09/14 23:06:28  agmsmith
373  * Now has live updates of the list of words.
374  *
375  * Revision 1.30  2002/09/14 19:53:11  agmsmith
376  * Now with a better display of the words.
377  *
378  * Revision 1.29  2002/09/13 21:33:54  agmsmith
379  * Now draws the words in the word display view, but still primitive.
380  *
381  * Revision 1.28  2002/09/13 19:28:02  agmsmith
382  * Added display of most genuine and most spamiest, fixed up cursor.
383  *
384  * Revision 1.27  2002/09/13 03:08:42  agmsmith
385  * Show current word and message counts, and a busy cursor.
386  *
387  * Revision 1.26  2002/09/13 00:00:08  agmsmith
388  * Fixed up some deadlock problems, now using asynchronous message replies.
389  *
390  * Revision 1.25  2002/09/12 17:56:58  agmsmith
391  * Keep track of words which are spamiest and genuinest.
392  *
393  * Revision 1.24  2002/09/12 01:57:10  agmsmith
394  * Added server mode.
395  *
396  * Revision 1.23  2002/09/11 23:30:45  agmsmith
397  * Added Purge button and ignore classification checkbox.
398  *
399  * Revision 1.22  2002/09/11 21:23:13  agmsmith
400  * Added bulk update choice, purge button, moved to a BView container
401  * for all the controls (so background colour could be set, and Pulse
402  * works normally for it too).
403  *
404  * Revision 1.21  2002/09/10 22:52:49  agmsmith
405  * You can now change the database name in the GUI.
406  *
407  * Revision 1.20  2002/09/09 14:20:42  agmsmith
408  * Now can have multiple backups, and implemented refs received.
409  *
410  * Revision 1.19  2002/09/07 19:14:56  agmsmith
411  * Added standard GUI measurement code.
412  *
413  * Revision 1.18  2002/09/06 21:03:03  agmsmith
414  * Rearranging code to avoid forward references when adding a window class.
415  *
416  * Revision 1.17  2002/09/06 02:54:00  agmsmith
417  * Added the ability to purge old words from the database.
418  *
419  * Revision 1.16  2002/09/05 00:46:03  agmsmith
420  * Now adds spam to the database!
421  *
422  * Revision 1.15  2002/09/04 20:32:15  agmsmith
423  * Read ahead a couple of letters to decode quoted-printable better.
424  *
425  * Revision 1.14  2002/09/04 03:10:03  agmsmith
426  * Can now tokenize (break into words) a text file.
427  *
428  * Revision 1.13  2002/09/03 21:50:54  agmsmith
429  * Count database command, set up MIME type for the database file.
430  *
431  * Revision 1.12  2002/09/03 19:55:54  agmsmith
432  * Added loading and saving the database.
433  *
434  * Revision 1.11  2002/09/02 03:35:33  agmsmith
435  * Create indices and set up attribute associations with the e-mail MIME type.
436  *
437  * Revision 1.10  2002/09/01 15:52:49  agmsmith
438  * Can now delete the database.
439  *
440  * Revision 1.9  2002/08/31 21:55:32  agmsmith
441  * Yet more scripting.
442  *
443  * Revision 1.8  2002/08/31 21:41:37  agmsmith
444  * Under construction, with example code to decode a B_REPLY.
445  *
446  * Revision 1.7  2002/08/30 19:29:06  agmsmith
447  * Combined loading and saving settings into one function.
448  *
449  * Revision 1.6  2002/08/30 02:01:10  agmsmith
450  * Working on loading and saving settings.
451  *
452  * Revision 1.5  2002/08/29 23:17:42  agmsmith
453  * More scripting.
454  *
455  * Revision 1.4  2002/08/28 00:40:52  agmsmith
456  * Scripting now seems to work, at least the messages flow properly.
457  *
458  * Revision 1.3  2002/08/25 21:51:44  agmsmith
459  * Getting the about text formatting right.
460  *
461  * Revision 1.2  2002/08/25 21:28:20  agmsmith
462  * Trying out the BeOS scripting system as a way of implementing the program.
463  *
464  * Revision 1.1  2002/08/24 02:27:51  agmsmith
465  * Initial revision
466  */
467 
468 /* Standard C Library. */
469 
470 #include <errno.h>
471 #include <stdio.h>
472 #include <stdlib.h>
473 #include <strings.h>
474 
475 /* Standard C++ library. */
476 
477 #include <iostream>
478 
479 /* STL (Standard Template Library) headers. */
480 
481 #include <map>
482 #include <queue>
483 #include <set>
484 #include <string>
485 #include <vector>
486 
487 using namespace std;
488 
489 /* BeOS (Be Operating System) headers. */
490 
491 #include <Alert.h>
492 #include <Application.h>
493 #include <Beep.h>
494 #include <Button.h>
495 #include <CheckBox.h>
496 #include <Cursor.h>
497 #include <Directory.h>
498 #include <Entry.h>
499 #include <File.h>
500 #include <FilePanel.h>
501 #include <FindDirectory.h>
502 #include <fs_index.h>
503 #include <fs_info.h>
504 #include <MenuBar.h>
505 #include <MenuItem.h>
506 #include <Message.h>
507 #include <MessageQueue.h>
508 #include <MessageRunner.h>
509 #include <Mime.h>
510 #include <NodeInfo.h>
511 #include <Path.h>
512 #include <Picture.h>
513 #include <PictureButton.h>
514 #include <Point.h>
515 #include <Polygon.h>
516 #include <PopUpMenu.h>
517 #include <PropertyInfo.h>
518 #include <RadioButton.h>
519 #include <Resources.h>
520 #include <Screen.h>
521 #include <ScrollBar.h>
522 #include <String.h>
523 #include <StringView.h>
524 #include <TextControl.h>
525 #include <View.h>
526 
527 /* Included from the Mail Daemon Replacement project (MDR) include/public
528 directory, available from http://sourceforge.net/projects/bemaildaemon/ */
529 
530 #include <MailMessage.h>
531 #include <MailAttachment.h>
532 
533 
534 /******************************************************************************
535  * Global variables, and not-so-variable things too.  Grouped by functionality.
536  */
537 
538 static float g_MarginBetweenControls; /* Space of a letter "M" between them. */
539 static float g_LineOfTextHeight;      /* Height of text the current font. */
540 static float g_StringViewHeight;      /* Height of a string view text box. */
541 static float g_ButtonHeight;          /* How many pixels tall buttons are. */
542 static float g_CheckBoxHeight;        /* Same for check boxes. */
543 static float g_RadioButtonHeight;     /* Also for radio buttons. */
544 static float g_PopUpMenuHeight;       /* Again for pop-up menus. */
545 static float g_TextBoxHeight;         /* Ditto for editable text controls. */
546 
547 static const char *g_ABSAppSignature =
548   "application/x-vnd.agmsmith.spamdbm";
549 
550 static const char *g_ABSDatabaseFileMIMEType =
551   "text/x-vnd.agmsmith.spam_probability_database";
552 
553 static const char *g_DefaultDatabaseFileName =
554   "SpamDBM Database";
555 
556 static const char *g_DatabaseRecognitionString =
557   "Spam Database File";
558 
559 static const char *g_AttributeNameClassification = "MAIL:classification";
560 static const char *g_AttributeNameSpamRatio = "MAIL:ratio_spam";
561 static const char *g_BeepGenuine = "SpamFilter-Genuine";
562 static const char *g_BeepSpam = "SpamFilter-Spam";
563 static const char *g_BeepUncertain = "SpamFilter-Uncertain";
564 static const char *g_ClassifiedSpam = "Spam";
565 static const char *g_ClassifiedGenuine = "Genuine";
566 static const char *g_DataName = "data";
567 static const char *g_ResultName = "result";
568 
569 static const char *g_SettingsDirectoryName = "Mail";
570 static const char *g_SettingsFileName = "SpamDBM Settings";
571 static const uint32 g_SettingsWhatCode = 'SDBM';
572 static const char *g_BackupSuffix = ".backup %d";
573 static const int g_MaxBackups = 10; /* Numbered from 0 to g_MaxBackups - 1. */
574 static const size_t g_MaxWordLength = 50; /* Words longer than this aren't. */
575 static const int g_MaxInterestingWords = 150; /* Top N words are examined. */
576 static const double g_RobinsonS = 0.45; /* Default weight for no data. */
577 static const double g_RobinsonX = 0.5; /* Halfway point for no data. */
578 
579 static bool g_CommandLineMode;
580   /* TRUE if the program was started from the command line (and thus should
581   exit after processing the command), FALSE if it is running with a graphical
582   user interface. */
583 
584 static bool g_ServerMode;
585   /* When TRUE the program runs in server mode - error messages don't result in
586   pop-up dialog boxes, but you can still see them in stderr.  Also the window
587   is minimized, if it exists. */
588 
589 static int g_QuitCountdown = -1;
590   /* Set to the number of pulse timing events (about one every half second) to
591   count down before the program quits.  Negative means stop counting.  Zero
592   means quit at the next pulse event.  This is used to keep the program alive
593   for a short while after someone requests that it quit, in case more scripting
594   commands come in, which will stop the countdown.  Needed to handle the case
595   where there are multiple e-mail accounts all requesting spam identification,
596   and one finishes first and tells the server to quit.  It also checks to see
597   that there is no more work to do before trying to quit. */
598 
599 static volatile bool g_AppReadyToRunCompleted = false;
600   /* The BApplication starts processing messages before ReadyToRun finishes,
601   which can lead to initialisation problems (button heights not determined).
602   So wait for this to turn TRUE in code that might run early, like
603   RefsReceived. */
604 
605 static class CommanderLooper *g_CommanderLooperPntr = NULL;
606 static BMessenger *g_CommanderMessenger = NULL;
607   /* Some globals for use with the looper which processes external commands
608   (arguments received, file references received), needed for avoiding deadlocks
609   which would happen if the BApplication sent a scripting message to itself. */
610 
611 static BCursor *g_BusyCursor = NULL;
612   /* The busy cursor, will be loaded from the resource file during application
613   startup. */
614 
615 typedef enum PropertyNumbersEnum
616 {
617   PN_DATABASE_FILE = 0,
618   PN_SPAM,
619   PN_SPAM_STRING,
620   PN_GENUINE,
621   PN_GENUINE_STRING,
622   PN_UNCERTAIN,
623   PN_IGNORE_PREVIOUS_CLASSIFICATION,
624   PN_SERVER_MODE,
625   PN_FLUSH,
626   PN_PURGE_AGE,
627   PN_PURGE_POPULARITY,
628   PN_PURGE,
629   PN_OLDEST,
630   PN_EVALUATE,
631   PN_EVALUATE_STRING,
632   PN_RESET_TO_DEFAULTS,
633   PN_INSTALL_THINGS,
634   PN_TOKENIZE_MODE,
635   PN_SCORING_MODE,
636   PN_MAX
637 } PropertyNumbers;
638 
639 static const char * g_PropertyNames [PN_MAX] =
640 {
641   "DatabaseFile",
642   "Spam",
643   "SpamString",
644   "Genuine",
645   "GenuineString",
646   "Uncertain",
647   "IgnorePreviousClassification",
648   "ServerMode",
649   "Flush",
650   "PurgeAge",
651   "PurgePopularity",
652   "Purge",
653   "Oldest",
654   "Evaluate",
655   "EvaluateString",
656   "ResetToDefaults",
657   "InstallThings",
658   "TokenizeMode",
659   "ScoringMode"
660 };
661 
662 /* This array lists the scripting commands we can handle, in a format that the
663 scripting system can understand too. */
664 
665 static struct property_info g_ScriptingPropertyList [] =
666 {
667   /* *name; commands[10]; specifiers[10]; *usage; extra_data; ... */
668   {g_PropertyNames[PN_DATABASE_FILE], {B_GET_PROPERTY, 0},
669     {B_DIRECT_SPECIFIER, 0}, "Get the pathname of the current database file.  "
670     "The default name is something like B_USER_SETTINGS_DIRECTORY / "
671     "Mail / SpamDBM Database", PN_DATABASE_FILE,
672     {}, {}, {}},
673   {g_PropertyNames[PN_DATABASE_FILE], {B_SET_PROPERTY, 0},
674     {B_DIRECT_SPECIFIER, 0}, "Change the pathname of the database file to "
675     "use.  It will automatically be converted to an absolute path name, "
676     "so make sure the parent directories exist before setting it.  If it "
677     "doesn't exist, you'll have to use the create command next.",
678     PN_DATABASE_FILE, {}, {}, {}},
679   {g_PropertyNames[PN_DATABASE_FILE], {B_CREATE_PROPERTY, 0},
680     {B_DIRECT_SPECIFIER, 0}, "Creates a new empty database, will replace "
681     "the existing database file too.", PN_DATABASE_FILE, {}, {}, {}},
682   {g_PropertyNames[PN_DATABASE_FILE], {B_DELETE_PROPERTY, 0},
683     {B_DIRECT_SPECIFIER, 0}, "Deletes the database file and all backup copies "
684     "of that file too.  Really only of use for uninstallers.",
685     PN_DATABASE_FILE, {}, {}, {}},
686   {g_PropertyNames[PN_DATABASE_FILE], {B_COUNT_PROPERTIES, 0},
687     {B_DIRECT_SPECIFIER, 0}, "Returns the number of words in the database.",
688     PN_DATABASE_FILE, {}, {}, {}},
689   {g_PropertyNames[PN_SPAM], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
690     "Adds the spam in the given file (specify full pathname to be safe) to "
691     "the database.  The words in the files will be added to the list of words "
692     "in the database that identify spam messages.  The files processed will "
693     "also have the attribute MAIL:classification added with a value of "
694     "\"Spam\" or \"Genuine\" as specified.  They also have their spam ratio "
695     "attribute updated, as if you had also used the Evaluate command on "
696     "them.  If they already have the MAIL:classification "
697     "attribute and it matches the new classification then they won't get "
698     "processed (and if it is different, they will get removed from the "
699     "statistics for the old class and added to the statistics for the new "
700     "one).  You can turn off that behaviour with the "
701     "IgnorePreviousClassification property.  The command line version lets "
702     "you specify more than one pathname.", PN_SPAM, {}, {}, {}},
703   {g_PropertyNames[PN_SPAM], {B_COUNT_PROPERTIES, 0}, {B_DIRECT_SPECIFIER, 0},
704     "Returns the number of spam messages in the database.", PN_SPAM,
705     {}, {}, {}},
706   {g_PropertyNames[PN_SPAM_STRING], {B_SET_PROPERTY, 0},
707     {B_DIRECT_SPECIFIER, 0}, "Adds the spam in the given string (assumed to "
708     "be the text of a whole e-mail message, not just a file name) to the "
709     "database.", PN_SPAM_STRING, {}, {}, {}},
710   {g_PropertyNames[PN_GENUINE], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
711     "Similar to adding spam except that the message file is added to the "
712     "genuine statistics.", PN_GENUINE, {}, {}, {}},
713   {g_PropertyNames[PN_GENUINE], {B_COUNT_PROPERTIES, 0},
714     {B_DIRECT_SPECIFIER, 0}, "Returns the number of genuine messages in the "
715     "database.", PN_GENUINE, {}, {}, {}},
716   {g_PropertyNames[PN_GENUINE_STRING], {B_SET_PROPERTY, 0},
717     {B_DIRECT_SPECIFIER, 0}, "Adds the genuine message in the given string "
718     "(assumed to be the text of a whole e-mail message, not just a file name) "
719     "to the database.", PN_GENUINE_STRING, {}, {}, {}},
720   {g_PropertyNames[PN_UNCERTAIN], {B_SET_PROPERTY, 0}, {B_DIRECT_SPECIFIER, 0},
721     "Similar to adding spam except that the message file is removed from the "
722     "database, undoing the previous classification.  Obviously, it needs to "
723     "have been classified previously (using the file attributes) so it can "
724     "tell if it is removing spam or genuine words.", PN_UNCERTAIN, {}, {}, {}},
725   {g_PropertyNames[PN_IGNORE_PREVIOUS_CLASSIFICATION], {B_SET_PROPERTY, 0},
726     {B_DIRECT_SPECIFIER, 0}, "If set to true then the previous classification "
727     "(which was saved as an attribute of the e-mail message file) will be "
728     "ignored, so that you can add the message to the database again.  If set "
729     "to false (the normal case), the attribute will be examined, and if the "
730     "message has already been classified as what you claim it is, nothing "
731     "will be done.  If it was misclassified, then the message will be removed "
732     "from the statistics for the old class and added to the stats for the "
733     "new classification you have requested.",
734     PN_IGNORE_PREVIOUS_CLASSIFICATION, {}, {}, {}},
735   {g_PropertyNames[PN_IGNORE_PREVIOUS_CLASSIFICATION], {B_GET_PROPERTY, 0},
736     {B_DIRECT_SPECIFIER, 0}, "Find out the current setting of the flag for "
737     "ignoring the previously recorded classification.",
738     PN_IGNORE_PREVIOUS_CLASSIFICATION, {}, {}, {}},
739   {g_PropertyNames[PN_SERVER_MODE], {B_SET_PROPERTY, 0},
740     {B_DIRECT_SPECIFIER, 0}, "If set to true then error messages get printed "
741     "to the standard error stream rather than showing up in an alert box.  "
742     "It also starts up with the window minimized.", PN_SERVER_MODE,
743     {}, {}, {}},
744   {g_PropertyNames[PN_SERVER_MODE], {B_GET_PROPERTY, 0},
745     {B_DIRECT_SPECIFIER, 0}, "Find out the setting of the server mode flag.",
746     PN_SERVER_MODE, {}, {}, {}},
747   {g_PropertyNames[PN_FLUSH], {B_EXECUTE_PROPERTY, 0},
748     {B_DIRECT_SPECIFIER, 0}, "Writes out the database file to disk, if it has "
749     "been updated in memory but hasn't been saved to disk.  It will "
750     "automatically get written when the program exits, so this command is "
751     "mostly useful for server mode.", PN_FLUSH, {}, {}, {}},
752   {g_PropertyNames[PN_PURGE_AGE], {B_SET_PROPERTY, 0},
753     {B_DIRECT_SPECIFIER, 0}, "Sets the old age limit.  Words which haven't "
754       "been updated since this many message additions to the database may be "
755       "deleted when you do a purge.  A good value is 1000, meaning that if a "
756       "word hasn't appeared in the last 1000 spam/genuine messages, it will "
757       "be forgotten.  Zero will purge all words, 1 will purge words not in "
758       "the last message added to the database, 2 will purge words not in the "
759       "last two messages added, and so on.  This is mostly useful for "
760       "removing those one time words which are often hunks of binary garbage, "
761       "not real words.  This acts in combination with the popularity limit; "
762       "both conditions have to be valid before the word gets deleted.",
763       PN_PURGE_AGE, {}, {}, {}},
764   {g_PropertyNames[PN_PURGE_AGE], {B_GET_PROPERTY, 0},
765     {B_DIRECT_SPECIFIER, 0}, "Gets the old age limit.", PN_PURGE_AGE,
766     {}, {}, {}},
767   {g_PropertyNames[PN_PURGE_POPULARITY], {B_SET_PROPERTY, 0},
768     {B_DIRECT_SPECIFIER, 0}, "Sets the popularity limit.  Words which aren't "
769     "this popular may be deleted when you do a purge.  A good value is 5, "
770     "which means that the word is safe from purging if it has been seen in 6 "
771     "or more e-mail messages.  If it's only in 5 or less, then it may get "
772     "purged.  The extreme is zero, where only words that haven't been seen "
773     "in any message are deleted (usually means no words).  This acts in "
774     "combination with the old age limit; both conditions have to be valid "
775     "before the word gets deleted.", PN_PURGE_POPULARITY, {}, {}, {}},
776   {g_PropertyNames[PN_PURGE_POPULARITY], {B_GET_PROPERTY, 0},
777     {B_DIRECT_SPECIFIER, 0}, "Gets the purge popularity limit.",
778     PN_PURGE_POPULARITY, {}, {}, {}},
779   {g_PropertyNames[PN_PURGE], {B_EXECUTE_PROPERTY, 0},
780     {B_DIRECT_SPECIFIER, 0}, "Purges the old obsolete words from the "
781     "database, if they are old enough according to the age limit and also "
782     "unpopular enough according to the popularity limit.", PN_PURGE,
783     {}, {}, {}},
784   {g_PropertyNames[PN_OLDEST], {B_GET_PROPERTY, 0},
785     {B_DIRECT_SPECIFIER, 0}, "Gets the age of the oldest message in the "
786     "database.  It's relative to the beginning of time, so you need to do "
787     "(total messages - age - 1) to see how many messages ago it was added.",
788     PN_OLDEST, {}, {}, {}},
789   {g_PropertyNames[PN_EVALUATE], {B_SET_PROPERTY, 0},
790     {B_DIRECT_SPECIFIER, 0}, "Evaluates a given file (by path name) to see "
791     "if it is spam or not.  Returns the ratio of spam probability vs genuine "
792     "probability, 0.0 meaning completely genuine, 1.0 for completely spam.  "
793     "Normally you should safely be able to consider it as spam if it is over "
794     "0.56 for the Robinson scoring method.  For the ChiSquared method, the "
795     "numbers are near 0 for genuine, near 1 for spam, and anywhere in the "
796     "middle means it can't decide.  The program attaches a MAIL:ratio_spam "
797     "attribute with the ratio as its "
798     "float32 value to the file.  Also returns the top few interesting words "
799     "in \"words\" and the associated per-word probability ratios in "
800     "\"ratios\".", PN_EVALUATE, {}, {}, {}},
801   {g_PropertyNames[PN_EVALUATE_STRING], {B_SET_PROPERTY, 0},
802     {B_DIRECT_SPECIFIER, 0}, "Like Evaluate, but rather than a file name, "
803     "the string argument contains the entire text of the message to be "
804     "evaluated.", PN_EVALUATE_STRING, {}, {}, {}},
805   {g_PropertyNames[PN_RESET_TO_DEFAULTS], {B_EXECUTE_PROPERTY, 0},
806     {B_DIRECT_SPECIFIER, 0}, "Resets all the configuration options to the "
807     "default values, including the database name.", PN_RESET_TO_DEFAULTS,
808     {}, {}, {}},
809   {g_PropertyNames[PN_INSTALL_THINGS], {B_EXECUTE_PROPERTY, 0},
810     {B_DIRECT_SPECIFIER, 0}, "Creates indices for the MAIL:classification and "
811     "MAIL:ratio_spam attributes on all volumes which support BeOS queries, "
812     "identifies them to the system as e-mail related attributes (modifies "
813     "the text/x-email MIME type), and sets up the new MIME type "
814     "(text/x-vnd.agmsmith.spam_probability_database) for the database file.  "
815     "Also registers names for the sound effects used by the separate filter "
816     "program (use the installsound BeOS program or the Sounds preferences "
817     "program to associate sound files with the names).", PN_INSTALL_THINGS,
818     {}, {}, {}},
819   {g_PropertyNames[PN_TOKENIZE_MODE], {B_SET_PROPERTY, 0},
820     {B_DIRECT_SPECIFIER, 0}, "Sets the method used for breaking up the "
821     "message into words.  Use \"Whole\" for the whole file (also use it for "
822     "non-email files).  The file isn't broken into parts; the whole thing is "
823     "converted into words, headers and attachments are just more raw data.  "
824     "Well, not quite raw data since it converts quoted-printable codes "
825     "(equals sign followed by hex digits or end of line) to the equivalent "
826     "single characters.  \"PlainText\" breaks the file into MIME components "
827     "and only looks at the ones which are of MIME type text/plain.  "
828     "\"AnyText\" will look for words in all text/* things, including "
829     "text/html attachments.  \"AllParts\" will decode all message components "
830     "and look for words in them, including binary attachments.  "
831     "\"JustHeader\" will only look for words in the message header.  "
832     "\"AllPartsAndHeader\", \"PlainTextAndHeader\" and \"AnyTextAndHeader\" "
833     "will also include the words from the message headers.", PN_TOKENIZE_MODE,
834     {}, {}, {}},
835   {g_PropertyNames[PN_TOKENIZE_MODE], {B_GET_PROPERTY, 0},
836     {B_DIRECT_SPECIFIER, 0}, "Gets the method used for breaking up the "
837     "message into words.", PN_TOKENIZE_MODE, {}, {}, {}},
838   {g_PropertyNames[PN_SCORING_MODE], {B_SET_PROPERTY, 0},
839     {B_DIRECT_SPECIFIER, 0}, "Sets the method used for combining the "
840     "probabilities of individual words into an overall score.  "
841     "\"Robinson\" mode will use Gary Robinson's nth root of the product "
842     "method.  It gives a nice range of values between 0 and 1 so you can "
843     "see shades of spaminess.  The cutoff point between spam and genuine "
844     "varies depending on your database of words (0.56 was one point in "
845     "some experiments).  \"ChiSquared\" mode will use chi-squared "
846     "statistics to evaluate the difference in probabilities that the lists "
847     "of word ratios are random.  The result is very close to 0 for genuine "
848     "and very close to 1 for spam, and near the middle if it is uncertain.",
849     PN_SCORING_MODE, {}, {}, {}},
850   {g_PropertyNames[PN_SCORING_MODE], {B_GET_PROPERTY, 0},
851     {B_DIRECT_SPECIFIER, 0}, "Gets the method used for combining the "
852     "individual word ratios into an overall score.", PN_SCORING_MODE,
853     {}, {}, {}},
854 
855   { 0 }
856 };
857 
858 
859 /* The various scoring modes as text and enums.  See PN_SCORING_MODE. */
860 
861 typedef enum ScoringModeEnum
862 {
863   SM_ROBINSON = 0,
864   SM_CHISQUARED,
865   SM_MAX
866 } ScoringModes;
867 
868 static const char * g_ScoringModeNames [SM_MAX] =
869 {
870   "Robinson",
871   "ChiSquared"
872 };
873 
874 
875 /* The various tokenizing modes as text and enums.  See PN_TOKENIZE_MODE. */
876 
877 typedef enum TokenizeModeEnum
878 {
879   TM_WHOLE = 0,
880   TM_PLAIN_TEXT,
881   TM_PLAIN_TEXT_HEADER,
882   TM_ANY_TEXT,
883   TM_ANY_TEXT_HEADER,
884   TM_ALL_PARTS,
885   TM_ALL_PARTS_HEADER,
886   TM_JUST_HEADER,
887   TM_MAX
888 } TokenizeModes;
889 
890 static const char * g_TokenizeModeNames [TM_MAX] =
891 {
892   "All",
893   "Plain text",
894   "Plain text and header",
895   "Any text",
896   "Any text and header",
897   "All parts",
898   "All parts and header",
899   "Just header"
900 };
901 
902 
903 /* Possible message classifications. */
904 
905 typedef enum ClassificationTypesEnum
906 {
907   CL_GENUINE = 0,
908   CL_SPAM,
909   CL_UNCERTAIN,
910   CL_MAX
911 } ClassificationTypes;
912 
913 static const char * g_ClassificationTypeNames [CL_MAX] =
914 {
915   g_ClassifiedGenuine,
916   g_ClassifiedSpam,
917   "Uncertain"
918 };
919 
920 
921 /* Some polygon graphics for the scroll arrows. */
922 
923 static BPoint g_UpLinePoints [] =
924 {
925   BPoint (8, 2 * (1)),
926   BPoint (14, 2 * (6)),
927   BPoint (10, 2 * (6)),
928   BPoint (10, 2 * (13)),
929   BPoint (6, 2 * (13)),
930   BPoint (6, 2 * (6)),
931   BPoint (2, 2 * (6))
932 };
933 
934 static BPoint g_DownLinePoints [] =
935 {
936   BPoint (8, 2 * (14-1)),
937   BPoint (14, 2 * (14-6)),
938   BPoint (10, 2 * (14-6)),
939   BPoint (10, 2 * (14-13)),
940   BPoint (6, 2 * (14-13)),
941   BPoint (6, 2 * (14-6)),
942   BPoint (2, 2 * (14-6))
943 };
944 
945 static BPoint g_UpPagePoints [] =
946 {
947   BPoint (8, 2 * (1)),
948   BPoint (13, 2 * (6)),
949   BPoint (10, 2 * (6)),
950   BPoint (14, 2 * (10)),
951   BPoint (10, 2 * (10)),
952   BPoint (10, 2 * (13)),
953   BPoint (6, 2 * (13)),
954   BPoint (6, 2 * (10)),
955   BPoint (2, 2 * (10)),
956   BPoint (6, 2 * (6)),
957   BPoint (3, 2 * (6))
958 };
959 
960 static BPoint g_DownPagePoints [] =
961 {
962   BPoint (8, 2 * (14-1)),
963   BPoint (13, 2 * (14-6)),
964   BPoint (10, 2 * (14-6)),
965   BPoint (14, 2 * (14-10)),
966   BPoint (10, 2 * (14-10)),
967   BPoint (10, 2 * (14-13)),
968   BPoint (6, 2 * (14-13)),
969   BPoint (6, 2 * (14-10)),
970   BPoint (2, 2 * (14-10)),
971   BPoint (6, 2 * (14-6)),
972   BPoint (3, 2 * (14-6))
973 };
974 
975 
976 /* An array of flags to identify characters which are considered to be spaces.
977 If character code X has g_SpaceCharacters[X] set to true then it is a
978 space-like character.  Character codes 128 and above are always non-space since
979 they are UTF-8 characters.  Initialised in the ABSApp constructor. */
980 
981 static bool g_SpaceCharacters [128];
982 
983 
984 
985 /******************************************************************************
986  * Each word in the spam database gets one of these structures.  The database
987  * has a string (the word) as the key and this structure as the value
988  * (statistics for that word).
989  */
990 
991 typedef struct StatisticsStruct
992 {
993   uint32 age;
994     /* Sequence number for the time when this word was last updated in the
995     database, so that we can remove old words (haven't been seen in recent
996     spam).  It's zero for the first file ever added (spam or genuine) to the
997     database, 1 for all words added or updated by the second file, etc.  If a
998     later file updates an existing word, it gets the age of the later file. */
999 
1000   uint32 genuineCount;
1001     /* Number of genuine messages that have this word. */
1002 
1003   uint32 spamCount;
1004     /* A count of the number of spam e-mail messages which contain the word. */
1005 
1006 } StatisticsRecord, *StatisticsPointer;
1007 
1008 typedef map<string, StatisticsRecord> StatisticsMap;
1009   /* Define this type which will be used for our main data storage facility, so
1010   we can more conveniently specify things that are derived from it, like
1011   iterators. */
1012 
1013 
1014 
1015 /******************************************************************************
1016  * An alert box asking how the user wants to mark messages.  There are buttons
1017  * for each classification category, and a checkbox to mark all remaining N
1018  * messages the same way.  And a cancel button.  To use it, first create the
1019  * ClassificationChoicesWindow, specifying the input arguments.  Then call the
1020  * Go method which will show the window, stuff the user's answer into your
1021  * output arguments (class set to CL_MAX if the user cancels), and destroy the
1022  * window.  Implemented because BAlert only allows 3 buttons, max!
1023  */
1024 
1025 class ClassificationChoicesWindow : public BWindow
1026 {
1027 public:
1028   /* Constructor and destructor. */
1029   ClassificationChoicesWindow (BRect FrameRect,
1030     const char *FileName, int NumberOfFiles);
1031 
1032   /* BeOS virtual functions. */
1033   virtual void MessageReceived (BMessage *MessagePntr);
1034 
1035   /* Our methods. */
1036   void Go (bool *BulkModeSelectedPntr,
1037     ClassificationTypes *ChoosenClassificationPntr);
1038 
1039   /* Various message codes for various buttons etc. */
1040   static const uint32 MSG_CLASS_BUTTONS = 'ClB0';
1041   static const uint32 MSG_CANCEL_BUTTON = 'Cncl';
1042   static const uint32 MSG_BULK_CHECKBOX = 'BlkK';
1043 
1044 private:
1045   /* Member variables. */
1046   bool *m_BulkModeSelectedPntr;
1047   ClassificationTypes *m_ChoosenClassificationPntr;
1048 };
1049 
1050 class ClassificationChoicesView : public BView
1051 {
1052 public:
1053   /* Constructor and destructor. */
1054   ClassificationChoicesView (BRect FrameRect,
1055     const char *FileName, int NumberOfFiles);
1056 
1057   /* BeOS virtual functions. */
1058   virtual void AttachedToWindow ();
1059   virtual void GetPreferredSize (float *width, float *height);
1060 
1061 private:
1062   /* Member variables. */
1063   const char *m_FileName;
1064   int         m_NumberOfFiles;
1065   float       m_PreferredBottomY;
1066 };
1067 
1068 
1069 
1070 /******************************************************************************
1071  * Due to deadlock problems with the BApplication posting scripting messages to
1072  * itself, we need to add a second Looper.  Its job is to just to convert
1073  * command line arguments and arguments from the Tracker (refs received) into a
1074  * series of scripting commands sent to the main BApplication.  It also prints
1075  * out the replies received (to stdout for command line replies).  An instance
1076  * of this class will be created and run by the main() function, and shut down
1077  * by it too.
1078  */
1079 
1080 class CommanderLooper : public BLooper
1081 {
1082 public:
1083   CommanderLooper ();
1084   ~CommanderLooper ();
1085   virtual void MessageReceived (BMessage *MessagePntr);
1086 
1087   void CommandArguments (int argc, char **argv);
1088   void CommandReferences (BMessage *MessagePntr,
1089     bool BulkMode = false,
1090     ClassificationTypes BulkClassification = CL_GENUINE);
1091   bool IsBusy ();
1092 
1093 private:
1094   void ProcessArgs (BMessage *MessagePntr);
1095   void ProcessRefs (BMessage *MessagePntr);
1096 
1097   static const uint32 MSG_COMMAND_ARGUMENTS = 'CArg';
1098   static const uint32 MSG_COMMAND_FILE_REFS = 'CRef';
1099 
1100   bool m_IsBusy;
1101 };
1102 
1103 
1104 
1105 /******************************************************************************
1106  * This view contains the various buttons and other controls for setting
1107  * configuration options and displaying the state of the database (but not the
1108  * actual list of words).  It will appear in the top half of the
1109  * DatabaseWindow.
1110  */
1111 
1112 class ControlsView : public BView
1113 {
1114 public:
1115   /* Constructor and destructor. */
1116   ControlsView (BRect NewBounds);
1117   ~ControlsView ();
1118 
1119   /* BeOS virtual functions. */
1120   virtual void AttachedToWindow ();
1121   virtual void FrameResized (float Width, float Height);
1122   virtual void MessageReceived (BMessage *MessagePntr);
1123   virtual void Pulse ();
1124 
1125 private:
1126   /* Various message codes for various buttons etc. */
1127   static const uint32 MSG_BROWSE_BUTTON = 'Brws';
1128   static const uint32 MSG_DATABASE_NAME = 'DbNm';
1129   static const uint32 MSG_ESTIMATE_BUTTON = 'Estm';
1130   static const uint32 MSG_ESTIMATE_FILE_REFS = 'ERef';
1131   static const uint32 MSG_IGNORE_CLASSIFICATION = 'IPCl';
1132   static const uint32 MSG_PURGE_AGE = 'PuAg';
1133   static const uint32 MSG_PURGE_BUTTON = 'Purg';
1134   static const uint32 MSG_PURGE_POPULARITY = 'PuPo';
1135   static const uint32 MSG_SERVER_MODE = 'SrvM';
1136 
1137   /* Our member functions. */
1138   void BrowseForDatabaseFile ();
1139   void BrowseForFileToEstimate ();
1140   void PollServerForChanges ();
1141 
1142   /* Member variables. */
1143   BButton        *m_AboutButtonPntr;
1144   BButton        *m_AddExampleButtonPntr;
1145   BButton        *m_BrowseButtonPntr;
1146   BFilePanel     *m_BrowseFilePanelPntr;
1147   BButton        *m_CreateDatabaseButtonPntr;
1148   char            m_DatabaseFileNameCachedValue [PATH_MAX];
1149   BTextControl   *m_DatabaseFileNameTextboxPntr;
1150   bool            m_DatabaseLoadDone;
1151   BButton        *m_EstimateSpamButtonPntr;
1152   BFilePanel     *m_EstimateSpamFilePanelPntr;
1153   uint32          m_GenuineCountCachedValue;
1154   BTextControl   *m_GenuineCountTextboxPntr;
1155   bool            m_IgnorePreviousClassCachedValue;
1156   BCheckBox      *m_IgnorePreviousClassCheckboxPntr;
1157   BButton        *m_InstallThingsButtonPntr;
1158   uint32          m_PurgeAgeCachedValue;
1159   BTextControl   *m_PurgeAgeTextboxPntr;
1160   BButton        *m_PurgeButtonPntr;
1161   uint32          m_PurgePopularityCachedValue;
1162   BTextControl   *m_PurgePopularityTextboxPntr;
1163   BButton        *m_ResetToDefaultsButtonPntr;
1164   ScoringModes    m_ScoringModeCachedValue;
1165   BMenuBar       *m_ScoringModeMenuBarPntr;
1166   BPopUpMenu     *m_ScoringModePopUpMenuPntr;
1167   bool            m_ServerModeCachedValue;
1168   BCheckBox      *m_ServerModeCheckboxPntr;
1169   uint32          m_SpamCountCachedValue;
1170   BTextControl   *m_SpamCountTextboxPntr;
1171   bigtime_t       m_TimeOfLastPoll;
1172   TokenizeModes   m_TokenizeModeCachedValue;
1173   BMenuBar       *m_TokenizeModeMenuBarPntr;
1174   BPopUpMenu     *m_TokenizeModePopUpMenuPntr;
1175   uint32          m_WordCountCachedValue;
1176   BTextControl   *m_WordCountTextboxPntr;
1177 };
1178 
1179 
1180 /* Various message codes for various buttons etc. */
1181 static const uint32 MSG_LINE_DOWN = 'LnDn';
1182 static const uint32 MSG_LINE_UP = 'LnUp';
1183 static const uint32 MSG_PAGE_DOWN = 'PgDn';
1184 static const uint32 MSG_PAGE_UP = 'PgUp';
1185 
1186 /******************************************************************************
1187  * This view contains the list of words.  It displays as many as can fit in the
1188  * view rectangle, starting at a specified word (so it can simulate scrolling).
1189  * Usually it will appear in the bottom half of the DatabaseWindow.
1190  */
1191 
1192 class WordsView : public BView
1193 {
1194 public:
1195   /* Constructor and destructor. */
1196   WordsView (BRect NewBounds);
1197 
1198   /* BeOS virtual functions. */
1199   virtual void AttachedToWindow ();
1200   virtual void Draw (BRect UpdateRect);
1201   virtual void KeyDown (const char *BufferPntr, int32 NumBytes);
1202   virtual void MakeFocus (bool Focused);
1203   virtual void MessageReceived (BMessage *MessagePntr);
1204   virtual void MouseDown (BPoint point);
1205   virtual void Pulse ();
1206 
1207 private:
1208   /* Our member functions. */
1209   void MoveTextUpOrDown (uint32 MovementType);
1210   void RefsDroppedHere (BMessage *MessagePntr);
1211 
1212   /* Member variables. */
1213   BPictureButton *m_ArrowLineDownPntr;
1214   BPictureButton *m_ArrowLineUpPntr;
1215   BPictureButton *m_ArrowPageDownPntr;
1216   BPictureButton *m_ArrowPageUpPntr;
1217     /* Various buttons for controlling scrolling, since we can't use a scroll
1218     bar.  To make them less obvious, their background view colour needs to be
1219     changed whenever the main view's colour changes. */
1220 
1221   float m_AscentHeight;
1222     /* The ascent height for the font used to draw words.  Height from the top
1223     of the highest letter to the base line (which is near the middle bottom of
1224     the letters, the line where you would align your writing of the text by
1225     hand, all letters have part above, some also have descenders below this
1226     line). */
1227 
1228   rgb_color m_BackgroundColour;
1229     /* The current background colour.  Changes when the focus changes. */
1230 
1231   uint32 m_CachedTotalGenuineMessages;
1232   uint32 m_CachedTotalSpamMessages;
1233   uint32 m_CachedWordCount;
1234     /* These are cached copies of the similar values in the BApplication.  They
1235     reflect what's currently displayed.  If they are different than the values
1236     from the BApplication then the polling loop will try to redraw the display.
1237     They get set to the values actually used during drawing when drawing is
1238     successful. */
1239 
1240   char m_FirstDisplayedWord [g_MaxWordLength + 1];
1241     /* The scrolling display starts at this word.  Since we can't use index
1242     numbers (word[12345] for example), we use the word itself.  The scroll
1243     buttons set this to the next or previous word in the database.  Typing by
1244     the user when the view has the focus will also change this starting word.
1245     */
1246 
1247   rgb_color m_FocusedColour;
1248     /* The colour to use for focused mode (typing by the user is received by
1249     our view). */
1250 
1251   bigtime_t m_LastTimeAKeyWasPressed;
1252     /* Records the time when a key was last pressed.  Used for determining when
1253     the user has stopped typing a batch of letters. */
1254 
1255   float m_LineHeight;
1256     /* Height of a line of text in the font used for the word display.
1257     Includes the height of the letters plus a bit of extra space for between
1258     the lines (called leading). */
1259 
1260   BFont m_TextFont;
1261     /* The font used to draw the text in the window. */
1262 
1263   float m_TextHeight;
1264     /* Maximum total height of the letters in the text, includes the part above
1265     the baseline and the part below.  Doesn't include the sliver of space
1266     between lines. */
1267 
1268   rgb_color m_UnfocusedColour;
1269     /* The colour to use for unfocused mode, when user typing isn't active. */
1270 };
1271 
1272 
1273 
1274 /******************************************************************************
1275  * The BWindow class for this program.  It displays the database in real time,
1276  * and has various buttons and gadgets in the top half for changing settings
1277  * (live changes, no OK button, and they reflect changes done by other programs
1278  * using the server too).  The bottom half is a scrolling view listing all the
1279  * words in the database.  A simple graphic blotch behind each word shows
1280  * whether the word is strongly or weakly related to spam or genuine messages.
1281  * Most operations go through the scripting message system, but it also peeks
1282  * at the BApplication data for examining simple things and when redrawing the
1283  * list of words.
1284  */
1285 
1286 class DatabaseWindow : public BWindow
1287 {
1288 public:
1289   /* Constructor and destructor. */
1290   DatabaseWindow ();
1291 
1292   /* BeOS virtual functions. */
1293   virtual void MessageReceived (BMessage *MessagePntr);
1294   virtual bool QuitRequested ();
1295 
1296 private:
1297   /* Member variables. */
1298   ControlsView *m_ControlsViewPntr;
1299   WordsView    *m_WordsViewPntr;
1300 };
1301 
1302 
1303 
1304 /******************************************************************************
1305  * ABSApp is the BApplication class for this program.  This handles messages
1306  * from the outside world (requests to load a database, or to add files to the
1307  * collection).  It responds to command line arguments (if you start up the
1308  * program a second time, the system will just send the arguments to the
1309  * existing running program).  It responds to scripting messages.  And it
1310  * responds to messages from the window.  Its thread does the main work of
1311  * updating the database and reading / writing files.
1312  */
1313 
1314 class ABSApp : public BApplication
1315 {
1316 public:
1317   /* Constructor and destructor. */
1318   ABSApp ();
1319   ~ABSApp ();
1320 
1321   /* BeOS virtual functions. */
1322   virtual void AboutRequested ();
1323   virtual void ArgvReceived (int32 argc, char **argv);
1324   virtual status_t GetSupportedSuites (BMessage *MessagePntr);
1325   virtual void MessageReceived (BMessage *MessagePntr);
1326   virtual void Pulse ();
1327   virtual bool QuitRequested ();
1328   virtual void ReadyToRun ();
1329   virtual void RefsReceived (BMessage *MessagePntr);
1330   virtual BHandler *ResolveSpecifier (BMessage *MessagePntr, int32 Index,
1331     BMessage *SpecifierMsgPntr, int32 SpecificationKind, const char *Property);
1332 
1333 private:
1334   /* Our member functions. */
1335   status_t AddFileToDatabase (ClassificationTypes IsSpamOrWhat,
1336     const char *FileName, char *ErrorMessage);
1337   status_t AddPositionIOToDatabase (ClassificationTypes IsSpamOrWhat,
1338     BPositionIO *MessageIOPntr, const char *OptionalFileName,
1339     char *ErrorMessage);
1340   status_t AddStringToDatabase (ClassificationTypes IsSpamOrWhat,
1341     const char *String, char *ErrorMessage);
1342   void AddWordsToSet (const char *InputString, size_t NumberOfBytes,
1343     char PrefixCharacter, set<string> &WordSet);
1344   status_t CreateDatabaseFile (char *ErrorMessage);
1345   void DefaultSettings ();
1346   status_t DeleteDatabaseFile (char *ErrorMessage);
1347   status_t EvaluateFile (const char *PathName, BMessage *ReplyMessagePntr,
1348     char *ErrorMessage);
1349   status_t EvaluatePositionIO (BPositionIO *PositionIOPntr,
1350     const char *OptionalFileName, BMessage *ReplyMessagePntr,
1351     char *ErrorMessage);
1352   status_t EvaluateString (const char *BufferPntr, ssize_t BufferSize,
1353     BMessage *ReplyMessagePntr, char *ErrorMessage);
1354   status_t GetWordsFromPositionIO (BPositionIO *PositionIOPntr,
1355     const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1356   status_t InstallThings (char *ErrorMessage);
1357   status_t LoadDatabaseIfNeeded (char *ErrorMessage);
1358   status_t LoadSaveDatabase (bool DoLoad, char *ErrorMessage);
1359 public:
1360   status_t LoadSaveSettings (bool DoLoad);
1361 private:
1362   status_t MakeBackup (char *ErrorMessage);
1363   void MakeDatabaseEmpty ();
1364   void ProcessScriptingMessage (BMessage *MessagePntr,
1365     struct property_info *PropInfoPntr);
1366   status_t PurgeOldWords (char *ErrorMessage);
1367   status_t RecursivelyTokenizeMailComponent (
1368     BMailComponent *ComponentPntr, const char *OptionalFileName,
1369     set<string> &WordSet, char *ErrorMessage,
1370     int RecursionLevel, int MaxRecursionLevel);
1371   status_t SaveDatabaseIfNeeded (char *ErrorMessage);
1372   status_t TokenizeParts (BPositionIO *PositionIOPntr,
1373     const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1374   status_t TokenizeWhole (BPositionIO *PositionIOPntr,
1375     const char *OptionalFileName, set<string> &WordSet, char *ErrorMessage);
1376 
1377 public:
1378   /* Member variables.  Many are read by the window thread to see if it needs
1379   updating, and to draw the words.  However, the other threads will lock the
1380   BApplication or using scripting commands if they want to make changes. */
1381 
1382   bool m_DatabaseHasChanged;
1383     /* Set to TRUE when the in-memory database (stored in m_WordMap) has
1384     changed and is different from the on-disk database file.  When the
1385     application exits, the database will be written out if it has changed. */
1386 
1387   BString m_DatabaseFileName;
1388     /* The absolute path name to use for the database file on disk. */
1389 
1390   bool m_IgnorePreviousClassification;
1391     /* If TRUE then the previous classification of a message (stored in an
1392     attribute on the message file) will be ignored, and the message will be
1393     added to the requested spam/genuine list.  If this is FALSE then the spam
1394     won't be added to the list if it has already been classified as specified,
1395     but if it was mis-classified, it will be removed from the old list and
1396     added to the new list. */
1397 
1398   uint32 m_OldestAge;
1399     /* The age of the oldest word.  This will be the smallest age number in the
1400     database.  Mostly useful for scaling graphics representing age in the word
1401     display.  If the oldest word is no longer the oldest, this variable won't
1402     get immediately updated since it would take a lot of effort to find the
1403     next older age.  Since it's only used for display, we'll let it be slightly
1404     incorrect.  The next database load or purge will fix it. */
1405 
1406   uint32 m_PurgeAge;
1407     /* When purging old words, they have to be at least this old to be eligible
1408     for deletion.  Age is measured as the number of e-mails added to the
1409     database since the word was last updated in the database.  Zero means all
1410     words are old. */
1411 
1412   uint32 m_PurgePopularity;
1413     /* When purging old words, they have to be less than or equal to this
1414     popularity limit to be eligible for deletion.  Popularity is measured as
1415     the number of messages (spam and genuine) which have the word.  Zero means
1416     no words. */
1417 
1418   ScoringModes m_ScoringMode;
1419     /* Controls how to combine the word probabilities into an overall score.
1420     See the PN_SCORING_MODE comments for details. */
1421 
1422   BPath m_SettingsDirectoryPath;
1423     /* The constructor initialises this to the settings directory path.  It
1424     never changes after that. */
1425 
1426   bool m_SettingsHaveChanged;
1427     /* Set to TRUE when the settings are changed (different than the ones which
1428     were loaded).  When the application exits, the settings will be written out
1429     if they have changed. */
1430 
1431   double m_SmallestUseableDouble;
1432     /* When multiplying fractional numbers together, avoid using numbers
1433     smaller than this because the double exponent range is close to being
1434     exhausted.  The IEEE STANDARD 754 floating-point arithmetic (used on the
1435     Intel i8087 and later math processors) has 64 bit numbers with 53 bits of
1436     mantissa, giving it an underflow starting at 0.5**1022 = 2.2e-308 where it
1437     rounds off to the nearest multiple of 0.5**1074 = 4.9e-324. */
1438 
1439   TokenizeModes m_TokenizeMode;
1440     /* Controls how to convert the raw message text into words.  See the
1441     PN_TOKENIZE_MODE comments for details. */
1442 
1443   uint32 m_TotalGenuineMessages;
1444     /* Number of genuine messages which are in the database. */
1445 
1446   uint32 m_TotalSpamMessages;
1447     /* Number of spam messages which are in the database. */
1448 
1449   uint32 m_WordCount;
1450     /* The number of words currently in the database.  Stored separately as a
1451     member variable to avoid having to call m_WordMap.size() all the time,
1452     which other threads can't do while the database is being updated (but they
1453     can look at the word count variable). */
1454 
1455   StatisticsMap m_WordMap;
1456     /* The in-memory data structure holding the set of words and their
1457     associated statistics.  When the database isn't in use, it is an empty
1458     collection.  You should lock the BApplication if you are using the word
1459     collection (reading or writing) from another thread. */
1460 };
1461 
1462 
1463 
1464 /******************************************************************************
1465  * Global utility function to display an error message and return.  The message
1466  * part describes the error, and if ErrorNumber is non-zero, gets the string
1467  * ", error code $X (standard description)." appended to it.  If the message
1468  * is NULL then it gets defaulted to "Something went wrong".  The title part
1469  * doesn't get displayed (no title bar in the dialog box, but you can see it in
1470  * the debugger as the window thread name), and defaults to "Error Message" if
1471  * you didn't specify one.  If running in command line mode, the error gets
1472  * printed to stderr rather than showing up in a dialog box.
1473  */
1474 
1475 static void
DisplayErrorMessage(const char * MessageString=NULL,int ErrorNumber=0,const char * TitleString=NULL)1476 DisplayErrorMessage (
1477   const char *MessageString = NULL,
1478   int ErrorNumber = 0,
1479   const char *TitleString = NULL)
1480 {
1481   BAlert *AlertPntr;
1482   char ErrorBuffer [PATH_MAX + 1500];
1483 
1484   if (TitleString == NULL)
1485     TitleString = "SpamDBM Error Message";
1486 
1487   if (MessageString == NULL)
1488   {
1489     if (ErrorNumber == 0)
1490       MessageString = "No error, no message, why bother?";
1491     else
1492       MessageString = "Something went wrong";
1493   }
1494 
1495   if (ErrorNumber != 0)
1496   {
1497     sprintf (ErrorBuffer, "%s, error code $%X/%d (%s) has occured.",
1498       MessageString, ErrorNumber, ErrorNumber, strerror (ErrorNumber));
1499     MessageString = ErrorBuffer;
1500   }
1501 
1502   if (g_CommandLineMode || g_ServerMode)
1503     cerr << TitleString << ": " << MessageString << endl;
1504   else
1505   {
1506     AlertPntr = new BAlert (TitleString, MessageString,
1507       "Acknowledge", NULL, NULL, B_WIDTH_AS_USUAL, B_STOP_ALERT);
1508     if (AlertPntr != NULL) {
1509       AlertPntr->SetFlags(AlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
1510       AlertPntr->Go ();
1511     }
1512   }
1513 }
1514 
1515 
1516 
1517 /******************************************************************************
1518  * Word wrap a long line of text into shorter 79 column lines and print the
1519  * result on the given output stream.
1520  */
1521 
1522 static void
WrapTextToStream(ostream & OutputStream,const char * TextPntr)1523 WrapTextToStream (ostream& OutputStream, const char *TextPntr)
1524 {
1525   const int LineLength = 79;
1526   char     *StringPntr;
1527   char      TempString [LineLength+1];
1528 
1529   TempString[LineLength] = 0; /* Only needs to be done once. */
1530 
1531   while (*TextPntr != 0)
1532   {
1533     while (isspace (*TextPntr))
1534       TextPntr++; /* Skip leading spaces. */
1535     if (*TextPntr == 0)
1536       break; /* It was all spaces, don't print any more. */
1537 
1538     strncpy (TempString, TextPntr, LineLength);
1539 
1540     /* Advance StringPntr to the end of the temp string, partly to see how long
1541     it is (rather than doing strlen). */
1542 
1543     StringPntr = TempString;
1544     while (*StringPntr != 0)
1545       StringPntr++;
1546 
1547     if (StringPntr - TempString < LineLength)
1548     {
1549       /* This line fits completely. */
1550       OutputStream << TempString << endl;
1551       TextPntr += StringPntr - TempString;
1552       continue;
1553     }
1554 
1555     /* Advance StringPntr to the last space in the temp string. */
1556 
1557     while (StringPntr > TempString)
1558     {
1559       if (isspace (*StringPntr))
1560         break; /* Found the trailing space. */
1561       else /* Go backwards, looking for the trailing space. */
1562         StringPntr--;
1563     }
1564 
1565     /* Remove more trailing spaces at the end of the line, in case there were
1566     several spaces in a row. */
1567 
1568     while (StringPntr > TempString && isspace (StringPntr[-1]))
1569       StringPntr--;
1570 
1571     /* Print the line of text and advance the text pointer too. */
1572 
1573     if (StringPntr == TempString)
1574     {
1575       /* This line has no spaces, don't wrap it, just split off a chunk. */
1576       OutputStream << TempString << endl;
1577       TextPntr += strlen (TempString);
1578       continue;
1579     }
1580 
1581     *StringPntr = 0; /* Cut off after the first trailing space. */
1582     OutputStream << TempString << endl;
1583     TextPntr += StringPntr - TempString;
1584   }
1585 }
1586 
1587 
1588 
1589 /******************************************************************************
1590  * Print the usage info to the stream.  Includes a list of all commands.
1591  */
1592 ostream& PrintUsage (ostream& OutputStream);
1593 
PrintUsage(ostream & OutputStream)1594 ostream& PrintUsage (ostream& OutputStream)
1595 {
1596   struct property_info *PropInfoPntr;
1597 
1598   OutputStream << "\nSpamDBM - A Spam Database Manager\n";
1599   OutputStream << "Copyright © 2002 by Alexander G. M. Smith.  ";
1600   OutputStream << "Released to the public domain.\n\n";
1601   WrapTextToStream (OutputStream, "Compiled on " __DATE__ " at " __TIME__
1602 ".  $Id: spamdbm.cpp 30630 2009-05-05 01:31:01Z bga $  $HeadURL: http://svn.haiku-os.org/haiku/haiku/trunk/src/bin/mail_utils/spamdbm.cpp $");
1603   OutputStream << "\n"
1604 "This is a program for classifying e-mail messages as spam (junk mail which\n"
1605 "you don't want to read) and regular genuine messages.  It can learn what's\n"
1606 "spam and what's genuine.  You just give it a bunch of spam messages and a\n"
1607 "bunch of non-spam ones.  It uses them to make a list of the words from the\n"
1608 "messages with the probability that each word is from a spam message or from\n"
1609 "a genuine message.  Later on, it can use those probabilities to classify\n"
1610 "new messages as spam or not spam.  If the classifier stops working well\n"
1611 "(because the spammers have changed their writing style and vocabulary, or\n"
1612 "your regular correspondants are writing like spammers), you can use this\n"
1613 "program to update the list of words to identify the new messages\n"
1614 "correctly.\n"
1615 "\n"
1616 "The original idea was from Paul Graham's algorithm, which has an excellent\n"
1617 "writeup at: http://www.paulgraham.com/spam.html\n"
1618 "\n"
1619 "Gary Robinson came up with the improved algorithm, which you can read about at:\n"
1620 "http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html\n"
1621 "\n"
1622 "Then he, Tim Peters and the SpamBayes mailing list developed the Chi-Squared\n"
1623 "test, see http://mail.python.org/pipermail/spambayes/2002-October/001036.html\n"
1624 "for one of the earlier messages leading from the central limit theorem to\n"
1625 "the current chi-squared scoring method.\n"
1626 "\n"
1627 "Thanks go to Isaac Yonemoto for providing a better icon, which we can\n"
1628 "unfortunately no longer use, since the Hormel company wants people to\n"
1629 "avoid associating their meat product with junk e-mail.\n"
1630 "\n"
1631 "Tokenising code updated in 2005 to use some of the tricks that SpamBayes\n"
1632 "uses to extract words from messages.  In particular, HTML is now handled.\n"
1633 "\n"
1634 "Usage: Specify the operation as the first argument followed by more\n"
1635 "information as appropriate.  The program's configuration will affect the\n"
1636 "actual operation (things like the name of the database file to use, or\n"
1637 "whether it should allow non-email messages to be added).  In command line\n"
1638 "mode it will do the operation and exit.  In GUI/server mode a command line\n"
1639 "invocation will just send the command to the running server.  You can also\n"
1640 "use BeOS scripting (see the \"Hey\" command which you can get from\n"
1641 "http://www.bebits.com/app/2042 ) to control the Spam server.  And finally,\n"
1642 "there's also a GUI interface which shows up if you start it without any\n"
1643 "command line arguments.\n"
1644 "\n"
1645 "Commands:\n"
1646 "\n"
1647 "Quit\n"
1648 "Stop the program.  Useful if it's running as a server.\n"
1649 "\n";
1650 
1651   /* Go through all our scripting commands and add a description of each one to
1652   the usage text. */
1653 
1654   for (PropInfoPntr = g_ScriptingPropertyList + 0;
1655   PropInfoPntr->name != 0;
1656   PropInfoPntr++)
1657   {
1658     switch (PropInfoPntr->commands[0])
1659     {
1660       case B_GET_PROPERTY:
1661         OutputStream << "Get " << PropInfoPntr->name << endl;
1662         break;
1663 
1664       case B_SET_PROPERTY:
1665         OutputStream << "Set " << PropInfoPntr->name << " NewValue" << endl;
1666         break;
1667 
1668       case B_COUNT_PROPERTIES:
1669         OutputStream << "Count " << PropInfoPntr->name << endl;
1670         break;
1671 
1672       case B_CREATE_PROPERTY:
1673         OutputStream << "Create " << PropInfoPntr->name << endl;
1674         break;
1675 
1676       case B_DELETE_PROPERTY:
1677         OutputStream << "Delete " << PropInfoPntr->name << endl;
1678         break;
1679 
1680       case B_EXECUTE_PROPERTY:
1681         OutputStream << PropInfoPntr->name << endl;
1682         break;
1683 
1684       default:
1685         OutputStream << "Buggy Command: " << PropInfoPntr->name << endl;
1686         break;
1687     }
1688     WrapTextToStream (OutputStream, (char *)PropInfoPntr->usage);
1689     OutputStream << endl;
1690   }
1691 
1692   return OutputStream;
1693 }
1694 
1695 
1696 
1697 /******************************************************************************
1698  * A utility function to send a command to the application, will return after a
1699  * short delay if the application is busy (doesn't wait for it to be executed).
1700  * The reply from the application is also thrown away.  It used to be an
1701  * overloaded function, but the system couldn't distinguish between bool and
1702  * int, so now it has slightly different names depending on the arguments.
1703  */
1704 
1705 static void
SubmitCommand(BMessage & CommandMessage)1706 SubmitCommand (BMessage& CommandMessage)
1707 {
1708   status_t ErrorCode;
1709 
1710   ErrorCode = be_app_messenger.SendMessage (&CommandMessage,
1711     be_app_messenger /* reply messenger, throw away the reply */,
1712     1000000 /* delivery timeout */);
1713 
1714   if (ErrorCode != B_OK)
1715     cerr << "SubmitCommand failed to send a command, code " <<
1716     ErrorCode << " (" << strerror (ErrorCode) << ")." << endl;
1717 }
1718 
1719 
1720 static void
SubmitCommandString(PropertyNumbers Property,uint32 CommandCode,const char * StringArgument=NULL)1721 SubmitCommandString (
1722   PropertyNumbers Property,
1723   uint32 CommandCode,
1724   const char *StringArgument = NULL)
1725 {
1726   BMessage CommandMessage (CommandCode);
1727 
1728   if (Property < 0 || Property >= PN_MAX)
1729   {
1730     DisplayErrorMessage ("SubmitCommandString bug.");
1731     return;
1732   }
1733   CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1734   if (StringArgument != NULL)
1735     CommandMessage.AddString (g_DataName, StringArgument);
1736   SubmitCommand (CommandMessage);
1737 }
1738 
1739 
1740 static void
SubmitCommandInt32(PropertyNumbers Property,uint32 CommandCode,int32 Int32Argument)1741 SubmitCommandInt32 (
1742   PropertyNumbers Property,
1743   uint32 CommandCode,
1744   int32 Int32Argument)
1745 {
1746   BMessage CommandMessage (CommandCode);
1747 
1748   if (Property < 0 || Property >= PN_MAX)
1749   {
1750     DisplayErrorMessage ("SubmitCommandInt32 bug.");
1751     return;
1752   }
1753   CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1754   CommandMessage.AddInt32 (g_DataName, Int32Argument);
1755   SubmitCommand (CommandMessage);
1756 }
1757 
1758 
1759 static void
SubmitCommandBool(PropertyNumbers Property,uint32 CommandCode,bool BoolArgument)1760 SubmitCommandBool (
1761   PropertyNumbers Property,
1762   uint32 CommandCode,
1763   bool BoolArgument)
1764 {
1765   BMessage CommandMessage (CommandCode);
1766 
1767   if (Property < 0 || Property >= PN_MAX)
1768   {
1769     DisplayErrorMessage ("SubmitCommandBool bug.");
1770     return;
1771   }
1772   CommandMessage.AddSpecifier (g_PropertyNames [Property]);
1773   CommandMessage.AddBool (g_DataName, BoolArgument);
1774   SubmitCommand (CommandMessage);
1775 }
1776 
1777 
1778 
1779 /******************************************************************************
1780  * A utility function which will estimate the spaminess of file(s), not
1781  * callable from the application thread since it sends a scripting command to
1782  * the application and waits for results.  For each file there will be an entry
1783  * reference in the message.  For each of those, run it through the spam
1784  * estimator and display a box with the results.  This function is used both by
1785  * the file requestor and by dragging and dropping into the middle of the words
1786  * view.
1787  */
1788 
1789 static void
EstimateRefFilesAndDisplay(BMessage * MessagePntr)1790 EstimateRefFilesAndDisplay (BMessage *MessagePntr)
1791 {
1792   BAlert     *AlertPntr;
1793   BEntry      Entry;
1794   entry_ref   EntryRef;
1795   status_t    ErrorCode;
1796   int         i, j;
1797   BPath       Path;
1798   BMessage    ReplyMessage;
1799   BMessage    ScriptingMessage;
1800   const char *StringPntr;
1801   float       TempFloat;
1802   int32       TempInt32;
1803   char        TempString [PATH_MAX + 1024 +
1804                 g_MaxInterestingWords * (g_MaxWordLength + 16)];
1805 
1806   for (i = 0; MessagePntr->FindRef ("refs", i, &EntryRef) == B_OK; i++)
1807   {
1808     /* See if the entry is a valid file or directory or other thing. */
1809 
1810     ErrorCode = Entry.SetTo (&EntryRef, true /* traverse symbolic links */);
1811     if (ErrorCode != B_OK || !Entry.Exists () || Entry.GetPath (&Path) != B_OK)
1812       continue;
1813 
1814     /* Evaluate the spaminess of the file. */
1815 
1816     ScriptingMessage.MakeEmpty ();
1817     ScriptingMessage.what = B_SET_PROPERTY;
1818     ScriptingMessage.AddSpecifier (g_PropertyNames[PN_EVALUATE]);
1819     ScriptingMessage.AddString (g_DataName, Path.Path ());
1820 
1821     if (be_app_messenger.SendMessage (&ScriptingMessage,&ReplyMessage) != B_OK)
1822       break; /* App has died or something is wrong. */
1823 
1824     if (ReplyMessage.FindInt32 ("error", &TempInt32) != B_OK ||
1825     TempInt32 != B_OK)
1826       break; /* Error messages will be displayed elsewhere. */
1827 
1828     ReplyMessage.FindFloat (g_ResultName, &TempFloat);
1829     sprintf (TempString, "%f spam ratio for \"%s\".\nThe top words are:",
1830       (double) TempFloat, Path.Path ());
1831 
1832     for (j = 0; j < 20 /* Don't print too many! */; j++)
1833     {
1834       if (ReplyMessage.FindString ("words", j, &StringPntr) != B_OK ||
1835       ReplyMessage.FindFloat ("ratios", j, &TempFloat) != B_OK)
1836         break;
1837 
1838       sprintf (TempString + strlen (TempString), "\n%s / %f",
1839         StringPntr, TempFloat);
1840     }
1841     if (j >= 20 && j < g_MaxInterestingWords)
1842       sprintf (TempString + strlen (TempString), "\nAnd up to %d more words.",
1843         g_MaxInterestingWords - j);
1844 
1845     AlertPntr = new BAlert ("Estimate", TempString, "OK");
1846     if (AlertPntr != NULL) {
1847       AlertPntr->SetFlags(AlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
1848       AlertPntr->Go ();
1849     }
1850   }
1851 }
1852 
1853 
1854 
1855 /******************************************************************************
1856  * A utility function from the http://sourceforge.net/projects/spambayes
1857  * SpamBayes project.  Return prob(chisq >= x2, with v degrees of freedom).  It
1858  * computes the probability that the chi-squared value (a kind of normalized
1859  * error measurement), with v degrees of freedom, would be larger than a given
1860  * number (x2; chi is the Greek letter X thus x2).  So you can tell if the
1861  * error is really unusual (the returned probability is near zero meaning that
1862  * your measured error number is kind of large - actual chi-squared is rarely
1863  * above that number merely due to random effects), or if it happens often
1864  * (usually if the probability is over 5% then it's within 3 standard
1865  * deviations - meaning that chi-squared goes over your number fairly often due
1866  * merely to random effects).  v must be even for this calculation to work.
1867  */
1868 
ChiSquaredProbability(double x2,int v)1869 static double ChiSquaredProbability (double x2, int v)
1870 {
1871   int    halfV = v / 2;
1872   int    i;
1873   double m;
1874   double sum;
1875   double term;
1876 
1877   if (v & 1)
1878     return -1.0; /* Out of range return value as a hint v is odd. */
1879 
1880   /* If x2 is very large, exp(-m) will underflow to 0. */
1881   m = x2 / 2.0;
1882   sum = term = exp (-m);
1883   for (i = 1; i < halfV; i++)
1884   {
1885     term *= m / i;
1886     sum += term;
1887   }
1888 
1889   /* With small x2 and large v, accumulated roundoff error, plus error in the
1890   platform exp(), can cause this to spill a few ULP above 1.0.  For example,
1891   ChiSquaredProbability(100, 300) on my box has sum == 1.0 + 2.0**-52 at this
1892   point.  Returning a value even a teensy bit over 1.0 is no good. */
1893 
1894   if (sum > 1.0)
1895     return 1.0;
1896   return sum;
1897 }
1898 
1899 
1900 
1901 /******************************************************************************
1902  * A utility function to remove the "[Spam 99.9%] " from in front of the
1903  * MAIL:subject attribute of a file.
1904  */
1905 
RemoveSpamPrefixFromSubjectAttribute(BNode * BNodePntr)1906 static status_t RemoveSpamPrefixFromSubjectAttribute (BNode *BNodePntr)
1907 {
1908   status_t    ErrorCode;
1909   const char *MailSubjectName = "MAIL:subject";
1910   char       *StringPntr;
1911   char        SubjectString [2000];
1912 
1913   ErrorCode = BNodePntr->ReadAttr (MailSubjectName,
1914     B_STRING_TYPE, 0 /* offset */, SubjectString,
1915     sizeof (SubjectString) - 1);
1916   if (ErrorCode <= 0)
1917     return 0; /* The attribute isn't there so we don't care. */
1918   if (ErrorCode >= (int) sizeof (SubjectString) - 1)
1919     return 0; /* Can't handle subjects which are too long. */
1920 
1921   SubjectString [ErrorCode] = 0;
1922   ErrorCode = 0; /* So do-nothing exit returns zero. */
1923   if (strncmp (SubjectString, "[Spam ", 6) == 0)
1924   {
1925     for (StringPntr = SubjectString;
1926     *StringPntr != 0 && *StringPntr != ']'; StringPntr++)
1927       ; /* No body in this for loop. */
1928     if (StringPntr[0] == ']' && StringPntr[1] == ' ')
1929     {
1930       ErrorCode = BNodePntr->RemoveAttr (MailSubjectName);
1931       ErrorCode = BNodePntr->WriteAttr (MailSubjectName,
1932         B_STRING_TYPE, 0 /* offset */,
1933         StringPntr + 2, strlen (StringPntr + 2) + 1);
1934       if (ErrorCode > 0)
1935         ErrorCode = 0;
1936     }
1937   }
1938 
1939   return ErrorCode;
1940 }
1941 
1942 
1943 
1944 /******************************************************************************
1945  * The tokenizing functions.  To make tokenization of the text easier to
1946  * understand, it is broken up into several passes.  Each pass goes over the
1947  * text (can include NUL bytes) and extracts all the words it can recognise
1948  * (can be none).  The extracted words are added to the WordSet, with the
1949  * PrefixCharacter prepended (zero if none) so we can distinguish between words
1950  * found in headers and in the text body.  It also modifies the input text
1951  * buffer in-place to change the text that the next pass will see (blanking out
1952  * words that it wants to delete, but not inserting much new text since the
1953  * buffer can't be enlarged).  They all return the number of bytes remaining in
1954  * InputString after it has been modified to be input for the next pass.
1955  * Returns zero if it has exhausted the possibility of getting more words, or
1956  * if something goes wrong.
1957  */
1958 
TokenizerPassLowerCase(char * BufferPntr,size_t NumberOfBytes)1959 static size_t TokenizerPassLowerCase (
1960   char *BufferPntr,
1961   size_t NumberOfBytes)
1962 {
1963   char *EndOfStringPntr;
1964 
1965   EndOfStringPntr = BufferPntr + NumberOfBytes;
1966 
1967   while (BufferPntr < EndOfStringPntr)
1968   {
1969     /* Do our own lower case conversion; tolower () has problems with UTF-8
1970     characters that have the high bit set. */
1971 
1972     if (*BufferPntr >= 'A' && *BufferPntr <= 'Z')
1973       *BufferPntr = *BufferPntr + ('a' - 'A');
1974     BufferPntr++;
1975   }
1976   return NumberOfBytes;
1977 }
1978 
1979 
1980 /* A utility function for some commonly repeated code.  If this was Modula-2,
1981 we could use a nested procedure.  But it's not.  Adds the given word to the set
1982 of words, checking for maximum word length and prepending the prefix to the
1983 word, which gets modified by this function to reflect the word actually added
1984 to the set. */
1985 
1986 static void
AddWordAndPrefixToSet(string & Word,const char * PrefixString,set<string> & WordSet)1987 AddWordAndPrefixToSet (
1988   string &Word,
1989   const char *PrefixString,
1990   set<string> &WordSet)
1991 {
1992   if (Word.empty ())
1993     return;
1994 
1995   if (Word.size () > g_MaxWordLength)
1996     Word.resize (g_MaxWordLength);
1997   Word.insert (0, PrefixString);
1998   WordSet.insert (Word);
1999 }
2000 
2001 
2002 /* Hunt through the text for various URLs and extract the components as
2003 separate words.  Doesn't affect the text in the buffer.  Looks for
2004 protocol://user:password@computer:port/path?query=key#anchor strings.  Also
2005 www.blah strings are detected and broken down.  Doesn't do HREF="" strings
2006 where the string has a relative path (no host computer name).  Assumes the
2007 input buffer is already in lower case. */
2008 
TokenizerPassExtractURLs(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2009 static size_t TokenizerPassExtractURLs (
2010   char *BufferPntr,
2011   size_t NumberOfBytes,
2012   char PrefixCharacter,
2013   set<string> &WordSet)
2014 {
2015   char   *AtSignStringPntr;
2016   char   *HostStringPntr;
2017   char   *InputStringEndPntr;
2018   char   *InputStringPntr;
2019   char   *OptionsStringPntr;
2020   char   *PathStringPntr;
2021   char    PrefixString [2];
2022   char   *ProtocolStringPntr;
2023   string  Word;
2024 
2025   InputStringPntr = BufferPntr;
2026   InputStringEndPntr = BufferPntr + NumberOfBytes;
2027   PrefixString [0] = PrefixCharacter;
2028   PrefixString [1] = 0;
2029 
2030   while (InputStringPntr < InputStringEndPntr - 4)
2031   {
2032     HostStringPntr = NULL;
2033     if (memcmp (InputStringPntr, "www.", 4) == 0)
2034       HostStringPntr = InputStringPntr;
2035     else if (memcmp (InputStringPntr, "://", 3) == 0)
2036     {
2037       /* Find the protocol name, and add it as a word such as "ftp:" "http:" */
2038       ProtocolStringPntr = InputStringPntr;
2039       while (ProtocolStringPntr > BufferPntr &&
2040       isalpha (ProtocolStringPntr[-1]))
2041         ProtocolStringPntr--;
2042       Word.assign (ProtocolStringPntr,
2043         (InputStringPntr - ProtocolStringPntr) + 1 /* for the colon */);
2044       AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2045       HostStringPntr = InputStringPntr + 3; /* Skip past the "://" */
2046     }
2047     if (HostStringPntr == NULL)
2048     {
2049       InputStringPntr++;
2050       continue;
2051     }
2052 
2053     /* Got a host name string starting at HostStringPntr.  It's everything
2054     until the next slash or space, like "user:password@computer:port". */
2055 
2056     InputStringPntr = HostStringPntr;
2057     AtSignStringPntr = NULL;
2058     while (InputStringPntr < InputStringEndPntr &&
2059     (*InputStringPntr != '/' && !isspace (*InputStringPntr)))
2060     {
2061       if (*InputStringPntr == '@')
2062         AtSignStringPntr = InputStringPntr;
2063       InputStringPntr++;
2064     }
2065     if (AtSignStringPntr != NULL)
2066     {
2067       /* Add a word with the user and password, unseparated. */
2068       Word.assign (HostStringPntr,
2069         AtSignStringPntr - HostStringPntr + 1 /* for the @ sign */);
2070       AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2071       HostStringPntr = AtSignStringPntr + 1;
2072     }
2073 
2074     /* Add a word with the computer and port, unseparated. */
2075 
2076     Word.assign (HostStringPntr, InputStringPntr - HostStringPntr);
2077     AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2078 
2079     /* Now get the path name, not including the extra junk after ?  and #
2080     separators (they're stored as separate options).  Stops at white space or a
2081     double quote mark. */
2082 
2083     PathStringPntr = InputStringPntr;
2084     OptionsStringPntr = NULL;
2085     while (InputStringPntr < InputStringEndPntr &&
2086     (*InputStringPntr != '"' && !isspace (*InputStringPntr)))
2087     {
2088       if (OptionsStringPntr == NULL &&
2089       (*InputStringPntr == '?' || *InputStringPntr == '#'))
2090         OptionsStringPntr = InputStringPntr;
2091       InputStringPntr++;
2092     }
2093 
2094     if (OptionsStringPntr == NULL)
2095     {
2096       /* No options, all path. */
2097       Word.assign (PathStringPntr, InputStringPntr - PathStringPntr);
2098       AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2099     }
2100     else
2101     {
2102       /* Insert the path before the options. */
2103       Word.assign (PathStringPntr, OptionsStringPntr - PathStringPntr);
2104       AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2105 
2106       /* Insert all the options as a word. */
2107       Word.assign (OptionsStringPntr, InputStringPntr - OptionsStringPntr);
2108       AddWordAndPrefixToSet (Word, PrefixString, WordSet);
2109     }
2110   }
2111   return NumberOfBytes;
2112 }
2113 
2114 
2115 /* Replace long Asian words (likely to actually be sentences) with the first
2116 character in the word. */
2117 
TokenizerPassTruncateLongAsianWords(char * BufferPntr,size_t NumberOfBytes)2118 static size_t TokenizerPassTruncateLongAsianWords (
2119   char *BufferPntr,
2120   size_t NumberOfBytes)
2121 {
2122   char *EndOfStringPntr;
2123   char *InputStringPntr;
2124   int   Letter;
2125   char *OutputStringPntr;
2126   char *StartOfInputLongUnicodeWord;
2127   char *StartOfOutputLongUnicodeWord;
2128 
2129   InputStringPntr = BufferPntr;
2130   EndOfStringPntr = InputStringPntr + NumberOfBytes;
2131   OutputStringPntr = InputStringPntr;
2132   StartOfInputLongUnicodeWord = NULL; /* Non-NULL flags it as started. */
2133   StartOfOutputLongUnicodeWord = NULL;
2134 
2135   /* Copy the text from the input to the output (same buffer), but when we find
2136   a sequence of UTF-8 characters that is too long then truncate it down to one
2137   character and reset the output pointer to be after that character, thus
2138   deleting the word.  Replacing the deleted characters after it with spaces
2139   won't work since we need to preserve the lack of space to handle those sneaky
2140   HTML artificial word breakers.  So that Thelongword<blah>ing becomes
2141   "T<blah>ing" rather than "T <blah>ing", so the next step joins them up into
2142   "Ting" rather than "T" and "ing".  The first code in a UTF-8 character is
2143   11xxxxxx and subsequent ones are 10xxxxxx. */
2144 
2145   while (InputStringPntr < EndOfStringPntr)
2146   {
2147     Letter = (unsigned char) *InputStringPntr;
2148     if (Letter < 128) // Got a regular ASCII letter?
2149     {
2150       if (StartOfInputLongUnicodeWord != NULL)
2151       {
2152         if (InputStringPntr - StartOfInputLongUnicodeWord >
2153         (int) g_MaxWordLength * 2)
2154         {
2155           /* Need to truncate the long word (100 bytes or about 50 characters)
2156           back down to the first UTF-8 character, so find out where the first
2157           character ends (skip past the 10xxxxxx bytes), and rewind the output
2158           pointer to be just after that (ignoring the rest of the long word in
2159           effect). */
2160 
2161           OutputStringPntr = StartOfOutputLongUnicodeWord + 1;
2162           while (OutputStringPntr < InputStringPntr)
2163           {
2164             Letter = (unsigned char) *OutputStringPntr;
2165             if (Letter < 128 || Letter >= 192)
2166               break;
2167             ++OutputStringPntr; // Still a UTF-8 middle of the character code.
2168           }
2169         }
2170         StartOfInputLongUnicodeWord = NULL;
2171       }
2172     }
2173     else if (Letter >= 192 && StartOfInputLongUnicodeWord == NULL)
2174     {
2175       /* Got the start of a UTF-8 character.  Remember the spot so we can see
2176       if this is a too long UTF-8 word, which is often a whole sentence in
2177       asian languages, since they sort of use a single character per word. */
2178 
2179       StartOfInputLongUnicodeWord = InputStringPntr;
2180       StartOfOutputLongUnicodeWord = OutputStringPntr;
2181     }
2182     *OutputStringPntr++ = *InputStringPntr++;
2183   }
2184   return OutputStringPntr - BufferPntr;
2185 }
2186 
2187 
2188 /* Find all the words in the string and add them to our local set of words.
2189 The characters considered white space are defined by g_SpaceCharacters.  This
2190 function is also used as a subroutine by other tokenizer functions when they
2191 have a bunch of presumably plain text they want broken into words and added. */
2192 
TokenizerPassGetPlainWords(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2193 static size_t TokenizerPassGetPlainWords (
2194   char *BufferPntr,
2195   size_t NumberOfBytes,
2196   char PrefixCharacter,
2197   set<string> &WordSet)
2198 {
2199   string  AccumulatedWord;
2200   char   *EndOfStringPntr;
2201   size_t  Length;
2202   int     Letter;
2203 
2204   if (NumberOfBytes <= 0)
2205     return 0; /* Nothing to process. */
2206 
2207   if (PrefixCharacter != 0)
2208     AccumulatedWord = PrefixCharacter;
2209   EndOfStringPntr = BufferPntr + NumberOfBytes;
2210   while (true)
2211   {
2212     if (BufferPntr >= EndOfStringPntr)
2213       Letter = EOF; // Usually a negative number.
2214     else
2215       Letter = (unsigned char) *BufferPntr++;
2216 
2217     /* See if it is a letter we treat as white space.  Some word separators
2218     like dashes and periods aren't considered as space.  Note that codes above
2219     127 are UTF-8 characters, which we consider non-space. */
2220 
2221     if (Letter < 0 /* EOF is -1 */ ||
2222     (Letter < 128 && g_SpaceCharacters[Letter]))
2223     {
2224       /* That space finished off a word.  Remove trailing periods... */
2225 
2226       while ((Length = AccumulatedWord.size()) > 0 &&
2227       AccumulatedWord [Length-1] == '.')
2228         AccumulatedWord.resize (Length - 1);
2229 
2230       /* If there's anything left in the word, add it to the set.  Also ignore
2231       words which are too big (it's probably some binary encoded data).  But
2232       leave room for supercalifragilisticexpialidoceous.  According to one web
2233       site, pneumonoultramicroscopicsilicovolcanoconiosis is the longest word
2234       currently in English.  Note that some uuencoded data was seen with a 60
2235       character line length. */
2236 
2237       if (PrefixCharacter != 0)
2238         Length--; // Don't count prefix when judging size or emptiness.
2239       if (Length > 0 && Length <= g_MaxWordLength)
2240         WordSet.insert (AccumulatedWord);
2241 
2242       /* Empty out the string to get ready for the next word.  Not quite empty,
2243       start it off with the prefix character if any. */
2244 
2245       if (PrefixCharacter != 0)
2246         AccumulatedWord = PrefixCharacter;
2247       else
2248         AccumulatedWord.resize (0);
2249     }
2250     else /* Not a space-like character, add it to the word. */
2251       AccumulatedWord.append (1 /* one copy of the char */, (char) Letter);
2252 
2253     if (Letter < 0)
2254       break; /* End of data.  Exit here so that last word got processed. */
2255   }
2256   return NumberOfBytes;
2257 }
2258 
2259 
2260 /* Delete Things from the text.  The Thing is marked by a start string and an
2261 end string, such as "<!--" and "--> for HTML comment things.  All the text
2262 between the markers will be added to the word list before it gets deleted from
2263 the buffer.  The markers must be prepared in lower case and the buffer is
2264 assumed to have already been converted to lower case.  You can specify an empty
2265 string for the end marker if you're just matching a string constant like
2266 "&nbsp;", which you would put in the starting marker.  This is a utility
2267 function used by other tokenizer functions. */
2268 
TokenizerUtilRemoveStartEndThing(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet,const char * ThingStartCode,const char * ThingEndCode,bool ReplaceWithSpace)2269 static size_t TokenizerUtilRemoveStartEndThing (
2270   char *BufferPntr,
2271   size_t NumberOfBytes,
2272   char PrefixCharacter,
2273   set<string> &WordSet,
2274   const char *ThingStartCode,
2275   const char *ThingEndCode,
2276   bool ReplaceWithSpace)
2277 {
2278   char *EndOfStringPntr;
2279   bool  FoundAndDeletedThing;
2280   char *InputStringPntr;
2281   char *OutputStringPntr;
2282   int   ThingEndLength;
2283   char *ThingEndPntr;
2284   int   ThingStartLength;
2285 
2286   InputStringPntr = BufferPntr;
2287   EndOfStringPntr = InputStringPntr + NumberOfBytes;
2288   OutputStringPntr = InputStringPntr;
2289   ThingStartLength = strlen (ThingStartCode);
2290   ThingEndLength = strlen (ThingEndCode);
2291 
2292   if (ThingStartLength <= 0)
2293     return NumberOfBytes; /* Need some things to look for first! */
2294 
2295   while (InputStringPntr < EndOfStringPntr)
2296   {
2297     /* Search for the starting marker. */
2298 
2299     FoundAndDeletedThing = false;
2300     if (EndOfStringPntr - InputStringPntr >=
2301     ThingStartLength + ThingEndLength /* space remains for start + end */ &&
2302     *InputStringPntr == *ThingStartCode &&
2303     memcmp (InputStringPntr, ThingStartCode, ThingStartLength) == 0)
2304     {
2305       /* Found the start marker.  Look for the terminating string.  If it is an
2306       empty string, then we've found it right now! */
2307 
2308       ThingEndPntr = InputStringPntr + ThingStartLength;
2309       while (EndOfStringPntr - ThingEndPntr >= ThingEndLength)
2310       {
2311         if (ThingEndLength == 0 ||
2312         (*ThingEndPntr == *ThingEndCode &&
2313         memcmp (ThingEndPntr, ThingEndCode, ThingEndLength) == 0))
2314         {
2315           /* Got the end of the Thing.  First dump the text inbetween the start
2316           and end markers into the words list. */
2317 
2318           TokenizerPassGetPlainWords (InputStringPntr + ThingStartLength,
2319             ThingEndPntr - (InputStringPntr + ThingStartLength),
2320             PrefixCharacter, WordSet);
2321 
2322           /* Delete by not updating the output pointer while moving the input
2323           pointer to just after the ending tag. */
2324 
2325           InputStringPntr = ThingEndPntr + ThingEndLength;
2326           if (ReplaceWithSpace)
2327             *OutputStringPntr++ = ' ';
2328           FoundAndDeletedThing = true;
2329           break;
2330         }
2331         ThingEndPntr++;
2332       } /* End while ThingEndPntr */
2333     }
2334     if (!FoundAndDeletedThing)
2335       *OutputStringPntr++ = *InputStringPntr++;
2336   } /* End while InputStringPntr */
2337 
2338   return OutputStringPntr - BufferPntr;
2339 }
2340 
2341 
TokenizerPassRemoveHTMLComments(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2342 static size_t TokenizerPassRemoveHTMLComments (
2343   char *BufferPntr,
2344   size_t NumberOfBytes,
2345   char PrefixCharacter,
2346   set<string> &WordSet)
2347 {
2348   return TokenizerUtilRemoveStartEndThing (BufferPntr, NumberOfBytes,
2349     PrefixCharacter, WordSet, "<!--", "-->", false);
2350 }
2351 
2352 
TokenizerPassRemoveHTMLStyle(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2353 static size_t TokenizerPassRemoveHTMLStyle (
2354   char *BufferPntr,
2355   size_t NumberOfBytes,
2356   char PrefixCharacter,
2357   set<string> &WordSet)
2358 {
2359   return TokenizerUtilRemoveStartEndThing (BufferPntr, NumberOfBytes,
2360     PrefixCharacter, WordSet,
2361     "<style", "/style>", false /* replace with space if true */);
2362 }
2363 
2364 
2365 /* Convert Japanese periods (a round hollow dot symbol) to spaces so that the
2366 start of the next sentence is recognised at least as the start of a very long
2367 word.  The Japanese comma also does the same job. */
2368 
TokenizerPassJapanesePeriodsToSpaces(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2369 static size_t TokenizerPassJapanesePeriodsToSpaces (
2370   char *BufferPntr,
2371   size_t NumberOfBytes,
2372   char PrefixCharacter,
2373   set<string> &WordSet)
2374 {
2375   size_t BytesRemaining = NumberOfBytes;
2376 
2377   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2378     BytesRemaining, PrefixCharacter, WordSet, "。" /* period */, "", true);
2379   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2380     BytesRemaining, PrefixCharacter, WordSet, "、" /* comma */, "", true);
2381   return BytesRemaining;
2382 }
2383 
2384 
2385 /* Delete HTML tags from the text.  The contents of the tag are added as words
2386 before being deleted.  <P>, <BR> and &nbsp; are replaced by spaces at this
2387 stage while other HTML things get replaced by nothing. */
2388 
TokenizerPassRemoveHTMLTags(char * BufferPntr,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2389 static size_t TokenizerPassRemoveHTMLTags (
2390   char *BufferPntr,
2391   size_t NumberOfBytes,
2392   char PrefixCharacter,
2393   set<string> &WordSet)
2394 {
2395   size_t BytesRemaining = NumberOfBytes;
2396 
2397   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2398     BytesRemaining, PrefixCharacter, WordSet, "&nbsp;", "", true);
2399   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2400     BytesRemaining, PrefixCharacter, WordSet, "<p", ">", true);
2401   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2402     BytesRemaining, PrefixCharacter, WordSet, "<br", ">", true);
2403   BytesRemaining = TokenizerUtilRemoveStartEndThing (BufferPntr,
2404     BytesRemaining, PrefixCharacter, WordSet, "<", ">", false);
2405   return BytesRemaining;
2406 }
2407 
2408 
2409 
2410 /******************************************************************************
2411  * Implementation of the ABSApp class, constructor, destructor and the rest of
2412  * the member functions in mostly alphabetical order.
2413  */
2414 
ABSApp()2415 ABSApp::ABSApp ()
2416 : BApplication (g_ABSAppSignature),
2417   m_DatabaseHasChanged (false),
2418   m_SettingsHaveChanged (false)
2419 {
2420   status_t    ErrorCode;
2421   int         HalvingCount;
2422   int         i;
2423   const void *ResourceData;
2424   size_t      ResourceSize;
2425   BResources *ResourcesPntr;
2426 
2427   MakeDatabaseEmpty ();
2428 
2429   /* Set up the pathname which identifies our settings directory.  Note that
2430   the actual settings are loaded later on (or set to defaults) by the main()
2431   function, before this BApplication starts running.  So we don't bother
2432   initialising the other setting related variables here. */
2433 
2434   ErrorCode =
2435     find_directory (B_USER_SETTINGS_DIRECTORY, &m_SettingsDirectoryPath);
2436   if (ErrorCode == B_OK)
2437     ErrorCode = m_SettingsDirectoryPath.Append (g_SettingsDirectoryName);
2438   if (ErrorCode != B_OK)
2439     m_SettingsDirectoryPath.SetTo (".");
2440 
2441   /* Set up the table which identifies which characters are spaces and which
2442   are not.  Spaces are all control characters and all punctuation except for:
2443   apostrophe (so "it's" and possessive versions of words get stored), dash (for
2444   hyphenated words), dollar sign (for cash amounts), period (for IP addresses,
2445   we later remove trailing periods). */
2446 
2447   memset (g_SpaceCharacters, 1, sizeof (g_SpaceCharacters));
2448   g_SpaceCharacters['\''] = false;
2449   g_SpaceCharacters['-'] = false;
2450   g_SpaceCharacters['$'] = false;
2451   g_SpaceCharacters['.'] = false;
2452   for (i = '0'; i <= '9'; i++)
2453     g_SpaceCharacters[i] = false;
2454   for (i = 'A'; i <= 'Z'; i++)
2455     g_SpaceCharacters[i] = false;
2456   for (i = 'a'; i <= 'z'; i++)
2457     g_SpaceCharacters[i] = false;
2458 
2459   /* Initialise the busy cursor from data in the application's resources. */
2460 
2461   if ((ResourcesPntr = AppResources ()) != NULL && (ResourceData =
2462   ResourcesPntr->LoadResource ('CURS', "Busy Cursor", &ResourceSize)) != NULL
2463   && ResourceSize >= 68 /* Size of a raw 2x16x16x8+4 cursor is 68 bytes */)
2464     g_BusyCursor = new BCursor (ResourceData);
2465 
2466   /* Find out the smallest usable double by seeing how small we can make it. */
2467 
2468   m_SmallestUseableDouble = 1.0;
2469   HalvingCount = 0;
2470   while (HalvingCount < 10000 && m_SmallestUseableDouble > 0.0)
2471   {
2472     HalvingCount++;
2473     m_SmallestUseableDouble /= 2;
2474   }
2475 
2476   /* Recreate the number.  But don't make quite as small, we want to allow some
2477   precision bits and a bit of extra margin for intermediate results in future
2478   calculations. */
2479 
2480   HalvingCount -= 50 + sizeof (double) * 8;
2481 
2482   m_SmallestUseableDouble = 1.0;
2483   while (HalvingCount > 0)
2484   {
2485     HalvingCount--;
2486     m_SmallestUseableDouble /= 2;
2487   }
2488 }
2489 
2490 
~ABSApp()2491 ABSApp::~ABSApp ()
2492 {
2493   status_t ErrorCode;
2494   char     ErrorMessage [PATH_MAX + 1024];
2495 
2496   if (m_SettingsHaveChanged)
2497     LoadSaveSettings (false /* DoLoad */);
2498   if ((ErrorCode = SaveDatabaseIfNeeded (ErrorMessage)) != B_OK)
2499     DisplayErrorMessage (ErrorMessage, ErrorCode, "Exiting Error");
2500   delete g_BusyCursor;
2501   g_BusyCursor = NULL;
2502 }
2503 
2504 
2505 /* Display a box showing information about this program. */
2506 
2507 void
AboutRequested()2508 ABSApp::AboutRequested ()
2509 {
2510   BAlert *AboutAlertPntr;
2511 
2512   AboutAlertPntr = new BAlert ("About",
2513 "SpamDBM - Spam Database Manager\n\n"
2514 
2515 "This is a BeOS program for classifying e-mail messages as spam (unwanted \
2516 junk mail) or as genuine mail using a Bayesian statistical approach.  There \
2517 is also a Mail Daemon Replacement add-on to filter mail using the \
2518 classification statistics collected earlier.\n\n"
2519 
2520 "Written by Alexander G. M. Smith, fall 2002.\n\n"
2521 
2522 "The original idea was from Paul Graham's algorithm, which has an excellent \
2523 writeup at: http://www.paulgraham.com/spam.html\n\n"
2524 
2525 "Gary Robinson came up with the improved algorithm, which you can read about \
2526 at: http://radio.weblogs.com/0101454/stories/2002/09/16/spamDetection.html\n\n"
2527 
2528 "Mr. Robinson, Tim Peters and the SpamBayes mailing list people then \
2529 developed the even better chi-squared scoring method.\n\n"
2530 
2531 "Icon courtesy of Isaac Yonemoto, though it is no longer used since Hormel \
2532 doesn't want their meat product associated with junk e-mail.\n\n"
2533 
2534 "Tokenising code updated in 2005 to use some of the tricks that SpamBayes \
2535 uses to extract words from messages.  In particular, HTML is now handled.\n\n"
2536 
2537 "Released to the public domain, with no warranty.\n"
2538 "$Revision: 30630 $\n"
2539 "Compiled on " __DATE__ " at " __TIME__ ".", "Done");
2540   if (AboutAlertPntr != NULL)
2541   {
2542     AboutAlertPntr->SetFlags(AboutAlertPntr->Flags() | B_CLOSE_ON_ESCAPE);
2543     AboutAlertPntr->Go ();
2544   }
2545 }
2546 
2547 
2548 /* Add the text in the given file to the database as an example of a spam or
2549 genuine message, or removes it from the database if you claim it is
2550 CL_UNCERTAIN.  Also resets the spam ratio attribute to show the effect of the
2551 database change. */
2552 
AddFileToDatabase(ClassificationTypes IsSpamOrWhat,const char * FileName,char * ErrorMessage)2553 status_t ABSApp::AddFileToDatabase (
2554   ClassificationTypes IsSpamOrWhat,
2555   const char *FileName,
2556   char *ErrorMessage)
2557 {
2558   status_t ErrorCode;
2559   BFile    MessageFile;
2560   BMessage TempBMessage;
2561 
2562   ErrorCode = MessageFile.SetTo (FileName, B_READ_ONLY);
2563   if (ErrorCode != B_OK)
2564   {
2565     sprintf (ErrorMessage, "Unable to open file \"%s\" for reading", FileName);
2566     return ErrorCode;
2567   }
2568 
2569   ErrorCode = AddPositionIOToDatabase (IsSpamOrWhat,
2570     &MessageFile, FileName, ErrorMessage);
2571   MessageFile.Unset ();
2572   if (ErrorCode != B_OK)
2573     return ErrorCode;
2574 
2575   /* Re-evaluate the file so that the user sees the new ratio attribute. */
2576   return EvaluateFile (FileName, &TempBMessage, ErrorMessage);
2577 }
2578 
2579 
2580 /* Add the given text to the database.  The unique words found in MessageIOPntr
2581 will be added to the database (incrementing the count for the number of
2582 messages using each word, either the spam or genuine count depending on
2583 IsSpamOrWhat).  It will remove the message (decrement the word counts) if you
2584 specify CL_UNCERTAIN as the new classification.  And if it switches from spam
2585 to genuine or vice versa, it will do both - decrement the counts for the old
2586 class and increment the counts for the new one.  An attribute will be added to
2587 MessageIOPntr (if it is a file) to record that it has been marked as Spam or
2588 Genuine (so that it doesn't get added to the database a second time).  If it is
2589 being removed from the database, the classification attribute gets removed too.
2590 If things go wrong, a non-zero error code will be returned and an explanation
2591 written to ErrorMessage (assumed to be at least PATH_MAX + 1024 bytes long).
2592 OptionalFileName is just used in the error message to identify the file to the
2593 user. */
2594 
AddPositionIOToDatabase(ClassificationTypes IsSpamOrWhat,BPositionIO * MessageIOPntr,const char * OptionalFileName,char * ErrorMessage)2595 status_t ABSApp::AddPositionIOToDatabase (
2596   ClassificationTypes IsSpamOrWhat,
2597   BPositionIO *MessageIOPntr,
2598   const char *OptionalFileName,
2599   char *ErrorMessage)
2600 {
2601   BNode                             *BNodePntr;
2602   char                               ClassificationString [NAME_MAX];
2603   StatisticsMap::iterator            DataIter;
2604   status_t                           ErrorCode = 0;
2605   pair<StatisticsMap::iterator,bool> InsertResult;
2606   uint32                             NewAge;
2607   StatisticsRecord                   NewStatistics;
2608   ClassificationTypes                PreviousClassification;
2609   StatisticsPointer                  StatisticsPntr;
2610   set<string>::iterator              WordEndIter;
2611   set<string>::iterator              WordIter;
2612   set<string>                        WordSet;
2613 
2614   NewAge = m_TotalGenuineMessages + m_TotalSpamMessages;
2615   if (NewAge >= 0xFFFFFFF0UL)
2616   {
2617     sprintf (ErrorMessage,
2618       "The database is full!  There are %" B_PRIu32 " messages in "
2619       "it and we can't add any more without overflowing the maximum integer "
2620       "representation in 32 bits", NewAge);
2621     return B_NO_MEMORY;
2622   }
2623 
2624   /* Check that this file hasn't already been added to the database. */
2625 
2626   PreviousClassification = CL_UNCERTAIN;
2627   BNodePntr = dynamic_cast<BNode *> (MessageIOPntr);
2628   if (BNodePntr != NULL) /* If this thing might have attributes. */
2629   {
2630     ErrorCode = BNodePntr->ReadAttr (g_AttributeNameClassification,
2631       B_STRING_TYPE, 0 /* offset */, ClassificationString,
2632       sizeof (ClassificationString) - 1);
2633     if (ErrorCode <= 0) /* Positive values for the number of bytes read */
2634       strcpy (ClassificationString, "none");
2635     else /* Just in case it needs a NUL at the end. */
2636       ClassificationString [ErrorCode] = 0;
2637 
2638     if (strcasecmp (ClassificationString, g_ClassifiedSpam) == 0)
2639       PreviousClassification = CL_SPAM;
2640     else if (strcasecmp (ClassificationString, g_ClassifiedGenuine) == 0)
2641       PreviousClassification = CL_GENUINE;
2642   }
2643 
2644   if (!m_IgnorePreviousClassification &&
2645   PreviousClassification != CL_UNCERTAIN)
2646   {
2647     if (IsSpamOrWhat == PreviousClassification)
2648     {
2649       sprintf (ErrorMessage, "Ignoring file \"%s\" since it seems to have "
2650         "already been classified as %s.", OptionalFileName,
2651         g_ClassificationTypeNames [IsSpamOrWhat]);
2652     }
2653     else
2654     {
2655       sprintf (ErrorMessage, "Changing existing classification of file \"%s\" "
2656         "from %s to %s.", OptionalFileName,
2657         g_ClassificationTypeNames [PreviousClassification],
2658         g_ClassificationTypeNames [IsSpamOrWhat]);
2659     }
2660     DisplayErrorMessage (ErrorMessage, 0, "Note");
2661   }
2662 
2663   if (!m_IgnorePreviousClassification &&
2664   IsSpamOrWhat == PreviousClassification)
2665     /* Nothing to do if it is already classified correctly and the user doesn't
2666     want double classification. */
2667     return B_OK;
2668 
2669   /* Get the list of unique words in the file. */
2670 
2671   ErrorCode = GetWordsFromPositionIO (MessageIOPntr, OptionalFileName,
2672     WordSet, ErrorMessage);
2673   if (ErrorCode != B_OK)
2674     return ErrorCode;
2675 
2676   /* Update the count of the number of messages processed, with corrections if
2677   reclassifying a message. */
2678 
2679   m_DatabaseHasChanged = true;
2680 
2681   if (!m_IgnorePreviousClassification &&
2682   PreviousClassification == CL_SPAM && m_TotalSpamMessages > 0)
2683     m_TotalSpamMessages--;
2684 
2685   if (IsSpamOrWhat == CL_SPAM)
2686     m_TotalSpamMessages++;
2687 
2688   if (!m_IgnorePreviousClassification &&
2689   PreviousClassification == CL_GENUINE && m_TotalGenuineMessages > 0)
2690       m_TotalGenuineMessages--;
2691 
2692   if (IsSpamOrWhat == CL_GENUINE)
2693     m_TotalGenuineMessages++;
2694 
2695   /* Mark the file's attributes with the new classification.  Don't care if it
2696   fails. */
2697 
2698   if (BNodePntr != NULL) /* If this thing might have attributes. */
2699   {
2700     ErrorCode = BNodePntr->RemoveAttr (g_AttributeNameClassification);
2701     if (IsSpamOrWhat != CL_UNCERTAIN)
2702     {
2703       strcpy (ClassificationString, g_ClassificationTypeNames [IsSpamOrWhat]);
2704       ErrorCode = BNodePntr->WriteAttr (g_AttributeNameClassification,
2705         B_STRING_TYPE, 0 /* offset */,
2706         ClassificationString, strlen (ClassificationString) + 1);
2707     }
2708   }
2709 
2710   /* Add the words to the database by incrementing or decrementing the counts
2711   for each word as appropriate. */
2712 
2713   WordEndIter = WordSet.end ();
2714   for (WordIter = WordSet.begin (); WordIter != WordEndIter; WordIter++)
2715   {
2716     if ((DataIter = m_WordMap.find (*WordIter)) == m_WordMap.end ())
2717     {
2718       /* No record in the database for the word. */
2719 
2720       if (IsSpamOrWhat == CL_UNCERTAIN)
2721         continue; /* Not adding words, don't have to subtract from nothing. */
2722 
2723       /* Create a new one record in the database for the new word. */
2724 
2725       memset (&NewStatistics, 0, sizeof (NewStatistics));
2726       InsertResult = m_WordMap.insert (
2727         StatisticsMap::value_type (*WordIter, NewStatistics));
2728       if (!InsertResult.second)
2729       {
2730         sprintf (ErrorMessage, "Failed to insert new database entry for "
2731           "word \"%s\", while processing file \"%s\"",
2732           WordIter->c_str (), OptionalFileName);
2733         return B_NO_MEMORY;
2734       }
2735       DataIter = InsertResult.first;
2736       m_WordCount++;
2737     }
2738 
2739     /* Got the database record for the word, update the statistics. */
2740 
2741     StatisticsPntr = &DataIter->second;
2742 
2743     StatisticsPntr->age = NewAge;
2744 
2745     /* Can't update m_OldestAge here, since it would take a lot of effort to
2746     find the next older age.  Since it's only used for display, we'll let it be
2747     slightly incorrect.  The next database load or purge will fix it. */
2748 
2749     if (IsSpamOrWhat == CL_SPAM)
2750       StatisticsPntr->spamCount++;
2751 
2752     if (IsSpamOrWhat == CL_GENUINE)
2753       StatisticsPntr->genuineCount++;
2754 
2755     if (!m_IgnorePreviousClassification &&
2756     PreviousClassification == CL_SPAM && StatisticsPntr->spamCount > 0)
2757       StatisticsPntr->spamCount--;
2758 
2759     if (!m_IgnorePreviousClassification &&
2760     PreviousClassification == CL_GENUINE && StatisticsPntr->genuineCount > 0)
2761       StatisticsPntr->genuineCount--;
2762   }
2763 
2764   return B_OK;
2765 }
2766 
2767 
2768 /* Add the text in the string to the database as an example of a spam or
2769 genuine message. */
2770 
AddStringToDatabase(ClassificationTypes IsSpamOrWhat,const char * String,char * ErrorMessage)2771 status_t ABSApp::AddStringToDatabase (
2772   ClassificationTypes IsSpamOrWhat,
2773   const char *String,
2774   char *ErrorMessage)
2775 {
2776   BMemoryIO MemoryIO (String, strlen (String));
2777 
2778   return AddPositionIOToDatabase (IsSpamOrWhat, &MemoryIO,
2779    "Memory Buffer" /* OptionalFileName */, ErrorMessage);
2780 }
2781 
2782 
2783 /* Given a bunch of text, find the words within it (doing special tricks to
2784 extract words from HTML), and add them to the set.  Allow NULs in the text.  If
2785 the PrefixCharacter isn't zero then it is prepended to all words found (so you
2786 can distinguish words as being from a header or from the body text).  See also
2787 TokenizeWhole which does something similar. */
2788 
2789 void
AddWordsToSet(const char * InputString,size_t NumberOfBytes,char PrefixCharacter,set<string> & WordSet)2790 ABSApp::AddWordsToSet (
2791   const char *InputString,
2792   size_t NumberOfBytes,
2793   char PrefixCharacter,
2794   set<string> &WordSet)
2795 {
2796   char   *BufferPntr;
2797   size_t  CurrentSize;
2798   int     PassNumber;
2799 
2800   /* Copy the input buffer.  The code will be modifying it in-place as HTML
2801   fragments and other junk are deleted. */
2802 
2803   BufferPntr = new char [NumberOfBytes];
2804   if (BufferPntr == NULL)
2805     return;
2806   memcpy (BufferPntr, InputString, NumberOfBytes);
2807 
2808   /* Do the tokenization.  Each pass does something to the text in the buffer,
2809   and may add words to the word set. */
2810 
2811   CurrentSize = NumberOfBytes;
2812   for (PassNumber = 1; PassNumber <= 8 && CurrentSize > 0 ; PassNumber++)
2813   {
2814     switch (PassNumber)
2815     {
2816       case 1: /* Lowercase first, rest of them assume lower case inputs. */
2817         CurrentSize = TokenizerPassLowerCase (BufferPntr, CurrentSize);
2818         break;
2819       case 2: CurrentSize = TokenizerPassJapanesePeriodsToSpaces (
2820         BufferPntr, CurrentSize, PrefixCharacter, WordSet); break;
2821       case 3: CurrentSize = TokenizerPassTruncateLongAsianWords (
2822         BufferPntr, CurrentSize); break;
2823       case 4: CurrentSize = TokenizerPassRemoveHTMLComments (
2824         BufferPntr, CurrentSize, 'Z', WordSet); break;
2825       case 5: CurrentSize = TokenizerPassRemoveHTMLStyle (
2826         BufferPntr, CurrentSize, 'Z', WordSet); break;
2827       case 6: CurrentSize = TokenizerPassExtractURLs (
2828         BufferPntr, CurrentSize, 'Z', WordSet); break;
2829       case 7: CurrentSize = TokenizerPassRemoveHTMLTags (
2830         BufferPntr, CurrentSize, 'Z', WordSet); break;
2831       case 8: CurrentSize = TokenizerPassGetPlainWords (
2832         BufferPntr, CurrentSize, PrefixCharacter, WordSet); break;
2833       default: break;
2834     }
2835   }
2836 
2837   delete [] BufferPntr;
2838 }
2839 
2840 
2841 /* The user has provided a command line.  This could actually be from a
2842 separate attempt to invoke the program (this application's resource/attributes
2843 have the launch flags set to "single launch", so the shell doesn't start the
2844 program but instead sends the arguments to the already running instance).  In
2845 either case, the command is sent to an intermediary thread where it is
2846 asynchronously converted into a scripting message(s) that are sent back to this
2847 BApplication.  The intermediary is needed since we can't recursively execute
2848 scripting messages while processing a message (this ArgsReceived one). */
2849 
2850 void
ArgvReceived(int32 argc,char ** argv)2851 ABSApp::ArgvReceived (int32 argc, char **argv)
2852 {
2853   if (g_CommanderLooperPntr != NULL)
2854     g_CommanderLooperPntr->CommandArguments (argc, argv);
2855 }
2856 
2857 
2858 /* Create a new empty database.  Note that we have to write out the new file
2859 immediately, otherwise other operations will see the empty database and then
2860 try to load the file, and complain that it doesn't exist.  Now they will see
2861 the empty database and redundantly load the empty file. */
2862 
CreateDatabaseFile(char * ErrorMessage)2863 status_t ABSApp::CreateDatabaseFile (char *ErrorMessage)
2864 {
2865   MakeDatabaseEmpty ();
2866   m_DatabaseHasChanged = true;
2867   return SaveDatabaseIfNeeded (ErrorMessage); /* Make it now. */
2868 }
2869 
2870 
2871 /* Set the settings to the defaults.  Needed in case there isn't a settings
2872 file or it is obsolete. */
2873 
2874 void
DefaultSettings()2875 ABSApp::DefaultSettings ()
2876 {
2877   status_t ErrorCode;
2878   BPath    DatabasePath (m_SettingsDirectoryPath);
2879   char     TempString [PATH_MAX];
2880 
2881   /* The default database file is in the settings directory. */
2882 
2883   ErrorCode = DatabasePath.Append (g_DefaultDatabaseFileName);
2884   if (ErrorCode != B_OK)
2885     strcpy (TempString, g_DefaultDatabaseFileName); /* Unlikely to happen. */
2886   else
2887     strcpy (TempString, DatabasePath.Path ());
2888   m_DatabaseFileName.SetTo (TempString);
2889 
2890   // Users need to be allowed to undo their mistakes...
2891   m_IgnorePreviousClassification = true;
2892   g_ServerMode = true;
2893   m_PurgeAge = 2000;
2894   m_PurgePopularity = 2;
2895   m_ScoringMode = SM_CHISQUARED;
2896   m_TokenizeMode = TM_ANY_TEXT_HEADER;
2897 
2898   m_SettingsHaveChanged = true;
2899 }
2900 
2901 
2902 /* Deletes the database file, and the backup file, and clears the database but
2903 marks it as not changed so that it doesn't get written out when the program
2904 exits. */
2905 
DeleteDatabaseFile(char * ErrorMessage)2906 status_t ABSApp::DeleteDatabaseFile (char *ErrorMessage)
2907 {
2908   BEntry   FileEntry;
2909   status_t ErrorCode;
2910   int      i;
2911   char     TempString [PATH_MAX+20];
2912 
2913   /* Clear the in-memory database. */
2914 
2915   MakeDatabaseEmpty ();
2916   m_DatabaseHasChanged = false;
2917 
2918   /* Delete the backup files first.  Don't care if it fails. */
2919 
2920   for (i = 0; i < g_MaxBackups; i++)
2921   {
2922     strcpy (TempString, m_DatabaseFileName.String ());
2923     sprintf (TempString + strlen (TempString), g_BackupSuffix, i);
2924     ErrorCode = FileEntry.SetTo (TempString);
2925     if (ErrorCode == B_OK)
2926       FileEntry.Remove ();
2927   }
2928 
2929   /* Delete the main database file. */
2930 
2931   strcpy (TempString, m_DatabaseFileName.String ());
2932   ErrorCode = FileEntry.SetTo (TempString);
2933   if (ErrorCode != B_OK)
2934   {
2935     sprintf (ErrorMessage, "While deleting, failed to make BEntry for "
2936       "\"%s\" (does the directory exist?)", TempString);
2937     return ErrorCode;
2938   }
2939 
2940   ErrorCode = FileEntry.Remove ();
2941   if (ErrorCode != B_OK)
2942     sprintf (ErrorMessage, "While deleting, failed to remove file "
2943       "\"%s\"", TempString);
2944 
2945   return ErrorCode;
2946 }
2947 
2948 
2949 /* Evaluate the given file as being a spam message, and tag it with the
2950 resulting spam probability ratio.  If it also has an e-mail subject attribute,
2951 remove the [Spam 99.9%] prefix since the number usually changes. */
2952 
EvaluateFile(const char * PathName,BMessage * ReplyMessagePntr,char * ErrorMessage)2953 status_t ABSApp::EvaluateFile (
2954   const char *PathName,
2955   BMessage *ReplyMessagePntr,
2956   char *ErrorMessage)
2957 {
2958   status_t ErrorCode;
2959   float    TempFloat;
2960   BFile    TextFile;
2961 
2962   /* Open the specified file. */
2963 
2964   ErrorCode = TextFile.SetTo (PathName, B_READ_ONLY);
2965   if (ErrorCode != B_OK)
2966   {
2967     sprintf (ErrorMessage, "Problems opening file \"%s\" for evaluating",
2968       PathName);
2969     return ErrorCode;
2970   }
2971 
2972   ErrorCode =
2973     EvaluatePositionIO (&TextFile, PathName, ReplyMessagePntr, ErrorMessage);
2974 
2975   if (ErrorCode == B_OK &&
2976   ReplyMessagePntr->FindFloat (g_ResultName, &TempFloat) == B_OK)
2977   {
2978     TextFile.WriteAttr (g_AttributeNameSpamRatio, B_FLOAT_TYPE,
2979       0 /* offset */, &TempFloat, sizeof (TempFloat));
2980     /* Don't know the spam cutoff ratio, that's in the e-mail filter, so just
2981     blindly remove the prefix, which would have the wrong percentage. */
2982     RemoveSpamPrefixFromSubjectAttribute (&TextFile);
2983   }
2984 
2985   return ErrorCode;
2986 }
2987 
2988 
2989 /* Evaluate a given file or memory buffer (a BPositionIO handles both cases)
2990 for spaminess.  The output is added to the ReplyMessagePntr message, with the
2991 probability ratio stored in "result" (0.0 means genuine and 1.0 means spam).
2992 It also adds the most significant words (used in the ratio calculation) to the
2993 array "words" and the associated per-word probability ratios in "ratios".  If
2994 it fails, an error code is returned and an error message written to the
2995 ErrorMessage string (which is at least MAX_PATH + 1024 bytes long).
2996 OptionalFileName is only used in the error message.
2997 
2998 The math used for combining the individual word probabilities in my method is
2999 based on Gary Robinson's method (formerly it was a variation of Paul Graham's
3000 method) or the Chi-Squared method.  It's input is the database of words that
3001 has a count of the number of spam and number of genuine messages each word
3002 appears in (doesn't matter if it appears more than once in a message, it still
3003 counts as 1).
3004 
3005 The spam word count is divided the by the total number of spam e-mail messages
3006 in the database to get the probability of spam and probability of genuineness
3007 is similarly computed for a particular word.  The spam probability is divided
3008 by the sum of the spam and genuine probabilities to get the Raw Spam Ratio for
3009 the word.  It's nearer to 0.0 for genuine and nearer to 1.0 for spam, and can
3010 be exactly zero or one too.
3011 
3012 To avoid multiplying later results by zero, and to compensate for a lack of
3013 data points, the Raw Spam Ratio is adjusted towards the 0.5 halfway point.  The
3014 0.5 is combined with the raw spam ratio, with a weight of 0.45 (determined to
3015 be a good value by the "spambayes" mailing list tests) messages applied to the
3016 half way point and a weight of the number of spam + genuine messages applied to
3017 the raw spam ratio.  This gives you the compensated spam ratio for the word.
3018 
3019 The top N (150 was good in the spambayes tests) extreme words are selected by
3020 the distance of each word's compensated spam ratio from 0.5.  Then the ratios
3021 of the words are combined.
3022 
3023 The Gary Robinson combining (scoring) method gets one value from the Nth root
3024 of the product of all the word ratios.  The other is the Nth root of the
3025 product of (1 - ratio) for all the words.  The final result is the first value
3026 divided by the sum of the two values.  The Nth root helps spread the resulting
3027 range of values more evenly between 0.0 and 1.0, otherwise the values all clump
3028 together at 0 or 1.  Also you can think of the Nth root as a kind of average
3029 for products; it's like a generic word probability which when multiplied by
3030 itself N times gives you the same result as the N separate actual word
3031 probabilities multiplied together.
3032 
3033 The Chi-Squared combining (scoring) method assumes that the spam word
3034 probabilities are uniformly distributed and computes an error measurement
3035 (called chi squared - see http://bmj.com/collections/statsbk/8.shtml for a good
3036 tutorial) and then sees how likely that error value would be observed in
3037 practice.  If it's rare to observe, then the words are likely not just randomly
3038 occuring and it's spammy.  The same is done for genuine words.  The two
3039 resulting unlikelynesses are compared to see which is more unlikely, if neither
3040 is, then the method says it can't decide.  The SpamBayes notes (see the
3041 classifier.py file in CVS in http://sourceforge.net/projects/spambayes) say:
3042 
3043 "Across vectors of length n, containing random uniformly-distributed
3044 probabilities, -2*sum(ln(p_i)) follows the chi-squared distribution with 2*n
3045 degrees of freedom.  This has been proven (in some appropriate sense) to be the
3046 most sensitive possible test for rejecting the hypothesis that a vector of
3047 probabilities is uniformly distributed.  Gary Robinson's original scheme was
3048 monotonic *with* this test, but skipped the details.  Turns out that getting
3049 closer to the theoretical roots gives a much sharper classification, with a
3050 very small (in # of msgs), but also very broad (in range of scores), "middle
3051 ground", where most of the mistakes live.  In particular, this scheme seems
3052 immune to all forms of "cancellation disease": if there are many strong ham
3053 *and* spam clues, this reliably scores close to 0.5.  Most other schemes are
3054 extremely certain then -- and often wrong."
3055 
3056 I did a test with 448 example genuine messages including personal mail (some
3057 with HTML attachments) and mailing lists, and 267 spam messages for 27471 words
3058 total.  Test messages were more recent messages in the same groups.  Out of 100
3059 test genuine messages, with Gary Robinson (0.56 cutoff limit), 1 (1%) was
3060 falsely identified as spam and 8 of 73 (11%) spam messages were incorrectly
3061 classified as genuine.  With my variation of Paul Graham's scheme (0.90 cutoff)
3062 I got 6 of 100 (6%) genuine messages incorrectly marked as spam and 2 of 73
3063 (3%) spam messages were incorrectly classified as genuine.  Pretty close, but
3064 Robinson's values are more evenly spread out so you can tell just how spammy it
3065 is by looking at the number. */
3066 
3067 struct WordAndRatioStruct
3068 {
3069   double        probabilityRatio; /* Actually the compensated ratio. */
3070   const string *wordPntr;
3071 
operator ()WordAndRatioStruct3072   bool operator() ( /* Our less-than comparison function for sorting. */
3073     const WordAndRatioStruct &ItemA,
3074     const WordAndRatioStruct &ItemB) const
3075   {
3076     return
3077       (fabs (ItemA.probabilityRatio - 0.5) <
3078       fabs (ItemB.probabilityRatio - 0.5));
3079   };
3080 };
3081 
EvaluatePositionIO(BPositionIO * PositionIOPntr,const char * OptionalFileName,BMessage * ReplyMessagePntr,char * ErrorMessage)3082 status_t ABSApp::EvaluatePositionIO (
3083   BPositionIO *PositionIOPntr,
3084   const char *OptionalFileName,
3085   BMessage *ReplyMessagePntr,
3086   char *ErrorMessage)
3087 {
3088   StatisticsMap::iterator            DataEndIter;
3089   StatisticsMap::iterator            DataIter;
3090   status_t                           ErrorCode;
3091   double                             GenuineProbability;
3092   uint32                             GenuineSpamSum;
3093   int                                i;
3094   priority_queue<
3095     WordAndRatioStruct /* Data type stored in the queue */,
3096     vector<WordAndRatioStruct> /* Underlying container */,
3097     WordAndRatioStruct /* Function for comparing elements */>
3098                                      PriorityQueue;
3099   double                             ProductGenuine;
3100   double                             ProductLogGenuine;
3101   double                             ProductLogSpam;
3102   double                             ProductSpam;
3103   double                             RawProbabilityRatio;
3104   float                              ResultRatio;
3105   double                             SpamProbability;
3106   StatisticsPointer                  StatisticsPntr;
3107   double                             TempDouble;
3108   double                             TotalGenuine;
3109   double                             TotalSpam;
3110   WordAndRatioStruct                 WordAndRatio;
3111   set<string>::iterator              WordEndIter;
3112   set<string>::iterator              WordIter;
3113   const WordAndRatioStruct          *WordRatioPntr;
3114   set<string>                        WordSet;
3115 
3116   /* Get the list of unique words in the file / memory buffer. */
3117 
3118   ErrorCode = GetWordsFromPositionIO (PositionIOPntr, OptionalFileName,
3119     WordSet, ErrorMessage);
3120   if (ErrorCode != B_OK)
3121     return ErrorCode;
3122 
3123   /* Prepare a few variables.  Mostly these are stored double values of some of
3124   the numbers involved (to avoid the overhead of multiple conversions from
3125   integer to double), with extra precautions to avoid divide by zero. */
3126 
3127   if (m_TotalGenuineMessages <= 0)
3128     TotalGenuine = 1.0;
3129   else
3130     TotalGenuine = m_TotalGenuineMessages;
3131 
3132   if (m_TotalSpamMessages <= 0)
3133     TotalSpam = 1.0;
3134   else
3135     TotalSpam = m_TotalSpamMessages;
3136 
3137   /* Look up the words in the database and calculate their compensated spam
3138   ratio.  The results are stored in a priority queue so that we can later find
3139   the top g_MaxInterestingWords for doing the actual determination. */
3140 
3141   WordEndIter = WordSet.end ();
3142   DataEndIter = m_WordMap.end ();
3143   for (WordIter = WordSet.begin (); WordIter != WordEndIter; WordIter++)
3144   {
3145     WordAndRatio.wordPntr = &(*WordIter);
3146 
3147     if ((DataIter = m_WordMap.find (*WordIter)) != DataEndIter)
3148     {
3149       StatisticsPntr = &DataIter->second;
3150 
3151       /* Calculate the probability the word is spam and the probability it is
3152       genuine.  Then the raw probability ratio. */
3153 
3154       SpamProbability = StatisticsPntr->spamCount / TotalSpam;
3155       GenuineProbability = StatisticsPntr->genuineCount / TotalGenuine;
3156 
3157       if (SpamProbability + GenuineProbability > 0)
3158         RawProbabilityRatio =
3159         SpamProbability / (SpamProbability + GenuineProbability);
3160       else /* Word with zero statistics, perhaps due to reclassification. */
3161         RawProbabilityRatio = 0.5;
3162 
3163       /* The compensated ratio leans towards 0.5 (g_RobinsonX) more for fewer
3164       data points, with a weight of 0.45 (g_RobinsonS). */
3165 
3166       GenuineSpamSum =
3167         StatisticsPntr->spamCount + StatisticsPntr->genuineCount;
3168 
3169       WordAndRatio.probabilityRatio =
3170         (g_RobinsonS * g_RobinsonX + GenuineSpamSum * RawProbabilityRatio) /
3171         (g_RobinsonS + GenuineSpamSum);
3172     }
3173     else /* Unknown word. With N=0, compensated ratio equation is RobinsonX. */
3174       WordAndRatio.probabilityRatio = g_RobinsonX;
3175 
3176      PriorityQueue.push (WordAndRatio);
3177   }
3178 
3179   /* Compute the combined probability (multiply them together) of the top few
3180   words.  To avoid numeric underflow (doubles can only get as small as 1E-300),
3181   logarithms are also used.  But avoid the logarithms (sum of logs of numbers
3182   is the same as the product of numbers) as much as possible due to reduced
3183   accuracy and slowness. */
3184 
3185   ProductGenuine = 1.0;
3186   ProductLogGenuine = 0.0;
3187   ProductSpam = 1.0;
3188   ProductLogSpam = 0.0;
3189   for (i = 0;
3190   i < g_MaxInterestingWords && !PriorityQueue.empty();
3191   i++, PriorityQueue.pop())
3192   {
3193     WordRatioPntr = &PriorityQueue.top();
3194     ProductSpam *= WordRatioPntr->probabilityRatio;
3195     ProductGenuine *= 1.0 - WordRatioPntr->probabilityRatio;
3196 
3197     /* Check for the numbers getting dangerously small, close to underflowing.
3198     If they are, move the value into the logarithm storage part. */
3199 
3200     if (ProductSpam < m_SmallestUseableDouble)
3201     {
3202       ProductLogSpam += log (ProductSpam);
3203       ProductSpam = 1.0;
3204     }
3205 
3206     if (ProductGenuine < m_SmallestUseableDouble)
3207     {
3208       ProductLogGenuine += log (ProductGenuine);
3209       ProductGenuine = 1.0;
3210     }
3211 
3212     ReplyMessagePntr->AddString ("words", WordRatioPntr->wordPntr->c_str ());
3213     ReplyMessagePntr->AddFloat ("ratios", WordRatioPntr->probabilityRatio);
3214   }
3215 
3216   /* Get the resulting log of the complete products. */
3217 
3218   if (i > 0)
3219   {
3220     ProductLogSpam += log (ProductSpam);
3221     ProductLogGenuine += log (ProductGenuine);
3222   }
3223 
3224   if (m_ScoringMode == SM_ROBINSON)
3225   {
3226     /* Apply Gary Robinson's scoring method where we take the Nth root of the
3227     products.  This is easiest in logarithm form. */
3228 
3229     if (i > 0)
3230     {
3231       ProductSpam = exp (ProductLogSpam / i);
3232       ProductGenuine = exp (ProductLogGenuine / i);
3233       ResultRatio = ProductSpam / (ProductGenuine + ProductSpam);
3234     }
3235     else /* Somehow got no words! */
3236       ResultRatio = g_RobinsonX;
3237   }
3238   else if (m_ScoringMode == SM_CHISQUARED)
3239   {
3240     /* From the SpamBayes notes: "We compute two chi-squared statistics, one
3241     for ham and one for spam.  The sum-of-the-logs business is more sensitive
3242     to probs near 0 than to probs near 1, so the spam measure uses 1-p (so that
3243     high-spamprob words have greatest effect), and the ham measure uses p
3244     directly (so that lo-spamprob words have greatest effect)."  That means we
3245     just reversed the meaning of the previously calculated spam and genuine
3246     products!  Oh well. */
3247 
3248     TempDouble = ProductLogSpam;
3249     ProductLogSpam = ProductLogGenuine;
3250     ProductLogGenuine = TempDouble;
3251 
3252     if (i > 0)
3253     {
3254       ProductSpam =
3255         1.0 - ChiSquaredProbability (-2.0 * ProductLogSpam, 2 * i);
3256       ProductGenuine =
3257         1.0 - ChiSquaredProbability (-2.0 * ProductLogGenuine, 2 * i);
3258 
3259       /* The SpamBayes notes say: "How to combine these into a single spam
3260       score?  We originally used (S-H)/(S+H) scaled into [0., 1.], which equals
3261       S/(S+H).  A systematic problem is that we could end up being near-certain
3262       a thing was (for example) spam, even if S was small, provided that H was
3263       much smaller.  Rob Hooft stared at these problems and invented the
3264       measure we use now, the simpler S-H, scaled into [0., 1.]." */
3265 
3266       ResultRatio = (ProductSpam - ProductGenuine + 1.0) / 2.0;
3267     }
3268     else /* No words to analyse. */
3269       ResultRatio = 0.5;
3270   }
3271   else /* Unknown scoring mode. */
3272   {
3273     strcpy (ErrorMessage, "Unknown scoring mode specified in settings");
3274     return B_BAD_VALUE;
3275   }
3276 
3277   ReplyMessagePntr->AddFloat (g_ResultName, ResultRatio);
3278   return B_OK;
3279 }
3280 
3281 
3282 /* Just evaluate the given string as being spam text. */
3283 
EvaluateString(const char * BufferPntr,ssize_t BufferSize,BMessage * ReplyMessagePntr,char * ErrorMessage)3284 status_t ABSApp::EvaluateString (
3285   const char *BufferPntr,
3286   ssize_t BufferSize,
3287   BMessage *ReplyMessagePntr,
3288   char *ErrorMessage)
3289 {
3290   BMemoryIO MemoryIO (BufferPntr, BufferSize);
3291 
3292   return EvaluatePositionIO (&MemoryIO, "Memory Buffer",
3293     ReplyMessagePntr, ErrorMessage);
3294 }
3295 
3296 
3297 /* Tell other programs about the scripting commands we support.  Try this
3298 command: "hey application/x-vnd.agmsmith.spamdbm getsuites" to
3299 see it in action (this program has to be already running for it to work). */
3300 
GetSupportedSuites(BMessage * MessagePntr)3301 status_t ABSApp::GetSupportedSuites (BMessage *MessagePntr)
3302 {
3303   BPropertyInfo TempPropInfo (g_ScriptingPropertyList);
3304 
3305   MessagePntr->AddString ("suites", "suite/x-vnd.agmsmith.spamdbm");
3306   MessagePntr->AddFlat ("messages", &TempPropInfo);
3307   return BApplication::GetSupportedSuites (MessagePntr);
3308 }
3309 
3310 
3311 /* Add all the words in the given file or memory buffer to the supplied set.
3312 The file name is only there for error messages, it assumes you have already
3313 opened the PositionIO to the right file.  If things go wrong, a non-zero error
3314 code will be returned and an explanation written to ErrorMessage (assumed to be
3315 at least PATH_MAX + 1024 bytes long). */
3316 
GetWordsFromPositionIO(BPositionIO * PositionIOPntr,const char * OptionalFileName,set<string> & WordSet,char * ErrorMessage)3317 status_t ABSApp::GetWordsFromPositionIO (
3318   BPositionIO *PositionIOPntr,
3319   const char *OptionalFileName,
3320   set<string> &WordSet,
3321   char *ErrorMessage)
3322 {
3323   status_t ErrorCode;
3324 
3325   if (m_TokenizeMode == TM_WHOLE)
3326     ErrorCode = TokenizeWhole (PositionIOPntr, OptionalFileName,
3327       WordSet, ErrorMessage);
3328   else
3329     ErrorCode = TokenizeParts (PositionIOPntr, OptionalFileName,
3330       WordSet, ErrorMessage);
3331 
3332   if (ErrorCode == B_OK && WordSet.empty ())
3333   {
3334     /* ENOMSG usually means no message found in queue, but I'm using it to show
3335     no words, a good indicator of spam which is pure HTML. */
3336 
3337     sprintf (ErrorMessage, "No words were found in \"%s\"", OptionalFileName);
3338     ErrorCode = ENOMSG;
3339   }
3340 
3341   return ErrorCode;
3342 }
3343 
3344 
3345 /* Set up indices for attributes MAIL:classification (string) and
3346 MAIL:ratio_spam (float) on all mounted disk volumes that support queries.  Also
3347 tell the system to make those attributes visible to the user (so they can see
3348 them in Tracker) and associate them with e-mail messages.  Also set up the
3349 database file MIME type (provide a description and associate it with this
3350 program so that it picks up the right icon).  And register the names for our
3351 sound effects. */
3352 
InstallThings(char * ErrorMessage)3353 status_t ABSApp::InstallThings (char *ErrorMessage)
3354 {
3355   int32       Cookie;
3356   dev_t       DeviceID;
3357   status_t    ErrorCode = B_OK;
3358   fs_info     FSInfo;
3359   int32       i;
3360   int32       iClassification;
3361   int32       iProbability;
3362   int32       j;
3363   index_info  IndexInfo;
3364   BMimeType   MimeType;
3365   BMessage    Parameters;
3366   const char *StringPntr;
3367   bool        TempBool;
3368   int32       TempInt32;
3369 
3370   /* Iterate through all mounted devices and try to make the indices on each
3371   one.  Don't bother if the index exists or the device doesn't support indices
3372   (actually queries). */
3373 
3374   Cookie = 0;
3375   while ((DeviceID = next_dev (&Cookie)) >= 0)
3376   {
3377     if (!fs_stat_dev (DeviceID, &FSInfo) && (FSInfo.flags & B_FS_HAS_QUERY))
3378     {
3379       if (fs_stat_index (DeviceID, g_AttributeNameClassification, &IndexInfo)
3380       && errno == B_ENTRY_NOT_FOUND)
3381       {
3382         if (fs_create_index (DeviceID, g_AttributeNameClassification,
3383         B_STRING_TYPE, 0 /* flags */))
3384         {
3385           ErrorCode = errno;
3386           sprintf (ErrorMessage, "Unable to make string index %s on "
3387             "volume #%d, volume name \"%s\", file system type \"%s\", "
3388             "on device \"%s\"", g_AttributeNameClassification,
3389             (int) DeviceID, FSInfo.volume_name, FSInfo.fsh_name,
3390             FSInfo.device_name);
3391         }
3392       }
3393 
3394       if (fs_stat_index (DeviceID, g_AttributeNameSpamRatio,
3395       &IndexInfo) && errno == B_ENTRY_NOT_FOUND)
3396       {
3397         if (fs_create_index (DeviceID, g_AttributeNameSpamRatio,
3398         B_FLOAT_TYPE, 0 /* flags */))
3399         {
3400           ErrorCode = errno;
3401           sprintf (ErrorMessage, "Unable to make float index %s on "
3402             "volume #%d, volume name \"%s\", file system type \"%s\", "
3403             "on device \"%s\"", g_AttributeNameSpamRatio,
3404             (int) DeviceID, FSInfo.volume_name, FSInfo.fsh_name,
3405             FSInfo.device_name);
3406         }
3407       }
3408     }
3409   }
3410   if (ErrorCode != B_OK)
3411     return ErrorCode;
3412 
3413   /* Set up the MIME types for the classification attributes, associate them
3414   with e-mail and make them visible to the user (but not editable).  First need
3415   to get the existing MIME settings, then add ours to them (otherwise the
3416   existing ones get wiped out). */
3417 
3418   ErrorCode = MimeType.SetTo ("text/x-email");
3419   if (ErrorCode != B_OK || !MimeType.IsInstalled ())
3420   {
3421     sprintf (ErrorMessage, "No e-mail MIME type (%s) in the system, can't "
3422       "update it to add our special attributes, and without e-mail this "
3423       "program is useless!", MimeType.Type ());
3424     if (ErrorCode == B_OK)
3425       ErrorCode = -1;
3426     return ErrorCode;
3427   }
3428 
3429   ErrorCode = MimeType.GetAttrInfo (&Parameters);
3430   if (ErrorCode != B_OK)
3431   {
3432     sprintf (ErrorMessage, "Unable to retrieve list of attributes "
3433       "associated with e-mail messages in the MIME database");
3434     return ErrorCode;
3435   }
3436 
3437   for (i = 0, iClassification = -1, iProbability = -1;
3438   i < 1000 && (iClassification < 0 || iProbability < 0);
3439   i++)
3440   {
3441     ErrorCode = Parameters.FindString ("attr:name", i, &StringPntr);
3442     if (ErrorCode != B_OK)
3443       break; /* Reached the end of the attributes. */
3444     if (strcmp (StringPntr, g_AttributeNameClassification) == 0)
3445       iClassification = i;
3446     else if (strcmp (StringPntr, g_AttributeNameSpamRatio) == 0)
3447       iProbability = i;
3448   }
3449 
3450   /* Add extra default settings for those programs which previously didn't
3451   update the MIME database with all the attributes that exist (so our new
3452   additions don't show up at the wrong index). */
3453 
3454   i--; /* Set i to index of last valid attribute. */
3455 
3456   for (j = 0; j <= i; j++)
3457   {
3458     if (Parameters.FindString ("attr:public_name", j, &StringPntr) ==
3459     B_BAD_INDEX)
3460     {
3461       if (Parameters.FindString ("attr:name", j, &StringPntr) != B_OK)
3462         StringPntr = "None!";
3463       Parameters.AddString ("attr:public_name", StringPntr);
3464     }
3465   }
3466 
3467   while (Parameters.FindInt32 ("attr:type", i, &TempInt32) == B_BAD_INDEX)
3468     Parameters.AddInt32 ("attr:type", B_STRING_TYPE);
3469 
3470   while (Parameters.FindBool ("attr:viewable", i, &TempBool) == B_BAD_INDEX)
3471     Parameters.AddBool ("attr:viewable", true);
3472 
3473   while (Parameters.FindBool ("attr:editable", i, &TempBool) == B_BAD_INDEX)
3474     Parameters.AddBool ("attr:editable", false);
3475 
3476   while (Parameters.FindInt32 ("attr:width", i, &TempInt32) == B_BAD_INDEX)
3477     Parameters.AddInt32 ("attr:width", 60);
3478 
3479   while (Parameters.FindInt32 ("attr:alignment", i, &TempInt32) == B_BAD_INDEX)
3480     Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3481 
3482   while (Parameters.FindBool ("attr:extra", i, &TempBool) == B_BAD_INDEX)
3483     Parameters.AddBool ("attr:extra", false);
3484 
3485   /* Add our new attributes to e-mail related things, if not already there. */
3486 
3487   if (iClassification < 0)
3488   {
3489     Parameters.AddString ("attr:name", g_AttributeNameClassification);
3490     Parameters.AddString ("attr:public_name", "Classification Group");
3491     Parameters.AddInt32 ("attr:type", B_STRING_TYPE);
3492     Parameters.AddBool ("attr:viewable", true);
3493     Parameters.AddBool ("attr:editable", false);
3494     Parameters.AddInt32 ("attr:width", 45);
3495     Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3496     Parameters.AddBool ("attr:extra", false);
3497   }
3498 
3499   if (iProbability < 0)
3500   {
3501     Parameters.AddString ("attr:name", g_AttributeNameSpamRatio);
3502     Parameters.AddString ("attr:public_name", "Spam/Genuine Estimate");
3503     Parameters.AddInt32 ("attr:type", B_FLOAT_TYPE);
3504     Parameters.AddBool ("attr:viewable", true);
3505     Parameters.AddBool ("attr:editable", false);
3506     Parameters.AddInt32 ("attr:width", 50);
3507     Parameters.AddInt32 ("attr:alignment", B_ALIGN_LEFT);
3508     Parameters.AddBool ("attr:extra", false);
3509   }
3510 
3511   if (iClassification < 0 || iProbability < 0)
3512   {
3513     ErrorCode = MimeType.SetAttrInfo (&Parameters);
3514     if (ErrorCode != B_OK)
3515     {
3516       sprintf (ErrorMessage, "Unable to associate the classification "
3517         "attributes with e-mail messages in the MIME database");
3518       return ErrorCode;
3519     }
3520   }
3521 
3522   /* Set up the MIME type for the database file. */
3523 
3524   sprintf (ErrorMessage, "Problems with setting up MIME type (%s) for "
3525     "the database files", g_ABSDatabaseFileMIMEType); /* A generic message. */
3526 
3527   ErrorCode = MimeType.SetTo (g_ABSDatabaseFileMIMEType);
3528   if (ErrorCode != B_OK)
3529     return ErrorCode;
3530 
3531   MimeType.Delete ();
3532   ErrorCode = MimeType.Install ();
3533   if (ErrorCode != B_OK)
3534   {
3535     sprintf (ErrorMessage, "Failed to install MIME type (%s) in the system",
3536       MimeType.Type ());
3537     return ErrorCode;
3538   }
3539 
3540   MimeType.SetShortDescription ("Spam Database");
3541   MimeType.SetLongDescription ("Bayesian Statistical Database for "
3542     "Classifying Junk E-Mail");
3543   sprintf (ErrorMessage, "1.0 ('%s')", g_DatabaseRecognitionString);
3544   MimeType.SetSnifferRule (ErrorMessage);
3545   MimeType.SetPreferredApp (g_ABSAppSignature);
3546 
3547   /* Set up the names of the sound effects.  Later on the user can associate
3548   sound files with the names by using the Sounds preferences panel or the
3549   installsound command.  The MDR add-on filter will trigger these sounds. */
3550 
3551   add_system_beep_event (g_BeepGenuine);
3552   add_system_beep_event (g_BeepSpam);
3553   add_system_beep_event (g_BeepUncertain);
3554 
3555   return B_OK;
3556 }
3557 
3558 
3559 /* Load the database if it hasn't been loaded yet.  Otherwise do nothing. */
3560 
LoadDatabaseIfNeeded(char * ErrorMessage)3561 status_t ABSApp::LoadDatabaseIfNeeded (char *ErrorMessage)
3562 {
3563   if (m_WordMap.empty ())
3564     return LoadSaveDatabase (true /* DoLoad */, ErrorMessage);
3565 
3566   return B_OK;
3567 }
3568 
3569 
3570 /* Either load the database of spam words (DoLoad is TRUE) from the file
3571 specified in the settings, or write (DoLoad is FALSE) the database to it.  If
3572 it doesn't exist (and its parent directories do exist) then it will be created
3573 when saving.  If it doesn't exist when loading, the in-memory database will be
3574 set to an empty one and an error will be returned with an explanation put into
3575 ErrorMessage (should be big enough for a path name and a couple of lines of
3576 text).
3577 
3578 The database file format is a UTF-8 text file (well, there could be some
3579 latin-1 characters and other junk in there - it just copies the bytes from the
3580 e-mail messages directly), with tab characters to separate fields (so that you
3581 can also load it into a spreadsheet).  The first line identifies the overall
3582 file type.  The second lists pairs of classifications plus the number of
3583 messages in each class.  Currently it is just Genuine and Spam, but for future
3584 compatability, that could be followed by more classification pairs.  The
3585 remaining lines each contain a word, the date it was last updated (actually
3586 it's the number of messages in the database when the word was added, smaller
3587 numbers mean it was updated longer ago), the genuine count and the spam count.
3588 */
3589 
LoadSaveDatabase(bool DoLoad,char * ErrorMessage)3590 status_t ABSApp::LoadSaveDatabase (bool DoLoad, char *ErrorMessage)
3591 {
3592   time_t                             CurrentTime;
3593   FILE                              *DatabaseFile = NULL;
3594   BNode                              DatabaseNode;
3595   BNodeInfo                          DatabaseNodeInfo;
3596   StatisticsMap::iterator            DataIter;
3597   StatisticsMap::iterator            EndIter;
3598   status_t                           ErrorCode;
3599   int                                i;
3600   pair<StatisticsMap::iterator,bool> InsertResult;
3601   char                               LineString [10240];
3602   StatisticsRecord                   Statistics;
3603   const char                        *StringPntr;
3604   char                              *TabPntr;
3605   const char                        *WordPntr;
3606 
3607   if (DoLoad)
3608   {
3609     MakeDatabaseEmpty ();
3610     m_DatabaseHasChanged = false; /* In case of early error exit. */
3611   }
3612   else /* Saving the database, backup the old version on disk. */
3613   {
3614     ErrorCode = MakeBackup (ErrorMessage);
3615     if (ErrorCode != B_OK) /* Usually because the directory isn't there. */
3616       return ErrorCode;
3617   }
3618 
3619   DatabaseFile = fopen (m_DatabaseFileName.String (), DoLoad ? "rb" : "wb");
3620   if (DatabaseFile == NULL)
3621   {
3622     ErrorCode = errno;
3623     sprintf (ErrorMessage, "Can't open database file \"%s\" for %s",
3624       m_DatabaseFileName.String (), DoLoad ? "reading" : "writing");
3625     goto ErrorExit;
3626   }
3627 
3628   /* Process the first line, which identifies the file. */
3629 
3630   if (DoLoad)
3631   {
3632     sprintf (ErrorMessage, "Can't read first line of database file \"%s\", "
3633       "expected it to start with \"%s\"",
3634       m_DatabaseFileName.String (), g_DatabaseRecognitionString);
3635     ErrorCode = -1;
3636 
3637     if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3638       goto ErrorExit;
3639     if (strncmp (LineString, g_DatabaseRecognitionString,
3640     strlen (g_DatabaseRecognitionString)) != 0)
3641       goto ErrorExit;
3642   }
3643   else /* Saving */
3644   {
3645     CurrentTime = time (NULL);
3646     if (fprintf (DatabaseFile, "%s V1 (word, age, genuine count, spam count)\t"
3647     "Written by SpamDBM $Revision: 30630 $\t"
3648     "Compiled on " __DATE__ " at " __TIME__ "\tThis file saved on %s",
3649     g_DatabaseRecognitionString, ctime (&CurrentTime)) <= 0)
3650     {
3651       ErrorCode = errno;
3652       sprintf (ErrorMessage, "Problems when writing to database file \"%s\"",
3653         m_DatabaseFileName.String ());
3654       goto ErrorExit;
3655     }
3656   }
3657 
3658   /* The second line lists the different classifications.  We just check to see
3659   that the first two are Genuine and Spam.  If there are others, they'll be
3660   ignored and lost when the database is saved. */
3661 
3662   if (DoLoad)
3663   {
3664     sprintf (ErrorMessage, "Can't read second line of database file \"%s\", "
3665       "expected it to list classifications %s and %s along with their totals",
3666       m_DatabaseFileName.String (), g_ClassifiedGenuine, g_ClassifiedSpam);
3667     ErrorCode = B_BAD_VALUE;
3668 
3669     if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3670       goto ErrorExit;
3671     i = strlen (LineString);
3672     if (i > 0 && LineString[i-1] == '\n')
3673       LineString[i-1] = 0; /* Remove trailing line feed character. */
3674 
3675     /* Look for the title word at the start of the line. */
3676 
3677     TabPntr = LineString;
3678     for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3679       ;
3680     if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3681 
3682     if (strncmp (StringPntr, "Classifications", 15) != 0)
3683       goto ErrorExit;
3684 
3685     /* Look for the Genuine class and count. */
3686 
3687     for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3688       ;
3689     if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3690 
3691     if (strcmp (StringPntr, g_ClassifiedGenuine) != 0)
3692       goto ErrorExit;
3693 
3694     for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3695       ;
3696     if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3697 
3698     m_TotalGenuineMessages = atoll (StringPntr);
3699 
3700     /* Look for the Spam class and count. */
3701 
3702     for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3703       ;
3704     if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3705 
3706     if (strcmp (StringPntr, g_ClassifiedSpam) != 0)
3707       goto ErrorExit;
3708 
3709     for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3710       ;
3711     if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3712 
3713     m_TotalSpamMessages = atoll (StringPntr);
3714   }
3715   else /* Saving */
3716   {
3717     fprintf (DatabaseFile,
3718       "Classifications and total messages:\t%s\t%" B_PRIu32
3719         "\t%s\t%" B_PRIu32 "\n",
3720       g_ClassifiedGenuine, m_TotalGenuineMessages,
3721       g_ClassifiedSpam, m_TotalSpamMessages);
3722   }
3723 
3724   /* The remainder of the file is the list of words and statistics.  Each line
3725   has a word, a tab, the time when the word was last changed in the database
3726   (sequence number of message addition, starts at 0 and goes up by one for each
3727   message added to the database), a tab then the number of messages in the
3728   first class (genuine) that had that word, then a tab, then the number of
3729   messages in the second class (spam) with that word, and so on. */
3730 
3731   if (DoLoad)
3732   {
3733     while (!feof (DatabaseFile))
3734     {
3735       if (fgets (LineString, sizeof (LineString), DatabaseFile) == NULL)
3736       {
3737         ErrorCode = errno;
3738         if (feof (DatabaseFile))
3739           break;
3740         if (ErrorCode == B_OK)
3741           ErrorCode = -1;
3742         sprintf (ErrorMessage, "Error while reading words and statistics "
3743           "from database file \"%s\"", m_DatabaseFileName.String ());
3744         goto ErrorExit;
3745       }
3746 
3747       i = strlen (LineString);
3748       if (i > 0 && LineString[i-1] == '\n')
3749         LineString[i-1] = 0; /* Remove trailing line feed character. */
3750 
3751       /* Get the word at the start of the line, save in WordPntr. */
3752 
3753       TabPntr = LineString;
3754       for (WordPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3755         ;
3756       if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3757 
3758       /* Get the date stamp.  Actually a sequence number, not a date. */
3759 
3760       for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3761         ;
3762       if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3763 
3764       Statistics.age = atoll (StringPntr);
3765 
3766       /* Get the Genuine count. */
3767 
3768       for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3769         ;
3770       if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3771 
3772       Statistics.genuineCount = atoll (StringPntr);
3773 
3774       /* Get the Spam count. */
3775 
3776       for (StringPntr = TabPntr; *TabPntr != 0 && *TabPntr != '\t'; TabPntr++)
3777         ;
3778       if (*TabPntr == '\t') *TabPntr++ = 0; /* Stringify up to next tab. */
3779 
3780       Statistics.spamCount = atoll (StringPntr);
3781 
3782       /* Ignore empty words, totally unused words and ones which are too long
3783       (avoids lots of length checking everywhere). */
3784 
3785       if (WordPntr[0] == 0 || strlen (WordPntr) > g_MaxWordLength ||
3786       (Statistics.genuineCount <= 0 && Statistics.spamCount <= 0))
3787         continue; /* Ignore this line of text, start on next one. */
3788 
3789       /* Add the combination to the database. */
3790 
3791       InsertResult = m_WordMap.insert (
3792         StatisticsMap::value_type (WordPntr, Statistics));
3793       if (InsertResult.second == false)
3794       {
3795         ErrorCode = B_BAD_VALUE;
3796         sprintf (ErrorMessage, "Error while inserting word \"%s\" from "
3797           "database \"%s\", perhaps it is a duplicate",
3798           WordPntr, m_DatabaseFileName.String ());
3799         goto ErrorExit;
3800       }
3801       m_WordCount++;
3802 
3803       /* And the hunt for the oldest word. */
3804 
3805       if (Statistics.age < m_OldestAge)
3806         m_OldestAge = Statistics.age;
3807     }
3808   }
3809   else /* Saving, dump all words and statistics to the file. */
3810   {
3811     EndIter = m_WordMap.end ();
3812     for (DataIter = m_WordMap.begin (); DataIter != EndIter; DataIter++)
3813     {
3814       if (fprintf (DatabaseFile,
3815       "%s\t%" B_PRIu32 "\t%" B_PRIu32 "\t%" B_PRIu32 "\n",
3816       DataIter->first.c_str (), DataIter->second.age,
3817       DataIter->second.genuineCount, DataIter->second.spamCount) <= 0)
3818       {
3819         ErrorCode = errno;
3820         sprintf (ErrorMessage, "Error while writing word \"%s\" to "
3821           "database \"%s\"",
3822           DataIter->first.c_str(), m_DatabaseFileName.String ());
3823         goto ErrorExit;
3824       }
3825     }
3826   }
3827 
3828   /* Set the file type so that the new file gets associated with this program,
3829   and picks up the right icon. */
3830 
3831   if (!DoLoad)
3832   {
3833     sprintf (ErrorMessage, "Unable to set attributes (file type) of database "
3834       "file \"%s\"", m_DatabaseFileName.String ());
3835     ErrorCode = DatabaseNode.SetTo (m_DatabaseFileName.String ());
3836     if (ErrorCode != B_OK)
3837       goto ErrorExit;
3838     DatabaseNodeInfo.SetTo (&DatabaseNode);
3839     ErrorCode = DatabaseNodeInfo.SetType (g_ABSDatabaseFileMIMEType);
3840     if (ErrorCode != B_OK)
3841       goto ErrorExit;
3842   }
3843 
3844   /* Success! */
3845   m_DatabaseHasChanged = false;
3846   ErrorCode = B_OK;
3847 
3848 ErrorExit:
3849   if (DatabaseFile != NULL)
3850     fclose (DatabaseFile);
3851   return ErrorCode;
3852 }
3853 
3854 
3855 /* Either load the settings (DoLoad is TRUE) from the configuration file or
3856 write them (DoLoad is FALSE) to it.  The configuration file is a flattened
3857 BMessage containing the various program settings.  If it doesn't exist (and its
3858 parent directories don't exist) then it will be created when saving.  If it
3859 doesn't exist when loading, the settings will be set to default values. */
3860 
LoadSaveSettings(bool DoLoad)3861 status_t ABSApp::LoadSaveSettings (bool DoLoad)
3862 {
3863   status_t    ErrorCode;
3864   const char *NamePntr;
3865   BMessage    Settings;
3866   BDirectory  SettingsDirectory;
3867   BFile       SettingsFile;
3868   const char *StringPntr;
3869   bool        TempBool;
3870   int32       TempInt32;
3871   char        TempString [PATH_MAX + 100];
3872 
3873   /* Preset things to default values if loading, in case of an error or it's an
3874   older version of the settings file which doesn't have every field defined. */
3875 
3876   if (DoLoad)
3877     DefaultSettings ();
3878 
3879   /* Look for our settings directory.  When saving we can try to create it. */
3880 
3881   ErrorCode = SettingsDirectory.SetTo (m_SettingsDirectoryPath.Path ());
3882   if (ErrorCode != B_OK)
3883   {
3884     if (DoLoad || ErrorCode != B_ENTRY_NOT_FOUND)
3885     {
3886       sprintf (TempString, "Can't find settings directory \"%s\"",
3887         m_SettingsDirectoryPath.Path ());
3888       goto ErrorExit;
3889     }
3890     ErrorCode = create_directory (m_SettingsDirectoryPath.Path (), 0755);
3891     if (ErrorCode == B_OK)
3892       ErrorCode = SettingsDirectory.SetTo (m_SettingsDirectoryPath.Path ());
3893     if (ErrorCode != B_OK)
3894     {
3895       sprintf (TempString, "Can't create settings directory \"%s\"",
3896         m_SettingsDirectoryPath.Path ());
3897       goto ErrorExit;
3898     }
3899   }
3900 
3901   ErrorCode = SettingsFile.SetTo (&SettingsDirectory, g_SettingsFileName,
3902     DoLoad ? B_READ_ONLY : B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
3903   if (ErrorCode != B_OK)
3904   {
3905     sprintf (TempString, "Can't open settings file \"%s\" in directory \"%s\" "
3906       "for %s", g_SettingsFileName, m_SettingsDirectoryPath.Path(),
3907       DoLoad ? "reading" : "writing");
3908     goto ErrorExit;
3909   }
3910 
3911   if (DoLoad)
3912   {
3913     ErrorCode = Settings.Unflatten (&SettingsFile);
3914     if (ErrorCode != 0 || Settings.what != g_SettingsWhatCode)
3915     {
3916       sprintf (TempString, "Corrupt data detected while reading settings "
3917         "file \"%s\" in directory \"%s\", will revert to defaults",
3918         g_SettingsFileName, m_SettingsDirectoryPath.Path());
3919       goto ErrorExit;
3920     }
3921   }
3922 
3923   /* Transfer the settings between the BMessage and our various global
3924   variables.  For loading, if the setting isn't present, leave it at the
3925   default value.  Note that loading and saving are intermingled here to make
3926   code maintenance easier (less chance of forgetting to update it if load and
3927   save were separate functions). */
3928 
3929   ErrorCode = B_OK; /* So that saving settings can record an error. */
3930 
3931   NamePntr = "DatabaseFileName";
3932   if (DoLoad)
3933   {
3934     if (Settings.FindString (NamePntr, &StringPntr) == B_OK)
3935       m_DatabaseFileName.SetTo (StringPntr);
3936   }
3937   else if (ErrorCode == B_OK)
3938     ErrorCode = Settings.AddString (NamePntr, m_DatabaseFileName);
3939 
3940   NamePntr = "ServerMode";
3941   if (DoLoad)
3942   {
3943     if (Settings.FindBool (NamePntr, &TempBool) == B_OK)
3944       g_ServerMode = TempBool;
3945   }
3946   else if (ErrorCode == B_OK)
3947     ErrorCode = Settings.AddBool (NamePntr, g_ServerMode);
3948 
3949   NamePntr = "IgnorePreviousClassification";
3950   if (DoLoad)
3951   {
3952     if (Settings.FindBool (NamePntr, &TempBool) == B_OK)
3953       m_IgnorePreviousClassification = TempBool;
3954   }
3955   else if (ErrorCode == B_OK)
3956     ErrorCode = Settings.AddBool (NamePntr, m_IgnorePreviousClassification);
3957 
3958   NamePntr = "PurgeAge";
3959   if (DoLoad)
3960   {
3961     if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3962       m_PurgeAge = TempInt32;
3963   }
3964   else if (ErrorCode == B_OK)
3965     ErrorCode = Settings.AddInt32 (NamePntr, m_PurgeAge);
3966 
3967   NamePntr = "PurgePopularity";
3968   if (DoLoad)
3969   {
3970     if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3971       m_PurgePopularity = TempInt32;
3972   }
3973   else if (ErrorCode == B_OK)
3974     ErrorCode = Settings.AddInt32 (NamePntr, m_PurgePopularity);
3975 
3976   NamePntr = "ScoringMode";
3977   if (DoLoad)
3978   {
3979     if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3980       m_ScoringMode = (ScoringModes) TempInt32;
3981     if (m_ScoringMode < 0 || m_ScoringMode >= SM_MAX)
3982       m_ScoringMode = (ScoringModes) 0;
3983   }
3984   else if (ErrorCode == B_OK)
3985     ErrorCode = Settings.AddInt32 (NamePntr, m_ScoringMode);
3986 
3987   NamePntr = "TokenizeMode";
3988   if (DoLoad)
3989   {
3990     if (Settings.FindInt32 (NamePntr, &TempInt32) == B_OK)
3991       m_TokenizeMode = (TokenizeModes) TempInt32;
3992     if (m_TokenizeMode < 0 || m_TokenizeMode >= TM_MAX)
3993       m_TokenizeMode = (TokenizeModes) 0;
3994   }
3995   else if (ErrorCode == B_OK)
3996     ErrorCode = Settings.AddInt32 (NamePntr, m_TokenizeMode);
3997 
3998   if (ErrorCode != B_OK)
3999   {
4000     strcpy (TempString, "Unable to stuff the program settings into a "
4001       "temporary BMessage, settings not saved");
4002     goto ErrorExit;
4003   }
4004 
4005   /* Save the settings BMessage to the settings file. */
4006 
4007   if (!DoLoad)
4008   {
4009     Settings.what = g_SettingsWhatCode;
4010     ErrorCode = Settings.Flatten (&SettingsFile);
4011     if (ErrorCode != 0)
4012     {
4013       sprintf (TempString, "Problems while writing settings file \"%s\" in "
4014         "directory \"%s\"", g_SettingsFileName,
4015         m_SettingsDirectoryPath.Path ());
4016       goto ErrorExit;
4017     }
4018   }
4019 
4020   m_SettingsHaveChanged = false;
4021   return B_OK;
4022 
4023 ErrorExit: /* Error message in TempString, code in ErrorCode. */
4024   DisplayErrorMessage (TempString, ErrorCode, DoLoad ?
4025     "Loading Settings Error" : "Saving Settings Error");
4026   return ErrorCode;
4027 }
4028 
4029 
4030 void
MessageReceived(BMessage * MessagePntr)4031 ABSApp::MessageReceived (BMessage *MessagePntr)
4032 {
4033   const char           *PropertyName;
4034   struct property_info *PropInfoPntr;
4035   int32                 SpecifierIndex;
4036   int32                 SpecifierKind;
4037   BMessage              SpecifierMessage;
4038 
4039   /* See if it is a scripting message that applies to the database or one of
4040   the other operations this program supports.  Pass on other scripting messages
4041   to the inherited parent MessageReceived function (they're usually scripting
4042   messages for the BApplication). */
4043 
4044   switch (MessagePntr->what)
4045   {
4046     case B_GET_PROPERTY:
4047     case B_SET_PROPERTY:
4048     case B_COUNT_PROPERTIES:
4049     case B_CREATE_PROPERTY:
4050     case B_DELETE_PROPERTY:
4051     case B_EXECUTE_PROPERTY:
4052       if (MessagePntr->GetCurrentSpecifier (&SpecifierIndex, &SpecifierMessage,
4053       &SpecifierKind, &PropertyName) == B_OK &&
4054       SpecifierKind == B_DIRECT_SPECIFIER)
4055       {
4056         for (PropInfoPntr = g_ScriptingPropertyList + 0; true; PropInfoPntr++)
4057         {
4058           if (PropInfoPntr->name == 0)
4059             break; /* Ran out of commands. */
4060 
4061           if (PropInfoPntr->commands[0] == MessagePntr->what &&
4062           strcasecmp (PropInfoPntr->name, PropertyName) == 0)
4063           {
4064             ProcessScriptingMessage (MessagePntr, PropInfoPntr);
4065             return;
4066           }
4067         }
4068       }
4069       break;
4070   }
4071 
4072   /* Pass the unprocessed message to the inherited function, maybe it knows
4073   what to do.  This includes replies to messages we sent ourselves. */
4074 
4075   BApplication::MessageReceived (MessagePntr);
4076 }
4077 
4078 
4079 /* Rename the existing database file to a backup file name, potentially
4080 replacing an older backup.  If something goes wrong, returns an error code and
4081 puts an explanation in ErrorMessage. */
4082 
MakeBackup(char * ErrorMessage)4083 status_t ABSApp::MakeBackup (char *ErrorMessage)
4084 {
4085   BEntry   Entry;
4086   status_t ErrorCode;
4087   int      i;
4088   char     LeafName [NAME_MAX];
4089   char     NewName [PATH_MAX+20];
4090   char     OldName [PATH_MAX+20];
4091 
4092   ErrorCode = Entry.SetTo (m_DatabaseFileName.String ());
4093   if (ErrorCode != B_OK)
4094   {
4095     sprintf (ErrorMessage, "While making backup, failed to make a BEntry for "
4096       "\"%s\" (maybe the directory doesn't exist?)",
4097       m_DatabaseFileName.String ());
4098     return ErrorCode;
4099   }
4100   if (!Entry.Exists ())
4101     return B_OK; /* No existing file to worry about overwriting. */
4102   Entry.GetName (LeafName);
4103 
4104   /* Find the first hole (no file) where we will stop the renaming chain. */
4105 
4106   for (i = 0; i < g_MaxBackups - 1; i++)
4107   {
4108     strcpy (OldName, m_DatabaseFileName.String ());
4109     sprintf (OldName + strlen (OldName), g_BackupSuffix, i);
4110     Entry.SetTo (OldName);
4111     if (!Entry.Exists ())
4112       break;
4113   }
4114 
4115   /* Move the files down by one to fill in the hole in the name series. */
4116 
4117   for (i--; i >= 0; i--)
4118   {
4119     strcpy (OldName, m_DatabaseFileName.String ());
4120     sprintf (OldName + strlen (OldName), g_BackupSuffix, i);
4121     Entry.SetTo (OldName);
4122     strcpy (NewName, LeafName);
4123     sprintf (NewName + strlen (NewName), g_BackupSuffix, i + 1);
4124     ErrorCode = Entry.Rename (NewName, true /* clobber */);
4125   }
4126 
4127   Entry.SetTo (m_DatabaseFileName.String ());
4128   strcpy (NewName, LeafName);
4129   sprintf (NewName + strlen (NewName), g_BackupSuffix, 0);
4130   ErrorCode = Entry.Rename (NewName, true /* clobber */);
4131   if (ErrorCode != B_OK)
4132     sprintf (ErrorMessage, "While making backup, failed to rename "
4133       "\"%s\" to \"%s\"", m_DatabaseFileName.String (), NewName);
4134 
4135   return ErrorCode;
4136 }
4137 
4138 
4139 void
MakeDatabaseEmpty()4140 ABSApp::MakeDatabaseEmpty ()
4141 {
4142   m_WordMap.clear (); /* Sets the map to empty, deallocating any old data. */
4143   m_WordCount = 0;
4144   m_TotalGenuineMessages = 0;
4145   m_TotalSpamMessages = 0;
4146   m_OldestAge = (uint32) -1 /* makes largest number possible */;
4147 }
4148 
4149 
4150 /* Do what the scripting command says.  A reply message will be sent back with
4151 several fields: "error" containing the numerical error code (0 for success),
4152 "CommandText" with a text representation of the command, "result" with the
4153 resulting data for a get or count command.  If it isn't understood, then rather
4154 than a B_REPLY kind of message, it will be a B_MESSAGE_NOT_UNDERSTOOD message
4155 with an "error" number and an "message" string with a description. */
4156 
4157 void
ProcessScriptingMessage(BMessage * MessagePntr,struct property_info * PropInfoPntr)4158 ABSApp::ProcessScriptingMessage (
4159   BMessage *MessagePntr,
4160   struct property_info *PropInfoPntr)
4161 {
4162   bool        ArgumentBool = false;
4163   bool        ArgumentGotBool = false;
4164   bool        ArgumentGotInt32 = false;
4165   bool        ArgumentGotString = false;
4166   int32       ArgumentInt32 = 0;
4167   const char *ArgumentString = NULL;
4168   BString     CommandText;
4169   status_t    ErrorCode;
4170   int         i;
4171   BMessage    ReplyMessage (B_MESSAGE_NOT_UNDERSTOOD);
4172   ssize_t     StringBufferSize;
4173   BMessage    TempBMessage;
4174   BPath       TempPath;
4175   char        TempString [PATH_MAX + 1024];
4176 
4177   if (g_QuitCountdown >= 0 && !g_CommandLineMode)
4178   {
4179     g_QuitCountdown = -1;
4180     cerr << "Quit countdown aborted due to a scripting command arriving.\n";
4181   }
4182 
4183   if (g_BusyCursor != NULL)
4184     SetCursor (g_BusyCursor);
4185 
4186   ErrorCode = MessagePntr->FindData (g_DataName, B_STRING_TYPE,
4187     (const void **) &ArgumentString, &StringBufferSize);
4188   if (ErrorCode == B_OK)
4189   {
4190     if (PropInfoPntr->extra_data != PN_EVALUATE_STRING &&
4191     PropInfoPntr->extra_data != PN_SPAM_STRING &&
4192     PropInfoPntr->extra_data != PN_GENUINE_STRING &&
4193     strlen (ArgumentString) >= PATH_MAX)
4194     {
4195       sprintf (TempString, "\"data\" string of a scripting message is too "
4196         "long, for SET %s action", PropInfoPntr->name);
4197       ErrorCode = B_NAME_TOO_LONG;
4198       goto ErrorExit;
4199     }
4200     ArgumentGotString = true;
4201   }
4202   else if (MessagePntr->FindBool (g_DataName, &ArgumentBool) == B_OK)
4203     ArgumentGotBool = true;
4204   else if (MessagePntr->FindInt32 (g_DataName, &ArgumentInt32) == B_OK)
4205     ArgumentGotInt32 = true;
4206 
4207   /* Prepare a Human readable description of the scripting command. */
4208 
4209   switch (PropInfoPntr->commands[0])
4210   {
4211     case B_SET_PROPERTY:
4212       CommandText.SetTo ("Set ");
4213       break;
4214 
4215     case B_GET_PROPERTY:
4216       CommandText.SetTo ("Get ");
4217       break;
4218 
4219     case B_COUNT_PROPERTIES:
4220       CommandText.SetTo ("Count ");
4221       break;
4222 
4223     case B_CREATE_PROPERTY:
4224       CommandText.SetTo ("Create ");
4225       break;
4226 
4227     case B_DELETE_PROPERTY:
4228       CommandText.SetTo ("Delete ");
4229       break;
4230 
4231     case B_EXECUTE_PROPERTY:
4232       CommandText.SetTo ("Execute ");
4233       break;
4234 
4235     default:
4236       sprintf (TempString, "Bug: scripting command for \"%s\" has an unknown "
4237         "action code %d", PropInfoPntr->name,
4238         (int) PropInfoPntr->commands[0]);
4239       ErrorCode = -1;
4240       goto ErrorExit;
4241   }
4242   CommandText.Append (PropInfoPntr->name);
4243 
4244   /* Add on the argument value to our readable command, if there is one. */
4245 
4246   if (ArgumentGotString)
4247   {
4248     CommandText.Append (" \"");
4249     CommandText.Append (ArgumentString);
4250     CommandText.Append ("\"");
4251   }
4252   if (ArgumentGotBool)
4253     CommandText.Append (ArgumentBool ? " true" : " false");
4254   if (ArgumentGotInt32)
4255   {
4256     sprintf (TempString, " %" B_PRId32, ArgumentInt32);
4257     CommandText.Append (TempString);
4258   }
4259 
4260   /* From now on the scripting command has been recognized and is in the
4261   correct format, so it always returns a B_REPLY message.  A readable version
4262   of the command is also added to make debugging easier. */
4263 
4264   ReplyMessage.what = B_REPLY;
4265   ReplyMessage.AddString ("CommandText", CommandText);
4266 
4267   /* Now actually do the command.  First prepare a default error message. */
4268 
4269   sprintf (TempString, "Operation code %d (get, set, count, etc) "
4270     "unsupported for property %s",
4271     (int) PropInfoPntr->commands[0], PropInfoPntr->name);
4272   ErrorCode = B_BAD_INDEX;
4273 
4274   switch (PropInfoPntr->extra_data)
4275   {
4276     case PN_DATABASE_FILE:
4277       switch (PropInfoPntr->commands[0])
4278       {
4279         case B_GET_PROPERTY: /* Get the database file name. */
4280           ReplyMessage.AddString (g_ResultName, m_DatabaseFileName);
4281           break;
4282 
4283         case B_SET_PROPERTY: /* Set the database file name to a new one. */
4284           if (!ArgumentGotString)
4285           {
4286             ErrorCode = B_BAD_TYPE;
4287             sprintf (TempString, "You need to specify a string for the "
4288               "SET %s command", PropInfoPntr->name);
4289             goto ErrorExit;
4290           }
4291           ErrorCode = TempPath.SetTo (ArgumentString, NULL /* leaf */,
4292             true /* normalize - verifies parent directories exist */);
4293           if (ErrorCode != B_OK)
4294           {
4295             sprintf (TempString, "New database path name of \"%s\" is invalid "
4296               "(parent directories must exist)", ArgumentString);
4297             goto ErrorExit;
4298           }
4299           if ((ErrorCode = SaveDatabaseIfNeeded (TempString)) != B_OK)
4300             goto ErrorExit;
4301           MakeDatabaseEmpty (); /* So that the new one gets loaded if used. */
4302 
4303           if (strlen (TempPath.Leaf ()) > NAME_MAX-strlen(g_BackupSuffix)-1)
4304           {
4305             /* Truncate the name so that there is enough space for the backup
4306             extension.  Approximately. */
4307             strcpy (TempString, TempPath.Leaf ());
4308             TempString [NAME_MAX - strlen (g_BackupSuffix) - 1] = 0;
4309             TempPath.GetParent (&TempPath);
4310             TempPath.Append (TempString);
4311           }
4312           m_DatabaseFileName.SetTo (TempPath.Path ());
4313           m_SettingsHaveChanged = true;
4314           break;
4315 
4316         case B_CREATE_PROPERTY: /* Make a new database file plus more. */
4317           if ((ErrorCode = CreateDatabaseFile (TempString)) != B_OK)
4318             goto ErrorExit;
4319           break;
4320 
4321         case B_DELETE_PROPERTY: /* Delete the file and its backups too. */
4322           if ((ErrorCode = DeleteDatabaseFile (TempString)) != B_OK)
4323             goto ErrorExit;
4324           break;
4325 
4326         case B_COUNT_PROPERTIES:
4327           if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4328             goto ErrorExit;
4329           ReplyMessage.AddInt32 (g_ResultName, m_WordCount);
4330           break;
4331 
4332         default: /* Unknown operation code, error message already set. */
4333           goto ErrorExit;
4334       }
4335       break;
4336 
4337     case PN_SPAM:
4338     case PN_SPAM_STRING:
4339     case PN_GENUINE:
4340     case PN_GENUINE_STRING:
4341     case PN_UNCERTAIN:
4342       switch (PropInfoPntr->commands[0])
4343       {
4344         case B_COUNT_PROPERTIES: /* Get the number of spam/genuine messages. */
4345           if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4346             goto ErrorExit;
4347           if (PropInfoPntr->extra_data == PN_SPAM ||
4348           PropInfoPntr->extra_data == PN_SPAM_STRING)
4349             ReplyMessage.AddInt32 (g_ResultName, m_TotalSpamMessages);
4350           else
4351             ReplyMessage.AddInt32 (g_ResultName, m_TotalGenuineMessages);
4352           break;
4353 
4354         case B_SET_PROPERTY: /* Add spam/genuine/uncertain to database. */
4355           if (!ArgumentGotString)
4356           {
4357             ErrorCode = B_BAD_TYPE;
4358             sprintf (TempString, "You need to specify a string (%s) "
4359               "for the SET %s command",
4360               (PropInfoPntr->extra_data == PN_GENUINE_STRING ||
4361               PropInfoPntr->extra_data == PN_SPAM_STRING)
4362               ? "text of the message to be added"
4363               : "pathname of the file containing the text to be added",
4364               PropInfoPntr->name);
4365             goto ErrorExit;
4366           }
4367           if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) != B_OK)
4368             goto ErrorExit;
4369           if (PropInfoPntr->extra_data == PN_GENUINE ||
4370           PropInfoPntr->extra_data == PN_SPAM ||
4371           PropInfoPntr->extra_data == PN_UNCERTAIN)
4372             ErrorCode = AddFileToDatabase (
4373               (PropInfoPntr->extra_data == PN_SPAM) ? CL_SPAM :
4374               ((PropInfoPntr->extra_data == PN_GENUINE) ? CL_GENUINE :
4375               CL_UNCERTAIN),
4376               ArgumentString, TempString /* ErrorMessage */);
4377           else
4378             ErrorCode = AddStringToDatabase (
4379               (PropInfoPntr->extra_data == PN_SPAM_STRING) ?
4380               CL_SPAM : CL_GENUINE,
4381               ArgumentString, TempString /* ErrorMessage */);
4382           if (ErrorCode != B_OK)
4383             goto ErrorExit;
4384           break;
4385 
4386         default: /* Unknown operation code, error message already set. */
4387           goto ErrorExit;
4388       }
4389       break;
4390 
4391     case PN_IGNORE_PREVIOUS_CLASSIFICATION:
4392       switch (PropInfoPntr->commands[0])
4393       {
4394         case B_GET_PROPERTY:
4395           ReplyMessage.AddBool (g_ResultName, m_IgnorePreviousClassification);
4396           break;
4397 
4398         case B_SET_PROPERTY:
4399           if (!ArgumentGotBool)
4400           {
4401             ErrorCode = B_BAD_TYPE;
4402             sprintf (TempString, "You need to specify a boolean (true/yes, "
4403               "false/no) for the SET %s command", PropInfoPntr->name);
4404             goto ErrorExit;
4405           }
4406           m_IgnorePreviousClassification = ArgumentBool;
4407           m_SettingsHaveChanged = true;
4408           break;
4409 
4410         default: /* Unknown operation code, error message already set. */
4411           goto ErrorExit;
4412       }
4413       break;
4414 
4415     case PN_SERVER_MODE:
4416       switch (PropInfoPntr->commands[0])
4417       {
4418         case B_GET_PROPERTY:
4419           ReplyMessage.AddBool (g_ResultName, g_ServerMode);
4420           break;
4421 
4422         case B_SET_PROPERTY:
4423           if (!ArgumentGotBool)
4424           {
4425             ErrorCode = B_BAD_TYPE;
4426             sprintf (TempString, "You need to specify a boolean (true/yes, "
4427               "false/no) for the SET %s command", PropInfoPntr->name);
4428             goto ErrorExit;
4429           }
4430           g_ServerMode = ArgumentBool;
4431           m_SettingsHaveChanged = true;
4432           break;
4433 
4434         default: /* Unknown operation code, error message already set. */
4435           goto ErrorExit;
4436       }
4437       break;
4438 
4439     case PN_FLUSH:
4440       if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4441       (ErrorCode = SaveDatabaseIfNeeded (TempString)) == B_OK)
4442         break;
4443       goto ErrorExit;
4444 
4445     case PN_PURGE_AGE:
4446       switch (PropInfoPntr->commands[0])
4447       {
4448         case B_GET_PROPERTY:
4449           ReplyMessage.AddInt32 (g_ResultName, m_PurgeAge);
4450           break;
4451 
4452         case B_SET_PROPERTY:
4453           if (!ArgumentGotInt32)
4454           {
4455             ErrorCode = B_BAD_TYPE;
4456             sprintf (TempString, "You need to specify a 32 bit integer "
4457               "for the SET %s command", PropInfoPntr->name);
4458             goto ErrorExit;
4459           }
4460           m_PurgeAge = ArgumentInt32;
4461           m_SettingsHaveChanged = true;
4462           break;
4463 
4464         default: /* Unknown operation code, error message already set. */
4465           goto ErrorExit;
4466       }
4467       break;
4468 
4469     case PN_PURGE_POPULARITY:
4470       switch (PropInfoPntr->commands[0])
4471       {
4472         case B_GET_PROPERTY:
4473           ReplyMessage.AddInt32 (g_ResultName, m_PurgePopularity);
4474           break;
4475 
4476         case B_SET_PROPERTY:
4477           if (!ArgumentGotInt32)
4478           {
4479             ErrorCode = B_BAD_TYPE;
4480             sprintf (TempString, "You need to specify a 32 bit integer "
4481               "for the SET %s command", PropInfoPntr->name);
4482             goto ErrorExit;
4483           }
4484           m_PurgePopularity = ArgumentInt32;
4485           m_SettingsHaveChanged = true;
4486           break;
4487 
4488         default: /* Unknown operation code, error message already set. */
4489           goto ErrorExit;
4490       }
4491       break;
4492 
4493     case PN_PURGE:
4494       if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4495       (ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK &&
4496       (ErrorCode = PurgeOldWords (TempString)) == B_OK)
4497         break;
4498       goto ErrorExit;
4499 
4500     case PN_OLDEST:
4501       if (PropInfoPntr->commands[0] == B_GET_PROPERTY &&
4502       (ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK)
4503       {
4504         ReplyMessage.AddInt32 (g_ResultName, m_OldestAge);
4505         break;
4506       }
4507       goto ErrorExit;
4508 
4509     case PN_EVALUATE:
4510     case PN_EVALUATE_STRING:
4511       if (PropInfoPntr->commands[0] == B_SET_PROPERTY)
4512       {
4513         if (!ArgumentGotString)
4514         {
4515           ErrorCode = B_BAD_TYPE;
4516           sprintf (TempString, "You need to specify a string for the "
4517             "SET %s command", PropInfoPntr->name);
4518           goto ErrorExit;
4519         }
4520         if ((ErrorCode = LoadDatabaseIfNeeded (TempString)) == B_OK)
4521         {
4522           if (PropInfoPntr->extra_data == PN_EVALUATE)
4523           {
4524             if ((ErrorCode = EvaluateFile (ArgumentString, &ReplyMessage,
4525             TempString)) == B_OK)
4526               break;
4527           }
4528           else /* PN_EVALUATE_STRING */
4529           {
4530             if ((ErrorCode = EvaluateString (ArgumentString, StringBufferSize,
4531             &ReplyMessage, TempString)) == B_OK)
4532               break;
4533           }
4534         }
4535       }
4536       goto ErrorExit;
4537 
4538     case PN_RESET_TO_DEFAULTS:
4539       if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY)
4540       {
4541         DefaultSettings ();
4542         break;
4543       }
4544       goto ErrorExit;
4545 
4546     case PN_INSTALL_THINGS:
4547       if (PropInfoPntr->commands[0] == B_EXECUTE_PROPERTY &&
4548       (ErrorCode = InstallThings (TempString)) == B_OK)
4549         break;
4550       goto ErrorExit;
4551 
4552     case PN_SCORING_MODE:
4553       switch (PropInfoPntr->commands[0])
4554       {
4555         case B_GET_PROPERTY:
4556           ReplyMessage.AddString (g_ResultName,
4557             g_ScoringModeNames[m_ScoringMode]);
4558           break;
4559 
4560         case B_SET_PROPERTY:
4561           i = SM_MAX;
4562           if (ArgumentGotString)
4563             for (i = 0; i < SM_MAX; i++)
4564             {
4565               if (strcasecmp (ArgumentString, g_ScoringModeNames [i]) == 0)
4566               {
4567                 m_ScoringMode = (ScoringModes) i;
4568                 m_SettingsHaveChanged = true;
4569                 break;
4570               }
4571             }
4572           if (i >= SM_MAX) /* Didn't find a valid scoring mode word. */
4573           {
4574             ErrorCode = B_BAD_TYPE;
4575             sprintf (TempString, "You used the unrecognized \"%s\" as "
4576               "a scoring mode for the SET %s command.  Should be one of: ",
4577               ArgumentGotString ? ArgumentString : "not specified",
4578               PropInfoPntr->name);
4579             for (i = 0; i < SM_MAX; i++)
4580             {
4581               strcat (TempString, g_ScoringModeNames [i]);
4582               if (i < SM_MAX - 1)
4583                 strcat (TempString, ", ");
4584             }
4585             goto ErrorExit;
4586           }
4587           break;
4588 
4589         default: /* Unknown operation code, error message already set. */
4590           goto ErrorExit;
4591       }
4592       break;
4593 
4594     case PN_TOKENIZE_MODE:
4595       switch (PropInfoPntr->commands[0])
4596       {
4597         case B_GET_PROPERTY:
4598           ReplyMessage.AddString (g_ResultName,
4599             g_TokenizeModeNames[m_TokenizeMode]);
4600           break;
4601 
4602         case B_SET_PROPERTY:
4603           i = TM_MAX;
4604           if (ArgumentGotString)
4605             for (i = 0; i < TM_MAX; i++)
4606             {
4607               if (strcasecmp (ArgumentString, g_TokenizeModeNames [i]) == 0)
4608               {
4609                 m_TokenizeMode = (TokenizeModes) i;
4610                 m_SettingsHaveChanged = true;
4611                 break;
4612               }
4613             }
4614           if (i >= TM_MAX) /* Didn't find a valid tokenize mode word. */
4615           {
4616             ErrorCode = B_BAD_TYPE;
4617             sprintf (TempString, "You used the unrecognized \"%s\" as "
4618               "a tokenize mode for the SET %s command.  Should be one of: ",
4619               ArgumentGotString ? ArgumentString : "not specified",
4620               PropInfoPntr->name);
4621             for (i = 0; i < TM_MAX; i++)
4622             {
4623               strcat (TempString, g_TokenizeModeNames [i]);
4624               if (i < TM_MAX - 1)
4625                 strcat (TempString, ", ");
4626             }
4627             goto ErrorExit;
4628           }
4629           break;
4630 
4631         default: /* Unknown operation code, error message already set. */
4632           goto ErrorExit;
4633       }
4634       break;
4635 
4636     default:
4637       sprintf (TempString, "Bug!  Unrecognized property identification "
4638         "number %d (should be between 0 and %d).  Fix the entry in "
4639         "the g_ScriptingPropertyList array!",
4640         (int) PropInfoPntr->extra_data, PN_MAX - 1);
4641       goto ErrorExit;
4642   }
4643 
4644   /* Success. */
4645 
4646   ReplyMessage.AddInt32 ("error", B_OK);
4647   ErrorCode = MessagePntr->SendReply (&ReplyMessage,
4648     this /* Reply's reply handler */, 500000 /* send timeout */);
4649   if (ErrorCode != B_OK)
4650     cerr << "ProcessScriptingMessage failed to send a reply message, code " <<
4651     ErrorCode << " (" << strerror (ErrorCode) << ")" << " for " <<
4652     CommandText.String () << endl;
4653   SetCursor (B_CURSOR_SYSTEM_DEFAULT);
4654   return;
4655 
4656 ErrorExit: /* Error message in TempString, return code in ErrorCode. */
4657   ReplyMessage.AddInt32 ("error", ErrorCode);
4658   ReplyMessage.AddString ("message", TempString);
4659   DisplayErrorMessage (TempString, ErrorCode);
4660   ErrorCode = MessagePntr->SendReply (&ReplyMessage,
4661     this /* Reply's reply handler */, 500000 /* send timeout */);
4662   if (ErrorCode != B_OK)
4663     cerr << "ProcessScriptingMessage failed to send an error message, code " <<
4664     ErrorCode << " (" << strerror (ErrorCode) << ")" << " for " <<
4665     CommandText.String () << endl;
4666   SetCursor (B_CURSOR_SYSTEM_DEFAULT);
4667 }
4668 
4669 
4670 /* Since quitting stops the program before the results of a script command are
4671 received, we use a time delay to do the quit and make sure there are no pending
4672 commands being processed by the auxiliary looper which is sending us commands.
4673 Also, we have a countdown which can be interrupted by an incoming scripting
4674 message in case one client tells us to quit while another one is still using us
4675 (happens when you have two or more e-mail accounts).  But if the system is
4676 shutting down, quit immediately! */
4677 
4678 void
Pulse()4679 ABSApp::Pulse ()
4680 {
4681   if (g_QuitCountdown == 0)
4682   {
4683     if (g_CommanderLooperPntr == NULL ||
4684     !g_CommanderLooperPntr->IsBusy ())
4685       PostMessage (B_QUIT_REQUESTED);
4686   }
4687   else if (g_QuitCountdown > 0)
4688   {
4689     cerr << "SpamDBM quitting in " << g_QuitCountdown << ".\n";
4690     g_QuitCountdown--;
4691   }
4692 }
4693 
4694 
4695 /* A quit request message has come in.  If the quit countdown has reached zero,
4696 allow the request, otherwise reject it (and start the countdown if it hasn't
4697 been started). */
4698 
4699 bool
QuitRequested()4700 ABSApp::QuitRequested ()
4701 {
4702   BMessage  *QuitMessage;
4703   team_info  RemoteInfo;
4704   BMessenger RemoteMessenger;
4705   team_id    RemoteTeam;
4706 
4707   /* See if the quit is from the system shutdown command (which goes through
4708   the registrar server), if so, quit immediately. */
4709 
4710   QuitMessage = CurrentMessage ();
4711   if (QuitMessage != NULL && QuitMessage->IsSourceRemote ())
4712   {
4713     RemoteMessenger = QuitMessage->ReturnAddress ();
4714     RemoteTeam = RemoteMessenger.Team ();
4715     if (get_team_info (RemoteTeam, &RemoteInfo) == B_OK &&
4716     strstr (RemoteInfo.args, "registrar") != NULL)
4717       g_QuitCountdown = 0;
4718   }
4719 
4720   if (g_QuitCountdown == 0)
4721     return BApplication::QuitRequested ();
4722 
4723   if (g_QuitCountdown < 0)
4724 //    g_QuitCountdown = 10; /* Start the countdown. */
4725     g_QuitCountdown = 5; /* Quit more quickly */
4726 
4727   return false;
4728 }
4729 
4730 
4731 /* Go through the current database and delete words which are too old (time is
4732 equivalent to the number of messages added to the database) and too unpopular
4733 (words not used by many messages).  Hopefully this will get rid of words which
4734 are just hunks of binary or other garbage.  The database has been loaded
4735 elsewhere. */
4736 
4737 status_t
PurgeOldWords(char * ErrorMessage)4738 ABSApp::PurgeOldWords (char *ErrorMessage)
4739 {
4740   uint32                  CurrentTime;
4741   StatisticsMap::iterator CurrentIter;
4742   StatisticsMap::iterator EndIter;
4743   StatisticsMap::iterator NextIter;
4744   char                    TempString [80];
4745 
4746   strcpy (ErrorMessage, "Purge can't fail"); /* So argument gets used. */
4747   CurrentTime = m_TotalGenuineMessages + m_TotalSpamMessages - 1;
4748   m_OldestAge = (uint32) -1 /* makes largest number possible */;
4749 
4750   EndIter = m_WordMap.end ();
4751   NextIter = m_WordMap.begin ();
4752   while (NextIter != EndIter) {
4753     CurrentIter = NextIter++;
4754 
4755     if (CurrentTime - CurrentIter->second.age >= m_PurgeAge &&
4756     CurrentIter->second.genuineCount + CurrentIter->second.spamCount <=
4757     m_PurgePopularity) {
4758       /* Delete this word, it is unpopular and old.  Sob. */
4759 
4760       m_WordMap.erase (CurrentIter);
4761       if (m_WordCount > 0)
4762         m_WordCount--;
4763 
4764       m_DatabaseHasChanged = true;
4765     }
4766     else /* This word is still in the database.  Update oldest age. */
4767     {
4768       if (CurrentIter->second.age < m_OldestAge)
4769         m_OldestAge = CurrentIter->second.age;
4770     }
4771   }
4772 
4773   /* Just a little bug check here.  Just in case. */
4774 
4775   if (m_WordCount != m_WordMap.size ()) {
4776     sprintf (TempString, "Our word count of %" B_PRIu32 " doesn't match the "
4777       "size of the database, %lu", m_WordCount, m_WordMap.size());
4778     DisplayErrorMessage (TempString, -1, "Bug!");
4779     m_WordCount = m_WordMap.size ();
4780   }
4781 
4782   return B_OK;
4783 }
4784 
4785 
4786 void
ReadyToRun()4787 ABSApp::ReadyToRun ()
4788 {
4789   DatabaseWindow *DatabaseWindowPntr;
4790   float           JunkFloat;
4791   BButton        *TempButtonPntr;
4792   BCheckBox      *TempCheckBoxPntr;
4793   font_height     TempFontHeight;
4794   BMenuBar       *TempMenuBarPntr;
4795   BMenuItem      *TempMenuItemPntr;
4796   BPopUpMenu     *TempPopUpMenuPntr;
4797   BRadioButton   *TempRadioButtonPntr;
4798   BRect           TempRect;
4799   const char     *TempString = "Testing My Things";
4800   BStringView    *TempStringViewPntr;
4801   BTextControl   *TempTextPntr;
4802   BWindow        *TempWindowPntr;
4803 
4804   /* This batch of code gets some measurements which will be used for laying
4805   out controls and other GUI elements.  Set the spacing between buttons and
4806   other controls to the width of the letter "M" in the user's desired font. */
4807 
4808  g_MarginBetweenControls = (int) be_plain_font->StringWidth ("M");
4809 
4810   /* Also find out how much space a line of text uses. */
4811 
4812   be_plain_font->GetHeight (&TempFontHeight);
4813   g_LineOfTextHeight = ceilf (
4814     TempFontHeight.ascent + TempFontHeight.descent + TempFontHeight.leading);
4815 
4816   /* Start finding out the height of various user interface gadgets, which can
4817   vary based on the current font size.  Make a temporary gadget, which is
4818   attached to our window, then resize it to its prefered size so that it
4819   accomodates the font size and other frills it needs. */
4820 
4821   TempWindowPntr = new (std::nothrow) BWindow (BRect (10, 20, 200, 200),
4822 	"Temporary Window", B_DOCUMENT_WINDOW,
4823 	B_NO_WORKSPACE_ACTIVATION | B_ASYNCHRONOUS_CONTROLS);
4824   if (TempWindowPntr == NULL) {
4825     DisplayErrorMessage ("Unable to create temporary window for finding "
4826       "sizes of controls.");
4827     g_QuitCountdown = 0;
4828     return;
4829   }
4830 
4831   TempRect = TempWindowPntr->Bounds ();
4832 
4833   /* Find the height of a single line of text in a BStringView. */
4834 
4835   TempStringViewPntr = new (std::nothrow) BStringView (TempRect, TempString, TempString);
4836   if (TempStringViewPntr != NULL) {
4837     TempWindowPntr->Lock();
4838     TempWindowPntr->AddChild (TempStringViewPntr);
4839     TempStringViewPntr->GetPreferredSize (&JunkFloat, &g_StringViewHeight);
4840     TempWindowPntr->RemoveChild (TempStringViewPntr);
4841     TempWindowPntr->Unlock();
4842     delete TempStringViewPntr;
4843   }
4844 
4845   /* Find the height of a button, which seems to be larger than a text
4846   control and can make life difficult.  Make a temporary button, which
4847   is attached to our window so that it resizes to accomodate the font size. */
4848 
4849   TempButtonPntr = new (std::nothrow) BButton (TempRect, TempString, TempString, NULL);
4850   if (TempButtonPntr != NULL) {
4851     TempWindowPntr->Lock();
4852     TempWindowPntr->AddChild (TempButtonPntr);
4853     TempButtonPntr->GetPreferredSize (&JunkFloat, &g_ButtonHeight);
4854     TempWindowPntr->RemoveChild (TempButtonPntr);
4855     TempWindowPntr->Unlock();
4856     delete TempButtonPntr;
4857   }
4858 
4859   /* Find the height of a text box. */
4860 
4861   TempTextPntr = new (std::nothrow) BTextControl (TempRect, TempString, NULL /* label */,
4862     TempString, NULL);
4863   if (TempTextPntr != NULL) {
4864     TempWindowPntr->Lock ();
4865     TempWindowPntr->AddChild (TempTextPntr);
4866     TempTextPntr->GetPreferredSize (&JunkFloat, &g_TextBoxHeight);
4867     TempWindowPntr->RemoveChild (TempTextPntr);
4868     TempWindowPntr->Unlock ();
4869     delete TempTextPntr;
4870   }
4871 
4872   /* Find the height of a checkbox control. */
4873 
4874   TempCheckBoxPntr = new (std::nothrow) BCheckBox (TempRect, TempString, TempString, NULL);
4875   if (TempCheckBoxPntr != NULL) {
4876     TempWindowPntr->Lock ();
4877     TempWindowPntr->AddChild (TempCheckBoxPntr);
4878     TempCheckBoxPntr->GetPreferredSize (&JunkFloat, &g_CheckBoxHeight);
4879     TempWindowPntr->RemoveChild (TempCheckBoxPntr);
4880     TempWindowPntr->Unlock ();
4881     delete TempCheckBoxPntr;
4882   }
4883 
4884   /* Find the height of a radio button control. */
4885 
4886   TempRadioButtonPntr =
4887     new (std::nothrow) BRadioButton (TempRect, TempString, TempString, NULL);
4888   if (TempRadioButtonPntr != NULL) {
4889     TempWindowPntr->Lock ();
4890     TempWindowPntr->AddChild (TempRadioButtonPntr);
4891     TempRadioButtonPntr->GetPreferredSize (&JunkFloat, &g_RadioButtonHeight);
4892     TempWindowPntr->RemoveChild (TempRadioButtonPntr);
4893     TempWindowPntr->Unlock ();
4894     delete TempRadioButtonPntr;
4895   }
4896 
4897   /* Find the height of a pop-up menu. */
4898 
4899   TempMenuBarPntr = new (std::nothrow) BMenuBar (TempRect, TempString,
4900     B_FOLLOW_LEFT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
4901     true /* resize to fit items */);
4902   TempPopUpMenuPntr = new (std::nothrow) BPopUpMenu (TempString);
4903   TempMenuItemPntr = new (std::nothrow) BMenuItem (TempString, new BMessage (12345), 'g');
4904 
4905   if (TempMenuBarPntr != NULL && TempPopUpMenuPntr != NULL &&
4906   TempMenuItemPntr != NULL)
4907   {
4908     TempPopUpMenuPntr->AddItem (TempMenuItemPntr);
4909     TempMenuBarPntr->AddItem (TempPopUpMenuPntr);
4910 
4911     TempWindowPntr->Lock ();
4912     TempWindowPntr->AddChild (TempMenuBarPntr);
4913     TempMenuBarPntr->GetPreferredSize (&JunkFloat, &g_PopUpMenuHeight);
4914     TempWindowPntr->RemoveChild (TempMenuBarPntr);
4915     TempWindowPntr->Unlock ();
4916     delete TempMenuBarPntr; // It will delete contents too.
4917   }
4918 
4919   TempWindowPntr->Lock ();
4920   TempWindowPntr->Quit ();
4921 
4922   SetPulseRate (500000);
4923 
4924   if (g_CommandLineMode)
4925     g_QuitCountdown = 0; /* Quit as soon as queued up commands done. */
4926   else /* GUI mode, make a window. */
4927   {
4928     DatabaseWindowPntr = new (std::nothrow) DatabaseWindow ();
4929     if (DatabaseWindowPntr == NULL) {
4930       DisplayErrorMessage ("Unable to create window.");
4931       g_QuitCountdown = 0;
4932     } else {
4933       DatabaseWindowPntr->Show (); /* Starts the window's message loop. */
4934     }
4935   }
4936 
4937   g_AppReadyToRunCompleted = true;
4938 }
4939 
4940 
4941 /* Given a mail component (body text, attachment, whatever), look for words in
4942 it.  If the tokenize mode specifies that it isn't one of the ones we are
4943 looking for, just skip it.  For container type components, recursively examine
4944 their contents, up to the maximum depth specified. */
4945 
4946 status_t
RecursivelyTokenizeMailComponent(BMailComponent * ComponentPntr,const char * OptionalFileName,set<string> & WordSet,char * ErrorMessage,int RecursionLevel,int MaxRecursionLevel)4947 ABSApp::RecursivelyTokenizeMailComponent (
4948   BMailComponent *ComponentPntr,
4949   const char *OptionalFileName,
4950   set<string> &WordSet,
4951   char *ErrorMessage,
4952   int RecursionLevel,
4953   int MaxRecursionLevel)
4954 {
4955   char                        AttachmentName [B_FILE_NAME_LENGTH];
4956   BMailAttachment            *AttachmentPntr;
4957   BMimeType                   ComponentMIMEType;
4958   BMailContainer             *ContainerPntr;
4959   BMallocIO                   ContentsIO;
4960   const char                 *ContentsBufferPntr;
4961   size_t                      ContentsBufferSize;
4962   status_t                    ErrorCode;
4963   bool                        ExamineComponent;
4964   const char                 *HeaderKeyPntr;
4965   const char                 *HeaderValuePntr;
4966   int                         i;
4967   int                         j;
4968   const char                 *NameExtension;
4969   int                         NumComponents;
4970   BMimeType                   TextAnyMIMEType ("text");
4971   BMimeType                   TextPlainMIMEType ("text/plain");
4972 
4973   if (ComponentPntr == NULL)
4974     return B_OK;
4975 
4976   /* Add things in the sub-headers that might be useful.  Things like the file
4977   name of attachments, the encoding type, etc. */
4978 
4979   if (m_TokenizeMode == TM_PLAIN_TEXT_HEADER ||
4980   m_TokenizeMode == TM_ANY_TEXT_HEADER ||
4981   m_TokenizeMode == TM_ALL_PARTS_HEADER ||
4982   m_TokenizeMode == TM_JUST_HEADER)
4983   {
4984     for (i = 0; i < 1000; i++)
4985     {
4986       HeaderKeyPntr = ComponentPntr->HeaderAt (i);
4987       if (HeaderKeyPntr == NULL)
4988         break;
4989       AddWordsToSet (HeaderKeyPntr, strlen (HeaderKeyPntr),
4990         'H' /* Prefix for Headers, uppercase unlike normal words. */, WordSet);
4991       for (j = 0; j < 1000; j++)
4992       {
4993         HeaderValuePntr = ComponentPntr->HeaderField (HeaderKeyPntr, j);
4994         if (HeaderValuePntr == NULL)
4995           break;
4996         AddWordsToSet (HeaderValuePntr, strlen (HeaderValuePntr),
4997           'H', WordSet);
4998       }
4999     }
5000   }
5001 
5002   /* Check the MIME type of the thing.  It's used to decide if the contents are
5003   worth examining for words. */
5004 
5005   ErrorCode = ComponentPntr->MIMEType (&ComponentMIMEType);
5006   if (ErrorCode != B_OK)
5007   {
5008     sprintf (ErrorMessage, "ABSApp::RecursivelyTokenizeMailComponent: "
5009       "Unable to get MIME type at level %d in \"%s\"",
5010       RecursionLevel, OptionalFileName);
5011     return ErrorCode;
5012   }
5013   if (ComponentMIMEType.Type() == NULL)
5014   {
5015     /* Have to make up a MIME type for things which don't have them, such as
5016     the main body text, otherwise it would get ignored. */
5017 
5018     if (NULL != dynamic_cast<BTextMailComponent *>(ComponentPntr))
5019       ComponentMIMEType.SetType ("text/plain");
5020   }
5021   if (!TextAnyMIMEType.Contains (&ComponentMIMEType) &&
5022   NULL != (AttachmentPntr = dynamic_cast<BMailAttachment *>(ComponentPntr)))
5023   {
5024     /* Sometimes spam doesn't give a text MIME type for text when they do an
5025     attachment (which is often base64 encoded).  Use the file name extension to
5026     see if it really is text. */
5027     NameExtension = NULL;
5028     if (AttachmentPntr->FileName (AttachmentName) >= 0)
5029       NameExtension = strrchr (AttachmentName, '.');
5030     if (NameExtension != NULL)
5031     {
5032       if (strcasecmp (NameExtension, ".txt") == 0)
5033         ComponentMIMEType.SetType ("text/plain");
5034       else if (strcasecmp (NameExtension, ".htm") == 0 ||
5035       strcasecmp (NameExtension, ".html") == 0)
5036         ComponentMIMEType.SetType ("text/html");
5037     }
5038   }
5039 
5040   switch (m_TokenizeMode)
5041   {
5042     case TM_PLAIN_TEXT:
5043     case TM_PLAIN_TEXT_HEADER:
5044       ExamineComponent = TextPlainMIMEType.Contains (&ComponentMIMEType);
5045       break;
5046 
5047     case TM_ANY_TEXT:
5048     case TM_ANY_TEXT_HEADER:
5049       ExamineComponent = TextAnyMIMEType.Contains (&ComponentMIMEType);
5050       break;
5051 
5052     case TM_ALL_PARTS:
5053     case TM_ALL_PARTS_HEADER:
5054       ExamineComponent = true;
5055       break;
5056 
5057     default:
5058       ExamineComponent = false;
5059       break;
5060   }
5061 
5062   if (ExamineComponent)
5063   {
5064     /* Get the contents of the component.  This will be UTF-8 text (converted
5065     from whatever encoding was used) for text attachments.  For other ones,
5066     it's just the raw data, or perhaps decoded from base64 encoding. */
5067 
5068     ContentsIO.SetBlockSize (16 * 1024);
5069     ErrorCode = ComponentPntr->GetDecodedData (&ContentsIO);
5070     if (ErrorCode == B_OK) /* Can fail for container components: no data. */
5071     {
5072       /* Look for words in the decoded data. */
5073 
5074       ContentsBufferPntr = (const char *) ContentsIO.Buffer ();
5075       ContentsBufferSize = ContentsIO.BufferLength ();
5076       if (ContentsBufferPntr != NULL /* can be empty */)
5077         AddWordsToSet (ContentsBufferPntr, ContentsBufferSize,
5078           0 /* no prefix character, this is body text */, WordSet);
5079     }
5080   }
5081 
5082   /* Examine any sub-components in the message. */
5083 
5084   if (RecursionLevel + 1 <= MaxRecursionLevel &&
5085   NULL != (ContainerPntr = dynamic_cast<BMailContainer *>(ComponentPntr)))
5086   {
5087     NumComponents = ContainerPntr->CountComponents ();
5088 
5089     for (i = 0; i < NumComponents; i++)
5090     {
5091       ComponentPntr = ContainerPntr->GetComponent (i);
5092 
5093       ErrorCode = RecursivelyTokenizeMailComponent (ComponentPntr,
5094         OptionalFileName, WordSet, ErrorMessage, RecursionLevel + 1,
5095         MaxRecursionLevel);
5096       if (ErrorCode != B_OK)
5097         break;
5098     }
5099   }
5100 
5101   return ErrorCode;
5102 }
5103 
5104 
5105 /* The user has tried to open a file or several files with this application,
5106 via Tracker's open-with menu item.  If it is a database type file, then change
5107 the database file name to it.  Otherwise, ask the user whether they want to
5108 classify it as spam or non-spam.  There will be at most around 100 files, BeOS
5109 R5.0.3's Tracker crashes if it tries to pass on more than that many using Open
5110 With... etc.  The command is sent to an intermediary thread where it is
5111 asynchronously converted into a scripting message(s) that are sent back to this
5112 BApplication.  The intermediary is needed since we can't recursively execute
5113 scripting messages while processing a message (this RefsReceived one). */
5114 
5115 void
RefsReceived(BMessage * MessagePntr)5116 ABSApp::RefsReceived (BMessage *MessagePntr)
5117 {
5118   if (g_CommanderLooperPntr != NULL)
5119     g_CommanderLooperPntr->CommandReferences (MessagePntr);
5120 }
5121 
5122 
5123 /* A scripting command is looking for something to execute it.  See if it is
5124 targetted at our database. */
5125 
ResolveSpecifier(BMessage * MessagePntr,int32 Index,BMessage * SpecifierMsgPntr,int32 SpecificationKind,const char * PropertyPntr)5126 BHandler * ABSApp::ResolveSpecifier (
5127   BMessage *MessagePntr,
5128   int32 Index,
5129   BMessage *SpecifierMsgPntr,
5130   int32 SpecificationKind,
5131   const char *PropertyPntr)
5132 {
5133   int i;
5134 
5135   /* See if it is one of our commands. */
5136 
5137   if (SpecificationKind == B_DIRECT_SPECIFIER)
5138   {
5139     for (i = PN_MAX - 1; i >= 0; i--)
5140     {
5141       if (strcasecmp (PropertyPntr, g_PropertyNames [i]) == 0)
5142         return this; /* Found it!  Return the Handler (which is us). */
5143     }
5144   }
5145 
5146   /* Handle an unrecognized scripting command, let the parent figure it out. */
5147 
5148   return BApplication::ResolveSpecifier (
5149     MessagePntr, Index, SpecifierMsgPntr, SpecificationKind, PropertyPntr);
5150 }
5151 
5152 
5153 /* Save the database if it hasn't been saved yet.  Otherwise do nothing. */
5154 
SaveDatabaseIfNeeded(char * ErrorMessage)5155 status_t ABSApp::SaveDatabaseIfNeeded (char *ErrorMessage)
5156 {
5157   if (m_DatabaseHasChanged)
5158     return LoadSaveDatabase (false /* DoLoad */, ErrorMessage);
5159 
5160   return B_OK;
5161 }
5162 
5163 
5164 /* Presumably the file is an e-mail message (or at least the header portion of
5165 one).  Break it into parts: header, body and MIME components.  Then add the
5166 words in the portions that match the current tokenization settings to the set
5167 of words. */
5168 
TokenizeParts(BPositionIO * PositionIOPntr,const char * OptionalFileName,set<string> & WordSet,char * ErrorMessage)5169 status_t ABSApp::TokenizeParts (
5170   BPositionIO *PositionIOPntr,
5171   const char *OptionalFileName,
5172   set<string> &WordSet,
5173   char *ErrorMessage)
5174 {
5175   status_t        ErrorCode = B_OK;
5176   BEmailMessage   WholeEMail;
5177 
5178   sprintf (ErrorMessage, "ABSApp::TokenizeParts: While getting e-mail "
5179     "headers, had problems with \"%s\"", OptionalFileName);
5180 
5181   ErrorCode = WholeEMail.SetToRFC822 (
5182     PositionIOPntr /* it does its own seeking to the start */,
5183     -1 /* length */, true /* parse_now */);
5184   if (ErrorCode < 0) goto ErrorExit;
5185 
5186   ErrorCode = RecursivelyTokenizeMailComponent (&WholeEMail,
5187     OptionalFileName, WordSet, ErrorMessage, 0 /* Initial recursion level */,
5188     (m_TokenizeMode == TM_JUST_HEADER) ? 0 : 500 /* Max recursion level */);
5189 
5190 ErrorExit:
5191   return ErrorCode;
5192 }
5193 
5194 
5195 /* Add all the words in the whole file or memory buffer to the supplied set.
5196 The file doesn't have to be an e-mail message since it isn't parsed for e-mail
5197 headers or MIME headers or anything.  It blindly adds everything that looks
5198 like a word, though it does convert quoted printable codes to the characters
5199 they represent.  See also AddWordsToSet which does something more advanced. */
5200 
TokenizeWhole(BPositionIO * PositionIOPntr,const char * OptionalFileName,set<string> & WordSet,char * ErrorMessage)5201 status_t ABSApp::TokenizeWhole (
5202   BPositionIO *PositionIOPntr,
5203   const char *OptionalFileName,
5204   set<string> &WordSet,
5205   char *ErrorMessage)
5206 {
5207   string                AccumulatedWord;
5208   uint8                 Buffer [16 * 1024];
5209   uint8                *BufferCurrentPntr = Buffer + 0;
5210   uint8                *BufferEndPntr = Buffer + 0;
5211   const char           *IOErrorString =
5212                           "TokenizeWhole: Error %ld while reading \"%s\"";
5213   size_t                Length;
5214   int                   Letter = ' ';
5215   char                  HexString [4];
5216   int                   NextLetter = ' ';
5217   int                   NextNextLetter = ' ';
5218 
5219   /* Use a buffer since reading single characters from a BFile is so slow.
5220   BufferCurrentPntr is the position of the next character to be read.  When it
5221   reaches BufferEndPntr, it is time to fill the buffer again. */
5222 
5223 #define ReadChar(CharVar) \
5224   { \
5225     if (BufferCurrentPntr < BufferEndPntr) \
5226       CharVar = *BufferCurrentPntr++; \
5227     else /* Try to fill the buffer. */ \
5228     { \
5229       ssize_t AmountRead; \
5230       AmountRead = PositionIOPntr->Read (Buffer, sizeof (Buffer)); \
5231       if (AmountRead < 0) \
5232       { \
5233         sprintf (ErrorMessage, IOErrorString, AmountRead, OptionalFileName); \
5234         return AmountRead; \
5235       } \
5236       else if (AmountRead == 0) \
5237         CharVar = EOF; \
5238       else \
5239       { \
5240         BufferEndPntr = Buffer + AmountRead; \
5241         BufferCurrentPntr = Buffer + 0; \
5242         CharVar = *BufferCurrentPntr++; \
5243       } \
5244     } \
5245   }
5246 
5247   /* Read all the words in the file and add them to our local set of words.  A
5248   set is used since we don't care how many times a word occurs. */
5249 
5250   while (true)
5251   {
5252     /* We read two letters ahead so that we can decode quoted printable
5253     characters (an equals sign followed by two hex digits or a new line).  Note
5254     that Letter can become EOF (-1) when end of file is reached. */
5255 
5256     Letter = NextLetter;
5257     NextLetter = NextNextLetter;
5258     ReadChar (NextNextLetter);
5259 
5260     /* Decode quoted printable codes first, so that the rest of the code just
5261     sees an ordinary character.  Or even nothing, if it is the hidden line
5262     break combination.  This may falsely corrupt stuff following an equals
5263     sign, but usually won't. */
5264 
5265     if (Letter == '=')
5266     {
5267       if ((NextLetter == '\r' && NextNextLetter == '\n') ||
5268       (NextLetter == '\n' && NextNextLetter == '\r'))
5269       {
5270         /* Make the "=\r\n" pair disappear.  It's not even white space. */
5271         ReadChar (NextLetter);
5272         ReadChar (NextNextLetter);
5273         continue;
5274       }
5275       if (NextLetter == '\n' || NextLetter == '\r')
5276       {
5277         /* Make the "=\n" pair disappear.  It's not even white space. */
5278         NextLetter = NextNextLetter;
5279         ReadChar (NextNextLetter);
5280         continue;
5281       }
5282       if (NextNextLetter != EOF &&
5283       isxdigit (NextLetter) && isxdigit (NextNextLetter))
5284       {
5285         /* Convert the hex code to a letter. */
5286         HexString[0] = NextLetter;
5287         HexString[1] = NextNextLetter;
5288         HexString[2] = 0;
5289         Letter = strtoul (HexString, NULL, 16 /* number system base */);
5290         ReadChar (NextLetter);
5291         ReadChar (NextNextLetter);
5292       }
5293     }
5294 
5295     /* Convert to lower case to improve word matches.  Of course this loses a
5296     bit of information, such as MONEY vs Money, an indicator of spam.  Well,
5297     apparently that isn't all that useful a distinction, so do it. */
5298 
5299     if (Letter >= 'A' && Letter < 'Z')
5300       Letter = Letter + ('a' - 'A');
5301 
5302     /* See if it is a letter we treat as white space - all control characters
5303     and all punctuation except for: apostrophe (so "it's" and possessive
5304     versions of words get stored), dash (for hyphenated words), dollar sign
5305     (for cash amounts), period (for IP addresses, we later remove trailing
5306     (periods).  Note that codes above 127 are UTF-8 characters, which we
5307     consider non-space. */
5308 
5309     if (Letter < 0 /* EOF */ || (Letter < 128 && g_SpaceCharacters[Letter]))
5310     {
5311       /* That space finished off a word.  Remove trailing periods... */
5312 
5313       while ((Length = AccumulatedWord.size()) > 0 &&
5314       AccumulatedWord [Length-1] == '.')
5315         AccumulatedWord.resize (Length - 1);
5316 
5317       /* If there's anything left in the word, add it to the set.  Also ignore
5318       words which are too big (it's probably some binary encoded data).  But
5319       leave room for supercalifragilisticexpialidoceous.  According to one web
5320       site, pneumonoultramicroscopicsilicovolcanoconiosis is the longest word
5321       currently in English.  Note that some uuencoded data was seen with a 60
5322       character line length. */
5323 
5324       if (Length > 0 && Length <= g_MaxWordLength)
5325         WordSet.insert (AccumulatedWord);
5326 
5327       /* Empty out the string to get ready for the next word. */
5328 
5329       AccumulatedWord.resize (0);
5330     }
5331     else /* Not a space-like character, add it to the word. */
5332       AccumulatedWord.append (1 /* one copy of the char */, (char) Letter);
5333 
5334     /* Stop at end of file or error.  Don't care which.  Exit here so that last
5335     word got processed. */
5336 
5337     if (Letter == EOF)
5338       break;
5339   }
5340 
5341   return B_OK;
5342 }
5343 
5344 
5345 
5346 /******************************************************************************
5347  * Implementation of the ClassificationChoicesView class, constructor,
5348  * destructor and the rest of the member functions in mostly alphabetical
5349  * order.
5350  */
5351 
ClassificationChoicesWindow(BRect FrameRect,const char * FileName,int NumberOfFiles)5352 ClassificationChoicesWindow::ClassificationChoicesWindow (
5353   BRect FrameRect,
5354   const char *FileName,
5355   int NumberOfFiles)
5356 : BWindow (FrameRect, "Classification Choices", B_TITLED_WINDOW,
5357     B_NOT_ZOOMABLE | B_NOT_RESIZABLE | B_ASYNCHRONOUS_CONTROLS),
5358   m_BulkModeSelectedPntr (NULL),
5359   m_ChoosenClassificationPntr (NULL)
5360 {
5361   ClassificationChoicesView *SubViewPntr;
5362 
5363   SubViewPntr = new ClassificationChoicesView (Bounds(),
5364     FileName, NumberOfFiles);
5365   AddChild (SubViewPntr);
5366   SubViewPntr->ResizeToPreferred ();
5367   ResizeTo (SubViewPntr->Frame().Width(), SubViewPntr->Frame().Height());
5368 }
5369 
5370 
5371 void
MessageReceived(BMessage * MessagePntr)5372 ClassificationChoicesWindow::MessageReceived (BMessage *MessagePntr)
5373 {
5374   BControl *ControlPntr;
5375 
5376   if (MessagePntr->what >= MSG_CLASS_BUTTONS &&
5377   MessagePntr->what < MSG_CLASS_BUTTONS + CL_MAX)
5378   {
5379     if (m_ChoosenClassificationPntr != NULL)
5380       *m_ChoosenClassificationPntr =
5381         (ClassificationTypes) (MessagePntr->what - MSG_CLASS_BUTTONS);
5382     PostMessage (B_QUIT_REQUESTED); // Close and destroy the window.
5383     return;
5384   }
5385 
5386   if (MessagePntr->what == MSG_BULK_CHECKBOX)
5387   {
5388     if (m_BulkModeSelectedPntr != NULL &&
5389     MessagePntr->FindPointer ("source", (void **) &ControlPntr) == B_OK)
5390       *m_BulkModeSelectedPntr = (ControlPntr->Value() == B_CONTROL_ON);
5391     return;
5392   }
5393 
5394   if (MessagePntr->what == MSG_CANCEL_BUTTON)
5395   {
5396     PostMessage (B_QUIT_REQUESTED); // Close and destroy the window.
5397     return;
5398   }
5399 
5400   BWindow::MessageReceived (MessagePntr);
5401 }
5402 
5403 
5404 void
Go(bool * BulkModeSelectedPntr,ClassificationTypes * ChoosenClassificationPntr)5405 ClassificationChoicesWindow::Go (
5406   bool *BulkModeSelectedPntr,
5407   ClassificationTypes *ChoosenClassificationPntr)
5408 {
5409   status_t  ErrorCode = 0;
5410   BView    *MainViewPntr;
5411   thread_id WindowThreadID;
5412 
5413   m_BulkModeSelectedPntr = BulkModeSelectedPntr;
5414   m_ChoosenClassificationPntr = ChoosenClassificationPntr;
5415   if (m_ChoosenClassificationPntr != NULL)
5416     *m_ChoosenClassificationPntr = CL_MAX;
5417 
5418   Show (); // Starts the window thread running.
5419 
5420   /* Move the window to the center of the screen it is now being displayed on
5421   (have to wait for it to be showing). */
5422 
5423   Lock ();
5424   MainViewPntr = FindView ("ClassificationChoicesView");
5425   if (MainViewPntr != NULL)
5426   {
5427     BRect   TempRect;
5428     BScreen TempScreen (this);
5429     float   X;
5430     float   Y;
5431 
5432     TempRect = TempScreen.Frame ();
5433     X = TempRect.Width() / 2;
5434     Y = TempRect.Height() / 2;
5435     TempRect = MainViewPntr->Frame();
5436     X -= TempRect.Width() / 2;
5437     Y -= TempRect.Height() / 2;
5438     MoveTo (ceilf (X), ceilf (Y));
5439   }
5440   Unlock ();
5441 
5442   /* Wait for the window to go away. */
5443 
5444   WindowThreadID = Thread ();
5445   if (WindowThreadID >= 0)
5446     // Delay until the window thread has died, presumably window deleted now.
5447     wait_for_thread (WindowThreadID, &ErrorCode);
5448 }
5449 
5450 
5451 
5452 /******************************************************************************
5453  * Implementation of the ClassificationChoicesView class, constructor,
5454  * destructor and the rest of the member functions in mostly alphabetical
5455  * order.
5456  */
5457 
ClassificationChoicesView(BRect FrameRect,const char * FileName,int NumberOfFiles)5458 ClassificationChoicesView::ClassificationChoicesView (
5459   BRect FrameRect,
5460   const char *FileName,
5461   int NumberOfFiles)
5462 : BView (FrameRect, "ClassificationChoicesView",
5463     B_FOLLOW_TOP | B_FOLLOW_LEFT, B_WILL_DRAW | B_NAVIGABLE_JUMP),
5464   m_FileName (FileName),
5465   m_NumberOfFiles (NumberOfFiles),
5466   m_PreferredBottomY (ceilf (g_ButtonHeight * 10))
5467 {
5468 }
5469 
5470 
5471 void
AttachedToWindow()5472 ClassificationChoicesView::AttachedToWindow ()
5473 {
5474   BButton            *ButtonPntr;
5475   BCheckBox          *CheckBoxPntr;
5476   ClassificationTypes Classification;
5477   float               Margin;
5478   float               RowHeight;
5479   float               RowTop;
5480   BTextView          *TextViewPntr;
5481   BRect               TempRect;
5482   char                TempString [2048];
5483   BRect               TextRect;
5484   float               X;
5485 
5486   SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
5487 
5488   RowHeight = g_ButtonHeight;
5489   if (g_CheckBoxHeight > RowHeight)
5490     RowHeight = g_CheckBoxHeight;
5491   RowHeight = ceilf (RowHeight * 1.1);
5492 
5493   TempRect = Bounds ();
5494   RowTop = TempRect.top;
5495 
5496   /* Show the file name text. */
5497 
5498   Margin = ceilf ((RowHeight - g_StringViewHeight) / 2);
5499   TempRect = Bounds ();
5500   TempRect.top = RowTop + Margin;
5501   TextRect = TempRect;
5502   TextRect.OffsetTo (0, 0);
5503   TextRect.InsetBy (g_MarginBetweenControls, 2);
5504   sprintf (TempString, "How do you want to classify the file named \"%s\"?",
5505     m_FileName);
5506   TextViewPntr = new BTextView (TempRect, "FileText", TextRect,
5507     B_FOLLOW_TOP | B_FOLLOW_LEFT, B_WILL_DRAW | B_FULL_UPDATE_ON_RESIZE);
5508   AddChild (TextViewPntr);
5509   TextViewPntr->SetText (TempString);
5510   TextViewPntr->MakeEditable (false);
5511   TextViewPntr->SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
5512   TextViewPntr->ResizeTo (TempRect.Width (),
5513     3 + TextViewPntr->TextHeight (0, sizeof (TempString)));
5514   RowTop = TextViewPntr->Frame().bottom + Margin;
5515 
5516   /* Make the classification buttons. */
5517 
5518   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
5519   TempRect = Bounds ();
5520   TempRect.top = RowTop + Margin;
5521   X = Bounds().left + g_MarginBetweenControls;
5522   for (Classification = (ClassificationTypes) 0; Classification < CL_MAX;
5523   Classification = (ClassificationTypes) ((int) Classification + 1))
5524   {
5525     TempRect = Bounds ();
5526     TempRect.top = RowTop + Margin;
5527     TempRect.left = X;
5528     sprintf (TempString, "%s Button",
5529       g_ClassificationTypeNames [Classification]);
5530     ButtonPntr = new BButton (TempRect, TempString,
5531       g_ClassificationTypeNames [Classification], new BMessage (
5532       ClassificationChoicesWindow::MSG_CLASS_BUTTONS + Classification));
5533     AddChild (ButtonPntr);
5534     ButtonPntr->ResizeToPreferred ();
5535     X = ButtonPntr->Frame().right + 3 * g_MarginBetweenControls;
5536   }
5537   RowTop += ceilf (RowHeight * 1.2);
5538 
5539   /* Make the Cancel button. */
5540 
5541   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
5542   TempRect = Bounds ();
5543   TempRect.top = RowTop + Margin;
5544   TempRect.left += g_MarginBetweenControls;
5545   ButtonPntr = new BButton (TempRect, "Cancel Button",
5546     "Cancel", new BMessage (ClassificationChoicesWindow::MSG_CANCEL_BUTTON));
5547   AddChild (ButtonPntr);
5548   ButtonPntr->ResizeToPreferred ();
5549   X = ButtonPntr->Frame().right + g_MarginBetweenControls;
5550 
5551   /* Make the checkbox for bulk operations. */
5552 
5553   if (m_NumberOfFiles > 1)
5554   {
5555     Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
5556     TempRect = Bounds ();
5557     TempRect.top = RowTop + Margin;
5558     TempRect.left = X;
5559     sprintf (TempString, "Mark all %d remaining messages the same way.",
5560       m_NumberOfFiles - 1);
5561     CheckBoxPntr = new BCheckBox (TempRect, "BulkBox", TempString,
5562       new BMessage (ClassificationChoicesWindow::MSG_BULK_CHECKBOX));
5563     AddChild (CheckBoxPntr);
5564     CheckBoxPntr->ResizeToPreferred ();
5565   }
5566   RowTop += RowHeight;
5567 
5568   m_PreferredBottomY = RowTop;
5569 }
5570 
5571 
5572 void
GetPreferredSize(float * width,float * height)5573 ClassificationChoicesView::GetPreferredSize (float *width, float *height)
5574 {
5575   if (width != NULL)
5576     *width = Bounds().Width();
5577   if (height != NULL)
5578     *height = m_PreferredBottomY;
5579 }
5580 
5581 
5582 
5583 /******************************************************************************
5584  * Implementation of the CommanderLooper class, constructor, destructor and the
5585  * rest of the member functions in mostly alphabetical order.
5586  */
5587 
CommanderLooper()5588 CommanderLooper::CommanderLooper ()
5589 : BLooper ("CommanderLooper", B_NORMAL_PRIORITY),
5590   m_IsBusy (false)
5591 {
5592 }
5593 
5594 
~CommanderLooper()5595 CommanderLooper::~CommanderLooper ()
5596 {
5597   g_CommanderLooperPntr = NULL;
5598   delete g_CommanderMessenger;
5599   g_CommanderMessenger = NULL;
5600 }
5601 
5602 
5603 /* Process some command line arguments.  Basically just send a message to this
5604 looper itself to do the work later.  That way the caller can continue doing
5605 whatever they're doing, particularly if it's the BApplication. */
5606 
5607 void
CommandArguments(int argc,char ** argv)5608 CommanderLooper::CommandArguments (int argc, char **argv)
5609 {
5610   int      i;
5611   BMessage InternalMessage;
5612 
5613   InternalMessage.what = MSG_COMMAND_ARGUMENTS;
5614   for (i = 0; i < argc; i++)
5615     InternalMessage.AddString ("arg", argv[i]);
5616 
5617   PostMessage (&InternalMessage);
5618 }
5619 
5620 
5621 /* Copy the refs out of the given message and stuff them into an internal
5622 message to ourself (so that the original message can be returned to the caller,
5623 and if it is Tracker, it can close the file handles it has open).  Optionally
5624 allow preset classification rather than asking the user (set BulkMode to TRUE
5625 and specify the class with BulkClassification). */
5626 
5627 void
CommandReferences(BMessage * MessagePntr,bool BulkMode,ClassificationTypes BulkClassification)5628 CommanderLooper::CommandReferences (
5629   BMessage *MessagePntr,
5630   bool BulkMode,
5631   ClassificationTypes BulkClassification)
5632 {
5633   entry_ref EntryRef;
5634   int       i;
5635   BMessage  InternalMessage;
5636 
5637   InternalMessage.what = MSG_COMMAND_FILE_REFS;
5638   for (i = 0; MessagePntr->FindRef ("refs", i, &EntryRef) == B_OK; i++)
5639     InternalMessage.AddRef ("refs", &EntryRef);
5640   InternalMessage.AddBool ("BulkMode", BulkMode);
5641   InternalMessage.AddInt32 ("BulkClassification", BulkClassification);
5642 
5643   PostMessage (&InternalMessage);
5644 }
5645 
5646 
5647 /* This function is called by other threads to see if the CommanderLooper is
5648 busy working on something. */
5649 
5650 bool
IsBusy()5651 CommanderLooper::IsBusy ()
5652 {
5653   if (m_IsBusy)
5654     return true;
5655 
5656   if (IsLocked () || !MessageQueue()->IsEmpty ())
5657     return true;
5658 
5659   return false;
5660 }
5661 
5662 
5663 void
5664 
MessageReceived(BMessage * MessagePntr)5665 CommanderLooper::MessageReceived (BMessage *MessagePntr)
5666 {
5667   m_IsBusy = true;
5668 
5669   if (MessagePntr->what == MSG_COMMAND_ARGUMENTS)
5670     ProcessArgs (MessagePntr);
5671   else if (MessagePntr->what == MSG_COMMAND_FILE_REFS)
5672     ProcessRefs (MessagePntr);
5673   else
5674     BLooper::MessageReceived (MessagePntr);
5675 
5676   m_IsBusy = false;
5677 }
5678 
5679 
5680 /* Process the command line by converting it into a series of scripting
5681 messages (possibly thousands) and sent them to the BApplication synchronously
5682 (so we can print the result). */
5683 
5684 void
ProcessArgs(BMessage * MessagePntr)5685 CommanderLooper::ProcessArgs (BMessage *MessagePntr)
5686 {
5687   int32                 argc = 0;
5688   const char          **argv = NULL;
5689   int                   ArgumentIndex;
5690   uint32                CommandCode;
5691   const char           *CommandWord;
5692   status_t              ErrorCode;
5693   const char           *ErrorTitle = "ProcessArgs";
5694   char                 *EndPntr;
5695   int32                 i;
5696   BMessage              ReplyMessage;
5697   BMessage              ScriptMessage;
5698   struct property_info *PropInfoPntr;
5699   const char           *PropertyName;
5700   bool                  TempBool;
5701   float                 TempFloat;
5702   int32                 TempInt32;
5703   const char           *TempStringPntr;
5704   type_code             TypeCode;
5705   const char           *ValuePntr;
5706 
5707   /* Get the argument count and pointers to arguments out of the message and
5708   into our argc and argv. */
5709 
5710   ErrorCode = MessagePntr->GetInfo ("arg", &TypeCode, &argc);
5711   if (ErrorCode != B_OK || TypeCode != B_STRING_TYPE)
5712   {
5713     DisplayErrorMessage ("Unable to find argument strings in message",
5714       ErrorCode, ErrorTitle);
5715     goto ErrorExit;
5716   }
5717 
5718   if (argc < 2)
5719   {
5720     cerr << PrintUsage;
5721     DisplayErrorMessage ("You need to specify a command word, like GET, SET "
5722       "and so on followed by a property, like DatabaseFile, and maybe "
5723       "followed by a value of some sort", -1, ErrorTitle);
5724     goto ErrorExit;
5725   }
5726 
5727   argv = (const char **) malloc (sizeof (char *) * argc);
5728   if (argv == NULL)
5729   {
5730     DisplayErrorMessage ("Out of memory when allocating argv array",
5731       ENOMEM, ErrorTitle);
5732     goto ErrorExit;
5733   }
5734 
5735   for (i = 0; i < argc; i++)
5736   {
5737     if ((ErrorCode = MessagePntr->FindString ("arg", i, &argv[i])) != B_OK)
5738     {
5739       DisplayErrorMessage ("Unable to find argument in the BMessage",
5740         ErrorCode, ErrorTitle);
5741       goto ErrorExit;
5742     }
5743   }
5744 
5745   CommandWord = argv[1];
5746 
5747   /* Special case for the Quit command since it isn't a scripting command. */
5748 
5749   if (strcasecmp (CommandWord, "quit") == 0)
5750   {
5751     g_QuitCountdown = 10;
5752     goto ErrorExit;
5753   }
5754 
5755   /* Find the corresponding scripting command. */
5756 
5757   if (strcasecmp (CommandWord, "set") == 0)
5758     CommandCode = B_SET_PROPERTY;
5759   else if (strcasecmp (CommandWord, "get") == 0)
5760     CommandCode = B_GET_PROPERTY;
5761   else if (strcasecmp (CommandWord, "count") == 0)
5762     CommandCode = B_COUNT_PROPERTIES;
5763   else if (strcasecmp (CommandWord, "create") == 0)
5764     CommandCode = B_CREATE_PROPERTY;
5765   else if (strcasecmp (CommandWord, "delete") == 0)
5766     CommandCode = B_DELETE_PROPERTY;
5767   else
5768     CommandCode = B_EXECUTE_PROPERTY;
5769 
5770   if (CommandCode == B_EXECUTE_PROPERTY)
5771   {
5772     PropertyName = CommandWord;
5773     ArgumentIndex = 2; /* Arguments to the command start at this index. */
5774   }
5775   else
5776   {
5777     if (CommandCode == B_SET_PROPERTY)
5778     {
5779       /* SET commands require at least one argument value. */
5780       if (argc < 4)
5781       {
5782         cerr << PrintUsage;
5783         DisplayErrorMessage ("SET commands require at least one "
5784           "argument value after the property name", -1, ErrorTitle);
5785         goto ErrorExit;
5786       }
5787     }
5788     else
5789       if (argc < 3)
5790       {
5791         cerr << PrintUsage;
5792         DisplayErrorMessage ("You need to specify a property to act on",
5793           -1, ErrorTitle);
5794         goto ErrorExit;
5795       }
5796     PropertyName = argv[2];
5797     ArgumentIndex = 3;
5798   }
5799 
5800   /* See if it is one of our commands. */
5801 
5802   for (PropInfoPntr = g_ScriptingPropertyList + 0; true; PropInfoPntr++)
5803   {
5804     if (PropInfoPntr->name == 0)
5805     {
5806       cerr << PrintUsage;
5807       DisplayErrorMessage ("The property specified isn't known or "
5808         "doesn't support the requested action (usually means it is an "
5809         "unknown command)", -1, ErrorTitle);
5810       goto ErrorExit; /* Unrecognized command. */
5811     }
5812 
5813     if (PropInfoPntr->commands[0] == CommandCode &&
5814     strcasecmp (PropertyName, PropInfoPntr->name) == 0)
5815       break;
5816   }
5817 
5818   /* Make the equivalent command message.  For commands with multiple
5819   arguments, repeat the message for each single argument and just change the
5820   data portion for each extra argument.  Send the command and wait for a reply,
5821   which we'll print out. */
5822 
5823   ScriptMessage.MakeEmpty ();
5824   ScriptMessage.what = CommandCode;
5825   ScriptMessage.AddSpecifier (PropertyName);
5826   while (true)
5827   {
5828     if (ArgumentIndex < argc) /* If there are arguments to be added. */
5829     {
5830       ValuePntr = argv[ArgumentIndex];
5831 
5832       /* Convert the value into the likely kind of data. */
5833 
5834       if (strcasecmp (ValuePntr, "yes") == 0 ||
5835       strcasecmp (ValuePntr, "true") == 0)
5836         ScriptMessage.AddBool (g_DataName, true);
5837       else if (strcasecmp (ValuePntr, "no") == 0 ||
5838       strcasecmp (ValuePntr, "false") == 0)
5839         ScriptMessage.AddBool (g_DataName, false);
5840       else
5841       {
5842         /* See if it is a number. */
5843         i = strtol (ValuePntr, &EndPntr, 0);
5844         if (*EndPntr == 0)
5845           ScriptMessage.AddInt32 (g_DataName, i);
5846         else /* Nope, it's just a string. */
5847           ScriptMessage.AddString (g_DataName, ValuePntr);
5848       }
5849     }
5850 
5851     ErrorCode = be_app_messenger.SendMessage (&ScriptMessage, &ReplyMessage);
5852     if (ErrorCode != B_OK)
5853     {
5854       DisplayErrorMessage ("Unable to send scripting command",
5855         ErrorCode, ErrorTitle);
5856       goto ErrorExit;
5857     }
5858 
5859     /* Print the reply to the scripting command.  Even in server mode.  To
5860     standard output. */
5861 
5862     if (ReplyMessage.FindString ("CommandText", &TempStringPntr) == B_OK)
5863     {
5864       TempInt32 = -1;
5865       if (ReplyMessage.FindInt32 ("error", &TempInt32) == B_OK &&
5866       TempInt32 == B_OK)
5867       {
5868         /* It's a successful reply to one of our scripting messages.  Print out
5869         the returned values code for command line users to see. */
5870 
5871         cout << "Result of command to " << TempStringPntr << " is:\t";
5872         if (ReplyMessage.FindString (g_ResultName, &TempStringPntr) == B_OK)
5873           cout << "\"" << TempStringPntr << "\"";
5874         else if (ReplyMessage.FindInt32 (g_ResultName, &TempInt32) == B_OK)
5875           cout << TempInt32;
5876         else if (ReplyMessage.FindFloat (g_ResultName, &TempFloat) == B_OK)
5877           cout << TempFloat;
5878         else if (ReplyMessage.FindBool (g_ResultName, &TempBool) == B_OK)
5879           cout << (TempBool ? "true" : "false");
5880         else
5881           cout << "just plain success";
5882         if (ReplyMessage.FindInt32 ("count", &TempInt32) == B_OK)
5883           cout << "\t(count " << TempInt32 << ")";
5884         for (i = 0; (i < 50) &&
5885         ReplyMessage.FindString ("words", i, &TempStringPntr) == B_OK &&
5886         ReplyMessage.FindFloat ("ratios", i, &TempFloat) == B_OK;
5887         i++)
5888         {
5889           if (i == 0)
5890             cout << "\twith top words:\t";
5891           else
5892             cout << "\t";
5893           cout << TempStringPntr << "/" << TempFloat;
5894         }
5895         cout << endl;
5896       }
5897       else /* An error reply, print out the error, even in server mode. */
5898       {
5899         cout << "Failure of command " << TempStringPntr << ", error ";
5900         cout << TempInt32 << " (" << strerror (TempInt32) << ")";
5901         if (ReplyMessage.FindString ("message", &TempStringPntr) == B_OK)
5902           cout << ", message: " << TempStringPntr;
5903         cout << "." << endl;
5904       }
5905     }
5906 
5907     /* Advance to the next argument and its scripting message. */
5908 
5909     ScriptMessage.RemoveName (g_DataName);
5910     if (++ArgumentIndex >= argc)
5911       break;
5912   }
5913 
5914 ErrorExit:
5915   free (argv);
5916 }
5917 
5918 
5919 /* Given a bunch of references to files, open the files.  If it's a database
5920 file, switch to using it as a database.  Otherwise, treat them as text files
5921 and add them to the database.  Prompt the user for the spam or genuine or
5922 uncertain (declassification) choice, with the option to bulk mark many files at
5923 once. */
5924 
5925 void
ProcessRefs(BMessage * MessagePntr)5926 CommanderLooper::ProcessRefs (BMessage *MessagePntr)
5927 {
5928   bool                         BulkMode = false;
5929   ClassificationTypes          BulkClassification = CL_GENUINE;
5930   ClassificationChoicesWindow *ChoiceWindowPntr;
5931   BEntry                       Entry;
5932   entry_ref                    EntryRef;
5933   status_t                     ErrorCode;
5934   const char                  *ErrorTitle = "CommanderLooper::ProcessRefs";
5935   int32                        NumberOfRefs = 0;
5936   BPath                        Path;
5937   int                          RefIndex;
5938   BMessage                     ReplyMessage;
5939   BMessage                     ScriptingMessage;
5940   bool                         TempBool;
5941   BFile                        TempFile;
5942   int32                        TempInt32;
5943   char                         TempString [PATH_MAX + 1024];
5944   type_code                    TypeCode;
5945 
5946   // Wait for ReadyToRun to finish initializing the globals with the sizes of
5947   // the controls, since they are needed when we show the custom alert box for
5948   // choosing the message type.
5949 
5950   TempInt32 = 0;
5951   while (!g_AppReadyToRunCompleted && TempInt32++ < 10)
5952     snooze (200000);
5953 
5954   ErrorCode = MessagePntr->GetInfo ("refs", &TypeCode, &NumberOfRefs);
5955   if (ErrorCode != B_OK || TypeCode != B_REF_TYPE || NumberOfRefs <= 0)
5956   {
5957     DisplayErrorMessage ("Unable to get refs from the message",
5958       ErrorCode, ErrorTitle);
5959     return;
5960   }
5961 
5962   if (MessagePntr->FindBool ("BulkMode", &TempBool) == B_OK)
5963     BulkMode = TempBool;
5964   if (MessagePntr->FindInt32 ("BulkClassification", &TempInt32) == B_OK &&
5965   TempInt32 >= 0 && TempInt32 < CL_MAX)
5966     BulkClassification = (ClassificationTypes) TempInt32;
5967 
5968   for (RefIndex = 0;
5969   MessagePntr->FindRef ("refs", RefIndex, &EntryRef) == B_OK;
5970   RefIndex++)
5971   {
5972     ScriptingMessage.MakeEmpty ();
5973     ScriptingMessage.what = 0; /* Haven't figured out what to do yet. */
5974 
5975     /* See if the entry is a valid file or directory or other thing. */
5976 
5977     ErrorCode = Entry.SetTo (&EntryRef, true /* traverse symbolic links */);
5978     if (ErrorCode != B_OK ||
5979     ((ErrorCode = /* assignment */ B_ENTRY_NOT_FOUND) != 0 /* this pacifies
5980     mwcc -nwhitehorn */ && !Entry.Exists ()) ||
5981     ((ErrorCode = Entry.GetPath (&Path)) != B_OK))
5982     {
5983       DisplayErrorMessage ("Bad entry reference encountered, will skip it",
5984         ErrorCode, ErrorTitle);
5985       BulkMode = false;
5986       continue; /* Bad file reference, try the next one. */
5987     }
5988 
5989     /* If it's a file, check if it is a spam database file.  Go by the magic
5990     text at the start of the file, in case someone has edited the file with a
5991     spreadsheet or other tool and lost the MIME type. */
5992 
5993     if (Entry.IsFile ())
5994     {
5995       ErrorCode = TempFile.SetTo (&Entry, B_READ_ONLY);
5996       if (ErrorCode != B_OK)
5997       {
5998         sprintf (TempString, "Unable to open file \"%s\" for reading, will "
5999           "skip it", Path.Path ());
6000         DisplayErrorMessage (TempString, ErrorCode, ErrorTitle);
6001         BulkMode = false;
6002         continue;
6003       }
6004       if (TempFile.Read (TempString, strlen (g_DatabaseRecognitionString)) ==
6005       (int) strlen (g_DatabaseRecognitionString) && strncmp (TempString,
6006       g_DatabaseRecognitionString, strlen (g_DatabaseRecognitionString)) == 0)
6007       {
6008         ScriptingMessage.what = B_SET_PROPERTY;
6009         ScriptingMessage.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
6010         ScriptingMessage.AddString (g_DataName, Path.Path ());
6011       }
6012       TempFile.Unset ();
6013     }
6014 
6015     /* Not a database file.  Could be a directory or a file.  Submit it as
6016     something to be marked spam or genuine. */
6017 
6018     if (ScriptingMessage.what == 0)
6019     {
6020       if (!Entry.IsFile ())
6021       {
6022         sprintf (TempString, "\"%s\" is not a file, can't do anything with it",
6023           Path.Path ());
6024         DisplayErrorMessage (TempString, -1, ErrorTitle);
6025         BulkMode = false;
6026         continue;
6027       }
6028 
6029       if (!BulkMode) /* Have to ask the user. */
6030       {
6031         ChoiceWindowPntr = new ClassificationChoicesWindow (
6032           BRect (40, 40, 40 + 50 * g_MarginBetweenControls,
6033           40 + g_ButtonHeight * 5), Path.Path (), NumberOfRefs - RefIndex);
6034         ChoiceWindowPntr->Go (&BulkMode, &BulkClassification);
6035         if (BulkClassification == CL_MAX)
6036           break; /* Cancel was picked. */
6037       }
6038 
6039       /* Format the command for classifying the file. */
6040 
6041       ScriptingMessage.what = B_SET_PROPERTY;
6042 
6043       if (BulkClassification == CL_GENUINE)
6044         ScriptingMessage.AddSpecifier (g_PropertyNames[PN_GENUINE]);
6045       else if (BulkClassification == CL_SPAM)
6046         ScriptingMessage.AddSpecifier (g_PropertyNames[PN_SPAM]);
6047       else if (BulkClassification == CL_UNCERTAIN)
6048         ScriptingMessage.AddSpecifier (g_PropertyNames[PN_UNCERTAIN]);
6049       else /* Broken code */
6050         break;
6051       ScriptingMessage.AddString (g_DataName, Path.Path ());
6052     }
6053 
6054     /* Tell the BApplication to do the work, and wait for it to finish.  The
6055     BApplication will display any error messages for us. */
6056 
6057     ErrorCode =
6058       be_app_messenger.SendMessage (&ScriptingMessage, &ReplyMessage);
6059     if (ErrorCode != B_OK)
6060     {
6061       DisplayErrorMessage ("Unable to send scripting command",
6062         ErrorCode, ErrorTitle);
6063       return;
6064     }
6065 
6066     /* If there was an error, allow the user to stop by switching off bulk
6067     mode.  The message will already have been displayed in an alert box, if
6068     server mode is off. */
6069 
6070     if (ReplyMessage.FindInt32 ("error", &TempInt32) != B_OK ||
6071     TempInt32 != B_OK)
6072       BulkMode = false;
6073   }
6074 }
6075 
6076 
6077 
6078 /******************************************************************************
6079  * Implementation of the ControlsView class, constructor, destructor and the
6080  * rest of the member functions in mostly alphabetical order.
6081  */
6082 
ControlsView(BRect NewBounds)6083 ControlsView::ControlsView (BRect NewBounds)
6084 : BView (NewBounds, "ControlsView", B_FOLLOW_TOP | B_FOLLOW_LEFT_RIGHT,
6085     B_WILL_DRAW | B_PULSE_NEEDED | B_NAVIGABLE_JUMP | B_FRAME_EVENTS),
6086   m_AboutButtonPntr (NULL),
6087   m_AddExampleButtonPntr (NULL),
6088   m_BrowseButtonPntr (NULL),
6089   m_BrowseFilePanelPntr (NULL),
6090   m_CreateDatabaseButtonPntr (NULL),
6091   m_DatabaseFileNameTextboxPntr (NULL),
6092   m_DatabaseLoadDone (false),
6093   m_EstimateSpamButtonPntr (NULL),
6094   m_EstimateSpamFilePanelPntr (NULL),
6095   m_GenuineCountTextboxPntr (NULL),
6096   m_IgnorePreviousClassCheckboxPntr (NULL),
6097   m_InstallThingsButtonPntr (NULL),
6098   m_PurgeAgeTextboxPntr (NULL),
6099   m_PurgeButtonPntr (NULL),
6100   m_PurgePopularityTextboxPntr (NULL),
6101   m_ResetToDefaultsButtonPntr (NULL),
6102   m_ScoringModeMenuBarPntr (NULL),
6103   m_ScoringModePopUpMenuPntr (NULL),
6104   m_ServerModeCheckboxPntr (NULL),
6105   m_SpamCountTextboxPntr (NULL),
6106   m_TimeOfLastPoll (0),
6107   m_TokenizeModeMenuBarPntr (NULL),
6108   m_TokenizeModePopUpMenuPntr (NULL),
6109   m_WordCountTextboxPntr (NULL)
6110 {
6111 }
6112 
6113 
~ControlsView()6114 ControlsView::~ControlsView ()
6115 {
6116   if (m_BrowseFilePanelPntr != NULL)
6117   {
6118     delete m_BrowseFilePanelPntr;
6119     m_BrowseFilePanelPntr = NULL;
6120   }
6121 
6122   if (m_EstimateSpamFilePanelPntr != NULL)
6123   {
6124     delete m_EstimateSpamFilePanelPntr;
6125     m_EstimateSpamFilePanelPntr = NULL;
6126   }
6127 }
6128 
6129 
6130 void
AttachedToWindow()6131 ControlsView::AttachedToWindow ()
6132 {
6133   float         BigPurgeButtonTop;
6134   BMessage      CommandMessage;
6135   const char   *EightDigitsString = " 12345678 ";
6136   float         Height;
6137   float         Margin;
6138   float         RowHeight;
6139   float         RowTop;
6140   ScoringModes  ScoringMode;
6141   const char   *StringPntr;
6142   BMenuItem    *TempMenuItemPntr;
6143   BRect         TempRect;
6144   char          TempString [PATH_MAX];
6145   TokenizeModes TokenizeMode;
6146   float         Width;
6147   float         X;
6148 
6149   SetViewColor (ui_color (B_PANEL_BACKGROUND_COLOR));
6150 
6151   TempRect = Bounds ();
6152   X = TempRect.right;
6153   RowTop = TempRect.top;
6154   RowHeight = g_ButtonHeight;
6155   if (g_TextBoxHeight > RowHeight)
6156     RowHeight = g_TextBoxHeight;
6157   RowHeight = ceilf (RowHeight * 1.1);
6158 
6159   /* Make the Create button at the far right of the first row of controls,
6160   which are all database file related. */
6161 
6162   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6163   TempRect = Bounds ();
6164   TempRect.top = RowTop + Margin;
6165   TempRect.bottom = TempRect.top + g_ButtonHeight;
6166 
6167   CommandMessage.MakeEmpty ();
6168   CommandMessage.what = B_CREATE_PROPERTY;
6169   CommandMessage.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
6170   m_CreateDatabaseButtonPntr = new BButton (TempRect, "Create Button",
6171     "Create", new BMessage (CommandMessage), B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6172   if (m_CreateDatabaseButtonPntr == NULL) goto ErrorExit;
6173   AddChild (m_CreateDatabaseButtonPntr);
6174   m_CreateDatabaseButtonPntr->SetTarget (be_app);
6175   m_CreateDatabaseButtonPntr->ResizeToPreferred ();
6176   m_CreateDatabaseButtonPntr->GetPreferredSize (&Width, &Height);
6177   m_CreateDatabaseButtonPntr->MoveTo (X - Width, TempRect.top);
6178   X -= Width + g_MarginBetweenControls;
6179 
6180   /* Make the Browse button, middle of the first row. */
6181 
6182   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6183   TempRect = Bounds ();
6184   TempRect.top = RowTop + Margin;
6185   TempRect.bottom = TempRect.top + g_ButtonHeight;
6186 
6187   m_BrowseButtonPntr = new BButton (TempRect, "Browse Button",
6188     "Browse…", new BMessage (MSG_BROWSE_BUTTON), B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6189   if (m_BrowseButtonPntr == NULL) goto ErrorExit;
6190   AddChild (m_BrowseButtonPntr);
6191   m_BrowseButtonPntr->SetTarget (this);
6192   m_BrowseButtonPntr->ResizeToPreferred ();
6193   m_BrowseButtonPntr->GetPreferredSize (&Width, &Height);
6194   m_BrowseButtonPntr->MoveTo (X - Width, TempRect.top);
6195   X -= Width + g_MarginBetweenControls;
6196 
6197   /* Fill the rest of the space on the first row with the file name box. */
6198 
6199   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6200   TempRect = Bounds ();
6201   TempRect.top = RowTop + Margin;
6202   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6203   TempRect.right = X;
6204 
6205   StringPntr = "Word Database:";
6206   strcpy (m_DatabaseFileNameCachedValue, "Unknown...");
6207   m_DatabaseFileNameTextboxPntr = new BTextControl (TempRect,
6208     "File Name",
6209     StringPntr /* label */,
6210     m_DatabaseFileNameCachedValue /* text */,
6211     new BMessage (MSG_DATABASE_NAME),
6212     B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP,
6213     B_WILL_DRAW | B_NAVIGABLE | B_NAVIGABLE_JUMP);
6214   AddChild (m_DatabaseFileNameTextboxPntr);
6215   m_DatabaseFileNameTextboxPntr->SetTarget (this);
6216   m_DatabaseFileNameTextboxPntr->SetDivider (
6217     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6218 
6219   /* Second row contains the purge age, and a long line explaining it.  There
6220   is space to the right where the top half of the big purge button will go. */
6221 
6222   RowTop += RowHeight /* previous row's RowHeight */;
6223   BigPurgeButtonTop = RowTop;
6224   TempRect = Bounds ();
6225   X = TempRect.left;
6226   RowHeight = g_TextBoxHeight;
6227   RowHeight = ceilf (RowHeight * 1.1);
6228 
6229   StringPntr = "Number of occurrences needed to store a word:";
6230   m_PurgeAgeCachedValue = 12345678;
6231 
6232   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6233   TempRect.top = RowTop + Margin;
6234   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6235   TempRect.left = X;
6236   TempRect.right = TempRect.left +
6237     be_plain_font->StringWidth (StringPntr) +
6238     be_plain_font->StringWidth (EightDigitsString) +
6239     3 * g_MarginBetweenControls;
6240 
6241   sprintf (TempString, "%d", (int) m_PurgeAgeCachedValue);
6242   m_PurgeAgeTextboxPntr = new BTextControl (TempRect,
6243     "Purge Age",
6244     StringPntr /* label */,
6245     TempString /* text */,
6246     new BMessage (MSG_PURGE_AGE),
6247     B_FOLLOW_LEFT | B_FOLLOW_TOP,
6248     B_WILL_DRAW | B_NAVIGABLE);
6249   AddChild (m_PurgeAgeTextboxPntr);
6250   m_PurgeAgeTextboxPntr->SetTarget (this);
6251   m_PurgeAgeTextboxPntr->SetDivider (
6252     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6253 
6254   /* Third row contains the purge popularity and bottom half of the purge
6255   button. */
6256 
6257   RowTop += RowHeight /* previous row's RowHeight */;
6258   TempRect = Bounds ();
6259   X = TempRect.left;
6260   RowHeight = g_TextBoxHeight;
6261   RowHeight = ceilf (RowHeight * 1.1);
6262 
6263   StringPntr = "Number of messages to store words from:";
6264   m_PurgePopularityCachedValue = 87654321;
6265   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6266   TempRect.top = RowTop + Margin;
6267   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6268   TempRect.left = X;
6269   TempRect.right = TempRect.left +
6270     be_plain_font->StringWidth (StringPntr) +
6271     be_plain_font->StringWidth (EightDigitsString) +
6272     3 * g_MarginBetweenControls;
6273   X = TempRect.right + g_MarginBetweenControls;
6274 
6275   sprintf (TempString, "%d", (int) m_PurgePopularityCachedValue);
6276   m_PurgePopularityTextboxPntr = new BTextControl (TempRect,
6277     "Purge Popularity",
6278     StringPntr /* label */,
6279     TempString /* text */,
6280     new BMessage (MSG_PURGE_POPULARITY),
6281     B_FOLLOW_LEFT | B_FOLLOW_TOP,
6282     B_WILL_DRAW | B_NAVIGABLE);
6283   AddChild (m_PurgePopularityTextboxPntr);
6284   m_PurgePopularityTextboxPntr->SetTarget (this);
6285   m_PurgePopularityTextboxPntr->SetDivider (
6286     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6287 
6288   /* Make the purge button, which will take up space in the 2nd and 3rd rows,
6289   on the right side.  Twice as tall as a regular button too. */
6290 
6291   StringPntr = "Remove Old Words";
6292   Margin = ceilf ((((RowTop + RowHeight) - BigPurgeButtonTop) -
6293     2 * g_TextBoxHeight) / 2);
6294   TempRect.top = BigPurgeButtonTop + Margin;
6295   TempRect.bottom = TempRect.top + 2 * g_TextBoxHeight;
6296   TempRect.left = X;
6297   TempRect.right = X + ceilf (2 * be_plain_font->StringWidth (StringPntr));
6298 
6299   CommandMessage.MakeEmpty ();
6300   CommandMessage.what = B_EXECUTE_PROPERTY;
6301   CommandMessage.AddSpecifier (g_PropertyNames[PN_PURGE]);
6302   m_PurgeButtonPntr = new BButton (TempRect, "Purge Button",
6303     StringPntr, new BMessage (CommandMessage), B_FOLLOW_LEFT | B_FOLLOW_TOP);
6304   if (m_PurgeButtonPntr == NULL) goto ErrorExit;
6305   m_PurgeButtonPntr->ResizeToPreferred();
6306   AddChild (m_PurgeButtonPntr);
6307   m_PurgeButtonPntr->SetTarget (be_app);
6308 
6309   /* The fourth row contains the ignore previous classification checkbox. */
6310 
6311   RowTop += RowHeight /* previous row's RowHeight */;
6312   TempRect = Bounds ();
6313   X = TempRect.left;
6314   RowHeight = g_CheckBoxHeight;
6315   RowHeight = ceilf (RowHeight * 1.1);
6316 
6317   StringPntr = "Allow Retraining on a Message";
6318   m_IgnorePreviousClassCachedValue = false;
6319 
6320   Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
6321   TempRect.top = RowTop + Margin;
6322   TempRect.bottom = TempRect.top + g_CheckBoxHeight;
6323   TempRect.left = X;
6324   m_IgnorePreviousClassCheckboxPntr = new BCheckBox (TempRect,
6325     "Ignore Check",
6326     StringPntr,
6327     new BMessage (MSG_IGNORE_CLASSIFICATION),
6328     B_FOLLOW_TOP | B_FOLLOW_LEFT);
6329   if (m_IgnorePreviousClassCheckboxPntr == NULL) goto ErrorExit;
6330   AddChild (m_IgnorePreviousClassCheckboxPntr);
6331   m_IgnorePreviousClassCheckboxPntr->SetTarget (this);
6332   m_IgnorePreviousClassCheckboxPntr->ResizeToPreferred ();
6333   m_IgnorePreviousClassCheckboxPntr->GetPreferredSize (&Width, &Height);
6334   X += Width + g_MarginBetweenControls;
6335 
6336   /* The fifth row contains the server mode checkbox. */
6337 
6338   RowTop += RowHeight /* previous row's RowHeight */;
6339   TempRect = Bounds ();
6340   RowHeight = g_CheckBoxHeight;
6341   RowHeight = ceilf (RowHeight * 1.1);
6342 
6343   StringPntr = "Print errors to Terminal";
6344   m_ServerModeCachedValue = false;
6345 
6346   Margin = ceilf ((RowHeight - g_CheckBoxHeight) / 2);
6347   TempRect.top = RowTop + Margin;
6348   TempRect.bottom = TempRect.top + g_CheckBoxHeight;
6349   m_ServerModeCheckboxPntr = new BCheckBox (TempRect,
6350     "ServerMode Check",
6351     StringPntr,
6352     new BMessage (MSG_SERVER_MODE),
6353     B_FOLLOW_TOP | B_FOLLOW_LEFT);
6354   if (m_ServerModeCheckboxPntr == NULL) goto ErrorExit;
6355   AddChild (m_ServerModeCheckboxPntr);
6356   m_ServerModeCheckboxPntr->SetTarget (this);
6357   m_ServerModeCheckboxPntr->ResizeToPreferred ();
6358   m_ServerModeCheckboxPntr->GetPreferredSize (&Width, &Height);
6359 
6360   /* This row just contains a huge pop-up menu which shows the tokenize mode
6361   and an explanation of what each mode does. */
6362 
6363   RowTop += RowHeight /* previous row's RowHeight */;
6364   TempRect = Bounds ();
6365   RowHeight = g_PopUpMenuHeight;
6366   RowHeight = ceilf (RowHeight * 1.1);
6367 
6368   Margin = ceilf ((RowHeight - g_PopUpMenuHeight) / 2);
6369   TempRect.top = RowTop + Margin;
6370   TempRect.bottom = TempRect.top + g_PopUpMenuHeight;
6371 
6372   m_TokenizeModeCachedValue = TM_MAX; /* Illegal value will force redraw. */
6373   m_TokenizeModeMenuBarPntr = new BMenuBar (TempRect, "TokenizeModeMenuBar",
6374     B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
6375     false /* resize to fit items */);
6376   if (m_TokenizeModeMenuBarPntr == NULL) goto ErrorExit;
6377   m_TokenizeModePopUpMenuPntr = new BPopUpMenu ("TokenizeModePopUpMenu");
6378   if (m_TokenizeModePopUpMenuPntr == NULL) goto ErrorExit;
6379 
6380   for (TokenizeMode = (TokenizeModes) 0;
6381   TokenizeMode < TM_MAX;
6382   TokenizeMode = (TokenizeModes) ((int) TokenizeMode + 1))
6383   {
6384     /* Each different tokenize mode gets its own menu item.  Selecting the item
6385     will send a canned command to the application to switch to the appropriate
6386     tokenize mode.  An optional explanation of each mode is added to the mode
6387     name string. */
6388 
6389     CommandMessage.MakeEmpty ();
6390     CommandMessage.what = B_SET_PROPERTY;
6391     CommandMessage.AddSpecifier (g_PropertyNames[PN_TOKENIZE_MODE]);
6392     CommandMessage.AddString (g_DataName, g_TokenizeModeNames[TokenizeMode]);
6393     strcpy (TempString, g_TokenizeModeNames[TokenizeMode]);
6394     switch (TokenizeMode)
6395     {
6396       case TM_WHOLE:
6397         strcat (TempString, " - Scan everything");
6398         break;
6399 
6400       case TM_PLAIN_TEXT:
6401         strcat (TempString, " - Scan e-mail body text except rich text");
6402         break;
6403 
6404       case TM_PLAIN_TEXT_HEADER:
6405         strcat (TempString, " - Scan entire e-mail text except rich text");
6406         break;
6407 
6408       case TM_ANY_TEXT:
6409         strcat (TempString, " - Scan e-mail body text and text attachments");
6410         break;
6411 
6412       case TM_ANY_TEXT_HEADER:
6413        strcat (TempString, " - Scan entire e-mail text and text attachments (recommended)");
6414         break;
6415 
6416       case TM_ALL_PARTS:
6417         strcat (TempString, " - Scan e-mail body and all attachments");
6418         break;
6419 
6420       case TM_ALL_PARTS_HEADER:
6421         strcat (TempString, " - Scan all parts of the e-mail");
6422         break;
6423 
6424       case TM_JUST_HEADER:
6425         strcat (TempString, " - Scan just the header (mail routing information)");
6426         break;
6427 
6428       default:
6429         break;
6430     }
6431     TempMenuItemPntr =
6432       new BMenuItem (TempString, new BMessage (CommandMessage));
6433     if (TempMenuItemPntr == NULL) goto ErrorExit;
6434     TempMenuItemPntr->SetTarget (be_app);
6435     m_TokenizeModePopUpMenuPntr->AddItem (TempMenuItemPntr);
6436   }
6437   m_TokenizeModeMenuBarPntr->AddItem (m_TokenizeModePopUpMenuPntr);
6438   AddChild (m_TokenizeModeMenuBarPntr);
6439 
6440   /* This row just contains a huge pop-up menu which shows the scoring mode
6441   and an explanation of what each mode does. */
6442 
6443   RowTop += RowHeight /* previous row's RowHeight */;
6444   TempRect = Bounds ();
6445   RowHeight = g_PopUpMenuHeight;
6446   RowHeight = ceilf (RowHeight * 1.1);
6447 
6448   Margin = ceilf ((RowHeight - g_PopUpMenuHeight) / 2);
6449   TempRect.top = RowTop + Margin;
6450   TempRect.bottom = TempRect.top + g_PopUpMenuHeight;
6451 
6452   m_ScoringModeCachedValue = SM_MAX; /* Illegal value will force redraw. */
6453   m_ScoringModeMenuBarPntr = new BMenuBar (TempRect, "ScoringModeMenuBar",
6454     B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP, B_ITEMS_IN_COLUMN,
6455     false /* resize to fit items */);
6456   if (m_ScoringModeMenuBarPntr == NULL) goto ErrorExit;
6457   m_ScoringModePopUpMenuPntr = new BPopUpMenu ("ScoringModePopUpMenu");
6458   if (m_ScoringModePopUpMenuPntr == NULL) goto ErrorExit;
6459 
6460   for (ScoringMode = (ScoringModes) 0;
6461   ScoringMode < SM_MAX;
6462   ScoringMode = (ScoringModes) ((int) ScoringMode + 1))
6463   {
6464     /* Each different scoring mode gets its own menu item.  Selecting the item
6465     will send a canned command to the application to switch to the appropriate
6466     scoring mode.  An optional explanation of each mode is added to the mode
6467     name string. */
6468 
6469     CommandMessage.MakeEmpty ();
6470     CommandMessage.what = B_SET_PROPERTY;
6471     CommandMessage.AddSpecifier (g_PropertyNames[PN_SCORING_MODE]);
6472     CommandMessage.AddString (g_DataName, g_ScoringModeNames[ScoringMode]);
6473 /*
6474     strcpy (TempString, g_ScoringModeNames[ScoringMode]);
6475     switch (ScoringMode)
6476     {
6477       case SM_ROBINSON:
6478         strcat (TempString, " - Learning Method 1: Naive Bayesian");
6479         break;
6480 
6481       case SM_CHISQUARED:
6482         strcat (TempString, " - Learning Method 2: Chi-Squared");
6483         break;
6484 
6485       default:
6486         break;
6487     }
6488 */
6489     switch (ScoringMode)
6490     {
6491       case SM_ROBINSON:
6492         strcpy (TempString, "Learning method 1: Naive Bayesian");
6493         break;
6494 
6495       case SM_CHISQUARED:
6496         strcpy (TempString, "Learning method 2: Chi-Squared");
6497         break;
6498 
6499       default:
6500         break;
6501     }
6502     TempMenuItemPntr =
6503       new BMenuItem (TempString, new BMessage (CommandMessage));
6504     if (TempMenuItemPntr == NULL) goto ErrorExit;
6505     TempMenuItemPntr->SetTarget (be_app);
6506     m_ScoringModePopUpMenuPntr->AddItem (TempMenuItemPntr);
6507   }
6508   m_ScoringModeMenuBarPntr->AddItem (m_ScoringModePopUpMenuPntr);
6509   AddChild (m_ScoringModeMenuBarPntr);
6510 
6511   /* The next row has the install MIME types button and the reset to defaults
6512   button, one on the left and the other on the right. */
6513 
6514   RowTop += RowHeight /* previous row's RowHeight */;
6515   TempRect = Bounds ();
6516   RowHeight = g_ButtonHeight;
6517   RowHeight = ceilf (RowHeight * 1.1);
6518 
6519   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6520   TempRect.top = RowTop + Margin;
6521   TempRect.bottom = TempRect.top + g_ButtonHeight;
6522 
6523   CommandMessage.MakeEmpty ();
6524   CommandMessage.what = B_EXECUTE_PROPERTY;
6525   CommandMessage.AddSpecifier (g_PropertyNames[PN_INSTALL_THINGS]);
6526   m_InstallThingsButtonPntr = new BButton (TempRect, "Install Button",
6527     "Install spam types",
6528     new BMessage (CommandMessage),
6529     B_FOLLOW_LEFT | B_FOLLOW_TOP);
6530   if (m_InstallThingsButtonPntr == NULL) goto ErrorExit;
6531   AddChild (m_InstallThingsButtonPntr);
6532   m_InstallThingsButtonPntr->SetTarget (be_app);
6533   m_InstallThingsButtonPntr->ResizeToPreferred ();
6534 
6535   /* The Reset to Defaults button.  On the right side of the row. */
6536 
6537   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6538   TempRect = Bounds ();
6539   TempRect.top = RowTop + Margin;
6540   TempRect.bottom = TempRect.top + g_ButtonHeight;
6541 
6542   CommandMessage.MakeEmpty ();
6543   CommandMessage.what = B_EXECUTE_PROPERTY;
6544   CommandMessage.AddSpecifier (g_PropertyNames[PN_RESET_TO_DEFAULTS]);
6545   m_ResetToDefaultsButtonPntr = new BButton (TempRect, "Reset Button",
6546     "Default settings", new BMessage (CommandMessage),
6547     B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6548   if (m_ResetToDefaultsButtonPntr == NULL) goto ErrorExit;
6549   AddChild (m_ResetToDefaultsButtonPntr);
6550   m_ResetToDefaultsButtonPntr->SetTarget (be_app);
6551   m_ResetToDefaultsButtonPntr->ResizeToPreferred ();
6552   m_ResetToDefaultsButtonPntr->GetPreferredSize (&Width, &Height);
6553   m_ResetToDefaultsButtonPntr->MoveTo (TempRect.right - Width, TempRect.top);
6554 
6555   /* The next row contains the Estimate, Add Examples and About buttons. */
6556 
6557   RowTop += RowHeight /* previous row's RowHeight */;
6558   TempRect = Bounds ();
6559   X = TempRect.left;
6560   RowHeight = g_ButtonHeight;
6561   RowHeight = ceilf (RowHeight * 1.1);
6562 
6563   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6564   TempRect.top = RowTop + Margin;
6565   TempRect.bottom = TempRect.top + g_ButtonHeight;
6566   TempRect.left = X;
6567 
6568   m_EstimateSpamButtonPntr = new BButton (TempRect, "Estimate Button",
6569     "Scan a message",
6570     new BMessage (MSG_ESTIMATE_BUTTON),
6571     B_FOLLOW_LEFT | B_FOLLOW_TOP);
6572   if (m_EstimateSpamButtonPntr == NULL) goto ErrorExit;
6573   AddChild (m_EstimateSpamButtonPntr);
6574   m_EstimateSpamButtonPntr->SetTarget (this);
6575   m_EstimateSpamButtonPntr->ResizeToPreferred ();
6576   X = m_EstimateSpamButtonPntr->Frame().right + g_MarginBetweenControls;
6577 
6578   /* The Add Example button in the middle.  Does the same as the browse button,
6579   but don't tell anyone that! */
6580 
6581   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6582   TempRect.top = RowTop + Margin;
6583   TempRect.bottom = TempRect.top + g_ButtonHeight;
6584   TempRect.left = X;
6585 
6586   m_AddExampleButtonPntr = new BButton (TempRect, "Example Button",
6587     "Train spam filter on a message",
6588     new BMessage (MSG_BROWSE_BUTTON),
6589     B_FOLLOW_LEFT_RIGHT | B_FOLLOW_TOP,
6590     B_WILL_DRAW | B_NAVIGABLE | B_FULL_UPDATE_ON_RESIZE);
6591   if (m_AddExampleButtonPntr == NULL) goto ErrorExit;
6592   AddChild (m_AddExampleButtonPntr);
6593   m_AddExampleButtonPntr->SetTarget (this);
6594   m_AddExampleButtonPntr->ResizeToPreferred ();
6595   X = m_AddExampleButtonPntr->Frame().right + g_MarginBetweenControls;
6596 
6597   /* Add the About button on the right. */
6598 
6599   Margin = ceilf ((RowHeight - g_ButtonHeight) / 2);
6600   TempRect = Bounds ();
6601   TempRect.top = RowTop + Margin;
6602   TempRect.bottom = TempRect.top + g_ButtonHeight;
6603   TempRect.left = X;
6604 
6605   m_AboutButtonPntr = new BButton (TempRect, "About Button",
6606     "About…",
6607     new BMessage (B_ABOUT_REQUESTED),
6608     B_FOLLOW_RIGHT | B_FOLLOW_TOP);
6609   if (m_AboutButtonPntr == NULL) goto ErrorExit;
6610   AddChild (m_AboutButtonPntr);
6611   m_AboutButtonPntr->SetTarget (be_app);
6612 
6613   /* This row displays various counters.  Starting with the genuine messages
6614   count on the left. */
6615 
6616   RowTop += RowHeight /* previous row's RowHeight */;
6617   TempRect = Bounds ();
6618   RowHeight = g_TextBoxHeight;
6619   RowHeight = ceilf (RowHeight * 1.1);
6620 
6621   StringPntr = "Genuine messages:";
6622   m_GenuineCountCachedValue = 87654321;
6623   sprintf (TempString, "%d", (int) m_GenuineCountCachedValue);
6624 
6625   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6626   TempRect = Bounds ();
6627   TempRect.top = RowTop + Margin;
6628   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6629   TempRect.right = TempRect.left +
6630     be_plain_font->StringWidth (StringPntr) +
6631     be_plain_font->StringWidth (TempString) +
6632     3 * g_MarginBetweenControls;
6633 
6634   m_GenuineCountTextboxPntr = new BTextControl (TempRect,
6635     "Genuine count",
6636     StringPntr /* label */,
6637     TempString /* text */,
6638     NULL /* no message */,
6639     B_FOLLOW_LEFT | B_FOLLOW_TOP,
6640     B_WILL_DRAW /* not B_NAVIGABLE */);
6641   AddChild (m_GenuineCountTextboxPntr);
6642   m_GenuineCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6643   m_GenuineCountTextboxPntr->SetDivider (
6644     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6645   m_GenuineCountTextboxPntr->SetEnabled (false); /* For display only. */
6646 
6647   /* The word count in the center. */
6648 
6649   StringPntr = "Word count:";
6650   m_WordCountCachedValue = 87654321;
6651   sprintf (TempString, "%d", (int) m_WordCountCachedValue);
6652 
6653   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6654   TempRect = Bounds ();
6655   TempRect.top = RowTop + Margin;
6656   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6657   Width = be_plain_font->StringWidth (StringPntr) +
6658     be_plain_font->StringWidth (TempString) +
6659     3 * g_MarginBetweenControls;
6660   TempRect.left = ceilf ((TempRect.right - TempRect.left) / 2 - Width / 2);
6661   TempRect.right = TempRect.left + Width;
6662 
6663   m_WordCountTextboxPntr = new BTextControl (TempRect,
6664     "Word count",
6665     StringPntr /* label */,
6666     TempString /* text */,
6667     NULL /* no message */,
6668     B_FOLLOW_H_CENTER | B_FOLLOW_TOP,
6669     B_WILL_DRAW /* not B_NAVIGABLE */);
6670   AddChild (m_WordCountTextboxPntr);
6671   m_WordCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6672   m_WordCountTextboxPntr->SetDivider (
6673     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6674   m_WordCountTextboxPntr->SetEnabled (false); /* For display only. */
6675 
6676   /* The spam count on the far right. */
6677 
6678   StringPntr = "Spam messages:";
6679   m_SpamCountCachedValue = 87654321;
6680   sprintf (TempString, "%d", (int) m_SpamCountCachedValue);
6681 
6682   Margin = ceilf ((RowHeight - g_TextBoxHeight) / 2);
6683   TempRect = Bounds ();
6684   TempRect.top = RowTop + Margin;
6685   TempRect.bottom = TempRect.top + g_TextBoxHeight;
6686   TempRect.left = TempRect.right -
6687     be_plain_font->StringWidth (StringPntr) -
6688     be_plain_font->StringWidth (TempString) -
6689     3 * g_MarginBetweenControls;
6690 
6691   m_SpamCountTextboxPntr = new BTextControl (TempRect,
6692     "Spam count",
6693     StringPntr /* label */,
6694     TempString /* text */,
6695     NULL /* no message */,
6696     B_FOLLOW_RIGHT | B_FOLLOW_TOP,
6697     B_WILL_DRAW /* not B_NAVIGABLE */);
6698   AddChild (m_SpamCountTextboxPntr);
6699   m_SpamCountTextboxPntr->SetTarget (this); /* Not that it matters. */
6700   m_SpamCountTextboxPntr->SetDivider (
6701     be_plain_font->StringWidth (StringPntr) + g_MarginBetweenControls);
6702   m_SpamCountTextboxPntr->SetEnabled (false); /* For display only. */
6703 
6704   /* Change the size of our view so it only takes up the space needed by the
6705   buttons. */
6706 
6707   RowTop += RowHeight /* previous row's RowHeight */;
6708   ResizeTo (Bounds().Width(), RowTop - Bounds().top + 1);
6709 
6710   return; /* Successful. */
6711 
6712 ErrorExit:
6713   DisplayErrorMessage ("Unable to initialise the controls view.");
6714 }
6715 
6716 
6717 void
BrowseForDatabaseFile()6718 ControlsView::BrowseForDatabaseFile ()
6719 {
6720   if (m_BrowseFilePanelPntr == NULL)
6721   {
6722     BEntry      DirectoryEntry;
6723     entry_ref   DirectoryEntryRef;
6724     BMessage    GetDatabasePathCommand;
6725     BMessage    GetDatabasePathResult;
6726     const char *StringPntr = NULL;
6727 
6728     /* Create a new file panel.  First set up the entry ref stuff so that the
6729     file panel can open to show the initial directory (the one where the
6730     database file currently is).  Note that we have to create it after the
6731     window and view are up and running, otherwise the BMessenger won't point to
6732     a valid looper/handler.  First find out the current database file name to
6733     use as a starting point. */
6734 
6735     GetDatabasePathCommand.what = B_GET_PROPERTY;
6736     GetDatabasePathCommand.AddSpecifier (g_PropertyNames[PN_DATABASE_FILE]);
6737     be_app_messenger.SendMessage (&GetDatabasePathCommand,
6738       &GetDatabasePathResult, 5000000 /* delivery timeout */,
6739       5000000 /* reply timeout */);
6740     if (GetDatabasePathResult.FindString (g_ResultName, &StringPntr) != B_OK ||
6741     DirectoryEntry.SetTo (StringPntr) != B_OK ||
6742     DirectoryEntry.GetParent (&DirectoryEntry) != B_OK)
6743       DirectoryEntry.SetTo ("."); /* Default directory if we can't find it. */
6744     if (DirectoryEntry.GetRef (&DirectoryEntryRef) != B_OK)
6745     {
6746       DisplayErrorMessage (
6747         "Unable to set up the file requestor starting directory.  Sorry.");
6748       return;
6749     }
6750 
6751     m_BrowseFilePanelPntr = new BFilePanel (
6752       B_OPEN_PANEL /* mode */,
6753       &be_app_messenger /* target for event messages */,
6754       &DirectoryEntryRef /* starting directory */,
6755       B_FILE_NODE,
6756       true /* true for multiple selections */,
6757       NULL /* canned message */,
6758       NULL /* ref filter */,
6759       false /* true for modal */,
6760       true /* true to hide when done */);
6761   }
6762 
6763   if (m_BrowseFilePanelPntr != NULL)
6764     m_BrowseFilePanelPntr->Show (); /* Answer returned later in RefsReceived. */
6765 }
6766 
6767 
6768 void
BrowseForFileToEstimate()6769 ControlsView::BrowseForFileToEstimate ()
6770 {
6771   if (m_EstimateSpamFilePanelPntr == NULL)
6772   {
6773     BEntry      DirectoryEntry;
6774     entry_ref   DirectoryEntryRef;
6775     status_t    ErrorCode;
6776     BMessenger  MessengerToSelf (this);
6777     BPath       PathToMailDirectory;
6778 
6779     /* Create a new file panel.  First set up the entry ref stuff so that the
6780     file panel can open to show the initial directory (the user's mail
6781     directory).  Note that we have to create the panel after the window and
6782     view are up and running, otherwise the BMessenger won't point to a valid
6783     looper/handler. */
6784 
6785     ErrorCode = find_directory (B_USER_DIRECTORY, &PathToMailDirectory);
6786     if (ErrorCode == B_OK)
6787     {
6788       PathToMailDirectory.Append ("mail");
6789       ErrorCode = DirectoryEntry.SetTo (PathToMailDirectory.Path(),
6790         true /* traverse symbolic links*/);
6791       if (ErrorCode != B_OK || !DirectoryEntry.Exists ())
6792       {
6793         /* If no mail directory, try home directory. */
6794         find_directory (B_USER_DIRECTORY, &PathToMailDirectory);
6795         ErrorCode = DirectoryEntry.SetTo (PathToMailDirectory.Path(), true);
6796       }
6797     }
6798     if (ErrorCode != B_OK)
6799       PathToMailDirectory.SetTo (".");
6800 
6801     DirectoryEntry.SetTo (PathToMailDirectory.Path(), true);
6802     if (DirectoryEntry.GetRef (&DirectoryEntryRef) != B_OK)
6803     {
6804       DisplayErrorMessage (
6805         "Unable to set up the file requestor starting directory.  Sorry.");
6806       return;
6807     }
6808 
6809     m_EstimateSpamFilePanelPntr = new BFilePanel (
6810       B_OPEN_PANEL /* mode */,
6811       &MessengerToSelf /* target for event messages */,
6812       &DirectoryEntryRef /* starting directory */,
6813       B_FILE_NODE,
6814       true /* true for multiple selections */,
6815       new BMessage (MSG_ESTIMATE_FILE_REFS) /* canned message */,
6816       NULL /* ref filter */,
6817       false /* true for modal */,
6818       true /* true to hide when done */);
6819   }
6820 
6821   if (m_EstimateSpamFilePanelPntr != NULL)
6822     m_EstimateSpamFilePanelPntr->Show (); /* Answer sent via a message. */
6823 }
6824 
6825 
6826 /* The display has been resized.  Have to manually adjust the popup menu bar to
6827 show the new size (the sub-items need to be resized too).  Then make it redraw.
6828 Well, actually just resetting the mark on the current item will resize it
6829 properly. */
6830 
6831 void
FrameResized(float,float)6832 ControlsView::FrameResized (float, float)
6833 {
6834   m_ScoringModeCachedValue = SM_MAX; /* Force it to reset the mark. */
6835   m_TokenizeModeCachedValue = TM_MAX; /* Force it to reset the mark. */
6836 }
6837 
6838 
6839 void
MessageReceived(BMessage * MessagePntr)6840 ControlsView::MessageReceived (BMessage *MessagePntr)
6841 {
6842   BMessage CommandMessage;
6843   bool     TempBool;
6844   uint32   TempUint32;
6845 
6846   switch (MessagePntr->what)
6847   {
6848     case MSG_BROWSE_BUTTON:
6849       BrowseForDatabaseFile ();
6850       break;
6851 
6852     case MSG_DATABASE_NAME:
6853       if (strcmp (m_DatabaseFileNameCachedValue,
6854       m_DatabaseFileNameTextboxPntr->Text ()) != 0)
6855         SubmitCommandString (PN_DATABASE_FILE, B_SET_PROPERTY,
6856         m_DatabaseFileNameTextboxPntr->Text ());
6857       break;
6858 
6859     case MSG_ESTIMATE_BUTTON:
6860       BrowseForFileToEstimate ();
6861       break;
6862 
6863     case MSG_ESTIMATE_FILE_REFS:
6864       EstimateRefFilesAndDisplay (MessagePntr);
6865       break;
6866 
6867     case MSG_IGNORE_CLASSIFICATION:
6868       TempBool = (m_IgnorePreviousClassCheckboxPntr->Value() == B_CONTROL_ON);
6869       if (m_IgnorePreviousClassCachedValue != TempBool)
6870         SubmitCommandBool (PN_IGNORE_PREVIOUS_CLASSIFICATION,
6871         B_SET_PROPERTY, TempBool);
6872       break;
6873 
6874     case MSG_PURGE_AGE:
6875       TempUint32 = strtoul (m_PurgeAgeTextboxPntr->Text (), NULL, 10);
6876       if (m_PurgeAgeCachedValue != TempUint32)
6877         SubmitCommandInt32 (PN_PURGE_AGE, B_SET_PROPERTY, TempUint32);
6878       break;
6879 
6880     case MSG_PURGE_POPULARITY:
6881       TempUint32 = strtoul (m_PurgePopularityTextboxPntr->Text (), NULL, 10);
6882       if (m_PurgePopularityCachedValue != TempUint32)
6883         SubmitCommandInt32 (PN_PURGE_POPULARITY, B_SET_PROPERTY, TempUint32);
6884       break;
6885 
6886     case MSG_SERVER_MODE:
6887       TempBool = (m_ServerModeCheckboxPntr->Value() == B_CONTROL_ON);
6888       if (m_ServerModeCachedValue != TempBool)
6889         SubmitCommandBool (PN_SERVER_MODE, B_SET_PROPERTY, TempBool);
6890       break;
6891 
6892     default:
6893       BView::MessageReceived (MessagePntr);
6894   }
6895 }
6896 
6897 
6898 /* Check the server for changes in the state of the database, and if there are
6899 any changes, update the displayed values.  Since this is a read only
6900 examination of the server, we go directly to the application rather than
6901 sending it messages.  Also, when sending messages, we can't find out what it is
6902 doing while it is busy with a batch of spam additions (all the spam add
6903 commands will be in the queue ahead of our requests for info).  Instead, we
6904 lock the BApplication (so it isn't changing things while we're looking) and
6905 retrieve our values. */
6906 
6907 void
PollServerForChanges()6908 ControlsView::PollServerForChanges ()
6909 {
6910   ABSApp     *MyAppPntr;
6911   BMenuItem  *TempMenuItemPntr;
6912   char        TempString [PATH_MAX];
6913   BWindow    *WindowPntr;
6914 
6915   /* We need a pointer to our window, for changing the title etc. */
6916 
6917   WindowPntr = Window ();
6918   if (WindowPntr == NULL)
6919     return; /* No window, no point in updating the display! */
6920 
6921   /* Check the server mode flag.  If the mode is off, then the window has to be
6922   minimized.  Similarly, if it gets turned on, maximize the window.  Note that
6923   the user can maximize the window manually, even while still in server mode.
6924   */
6925 
6926   if (g_ServerMode != m_ServerModeCachedValue &&
6927   m_ServerModeCheckboxPntr != NULL)
6928   {
6929     m_ServerModeCachedValue = g_ServerMode;
6930     m_ServerModeCheckboxPntr->SetValue (
6931       m_ServerModeCachedValue ? B_CONTROL_ON : B_CONTROL_OFF);
6932     WindowPntr->Minimize (m_ServerModeCachedValue);
6933   }
6934 
6935   if (WindowPntr->IsMinimized ())
6936     return; /* Window isn't visible, don't waste time updating it. */
6937 
6938   /* So that people don't stare at a blank screen, request a database load if
6939   nothing is there.  But only do it once, so the user doesn't get a lot of
6940   invalid database messages if one doesn't exist yet.  In server mode, we never
6941   get this far so it is only loaded when the user wants to see something. */
6942 
6943   if (!m_DatabaseLoadDone)
6944   {
6945     m_DatabaseLoadDone = true;
6946     /* Counting the number of words will load the database. */
6947     SubmitCommandString (PN_DATABASE_FILE, B_COUNT_PROPERTIES, "");
6948   }
6949 
6950   /* Check various read only values, which can be read from the BApplication
6951   without having to lock it.  This is useful for displaying the number of words
6952   as it is changing.  First up is the purge age setting. */
6953 
6954   MyAppPntr = dynamic_cast<ABSApp *> (be_app);
6955   if (MyAppPntr == NULL)
6956     return; /* Doesn't exist or is the wrong class.  Not likely! */
6957 
6958   if (MyAppPntr->m_PurgeAge != m_PurgeAgeCachedValue &&
6959   m_PurgeAgeTextboxPntr != NULL)
6960   {
6961     m_PurgeAgeCachedValue = MyAppPntr->m_PurgeAge;
6962     sprintf (TempString, "%" B_PRIu32, m_PurgeAgeCachedValue);
6963     m_PurgeAgeTextboxPntr->SetText (TempString);
6964   }
6965 
6966   /* Check the purge popularity. */
6967 
6968   if (MyAppPntr->m_PurgePopularity != m_PurgePopularityCachedValue &&
6969   m_PurgePopularityTextboxPntr != NULL)
6970   {
6971     m_PurgePopularityCachedValue = MyAppPntr->m_PurgePopularity;
6972     sprintf (TempString, "%" B_PRIu32, m_PurgePopularityCachedValue);
6973     m_PurgePopularityTextboxPntr->SetText (TempString);
6974   }
6975 
6976   /* Check the Ignore Previous Classification flag. */
6977 
6978   if (MyAppPntr->m_IgnorePreviousClassification !=
6979   m_IgnorePreviousClassCachedValue &&
6980   m_IgnorePreviousClassCheckboxPntr != NULL)
6981   {
6982     m_IgnorePreviousClassCachedValue =
6983       MyAppPntr->m_IgnorePreviousClassification;
6984     m_IgnorePreviousClassCheckboxPntr->SetValue (
6985       m_IgnorePreviousClassCachedValue ? B_CONTROL_ON : B_CONTROL_OFF);
6986   }
6987 
6988   /* Update the genuine count. */
6989 
6990   if (MyAppPntr->m_TotalGenuineMessages != m_GenuineCountCachedValue &&
6991   m_GenuineCountTextboxPntr != NULL)
6992   {
6993     m_GenuineCountCachedValue = MyAppPntr->m_TotalGenuineMessages;
6994     sprintf (TempString, "%" B_PRIu32, m_GenuineCountCachedValue);
6995     m_GenuineCountTextboxPntr->SetText (TempString);
6996   }
6997 
6998   /* Update the spam count. */
6999 
7000   if (MyAppPntr->m_TotalSpamMessages != m_SpamCountCachedValue &&
7001   m_SpamCountTextboxPntr != NULL)
7002   {
7003     m_SpamCountCachedValue = MyAppPntr->m_TotalSpamMessages;
7004     sprintf (TempString, "%" B_PRIu32, m_SpamCountCachedValue);
7005     m_SpamCountTextboxPntr->SetText (TempString);
7006   }
7007 
7008   /* Update the word count. */
7009 
7010   if (MyAppPntr->m_WordCount != m_WordCountCachedValue &&
7011   m_WordCountTextboxPntr != NULL)
7012   {
7013     m_WordCountCachedValue = MyAppPntr->m_WordCount;
7014     sprintf (TempString, "%" B_PRIu32, m_WordCountCachedValue);
7015     m_WordCountTextboxPntr->SetText (TempString);
7016   }
7017 
7018   /* Update the tokenize mode pop-up menu. */
7019 
7020   if (MyAppPntr->m_TokenizeMode != m_TokenizeModeCachedValue &&
7021   m_TokenizeModePopUpMenuPntr != NULL)
7022   {
7023     m_TokenizeModeCachedValue = MyAppPntr->m_TokenizeMode;
7024     TempMenuItemPntr =
7025       m_TokenizeModePopUpMenuPntr->ItemAt ((int) m_TokenizeModeCachedValue);
7026     if (TempMenuItemPntr != NULL)
7027       TempMenuItemPntr->SetMarked (true);
7028   }
7029 
7030   /* Update the scoring mode pop-up menu. */
7031 
7032   if (MyAppPntr->m_ScoringMode != m_ScoringModeCachedValue &&
7033   m_ScoringModePopUpMenuPntr != NULL)
7034   {
7035     m_ScoringModeCachedValue = MyAppPntr->m_ScoringMode;
7036     TempMenuItemPntr =
7037       m_ScoringModePopUpMenuPntr->ItemAt ((int) m_ScoringModeCachedValue);
7038     if (TempMenuItemPntr != NULL)
7039       TempMenuItemPntr->SetMarked (true);
7040   }
7041 
7042   /* Lock the application.  This will stop it from processing any further
7043   messages until we are done.  Or if it is busy, the lock will fail. */
7044 
7045   if (MyAppPntr->LockWithTimeout (100000) != B_OK)
7046     return; /* It's probably busy doing something. */
7047 
7048   /* See if the database file name has changed. */
7049 
7050   if (strcmp (MyAppPntr->m_DatabaseFileName.String (),
7051   m_DatabaseFileNameCachedValue) != 0 &&
7052   m_DatabaseFileNameTextboxPntr != NULL)
7053   {
7054     strcpy (m_DatabaseFileNameCachedValue,
7055       MyAppPntr->m_DatabaseFileName.String ());
7056     m_DatabaseFileNameTextboxPntr->SetText (m_DatabaseFileNameCachedValue);
7057     WindowPntr->SetTitle (m_DatabaseFileNameCachedValue);
7058   }
7059 
7060   /* Done.  Let the BApplication continue processing messages. */
7061 
7062   MyAppPntr->Unlock ();
7063 }
7064 
7065 
7066 void
Pulse()7067 ControlsView::Pulse ()
7068 {
7069   if (system_time () > m_TimeOfLastPoll + 200000)
7070   {
7071     PollServerForChanges ();
7072     m_TimeOfLastPoll = system_time ();
7073   }
7074 }
7075 
7076 
7077 
7078 /******************************************************************************
7079  * Implementation of the DatabaseWindow class, constructor, destructor and the
7080  * rest of the member functions in mostly alphabetical order.
7081  */
7082 
DatabaseWindow()7083 DatabaseWindow::DatabaseWindow ()
7084 : BWindow (BRect (30, 30, 620, 400),
7085     "Haiku spam filter server",
7086     B_DOCUMENT_WINDOW, B_ASYNCHRONOUS_CONTROLS)
7087 {
7088   BRect TempRect;
7089 
7090   /* Add the controls view. */
7091 
7092   m_ControlsViewPntr = new ControlsView (Bounds ());
7093   if (m_ControlsViewPntr == NULL)
7094     goto ErrorExit;
7095   AddChild (m_ControlsViewPntr);
7096 
7097   /* Add the word view in the remaining space under the controls view. */
7098 
7099 
7100   TempRect = Bounds ();
7101   TempRect.top = m_ControlsViewPntr->Frame().bottom + 1;
7102   m_WordsViewPntr = new WordsView (TempRect);
7103   if (m_WordsViewPntr == NULL)
7104     goto ErrorExit;
7105   AddChild (m_WordsViewPntr);
7106 
7107  /* Minimize the window if we are starting up in server mode.  This is done
7108 	before the window is open so it doesn't flash onto the screen, and possibly
7109 	steal a keystroke or two.  The ControlsView will further update the minimize
7110 	mode when it detects changes in the server mode. */
7111   Minimize (g_ServerMode);
7112 
7113   return;
7114 
7115 ErrorExit:
7116   DisplayErrorMessage ("Unable to initialise the window contents.");
7117 }
7118 
7119 
7120 void
MessageReceived(BMessage * MessagePntr)7121 DatabaseWindow::MessageReceived (BMessage *MessagePntr)
7122 {
7123   if (MessagePntr->what == B_MOUSE_WHEEL_CHANGED)
7124   {
7125     /* Pass the mouse wheel stuff down to the words view, since that's the only
7126     one which does scrolling so we don't need to worry about whether it has
7127     focus or not. */
7128 
7129     if (m_WordsViewPntr != NULL)
7130       m_WordsViewPntr->MessageReceived (MessagePntr);
7131   }
7132   else
7133     BWindow::MessageReceived (MessagePntr);
7134 }
7135 
7136 
7137 bool
QuitRequested()7138 DatabaseWindow::QuitRequested ()
7139 {
7140   be_app->PostMessage (B_QUIT_REQUESTED);
7141   return true;
7142 }
7143 
7144 
7145 
7146 /******************************************************************************
7147  * Implementation of the word display view.
7148  */
7149 
WordsView(BRect NewBounds)7150 WordsView::WordsView (BRect NewBounds)
7151 : BView (NewBounds, "WordsView", B_FOLLOW_ALL_SIDES,
7152     B_WILL_DRAW | B_FULL_UPDATE_ON_RESIZE | B_NAVIGABLE | B_PULSE_NEEDED),
7153   m_ArrowLineDownPntr (NULL),
7154   m_ArrowLineUpPntr (NULL),
7155   m_ArrowPageDownPntr (NULL),
7156   m_ArrowPageUpPntr (NULL),
7157   m_LastTimeAKeyWasPressed (0)
7158 {
7159   font_height TempFontHeight;
7160 
7161   GetFont (&m_TextFont); /* Modify the default font to be our own. */
7162   m_TextFont.SetSize (ceilf (m_TextFont.Size() * 1.1));
7163   m_TextFont.GetHeight (&TempFontHeight);
7164   SetFont (&m_TextFont);
7165 
7166   m_LineHeight = ceilf (TempFontHeight.ascent +
7167     TempFontHeight.descent + TempFontHeight.leading);
7168   m_AscentHeight = ceilf (TempFontHeight.ascent);
7169   m_TextHeight = ceilf (TempFontHeight.ascent +
7170     TempFontHeight.descent);
7171 
7172   m_FocusedColour.red = 255;
7173   m_FocusedColour.green = 255;
7174   m_FocusedColour.blue = 255;
7175   m_FocusedColour.alpha = 255;
7176 
7177   m_UnfocusedColour.red = 245;
7178   m_UnfocusedColour.green = 245;
7179   m_UnfocusedColour.blue = 255;
7180   m_UnfocusedColour.alpha = 255;
7181 
7182   m_BackgroundColour = m_UnfocusedColour;
7183   SetViewColor (m_BackgroundColour);
7184   SetLowColor (m_BackgroundColour);
7185   SetHighColor (0, 0, 0);
7186 
7187   strcpy (m_FirstDisplayedWord, "a");
7188 }
7189 
7190 
7191 void
AttachedToWindow()7192 WordsView::AttachedToWindow ()
7193 {
7194   BPolygon        DownLinePolygon (g_DownLinePoints,
7195                     sizeof (g_DownLinePoints) /
7196                     sizeof (g_DownLinePoints[0]));
7197 
7198   BPolygon        DownPagePolygon (g_DownPagePoints,
7199                     sizeof (g_DownPagePoints) /
7200                     sizeof (g_DownPagePoints[0]));
7201 
7202   BPolygon        UpLinePolygon (g_UpLinePoints,
7203                     sizeof (g_UpLinePoints) /
7204                     sizeof (g_UpLinePoints[0]));
7205 
7206   BPolygon        UpPagePolygon (g_UpPagePoints,
7207                     sizeof (g_UpPagePoints) /
7208                     sizeof (g_UpPagePoints[0]));
7209 
7210   BPicture        TempOffPicture;
7211   BPicture        TempOnPicture;
7212   BRect           TempRect;
7213 
7214   /* Make the buttons and associated polygon images for the forward and
7215   backwards a word or a page of words buttons.  They're the width of the scroll
7216   bar area on the right, but twice as tall as usual, since there is no scroll
7217   bar and that will make it easier to use them.  First the up a line button. */
7218 
7219   SetHighColor (0, 0, 0);
7220   BeginPicture (&TempOffPicture);
7221   FillPolygon (&UpLinePolygon);
7222   SetHighColor (180, 180, 180);
7223   StrokePolygon (&UpLinePolygon);
7224   EndPicture ();
7225 
7226   SetHighColor (128, 128, 128);
7227   BeginPicture (&TempOnPicture);
7228   FillPolygon (&UpLinePolygon);
7229   EndPicture ();
7230 
7231   TempRect = Bounds ();
7232   TempRect.bottom = TempRect.top + 2 * B_H_SCROLL_BAR_HEIGHT;
7233   TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7234   m_ArrowLineUpPntr = new BPictureButton (TempRect, "Up Line",
7235     &TempOffPicture, &TempOnPicture,
7236     new BMessage (MSG_LINE_UP), B_ONE_STATE_BUTTON,
7237     B_FOLLOW_RIGHT | B_FOLLOW_TOP, B_WILL_DRAW | B_NAVIGABLE);
7238   if (m_ArrowLineUpPntr == NULL) goto ErrorExit;
7239   AddChild (m_ArrowLineUpPntr);
7240   m_ArrowLineUpPntr->SetTarget (this);
7241 
7242   /* Up a page button. */
7243 
7244   SetHighColor (0, 0, 0);
7245   BeginPicture (&TempOffPicture);
7246   FillPolygon (&UpPagePolygon);
7247   SetHighColor (180, 180, 180);
7248   StrokePolygon (&UpPagePolygon);
7249   EndPicture ();
7250 
7251   SetHighColor (128, 128, 128);
7252   BeginPicture (&TempOnPicture);
7253   FillPolygon (&UpPagePolygon);
7254   EndPicture ();
7255 
7256   TempRect = Bounds ();
7257   TempRect.top += 2 * B_H_SCROLL_BAR_HEIGHT + 1;
7258   TempRect.bottom = TempRect.top + 2 * B_H_SCROLL_BAR_HEIGHT;
7259   TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7260   m_ArrowPageUpPntr = new BPictureButton (TempRect, "Up Page",
7261     &TempOffPicture, &TempOnPicture,
7262     new BMessage (MSG_PAGE_UP), B_ONE_STATE_BUTTON,
7263     B_FOLLOW_RIGHT | B_FOLLOW_TOP, B_WILL_DRAW | B_NAVIGABLE);
7264   if (m_ArrowPageUpPntr == NULL) goto ErrorExit;
7265   AddChild (m_ArrowPageUpPntr);
7266   m_ArrowPageUpPntr->SetTarget (this);
7267 
7268   /* Down a page button. */
7269 
7270   SetHighColor (0, 0, 0);
7271   BeginPicture (&TempOffPicture);
7272   FillPolygon (&DownPagePolygon);
7273   SetHighColor (180, 180, 180);
7274   StrokePolygon (&DownPagePolygon);
7275   EndPicture ();
7276 
7277   SetHighColor (128, 128, 128);
7278   BeginPicture (&TempOnPicture);
7279   FillPolygon (&DownPagePolygon);
7280   EndPicture ();
7281 
7282   TempRect = Bounds ();
7283   TempRect.bottom -= 3 * B_H_SCROLL_BAR_HEIGHT + 1;
7284   TempRect.top = TempRect.bottom - 2 * B_H_SCROLL_BAR_HEIGHT;
7285   TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7286   m_ArrowPageDownPntr = new BPictureButton (TempRect, "Down Page",
7287     &TempOffPicture, &TempOnPicture,
7288     new BMessage (MSG_PAGE_DOWN), B_ONE_STATE_BUTTON,
7289     B_FOLLOW_RIGHT | B_FOLLOW_BOTTOM, B_WILL_DRAW | B_NAVIGABLE);
7290   if (m_ArrowPageDownPntr == NULL) goto ErrorExit;
7291   AddChild (m_ArrowPageDownPntr);
7292   m_ArrowPageDownPntr->SetTarget (this);
7293 
7294   /* Down a line button. */
7295 
7296   SetHighColor (0, 0, 0);
7297   BeginPicture (&TempOffPicture);
7298   FillPolygon (&DownLinePolygon);
7299   SetHighColor (180, 180, 180);
7300   StrokePolygon (&DownLinePolygon);
7301   EndPicture ();
7302 
7303   SetHighColor (128, 128, 128);
7304   BeginPicture (&TempOnPicture);
7305   FillPolygon (&DownLinePolygon);
7306   EndPicture ();
7307 
7308   TempRect = Bounds ();
7309   TempRect.bottom -= B_H_SCROLL_BAR_HEIGHT;
7310   TempRect.top = TempRect.bottom - 2 * B_H_SCROLL_BAR_HEIGHT;
7311   TempRect.left = TempRect.right - B_V_SCROLL_BAR_WIDTH;
7312   m_ArrowLineDownPntr = new BPictureButton (TempRect, "Down Line",
7313     &TempOffPicture, &TempOnPicture,
7314     new BMessage (MSG_LINE_DOWN), B_ONE_STATE_BUTTON,
7315     B_FOLLOW_RIGHT | B_FOLLOW_BOTTOM, B_WILL_DRAW | B_NAVIGABLE);
7316   if (m_ArrowLineDownPntr == NULL) goto ErrorExit;
7317   AddChild (m_ArrowLineDownPntr);
7318   m_ArrowLineDownPntr->SetTarget (this);
7319 
7320   return;
7321 
7322 ErrorExit:
7323   DisplayErrorMessage ("Problems while making view displaying the words.");
7324 }
7325 
7326 
7327 /* Draw the words starting with the one at or after m_FirstDisplayedWord.  This
7328 requires looking at the database in the BApplication, which may or may not be
7329 available (if it isn't, don't draw, a redraw will usually be requested by the
7330 Pulse member function when it keeps on noticing that the stuff on the display
7331 doesn't match the database). */
7332 
7333 void
Draw(BRect UpdateRect)7334 WordsView::Draw (BRect UpdateRect)
7335 {
7336   float                   AgeDifference;
7337   float                   AgeProportion;
7338   float                   CenterX;
7339   float                   ColumnLeftCenterX;
7340   float                   ColumnMiddleCenterX;
7341   float                   ColumnRightCenterX;
7342   float                   CompensatedRatio;
7343   StatisticsMap::iterator DataIter;
7344   StatisticsMap::iterator EndIter;
7345   rgb_color               FillColour;
7346   float                   GenuineProportion;
7347   uint32                  GenuineSpamSum;
7348   float                   HeightPixels;
7349   float                   HeightProportion;
7350   float                   LeftBounds;
7351   ABSApp                 *MyAppPntr;
7352   uint32                  NewestAge;
7353   uint32                  OldestAge;
7354   float                   OneFifthTotalGenuine;
7355   float                   OneFifthTotalSpam;
7356   double                  RawProbabilityRatio;
7357   float                   RightBounds;
7358   float                   SpamProportion;
7359   StatisticsPointer       StatisticsPntr;
7360   BRect                   TempRect;
7361   char                    TempString [PATH_MAX];
7362   float                   TotalGenuineMessages = 1.0; /* Avoid divide by 0. */
7363   float                   TotalSpamMessages = 1.0;
7364   float                   Width;
7365   float                   Y;
7366 
7367   /* Lock the application.  This will stop it from processing any further
7368   messages until we are done.  Or if it is busy, the lock will fail. */
7369 
7370   MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7371   if (MyAppPntr == NULL || MyAppPntr->LockWithTimeout (100000) != B_OK)
7372     return; /* It's probably busy doing something. */
7373 
7374   /* Set up various loop invariant variables. */
7375 
7376   if (MyAppPntr->m_TotalGenuineMessages > 0)
7377     TotalGenuineMessages = MyAppPntr->m_TotalGenuineMessages;
7378   OneFifthTotalGenuine = TotalGenuineMessages / 5;
7379 
7380   if (MyAppPntr->m_TotalSpamMessages > 0)
7381     TotalSpamMessages = MyAppPntr->m_TotalSpamMessages;
7382   OneFifthTotalSpam = TotalSpamMessages / 5;
7383 
7384   EndIter = MyAppPntr->m_WordMap.end ();
7385 
7386   OldestAge = MyAppPntr->m_OldestAge;
7387   NewestAge = /* actually newest age plus one */
7388     MyAppPntr->m_TotalGenuineMessages + MyAppPntr->m_TotalSpamMessages;
7389 
7390   if (NewestAge == 0)
7391     goto NormalExit; /* No words to display, or something is badly wrong. */
7392 
7393   NewestAge--; /* The newest message has age NewestAge. */
7394   AgeDifference = NewestAge - OldestAge; /* Can be zero if just one message. */
7395 
7396   LeftBounds = Bounds().left;
7397   RightBounds = Bounds().right - B_V_SCROLL_BAR_WIDTH;
7398   Width = RightBounds - LeftBounds;
7399   FillColour.alpha = 255;
7400 
7401   CenterX = ceilf (LeftBounds + Width * 0.5);
7402   ColumnLeftCenterX = ceilf (LeftBounds + Width * 0.05);
7403   ColumnMiddleCenterX = CenterX;
7404   ColumnRightCenterX = ceilf (LeftBounds + Width * 0.95);
7405 
7406   for (DataIter = MyAppPntr->m_WordMap.lower_bound (m_FirstDisplayedWord),
7407   Y = Bounds().top;
7408   DataIter != EndIter && Y < UpdateRect.bottom;
7409   DataIter++, Y += m_LineHeight)
7410   {
7411     if (Y + m_LineHeight < UpdateRect.top)
7412       continue; /* Not in the visible area yet, don't actually draw. */
7413 
7414     /* Draw the colour bar behind the word.  It reflects the spamness or
7415     genuineness of that particular word, plus the importance of the word and
7416     the age of the word.
7417 
7418     First calculate the compensated spam ratio (described elsewhere).  It is
7419     close to 0.0 for genuine words and close to 1.0 for pure spam.  It is drawn
7420     as a blue bar to the left of center if it is less than 0.5, and a red bar
7421     on the right of center if it is greater than 0.5.  At exactly 0.5 nothing
7422     is drawn; the word is worthless as an indicator.
7423 
7424     The height of the bar corresponds to the number of messages the word was
7425     found in.  Make the height proportional to the total of spam and genuine
7426     messages for the word divided by the sum of the most extreme spam and
7427     genuine counts in the database.
7428 
7429     The staturation of the colour corresponds to the age of the word, with old
7430     words being almost white rather than solid blue or red. */
7431 
7432     StatisticsPntr = &DataIter->second;
7433 
7434     SpamProportion = StatisticsPntr->spamCount / TotalSpamMessages;
7435     GenuineProportion = StatisticsPntr->genuineCount / TotalGenuineMessages;
7436     if (SpamProportion + GenuineProportion > 0.0f)
7437       RawProbabilityRatio =
7438       SpamProportion / (SpamProportion + GenuineProportion);
7439     else
7440       RawProbabilityRatio = g_RobinsonX;
7441 
7442     /* The compensated ratio leans towards 0.5 (RobinsonX) more for fewer
7443     data points, with a weight of 0.45 (RobinsonS). */
7444 
7445     GenuineSpamSum =
7446       StatisticsPntr->spamCount + StatisticsPntr->genuineCount;
7447     CompensatedRatio =
7448       (g_RobinsonS * g_RobinsonX + GenuineSpamSum * RawProbabilityRatio) /
7449       (g_RobinsonS + GenuineSpamSum);
7450 
7451     /* Used to use the height based on the most frequent word, but some words,
7452     like "From", show up in all messages which made most other words just
7453     appear as a thin line.  I did a histogram plot of the sizes in my test
7454     database, and figured that you get better coverage of 90% of the messages
7455     if you use 1/5 of the total number as the count which gives you 100%
7456     height.  The other 10% get a full height bar, but most people wouldn't care
7457     that they're super frequently used. */
7458 
7459     HeightProportion = 0.5f * (StatisticsPntr->genuineCount /
7460       OneFifthTotalGenuine + StatisticsPntr->spamCount / OneFifthTotalSpam);
7461 
7462     if (HeightProportion > 1.0f)
7463       HeightProportion = 1.0f;
7464     HeightPixels = ceilf (HeightProportion * m_TextHeight);
7465 
7466     if (AgeDifference <= 0.0f)
7467       AgeProportion = 1.0; /* New is 1.0, old is 0.0 */
7468     else
7469       AgeProportion = (StatisticsPntr->age - OldestAge) / AgeDifference;
7470 
7471     TempRect.top = ceilf (Y + m_TextHeight / 2 - HeightPixels / 2);
7472     TempRect.bottom = TempRect.top + HeightPixels;
7473 
7474     if (CompensatedRatio < 0.5f)
7475     {
7476       TempRect.left = ceilf (
7477         CenterX - 1.6f * (0.5f - CompensatedRatio) * (CenterX - LeftBounds));
7478       TempRect.right = CenterX;
7479       FillColour.red = 230 - (int) (AgeProportion * 230.0f);
7480       FillColour.green = FillColour.red;
7481       FillColour.blue = 255;
7482     }
7483     else /* Ratio >= 0.5, red spam block. */
7484     {
7485       TempRect.left = CenterX;
7486       TempRect.right = ceilf (
7487         CenterX + 1.6f * (CompensatedRatio - 0.5f) * (RightBounds - CenterX));
7488       FillColour.blue = 230 - (int) (AgeProportion * 230.0f);
7489       FillColour.green = FillColour.blue;
7490       FillColour.red = 255;
7491     }
7492     SetHighColor (FillColour);
7493     SetDrawingMode (B_OP_COPY);
7494     FillRect (TempRect);
7495 
7496     /* Print the text centered in columns of various widths.  The number of
7497     genuine messages in the left 10% of the width, the word in the middle 80%,
7498     and the number of spam messages using the word in the right 10%. */
7499 
7500     SetHighColor (0, 0, 0);
7501     SetDrawingMode (B_OP_OVER); /* So that antialiased text mixes better. */
7502 
7503     sprintf (TempString, "%" B_PRIu32, StatisticsPntr->genuineCount);
7504     Width = m_TextFont.StringWidth (TempString);
7505     MovePenTo (ceilf (ColumnLeftCenterX - Width / 2), Y + m_AscentHeight);
7506     DrawString (TempString);
7507 
7508     strcpy (TempString, DataIter->first.c_str ());
7509     Width = m_TextFont.StringWidth (TempString);
7510     MovePenTo (ceilf (ColumnMiddleCenterX - Width / 2), Y + m_AscentHeight);
7511     DrawString (TempString);
7512 
7513     sprintf (TempString, "%" B_PRIu32, StatisticsPntr->spamCount);
7514     Width = m_TextFont.StringWidth (TempString);
7515     MovePenTo (ceilf (ColumnRightCenterX - Width / 2), Y + m_AscentHeight);
7516     DrawString (TempString);
7517   }
7518 
7519   /* Draw the first word (the one which the user types in to select the first
7520   displayed word) on the right, in the scroll bar margin, rotated 90 degrees to
7521   fit between the page up and page down buttons. */
7522 
7523   Width = m_TextFont.StringWidth (m_FirstDisplayedWord);
7524   if (Width > 0)
7525   {
7526     TempRect = Bounds ();
7527     TempRect.top += 4 * B_H_SCROLL_BAR_HEIGHT + 1;
7528     TempRect.bottom -= 5 * B_H_SCROLL_BAR_HEIGHT + 1;
7529 
7530     MovePenTo (TempRect.right - m_TextHeight + m_AscentHeight - 1,
7531       ceilf ((TempRect.bottom + TempRect.top) / 2 + Width / 2));
7532     m_TextFont.SetRotation (90);
7533     SetFont (&m_TextFont, B_FONT_ROTATION);
7534     DrawString (m_FirstDisplayedWord);
7535     m_TextFont.SetRotation (0);
7536     SetFont (&m_TextFont, B_FONT_ROTATION);
7537   }
7538 
7539 NormalExit:
7540 
7541   /* Successfully finished drawing.  Update the cached values to match what we
7542   have drawn. */
7543   m_CachedTotalGenuineMessages = MyAppPntr->m_TotalGenuineMessages;
7544   m_CachedTotalSpamMessages = MyAppPntr->m_TotalSpamMessages;
7545   m_CachedWordCount = MyAppPntr->m_WordCount;
7546 
7547   /* Done.  Let the BApplication continue processing messages. */
7548   MyAppPntr->Unlock ();
7549 }
7550 
7551 
7552 /* When the user presses keys, they select the first word to be displayed in
7553 the view (it's the word at or lexicographically after the word typed in).  The
7554 keys are appended to the starting word, until the user stops typing for a
7555 while, then the next key will be the first letter of a new starting word. */
7556 
7557 void
KeyDown(const char * BufferPntr,int32 NumBytes)7558 WordsView::KeyDown (const char *BufferPntr, int32 NumBytes)
7559 {
7560   int32          CharLength;
7561   bigtime_t      CurrentTime;
7562   char           TempString [40];
7563 
7564   CurrentTime = system_time ();
7565 
7566   if (NumBytes < (int32) sizeof (TempString))
7567   {
7568     memcpy (TempString, BufferPntr, NumBytes);
7569     TempString [NumBytes] = 0;
7570     CharLength = strlen (TempString); /* So NUL bytes don't get through. */
7571 
7572     /* Check for arrow keys, which move the view up and down. */
7573 
7574     if (CharLength == 1 &&
7575     (TempString[0] == B_UP_ARROW ||
7576     TempString[0] == B_DOWN_ARROW ||
7577     TempString[0] == B_PAGE_UP ||
7578     TempString[0] == B_PAGE_DOWN))
7579     {
7580       MoveTextUpOrDown ((TempString[0] == B_UP_ARROW) ? MSG_LINE_UP :
7581         ((TempString[0] == B_DOWN_ARROW) ? MSG_LINE_DOWN :
7582         ((TempString[0] == B_PAGE_UP) ? MSG_PAGE_UP : MSG_PAGE_DOWN)));
7583     }
7584     else if (CharLength > 1 ||
7585     (CharLength == 1 && 32 <= (uint8) TempString[0]))
7586     {
7587       /* Have a non-control character, or some sort of multibyte char.  Add it
7588       to the word and mark things for redisplay starting at the resulting word.
7589       */
7590 
7591       if (CurrentTime - m_LastTimeAKeyWasPressed >= 1000000 /* microseconds */)
7592         strcpy (m_FirstDisplayedWord, TempString); /* Starting a new word. */
7593       else if (strlen (m_FirstDisplayedWord) + CharLength <= g_MaxWordLength)
7594         strcat (m_FirstDisplayedWord, TempString); /* Append to existing. */
7595 
7596       Invalidate ();
7597     }
7598   }
7599 
7600   m_LastTimeAKeyWasPressed = CurrentTime;
7601   BView::KeyDown (BufferPntr, NumBytes);
7602 }
7603 
7604 
7605 /* Change the background colour to show that we have the focus.  When we have
7606 it, keystrokes will select the word to be displayed at the top of the list. */
7607 
7608 void
MakeFocus(bool Focused)7609 WordsView::MakeFocus (bool Focused)
7610 {
7611   if (Focused)
7612     m_BackgroundColour = m_FocusedColour;
7613   else
7614     m_BackgroundColour = m_UnfocusedColour;
7615   SetViewColor (m_BackgroundColour);
7616   SetLowColor (m_BackgroundColour);
7617 
7618   /* Also need to set the background colour for the scroll buttons, since they
7619   can't be made transparent. */
7620 
7621   if (m_ArrowLineDownPntr != NULL)
7622   {
7623     m_ArrowLineDownPntr->SetViewColor (m_BackgroundColour);
7624     m_ArrowLineDownPntr->Invalidate ();
7625   }
7626 
7627   if (m_ArrowLineUpPntr != NULL)
7628   {
7629     m_ArrowLineUpPntr->SetViewColor (m_BackgroundColour);
7630     m_ArrowLineUpPntr->Invalidate ();
7631   }
7632 
7633   if (m_ArrowPageDownPntr != NULL)
7634   {
7635     m_ArrowPageDownPntr->SetViewColor (m_BackgroundColour);
7636     m_ArrowPageDownPntr->Invalidate ();
7637   }
7638 
7639   if (m_ArrowPageUpPntr != NULL)
7640   {
7641     m_ArrowPageUpPntr->SetViewColor (m_BackgroundColour);
7642     m_ArrowPageUpPntr->Invalidate ();
7643   }
7644 
7645   Invalidate ();
7646 
7647   BView::MakeFocus (Focused);
7648 }
7649 
7650 
7651 void
MessageReceived(BMessage * MessagePntr)7652 WordsView::MessageReceived (BMessage *MessagePntr)
7653 {
7654   int32     CountFound;
7655   float     DeltaY; /* Usually -1.0, 0.0 or +1.0. */
7656   type_code TypeFound;
7657 
7658   switch (MessagePntr->what)
7659   {
7660     case B_MOUSE_WHEEL_CHANGED:
7661       if (MessagePntr->FindFloat ("be:wheel_delta_y", &DeltaY) != 0) break;
7662       if (DeltaY < 0)
7663         MoveTextUpOrDown (MSG_LINE_UP);
7664       else if (DeltaY > 0)
7665         MoveTextUpOrDown (MSG_LINE_DOWN);
7666       break;
7667 
7668     case MSG_LINE_DOWN:
7669     case MSG_LINE_UP:
7670     case MSG_PAGE_DOWN:
7671     case MSG_PAGE_UP:
7672       MoveTextUpOrDown (MessagePntr->what);
7673       break;
7674 
7675     case B_SIMPLE_DATA: /* Something has been dropped in our view. */
7676       if (MessagePntr->GetInfo ("refs", &TypeFound, &CountFound) == B_OK &&
7677       CountFound > 0 && TypeFound == B_REF_TYPE)
7678       {
7679         RefsDroppedHere (MessagePntr);
7680         break;
7681       }
7682       /* Else fall through to the default case, in case it is something else
7683       dropped that the system knows about. */
7684 
7685     default:
7686       BView::MessageReceived (MessagePntr);
7687   }
7688 }
7689 
7690 
7691 /* If the user clicks on our view, take over the focus. */
7692 
7693 void
MouseDown(BPoint)7694 WordsView::MouseDown (BPoint)
7695 {
7696   if (!IsFocus ())
7697     MakeFocus (true);
7698 }
7699 
7700 
7701 void
MoveTextUpOrDown(uint32 MovementType)7702 WordsView::MoveTextUpOrDown (uint32 MovementType)
7703 {
7704   StatisticsMap::iterator  DataIter;
7705   int                      i;
7706   ABSApp                  *MyAppPntr;
7707   int                      PageSize;
7708 
7709   /* Lock the application.  This will stop it from processing any further
7710   messages until we are done (we need to look at the word list directly).  Or
7711   if it is busy, the lock will fail. */
7712 
7713   MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7714   if (MyAppPntr == NULL || MyAppPntr->LockWithTimeout (2000000) != B_OK)
7715     return; /* It's probably busy doing something. */
7716 
7717   PageSize = (int) (Bounds().Height() / m_LineHeight - 1);
7718   if (PageSize < 1)
7719     PageSize = 1;
7720 
7721   DataIter = MyAppPntr->m_WordMap.lower_bound (m_FirstDisplayedWord);
7722 
7723   switch (MovementType)
7724   {
7725     case MSG_LINE_UP:
7726       if (DataIter != MyAppPntr->m_WordMap.begin ())
7727         DataIter--;
7728       break;
7729 
7730     case MSG_LINE_DOWN:
7731       if (DataIter != MyAppPntr->m_WordMap.end ())
7732         DataIter++;
7733       break;
7734 
7735     case MSG_PAGE_UP:
7736       for (i = 0; i < PageSize; i++)
7737       {
7738         if (DataIter == MyAppPntr->m_WordMap.begin ())
7739           break;
7740         DataIter--;
7741       }
7742       break;
7743 
7744     case MSG_PAGE_DOWN:
7745       for (i = 0; i < PageSize; i++)
7746       {
7747         if (DataIter == MyAppPntr->m_WordMap.end ())
7748           break;
7749         DataIter++;
7750       }
7751       break;
7752   }
7753 
7754   if (DataIter != MyAppPntr->m_WordMap.end ())
7755     strcpy (m_FirstDisplayedWord, DataIter->first.c_str ());
7756 
7757   Invalidate ();
7758 
7759   MyAppPntr->Unlock ();
7760 }
7761 
7762 
7763 /* This function periodically polls the BApplication to see if anything has
7764 changed.  If the word list is different or the display has changed in some
7765 other way, it will then try to refresh the display, repeating the attempt until
7766 it gets successfully drawn. */
7767 
7768 void
Pulse()7769 WordsView::Pulse ()
7770 {
7771   ABSApp *MyAppPntr;
7772 
7773   /* Probe the BApplication to see if it has changed. */
7774 
7775   MyAppPntr = dynamic_cast<ABSApp *> (be_app);
7776   if (MyAppPntr == NULL)
7777     return; /* Something is wrong, give up. */
7778 
7779   if (MyAppPntr->m_TotalGenuineMessages != m_CachedTotalGenuineMessages ||
7780   MyAppPntr->m_TotalSpamMessages != m_CachedTotalSpamMessages ||
7781   MyAppPntr->m_WordCount != m_CachedWordCount)
7782     Invalidate ();
7783 }
7784 
7785 
7786 /* The user has dragged and dropped some file references on the words view.  If
7787 it is in the left third, add the file(s) as examples of genuine messages, right
7788 third for spam messages and if it is in the middle third then evaluate the
7789 file(s) for spaminess. */
7790 
7791 void
RefsDroppedHere(BMessage * MessagePntr)7792 WordsView::RefsDroppedHere (BMessage *MessagePntr)
7793 {
7794   float  Left;
7795   bool   SpamExample = true; /* TRUE if example is of spam, FALSE genuine. */
7796   float  Third;
7797   BPoint WhereDropped;
7798 
7799   /* Find out which third of the view it was dropped into. */
7800 
7801   if (MessagePntr->FindPoint ("_drop_point_", &WhereDropped) != B_OK)
7802     return;  /* Need to know where it was dropped. */
7803   ConvertFromScreen (&WhereDropped);
7804   Third = Bounds().Width() / 3;
7805   Left = Bounds().left;
7806   if (WhereDropped.x < Left + Third)
7807     SpamExample = false;
7808   else if (WhereDropped.x < Left + 2 * Third)
7809   {
7810     /* In the middle third, evaluate all files for spaminess. */
7811     EstimateRefFilesAndDisplay (MessagePntr);
7812     return;
7813   }
7814 
7815   if (g_CommanderLooperPntr != NULL)
7816     g_CommanderLooperPntr->CommandReferences (
7817     MessagePntr, true /* BulkMode */, SpamExample ? CL_SPAM : CL_GENUINE);
7818 }
7819 
7820 
7821 
7822 /******************************************************************************
7823  * Finally, the main program which drives it all.
7824  */
7825 
main(int argc,char **)7826 int main (int argc, char**)
7827 {
7828   g_CommandLineMode = (argc > 1);
7829   if (!g_CommandLineMode)
7830     cout << PrintUsage; /* In case no arguments specified. */
7831 
7832   g_CommanderLooperPntr = new CommanderLooper;
7833   if (g_CommanderLooperPntr != NULL)
7834   {
7835     g_CommanderMessenger = new BMessenger (NULL, g_CommanderLooperPntr);
7836     g_CommanderLooperPntr->Run ();
7837   }
7838 
7839   ABSApp MyApp;
7840 
7841   if (MyApp.InitCheck () == 0)
7842   {
7843     MyApp.LoadSaveSettings (true /* DoLoad */);
7844     MyApp.Run ();
7845   }
7846 
7847   if (g_CommanderLooperPntr != NULL)
7848   {
7849     g_CommanderLooperPntr->PostMessage (B_QUIT_REQUESTED);
7850     snooze (100000); /* Let the CommanderLooper thread run so it quits. */
7851   }
7852 
7853   cerr << "SpamDBM shutting down..." << endl;
7854   return 0; /* And implicitly destroys MyApp, which writes out the database. */
7855 }
7856