]> code.delx.au - gnu-emacs/blob - lisp/mh-e/mh-junk.el
Update copyright year to 2016
[gnu-emacs] / lisp / mh-e / mh-junk.el
1 ;;; mh-junk.el --- MH-E interface to anti-spam measures
2
3 ;; Copyright (C) 2003-2016 Free Software Foundation, Inc.
4
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation, either version 3 of the License, or
15 ;; (at your option) any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
24
25 ;;; Commentary:
26
27 ;; Spam handling in MH-E.
28
29 ;;; Change Log:
30
31 ;;; Code:
32
33 (require 'mh-e)
34 (require 'mh-scan)
35 (mh-require-cl)
36
37 ;;;###mh-autoload
38 (defun mh-junk-blacklist (range)
39 "Blacklist RANGE as spam.
40
41 This command trains the spam program in use (see the option
42 `mh-junk-program') with the content of RANGE and then handles the
43 message(s) as specified by the option `mh-junk-disposition'.
44
45 Check the documentation of `mh-interactive-range' to see how RANGE is
46 read in interactive use.
47
48 For more information about using your particular spam fighting
49 program, see:
50
51 - `mh-spamassassin-blacklist'
52 - `mh-bogofilter-blacklist'
53 - `mh-spamprobe-blacklist'"
54 (interactive (list (mh-interactive-range "Blacklist")))
55 (mh-iterate-on-range () range (mh-blacklist-a-msg nil))
56 (if (looking-at mh-scan-blacklisted-msg-regexp)
57 (mh-next-msg)))
58
59 (defun mh-blacklist-a-msg (message)
60 "Blacklist MESSAGE.
61 If MESSAGE is nil then the message at point is blacklisted.
62 The hook `mh-blacklisted-msg-hook' is called after you mark a message
63 for blacklisting."
64 (save-excursion
65 (if (numberp message)
66 (mh-goto-msg message nil t)
67 (beginning-of-line)
68 (setq message (mh-get-msg-num t)))
69 (cond ((looking-at mh-scan-refiled-msg-regexp)
70 (error "Message %d is refiled; undo refile before blacklisting"
71 message))
72 ((looking-at mh-scan-deleted-msg-regexp)
73 (error "Message %d is deleted; undo delete before blacklisting"
74 message))
75 ((looking-at mh-scan-whitelisted-msg-regexp)
76 (error "Message %d is whitelisted; undo before blacklisting"
77 message))
78 ((looking-at mh-scan-blacklisted-msg-regexp) nil)
79 (t
80 (mh-set-folder-modified-p t)
81 (setq mh-blacklist (cons message mh-blacklist))
82 (if (not (memq message mh-seen-list))
83 (setq mh-seen-list (cons message mh-seen-list)))
84 (mh-notate nil mh-note-blacklisted mh-cmd-note)
85 (run-hooks 'mh-blacklist-msg-hook)))))
86
87 ;;;###mh-autoload
88 (defun mh-junk-blacklist-disposition ()
89 "Determines the fate of the selected spam."
90 (cond ((null mh-junk-disposition) nil)
91 ((equal mh-junk-disposition "") "+")
92 ((eq (aref mh-junk-disposition 0) ?+)
93 mh-junk-disposition)
94 ((eq (aref mh-junk-disposition 0) ?@)
95 (concat mh-current-folder "/"
96 (substring mh-junk-disposition 1)))
97 (t (concat "+" mh-junk-disposition))))
98
99 ;;;###mh-autoload
100 (defun mh-junk-process-blacklist (range)
101 "Blacklist RANGE as spam.
102 This command trains the spam program in use (see the option
103 `mh-junk-program') with the content of RANGE and then handles the
104 message(s) as specified by the option `mh-junk-disposition'."
105 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
106 (unless blacklist-func
107 (error "Customize `mh-junk-program' appropriately"))
108 (mh-iterate-on-range msg range
109 (message "Blacklisting message %d..." msg)
110 (funcall (symbol-function blacklist-func) msg)
111 (message "Blacklisting message %d...done" msg))
112 (mh-next-msg)))
113
114 ;;;###mh-autoload
115 (defun mh-junk-whitelist (range)
116 "Whitelist RANGE as ham.
117
118 This command reclassifies the RANGE as ham if it were incorrectly
119 classified as spam (see the option `mh-junk-program'). It then
120 refiles the message into the \"+inbox\" folder.
121
122 Check the documentation of `mh-interactive-range' to see how
123 RANGE is read in interactive use."
124 (interactive (list (mh-interactive-range "Whitelist")))
125 (mh-iterate-on-range () range (mh-junk-whitelist-a-msg nil))
126 (if (looking-at mh-scan-whitelisted-msg-regexp)
127 (mh-next-msg)))
128
129 (defun mh-junk-whitelist-a-msg (message)
130 "Whitelist MESSAGE.
131 If MESSAGE is nil then the message at point is whitelisted. The
132 hook `mh-whitelist-msg-hook' is called after you mark a message
133 for whitelisting."
134 (save-excursion
135 (if (numberp message)
136 (mh-goto-msg message nil t)
137 (beginning-of-line)
138 (setq message (mh-get-msg-num t)))
139 (cond ((looking-at mh-scan-refiled-msg-regexp)
140 (error "Message %d is refiled; undo refile before whitelisting"
141 message))
142 ((looking-at mh-scan-deleted-msg-regexp)
143 (error "Message %d is deleted; undo delete before whitelisting"
144 message))
145 ((looking-at mh-scan-blacklisted-msg-regexp)
146 (error "Message %d is blacklisted; undo before whitelisting"
147 message))
148 ((looking-at mh-scan-whitelisted-msg-regexp) nil)
149 (t
150 (mh-set-folder-modified-p t)
151 (setq mh-whitelist (cons message mh-whitelist))
152 (mh-notate nil mh-note-whitelisted mh-cmd-note)
153 (run-hooks 'mh-whitelist-msg-hook)))))
154
155 ;;;###mh-autoload
156 (defun mh-junk-process-whitelist (range)
157 "Whitelist RANGE as ham.
158
159 This command reclassifies the RANGE as ham if it were incorrectly
160 classified as spam (see the option `mh-junk-program')."
161 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
162 (unless whitelist-func
163 (error "Customize `mh-junk-program' appropriately"))
164 (mh-iterate-on-range msg range
165 (message "Whitelisting message %d..." msg)
166 (funcall (symbol-function whitelist-func) msg)
167 (message "Whitelisting message %d...done" msg))
168 (mh-next-msg)))
169
170 \f
171
172 ;; Spamassassin Interface
173
174 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
175 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
176
177 ;;;###mh-autoload
178 (defun mh-spamassassin-blacklist (msg)
179 "Blacklist MSG with SpamAssassin.
180
181 SpamAssassin is one of the more popular spam filtering programs.
182 Get it from your local distribution or from the SpamAssassin web
183 site at URL `http://spamassassin.org/'.
184
185 To use SpamAssassin, add the following recipes to
186 \".procmailrc\":
187
188 PATH=$PATH:/usr/bin/mh
189 MAILDIR=$HOME/`mhparam Path`
190
191 # Fight spam with SpamAssassin.
192 :0fw
193 | spamc
194
195 # Anything with a spam level of 10 or more is junked immediately.
196 :0:
197 * ^X-Spam-Level: ..........
198 /dev/null
199
200 :0:
201 * ^X-Spam-Status: Yes
202 spam/.
203
204 If you don't use \"spamc\", use \"spamassassin -P -a\".
205
206 Note that one of the recipes above throws away messages with a
207 score greater than or equal to 10. Here's how you can determine a
208 value that works best for you.
209
210 First, run \"spamassassin -t\" on every mail message in your
211 archive and use Gnumeric to verify that the average plus the
212 standard deviation of good mail is under 5, the SpamAssassin
213 default for \"spam\".
214
215 Using Gnumeric, sort the messages by score and view the messages
216 with the highest score. Determine the score which encompasses all
217 of your interesting messages and add a couple of points to be
218 conservative. Add that many dots to the \"X-Spam-Level:\" header
219 field above to send messages with that score down the drain.
220
221 In the example above, messages with a score of 5-9 are set aside
222 in the \"+spam\" folder for later review. The major weakness of
223 rules-based filters is a plethora of false positives so it is
224 worthwhile to check.
225
226 If SpamAssassin classifies a message incorrectly, or is unsure,
227 you can use the MH-E commands \\[mh-junk-blacklist] and
228 \\[mh-junk-whitelist].
229
230 The command \\[mh-junk-blacklist] adds a \"blacklist_from\" entry
231 to \"~/spamassassin/user_prefs\", deletes the message, and sends
232 the message to the Razor, so that others might not see this spam.
233 If the \"sa-learn\" command is available, the message is also
234 recategorized as spam.
235
236 The command \\[mh-junk-whitelist] adds a \"whitelist_from\" rule
237 to the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\"
238 command is available, the message is also recategorized as ham.
239
240 Over time, you'll observe that the same host or domain occurs
241 repeatedly in the \"blacklist_from\" entries, so you might think
242 that you could avoid future spam by blacklisting all mail from a
243 particular domain. The utility function
244 `mh-spamassassin-identify-spammers' helps you do precisely that.
245 This function displays a frequency count of the hosts and domains
246 in the \"blacklist_from\" entries from the last blank line in
247 \"~/.spamassassin/user_prefs\" to the end of the file. This
248 information can be used so that you can replace multiple
249 \"blacklist_from\" entries with a single wildcard entry such as:
250
251 blacklist_from *@*amazingoffersdirect2u.com
252
253 In versions of SpamAssassin (2.50 and on) that support a Bayesian
254 classifier, \\[mh-junk-blacklist] uses the program \"sa-learn\"
255 to recategorize the message as spam. Neither MH-E, nor
256 SpamAssassin, rebuilds the database after adding words, so you
257 will need to run \"sa-learn --rebuild\" periodically. This can be
258 done by adding the following to your crontab:
259
260 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
261 (unless mh-spamassassin-executable
262 (error "Unable to find the spamassassin executable"))
263 (let ((current-folder mh-current-folder)
264 (msg-file (mh-msg-filename msg mh-current-folder))
265 (sender))
266 (message "Reporting message %d..." msg)
267 (mh-truncate-log-buffer)
268 ;; Put call-process output in log buffer if we are saving it
269 ;; (this happens if mh-junk-background is t).
270 (with-current-buffer mh-log-buffer
271 (call-process mh-spamassassin-executable msg-file mh-junk-background nil
272 ;;"--report" "--remove-from-whitelist"
273 "-r" "-R") ; spamassassin V2.20
274 (when mh-sa-learn-executable
275 (message "Recategorizing message %d as spam..." msg)
276 (mh-truncate-log-buffer)
277 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
278 "--single" "--spam" "--local" "--no-rebuild")))
279 (message "Blacklisting sender of message %d..." msg)
280 (with-current-buffer (get-buffer-create mh-temp-buffer)
281 (erase-buffer)
282 (call-process (expand-file-name mh-scan-prog mh-progs)
283 nil t nil
284 (format "%d" msg) current-folder
285 "-format" "%<(mymbox{from})%|%(addr{from})%>")
286 (goto-char (point-min))
287 (if (search-forward-regexp "^\\(.+\\)$" nil t)
288 (progn
289 (setq sender (match-string 0))
290 (mh-spamassassin-add-rule "blacklist_from" sender)
291 (message "Blacklisting sender of message %d...done" msg))
292 (message "Blacklisting sender of message %d...not done (from my address)" msg)))))
293
294 ;;;###mh-autoload
295 (defun mh-spamassassin-whitelist (msg)
296 "Whitelist MSG with SpamAssassin.
297
298 The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to
299 the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command
300 is available, the message is also recategorized as ham.
301
302 See `mh-spamassassin-blacklist' for more information."
303 (unless mh-spamassassin-executable
304 (error "Unable to find the spamassassin executable"))
305 (let ((msg-file (mh-msg-filename msg mh-current-folder))
306 (show-buffer (get-buffer mh-show-buffer))
307 from)
308 (with-current-buffer (get-buffer-create mh-temp-buffer)
309 (erase-buffer)
310 (message "Removing spamassassin markup from message %d..." msg)
311 (call-process mh-spamassassin-executable msg-file t nil
312 ;; "--remove-markup"
313 "-d") ; spamassassin V2.20
314 (if show-buffer
315 (kill-buffer show-buffer))
316 (write-file msg-file)
317 (when mh-sa-learn-executable
318 (message "Recategorizing message %d as ham..." msg)
319 (mh-truncate-log-buffer)
320 ;; Put call-process output in log buffer if we are saving it
321 ;; (this happens if mh-junk-background is t).
322 (with-current-buffer mh-log-buffer
323 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
324 "--single" "--ham" "--local" "--no-rebuild")))
325 (message "Whitelisting sender of message %d..." msg)
326 (setq from
327 (car (mh-funcall-if-exists
328 ietf-drums-parse-address (mh-get-header-field "From:"))))
329 (kill-buffer nil)
330 (unless (or (null from) (equal from ""))
331 (mh-spamassassin-add-rule "whitelist_from" from))
332 (message "Whitelisting sender of message %d...done" msg))))
333
334 (defun mh-spamassassin-add-rule (rule body)
335 "Add a new rule to \"~/.spamassassin/user_prefs\".
336 The name of the rule is RULE and its body is BODY."
337 (save-window-excursion
338 (let* ((line (format "%s\t%s\n" rule body))
339 (case-fold-search t)
340 (file (expand-file-name "~/.spamassassin/user_prefs"))
341 (buffer-exists (find-buffer-visiting file)))
342 (find-file file)
343 (if (not (search-forward (format "\n%s" line) nil t))
344 (progn
345 (goto-char (point-max))
346 (insert (if (bolp) "" "\n") line)
347 (save-buffer)))
348 (if (not buffer-exists)
349 (kill-buffer nil)))))
350
351 ;;;###mh-autoload
352 (defun mh-spamassassin-identify-spammers ()
353 "Identify spammers who are repeat offenders.
354
355 This function displays a frequency count of the hosts and domains
356 in the \"blacklist_from\" entries from the last blank line in
357 \"~/.spamassassin/user_prefs\" to the end of the file. This
358 information can be used so that you can replace multiple
359 \"blacklist_from\" entries with a single wildcard entry such as:
360
361 blacklist_from *@*amazingoffersdirect2u.com"
362 (interactive)
363 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
364 (domains (make-hash-table :test 'equal)))
365 (find-file file)
366 ;; Only consider entries between last blank line and end of file.
367 (goto-char (1- (point-max)))
368 (search-backward-regexp "^$")
369 ;; Perform frequency count.
370 (save-excursion
371 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
372 nil t)
373 (let ((host (match-string 2))
374 value)
375 ;; Remove top-level-domain from hostname.
376 (setq host (cdr (reverse (split-string host "\\."))))
377 ;; Add counts for each host and domain part.
378 (while host
379 (setq value (gethash (car host) domains))
380 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
381 (setq host (cdr host))))))
382
383 ;; Output
384 (delete-other-windows)
385 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
386 (erase-buffer)
387 (maphash (lambda (key value) ""
388 (if (> value 2)
389 (insert (format "%s %s\n" key value))))
390 domains)
391 (sort-numeric-fields 2 (point-min) (point-max))
392 (reverse-region (point-min) (point-max))
393 (goto-char (point-min))))
394
395 \f
396
397 ;; Bogofilter Interface
398
399 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
400
401 ;;;###mh-autoload
402 (defun mh-bogofilter-blacklist (msg)
403 "Blacklist MSG with bogofilter.
404
405 Bogofilter is a Bayesian spam filtering program. Get it from your
406 local distribution or from the bogofilter web site at URL
407 `http://bogofilter.sourceforge.net/'.
408
409 Bogofilter is taught by running:
410
411 bogofilter -n < good-message
412
413 on every good message, and
414
415 bogofilter -s < spam-message
416
417 on every spam message. This is called a full training; three other
418 training methods are described in the FAQ that is distributed with
419 bogofilter. Note that most Bayesian filters need 1000 to 5000 of each
420 type of message to start doing a good job.
421
422 To use bogofilter, add the following recipes to \".procmailrc\":
423
424 PATH=$PATH:/usr/bin/mh
425 MAILDIR=$HOME/`mhparam Path`
426
427 # Fight spam with bogofilter.
428 :0fw
429 | bogofilter -3 -e -p
430
431 :0:
432 * ^X-Bogosity: Yes, tests=bogofilter
433 spam/.
434
435 :0:
436 * ^X-Bogosity: Unsure, tests=bogofilter
437 spam/unsure/.
438
439 If bogofilter classifies a message incorrectly, or is unsure, you can
440 use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]
441 to update bogofilter's training.
442
443 The \"Bogofilter FAQ\" suggests that you run the following
444 occasionally to shrink the database:
445
446 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
447 mv wordlist.db wordlist.db.prv
448 mv wordlist.db.new wordlist.db
449
450 The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
451 (unless mh-bogofilter-executable
452 (error "Unable to find the bogofilter executable"))
453 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
454 (mh-truncate-log-buffer)
455 ;; Put call-process output in log buffer if we are saving it
456 ;; (this happens if mh-junk-background is t).
457 (with-current-buffer mh-log-buffer
458 (call-process mh-bogofilter-executable msg-file mh-junk-background
459 nil "-s"))))
460
461 ;;;###mh-autoload
462 (defun mh-bogofilter-whitelist (msg)
463 "Whitelist MSG with bogofilter.
464
465 See `mh-bogofilter-blacklist' for more information."
466 (unless mh-bogofilter-executable
467 (error "Unable to find the bogofilter executable"))
468 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
469 (mh-truncate-log-buffer)
470 ;; Put call-process output in log buffer if we are saving it
471 ;; (this happens if mh-junk-background is t).
472 (with-current-buffer mh-log-buffer
473 (call-process mh-bogofilter-executable msg-file mh-junk-background
474 nil "-n"))))
475
476 \f
477
478 ;; Spamprobe Interface
479
480 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
481
482 ;;;###mh-autoload
483 (defun mh-spamprobe-blacklist (msg)
484 "Blacklist MSG with SpamProbe.
485
486 SpamProbe is a Bayesian spam filtering program. Get it from your
487 local distribution or from the SpamProbe web site at URL
488 `http://spamprobe.sourceforge.net'.
489
490 To use SpamProbe, add the following recipes to \".procmailrc\":
491
492 PATH=$PATH:/usr/bin/mh
493 MAILDIR=$HOME/`mhparam Path`
494
495 # Fight spam with SpamProbe.
496 :0
497 SCORE=| spamprobe receive
498
499 :0 wf
500 | formail -I \"X-SpamProbe: $SCORE\"
501
502 :0:
503 *^X-SpamProbe: SPAM
504 spam/.
505
506 If SpamProbe classifies a message incorrectly, you can use the
507 MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to
508 update SpamProbe's training."
509 (unless mh-spamprobe-executable
510 (error "Unable to find the spamprobe executable"))
511 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
512 (mh-truncate-log-buffer)
513 ;; Put call-process output in log buffer if we are saving it
514 ;; (this happens if mh-junk-background is t).
515 (with-current-buffer mh-log-buffer
516 (call-process mh-spamprobe-executable msg-file mh-junk-background
517 nil "spam"))))
518
519 ;;;###mh-autoload
520 (defun mh-spamprobe-whitelist (msg)
521 "Whitelist MSG with SpamProbe.
522
523 See `mh-spamprobe-blacklist' for more information."
524 (unless mh-spamprobe-executable
525 (error "Unable to find the spamprobe executable"))
526 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
527 (mh-truncate-log-buffer)
528 ;; Put call-process output in log buffer if we are saving it
529 ;; (this happens if mh-junk-background is t).
530 (with-current-buffer mh-log-buffer
531 (call-process mh-spamprobe-executable msg-file mh-junk-background
532 nil "good"))))
533
534 (provide 'mh-junk)
535
536 ;; Local Variables:
537 ;; indent-tabs-mode: nil
538 ;; sentence-end-double-space: nil
539 ;; End:
540
541 ;;; mh-junk.el ends here