19 December 2007

How to make nice looking diffs

I was wondering how to make nice looking diff patch files yesterday, as using "diff " gave really cryptic outputs and not very user friendly. I IM'ed Aizat who happened to be online in Chile. He just said use "svn diff". I told him that I was working on files local on my machine, so svn was not appropriate.

Googling didn't help much. So I just submitted the standard diff output as my patch.

Then this morning, Ow had a blog post about his patch, and he included his command line. The answer is "diff -Nau"!

So here is the patch for the archivemail-dspam script:

yky@x1407:~/dspam$ diff -Nau archivemail archivemail-dspam
--- archivemail 2007-12-18 19:13:34.000000000 +0800
+++ archivemail-dspam 2007-12-18 19:02:47.000000000 +0800
@@ -187,6 +187,8 @@
min_size = None
verbose = 0
warn_duplicates = 0
+ """ 071218 yky DSPAM-Confidence setting """
+ spam_confidence = 0.00

def parse_args(self, args, usage):
"""Set our runtime options from the command-line arguments.
@@ -206,7 +208,7 @@
"filter-append=", "pwfile=", "dont-mangle",
"archive-name=",
"preserve-unread", "quiet", "size=", "suffix=",
- "verbose", "version", "warn-duplicate"])
+ "verbose", "version", "warn-duplicate", "spam=" ])
except getopt.error, msg:
user_error(msg)

@@ -256,6 +258,8 @@
self.verbose = 1
if o == '--archive-name':
self.archive_name = a;
+ if o == '--spam':
+ self.spam_confidence = float(a)
if o in ('-V', '--version'):
print __version__ + "\n\n" + __copyright__
sys.exit(0)
@@ -265,7 +269,7 @@
"""Complain bitterly about our options now rather than later"""
if self.output_dir:
check_sane_destdir(self.output_dir)
- if self.days_old_max <>= 10000:
user_error("--days argument must be less than 10000")
@@ -661,6 +665,7 @@
--include-flagged messages flagged important can also be archived
--no-compress do not compress archives with gzip
--warn-duplicate warn about duplicate Message-IDs in the same mailbox
+ --spam=FLOAT SPAM Confidence levels ( e.g. 0.80 )
-v, --verbose report lots of extra debugging information
-q, --quiet quiet mode - print no statistics (suitable for crontab)
-V, --version display version information
@@ -737,6 +742,22 @@
mbox_from = "From %s %s\n" % (address, date_string)
return mbox_from

+
+def get_spam_confidence(message):
+ """Returns the DSPAM_Confidence from the message headers. Zero by default"""
+ """ 071218 yky Created """
+
+ assert(message != None)
+
+ for header in ('X-DSPAM-Confidence', 'SPAM-Confidence'):
+ confidence = message.get(header)
+ if confidence:
+ confidence_val = float( confidence )
+ if confidence_val:
+ vprint("Spam Confidence: %f " % confidence_val)
+ return confidence_val
+
+ return 0.0

def guess_return_path(message):
"""Return a guess at the Return Path address of an rfc822 message"""
@@ -987,6 +1008,11 @@
return 0
if options.preserve_unread and is_unread(message):
return 0
+
+ # 071218 yky Filtering by SPAM Confidence
+ if (options.spam_confidence > 0) and (options.spam_confidence > get_spam_confidence(message)):
+ return 0
+
return 1


@@ -1019,7 +1045,7 @@
max_days -- maximum number of days before message is considered old

"""
- assert(max_days >= 1)
+ assert(max_days >= 0)

time_now = time.time()
if time_message > time_now:


Thanks Ow!

yk.