diff options
author | rmanfredi <rmanfredi@ce11206b-0fef-46ef-8c6b-32a851c6a20e> | 2008-06-13 13:50:11 +0000 |
---|---|---|
committer | rmanfredi <rmanfredi@ce11206b-0fef-46ef-8c6b-32a851c6a20e> | 2008-06-13 13:50:11 +0000 |
commit | 96df6268757f4e72903e73de4247c5c53e60d0b0 (patch) | |
tree | f6e402b7f307948c968664fd52dfaaf19b0c5816 | |
parent | c1130e6651d031d18e736ca1200213278f5e6bf3 (diff) |
Added minimal MIME handling for biffing to extract the text/plain part of
the message, if available. If only text/html is available, the HTML is
stripped for compact emission on the terminal.
git-svn-id: svn://svn.code.sf.net/p/mailagent/code/trunk/mailagent@40 ce11206b-0fef-46ef-8c6b-32a851c6a20e
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | agent/pl/biff.pl | 174 | ||||
-rw-r--r-- | agent/test/actions | 1 | ||||
-rw-r--r-- | agent/test/cmd/biff.t | 16 | ||||
-rw-r--r-- | agent/test/mime | 38 | ||||
-rw-r--r-- | revision.h | 2 |
6 files changed, 230 insertions, 4 deletions
@@ -267,6 +267,7 @@ agent/test/filter/range.t Selector range tests agent/test/filter/status.t Action status updating tests agent/test/level Default logging level for tests agent/test/mail The mail used by testing routines +agent/test/mime Sample MIME email with HTML and plain parts agent/test/misc/ Directory for miscellaneous tests agent/test/misc/compress.t Folder compression checks agent/test/misc/mh.t MH-style folder checks @@ -301,7 +302,7 @@ agent/test/pl/mail.pl Modifies mail components agent/test/pl/misc.pl Set up for miscellaneous tests agent/test/pl/mta.pl Trivial MTA and NTA for tests agent/test/qp Sample quoted-printable encoded email -agent/test/rules Rules used by filtering tests +agent/test/rules Rules used by filtering tests bin/ Directory for uninstalled binaries bin/perload The dataloading/autoloading perl translator bin/svn-revision Updates the top "revision.h" file if needed diff --git a/agent/pl/biff.pl b/agent/pl/biff.pl index e51523c..85a2a57 100644 --- a/agent/pl/biff.pl +++ b/agent/pl/biff.pl @@ -230,7 +230,7 @@ sub body { local($trim) = @_; # Whether top reply text should be trimmed local($len) = defined $cf'bifflen ? $cf'bifflen : 560; local($lines) = defined $cf'bifflines ? $cf'bifflines : 7; - local(@body) = split(/\n/, $'Header{'Body'}); + local(@body) = split(/\n/, ${$'Header{'=Body='}}); local($skipnl) = $cf'biffnl =~ /OFF/i; # Skip blank lines? local($_); local($res) = ''; @@ -238,6 +238,13 @@ sub body { # Setting bifflen or bifflines to 0 means no body return '' if $len == 0 || $lines == 0; + my $content; + $content = unmime(\@body) if $'Header{'Mime-Version'}; + + &'add_log("retained content for biffing is $content") + if length($content) && $'loglvl > 8; + + strip_html(\@body) if $content =~ /html\b/; &trim(*body) if $trim; # Smart trim of leading reply text &mh(*body, $len) if $cf'biffmh =~ /^on/i; @@ -424,5 +431,170 @@ sub format { push(@ary, $body); # Remaining information on one line } +# Un-MIME the body by removing all the MIME headers and looking for the +# first text entity in the message. +# The supplied array is updated in-place and will contain on return the +# lines of the MIME entity that was retained. +# Returns the type of the retained MIME entity. +# NB: if no text part is found, the array will be empty upon return. +sub unmime { + my ($aref) = @_; + my $content = lc($'Header{'Content-Type'}); + $content =~ s/\(.*?\)\s*//g; # Removed allowed RFC822 comments + + &'add_log("global MIME content-type is $content") if $'loglvl > 16; + return $content unless $content =~ m|^multipart/|; + + my ($boundary) = $content =~ /boundary=(\S+);/; + ($boundary) = $content =~ /boundary=(\S+)/ unless length $boundary; + $boundary = $1 if $boundary =~ /^"(.*)"/ || $boundary =~ /^'(.*)'/; + + # We do not perform a recursive MIME parsing here + + my $entity_content; + my $header; + + &'add_log("searching text part for biffing, boundary=$boundary") + if $'loglvl > 16; + + my @entity; + my $grabbed = 0; + + for (;;) { + unless ($grabbed) { + return undef unless skip_past($aref, $boundary); + } + $grabbed = 0; + $header = parse_header($aref); + $entity_content = lc($header->{'Content-Type'}); + $entity_content =~ s/\(.*?\)\s*//g; + &'add_log("parsed entity header: content is $entity_content") + if $'loglvl > 19; + if ($entity_content =~ m|^text/|) { + # We found (another) text part, collect it... + @entity = (); + my $end = !skip_past($aref, $boundary, \@entity); + $grabbed = 1; # Avoid skipping at next loop iteration + last if $entity_content eq "text/plain"; # We found the best one + last if $end; + } + } + + &'add_log("kept entity $entity_content for biffing") if $'loglvl > 18; + + # Maybe the entity bears a transfer encoding? + my $entity_encoding = $header->{'Content-Transfer-Encoding'}; + $entity_encoding =~ s/\(.*?\)\s*//g; + + # XXX code duplication with body_check(), factorize some day... + my $output; + my $error; + + if ($entity_encoding =~ /^base64\s*$/i) { + base64'reset(length $'Header{'Body'}); + foreach my $d (@entity) { + base64'decode($d); + } + $error = base64'error_msg(); + $output = base64'output(); + } elsif ($entity_encoding =~ /^quoted-printable\s*$/i) { + qp'reset(length $'Header{'Body'}); + foreach my $d (@entity) { + qp'decode($d); + } + $error = qp'error_msg(); + $output = qp'output(); + } else { + $error = "no encoding"; + } + + &'add_log("decoded entity ($entity_encoding), error=$error") + if $'loglvl > 18; + + if (length $error) { + @$aref = @entity; + } else { + @$aref = split(/\r?\n/, $$output); + } + return $entity_content; +} + +# Skip past named boundary in the supplied array +# If $collect is a defined ARRAY ref, push there all the lines we see until +# the next boundary. +# Return false when we see the LAST boundary in the message, meaning there +# are no more parts to consider. +sub skip_past { + my ($aref, $boundary, $collect) = @_; + my $l; + while (defined ($l = shift @$aref)) { + return 0 if $l eq "--$boundary--"; + return 1 if $l eq "--$boundary"; + push(@$collect, $l) if defined $collect; + } + return undef; # Not found +} + +# Parse embedded MIME headers, returning hash ref +sub parse_header { + my ($aref) = @_; + my %header; + my $val; + my $last_header; + my $l; + my $saw_something = 0; + while (defined ($l = shift @$aref)) { + last if $l =~ /^$/ && $saw_something; + $saw_something++; + if ($l =~ /^\s/) { + $l =~ s/^\s+/ /; + $header{$last_header} .= $l if length $last_header; + } elsif (my ($field, $value) = $l =~ /^([!-9;-~\w-]+):\s*(.*)/) { + $last_header = header'normalize($field); + if ($header{$last_header} ne '') { + $header{$last_header} .= "\n" . $value; + } else { + $header{$last_header} = $value; + } + } + } + return \%header; +} + +# Strip HTML in-place and remove spurious blank lines +# This is done only on a best-effort basis to make the biff output nice +sub strip_html { + my ($aref) = @_; + my @out; + my $in_style = 0; + my $is_nl; + my $last_was_nl = 0; + my $l; + + while (defined ($l = shift @$aref)) { + $in_style++ while $l =~ s/<style\b.*?>//; + $in_style-- while $l =~ s|</style>||; + next if $in_style; + $l =~ s/<[^\0]*?>//g; + $l =~ s/&(\w)cedil;/$1/g; + $l =~ s/&(\w)acute;/$1/g; + $l =~ s/&(\w)grave;/$1/g; + $l =~ s/&(\w)circ;/$1/g; + $l =~ s/&(\w)uml;/$1/g; + $l =~ s/"/'/g; + $l =~ s/ / /g; + $l =~ s/ / /g; # Same as + $l =~ s/&#(\d+);/chr($1)/g; # Corect only for the ASCII part... + $l =~ s/&/&/g; # Must come last + $l =~ s/^\s*//; + $is_nl = 0 == length($l); + next if $last_was_nl && $is_nl; + $last_was_nl = $is_nl; + push(@out, $l); + } + + @$aref = @out; +} + package main; diff --git a/agent/test/actions b/agent/test/actions index 8f34ed7..981ec90 100644 --- a/agent/test/actions +++ b/agent/test/actions @@ -145,6 +145,7 @@ X-Tag: /begin/ X-Tag: /biff 1/ { BIFF off; LEAVE; BIFF on; SAVE ok }; X-Tag: /biff 2/ { BIFF bfmt; SAVE ok; BIFF -l off; LEAVE }; +X-Tag: /biff 3/ { BIFF bfmt; SAVE ok; }; X-Tag: /bounce 1/ { BOUNCE nobody }; X-Tag: /bounce 2/ { BOUNCE "list" }; diff --git a/agent/test/cmd/biff.t b/agent/test/cmd/biff.t index bef180f..27c2e45 100644 --- a/agent/test/cmd/biff.t +++ b/agent/test/cmd/biff.t @@ -74,7 +74,21 @@ $? == 0 || print "21\n"; &check_log('^\rTo: ram', 31) == 1 || print "32\n"; &check_log('^Got mail in ~/ok', 33) == 1 || print "34\n"; &check_log('^\r####', 35) == 1 || print "36\n"; -¬_log('^\r----', 37); +&check_log('moderated usenet', 37) == 1 || print "38\n"; +¬_log('^\r----', 39); +&cleanup; + +cp_mail("../mime"); +&add_header('X-Tag: biff 3'); +&make_tty(0, 0777, 40); # 40 & 41 +`$cmd`; +$? == 0 || print "41\n"; +-f 'ok' || print "42\n"; +-s 'tty0' || print "43\n"; +&get_log(44, 'tty0'); +¬_log('--foo', 45); +&check_log('^Got mail in ~/ok', 46) == 1 || print "47\n"; +&check_log('successfully decoded', 48) == 1 || print "49\n"; &cleanup; unlink 'mail'; diff --git a/agent/test/mime b/agent/test/mime new file mode 100644 index 0000000..8e05599 --- /dev/null +++ b/agent/test/mime @@ -0,0 +1,38 @@ +From raphael_manfredi@pobox.com Tue Jun 10 17:44:12 2008 +Received: from tours.ram.loc (fetchmail@localhost [127.0.0.1]) + by tours.ram.loc (8.14.3/8.13.8/Debian-3) with ESMTP id m5AFiCJq002957 + for <ram@localhost>; Tue, 10 Jun 2008 17:44:12 +0200 +From: "Raphael Manfredi" <Raphael_Manfredi@pobox.com> +To: "Raphael Manfredi" <Raphael_Manfredi@pobox.com> +Date: Tue, 10 Jun 2008 15:35:21 +0000 +Subject: Sample MIME message +Message-ID: <D42E3A26BC29C94DB4CC3DA3569AE82B0157AE42DE@GVW1088EXB.pobox.com> +MIME-Version: 1.0 +Content-Type: multipart/alternative; boundary="foo" +Status: RO +Content-Length: 609 +Lines: 22 + +--foo +Content-Type: text/html; charset="iso8859-1" + +<html> +<head> + <title>Sample HTML part</title> +</head> +<body> + <p>Sole paragraph</p> +</body> +</html> + +--foo +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 +Content-Length: 346 + +VGhpcyBtZXNzYWdlIGlzIG9uZSBiaWcgTUlNRSBwYXJ0IHRoYXQgaGFzIGJlZW4gYmFzZTY0LWVu +Y29kZWQuDQoNClRoZSBtYWlsYWdlbnQgdGVzdCBzdWl0ZSBpcyBnb2luZyB0byBsb29rIGZvciB0 +aGUgZm9sbG93aW5nIGxpbmU6DQoNCiAgICAgICAgKioqIFlFUywgc3VjY2Vzc2Z1bGx5IGRlY29k +ZWQgKioqDQoNCmluIHRoZSBkZWNvZGVkIGJvZHkgYXMgcHJvb2YgdGhhdCB0aGUgYmFzZTY0IGRl +Y29kaW5nIGxvZ2ljIGlzIHdvcmtpbmcuDQo= +--foo-- @@ -4,4 +4,4 @@ * Generated by ./bin/svn-revision. */ -#define REVISION 37 +#define REVISION 39 |