summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrmanfredi <rmanfredi@ce11206b-0fef-46ef-8c6b-32a851c6a20e>2008-06-13 13:50:11 +0000
committerrmanfredi <rmanfredi@ce11206b-0fef-46ef-8c6b-32a851c6a20e>2008-06-13 13:50:11 +0000
commit96df6268757f4e72903e73de4247c5c53e60d0b0 (patch)
treef6e402b7f307948c968664fd52dfaaf19b0c5816
parentc1130e6651d031d18e736ca1200213278f5e6bf3 (diff)
Added minimal MIME handling for biffing to extract the text/plain part of
the message, if available. If only text/html is available, the HTML is stripped for compact emission on the terminal. git-svn-id: svn://svn.code.sf.net/p/mailagent/code/trunk/mailagent@40 ce11206b-0fef-46ef-8c6b-32a851c6a20e
-rw-r--r--MANIFEST3
-rw-r--r--agent/pl/biff.pl174
-rw-r--r--agent/test/actions1
-rw-r--r--agent/test/cmd/biff.t16
-rw-r--r--agent/test/mime38
-rw-r--r--revision.h2
6 files changed, 230 insertions, 4 deletions
diff --git a/MANIFEST b/MANIFEST
index 5394ba4..75ec678 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -267,6 +267,7 @@ agent/test/filter/range.t Selector range tests
agent/test/filter/status.t Action status updating tests
agent/test/level Default logging level for tests
agent/test/mail The mail used by testing routines
+agent/test/mime Sample MIME email with HTML and plain parts
agent/test/misc/ Directory for miscellaneous tests
agent/test/misc/compress.t Folder compression checks
agent/test/misc/mh.t MH-style folder checks
@@ -301,7 +302,7 @@ agent/test/pl/mail.pl Modifies mail components
agent/test/pl/misc.pl Set up for miscellaneous tests
agent/test/pl/mta.pl Trivial MTA and NTA for tests
agent/test/qp Sample quoted-printable encoded email
-agent/test/rules Rules used by filtering tests
+agent/test/rules Rules used by filtering tests
bin/ Directory for uninstalled binaries
bin/perload The dataloading/autoloading perl translator
bin/svn-revision Updates the top "revision.h" file if needed
diff --git a/agent/pl/biff.pl b/agent/pl/biff.pl
index e51523c..85a2a57 100644
--- a/agent/pl/biff.pl
+++ b/agent/pl/biff.pl
@@ -230,7 +230,7 @@ sub body {
local($trim) = @_; # Whether top reply text should be trimmed
local($len) = defined $cf'bifflen ? $cf'bifflen : 560;
local($lines) = defined $cf'bifflines ? $cf'bifflines : 7;
- local(@body) = split(/\n/, $'Header{'Body'});
+ local(@body) = split(/\n/, ${$'Header{'=Body='}});
local($skipnl) = $cf'biffnl =~ /OFF/i; # Skip blank lines?
local($_);
local($res) = '';
@@ -238,6 +238,13 @@ sub body {
# Setting bifflen or bifflines to 0 means no body
return '' if $len == 0 || $lines == 0;
+ my $content;
+ $content = unmime(\@body) if $'Header{'Mime-Version'};
+
+ &'add_log("retained content for biffing is $content")
+ if length($content) && $'loglvl > 8;
+
+ strip_html(\@body) if $content =~ /html\b/;
&trim(*body) if $trim; # Smart trim of leading reply text
&mh(*body, $len) if $cf'biffmh =~ /^on/i;
@@ -424,5 +431,170 @@ sub format {
push(@ary, $body); # Remaining information on one line
}
+# Un-MIME the body by removing all the MIME headers and looking for the
+# first text entity in the message.
+# The supplied array is updated in-place and will contain on return the
+# lines of the MIME entity that was retained.
+# Returns the type of the retained MIME entity.
+# NB: if no text part is found, the array will be empty upon return.
+sub unmime {
+ my ($aref) = @_;
+ my $content = lc($'Header{'Content-Type'});
+ $content =~ s/\(.*?\)\s*//g; # Removed allowed RFC822 comments
+
+ &'add_log("global MIME content-type is $content") if $'loglvl > 16;
+ return $content unless $content =~ m|^multipart/|;
+
+ my ($boundary) = $content =~ /boundary=(\S+);/;
+ ($boundary) = $content =~ /boundary=(\S+)/ unless length $boundary;
+ $boundary = $1 if $boundary =~ /^"(.*)"/ || $boundary =~ /^'(.*)'/;
+
+ # We do not perform a recursive MIME parsing here
+
+ my $entity_content;
+ my $header;
+
+ &'add_log("searching text part for biffing, boundary=$boundary")
+ if $'loglvl > 16;
+
+ my @entity;
+ my $grabbed = 0;
+
+ for (;;) {
+ unless ($grabbed) {
+ return undef unless skip_past($aref, $boundary);
+ }
+ $grabbed = 0;
+ $header = parse_header($aref);
+ $entity_content = lc($header->{'Content-Type'});
+ $entity_content =~ s/\(.*?\)\s*//g;
+ &'add_log("parsed entity header: content is $entity_content")
+ if $'loglvl > 19;
+ if ($entity_content =~ m|^text/|) {
+ # We found (another) text part, collect it...
+ @entity = ();
+ my $end = !skip_past($aref, $boundary, \@entity);
+ $grabbed = 1; # Avoid skipping at next loop iteration
+ last if $entity_content eq "text/plain"; # We found the best one
+ last if $end;
+ }
+ }
+
+ &'add_log("kept entity $entity_content for biffing") if $'loglvl > 18;
+
+ # Maybe the entity bears a transfer encoding?
+ my $entity_encoding = $header->{'Content-Transfer-Encoding'};
+ $entity_encoding =~ s/\(.*?\)\s*//g;
+
+ # XXX code duplication with body_check(), factorize some day...
+ my $output;
+ my $error;
+
+ if ($entity_encoding =~ /^base64\s*$/i) {
+ base64'reset(length $'Header{'Body'});
+ foreach my $d (@entity) {
+ base64'decode($d);
+ }
+ $error = base64'error_msg();
+ $output = base64'output();
+ } elsif ($entity_encoding =~ /^quoted-printable\s*$/i) {
+ qp'reset(length $'Header{'Body'});
+ foreach my $d (@entity) {
+ qp'decode($d);
+ }
+ $error = qp'error_msg();
+ $output = qp'output();
+ } else {
+ $error = "no encoding";
+ }
+
+ &'add_log("decoded entity ($entity_encoding), error=$error")
+ if $'loglvl > 18;
+
+ if (length $error) {
+ @$aref = @entity;
+ } else {
+ @$aref = split(/\r?\n/, $$output);
+ }
+ return $entity_content;
+}
+
+# Skip past named boundary in the supplied array
+# If $collect is a defined ARRAY ref, push there all the lines we see until
+# the next boundary.
+# Return false when we see the LAST boundary in the message, meaning there
+# are no more parts to consider.
+sub skip_past {
+ my ($aref, $boundary, $collect) = @_;
+ my $l;
+ while (defined ($l = shift @$aref)) {
+ return 0 if $l eq "--$boundary--";
+ return 1 if $l eq "--$boundary";
+ push(@$collect, $l) if defined $collect;
+ }
+ return undef; # Not found
+}
+
+# Parse embedded MIME headers, returning hash ref
+sub parse_header {
+ my ($aref) = @_;
+ my %header;
+ my $val;
+ my $last_header;
+ my $l;
+ my $saw_something = 0;
+ while (defined ($l = shift @$aref)) {
+ last if $l =~ /^$/ && $saw_something;
+ $saw_something++;
+ if ($l =~ /^\s/) {
+ $l =~ s/^\s+/ /;
+ $header{$last_header} .= $l if length $last_header;
+ } elsif (my ($field, $value) = $l =~ /^([!-9;-~\w-]+):\s*(.*)/) {
+ $last_header = header'normalize($field);
+ if ($header{$last_header} ne '') {
+ $header{$last_header} .= "\n" . $value;
+ } else {
+ $header{$last_header} = $value;
+ }
+ }
+ }
+ return \%header;
+}
+
+# Strip HTML in-place and remove spurious blank lines
+# This is done only on a best-effort basis to make the biff output nice
+sub strip_html {
+ my ($aref) = @_;
+ my @out;
+ my $in_style = 0;
+ my $is_nl;
+ my $last_was_nl = 0;
+ my $l;
+
+ while (defined ($l = shift @$aref)) {
+ $in_style++ while $l =~ s/<style\b.*?>//;
+ $in_style-- while $l =~ s|</style>||;
+ next if $in_style;
+ $l =~ s/<[^\0]*?>//g;
+ $l =~ s/&(\w)cedil;/$1/g;
+ $l =~ s/&(\w)acute;/$1/g;
+ $l =~ s/&(\w)grave;/$1/g;
+ $l =~ s/&(\w)circ;/$1/g;
+ $l =~ s/&(\w)uml;/$1/g;
+ $l =~ s/&quot;/'/g;
+ $l =~ s/&nbsp;/ /g;
+ $l =~ s/&#160;/ /g; # Same as &nbsp;
+ $l =~ s/&#(\d+);/chr($1)/g; # Corect only for the ASCII part...
+ $l =~ s/&amp;/&/g; # Must come last
+ $l =~ s/^\s*//;
+ $is_nl = 0 == length($l);
+ next if $last_was_nl && $is_nl;
+ $last_was_nl = $is_nl;
+ push(@out, $l);
+ }
+
+ @$aref = @out;
+}
+
package main;
diff --git a/agent/test/actions b/agent/test/actions
index 8f34ed7..981ec90 100644
--- a/agent/test/actions
+++ b/agent/test/actions
@@ -145,6 +145,7 @@ X-Tag: /begin/
X-Tag: /biff 1/ { BIFF off; LEAVE; BIFF on; SAVE ok };
X-Tag: /biff 2/ { BIFF bfmt; SAVE ok; BIFF -l off; LEAVE };
+X-Tag: /biff 3/ { BIFF bfmt; SAVE ok; };
X-Tag: /bounce 1/ { BOUNCE nobody };
X-Tag: /bounce 2/ { BOUNCE "list" };
diff --git a/agent/test/cmd/biff.t b/agent/test/cmd/biff.t
index bef180f..27c2e45 100644
--- a/agent/test/cmd/biff.t
+++ b/agent/test/cmd/biff.t
@@ -74,7 +74,21 @@ $? == 0 || print "21\n";
&check_log('^\rTo: ram', 31) == 1 || print "32\n";
&check_log('^Got mail in ~/ok', 33) == 1 || print "34\n";
&check_log('^\r####', 35) == 1 || print "36\n";
-&not_log('^\r----', 37);
+&check_log('moderated usenet', 37) == 1 || print "38\n";
+&not_log('^\r----', 39);
+&cleanup;
+
+cp_mail("../mime");
+&add_header('X-Tag: biff 3');
+&make_tty(0, 0777, 40); # 40 & 41
+`$cmd`;
+$? == 0 || print "41\n";
+-f 'ok' || print "42\n";
+-s 'tty0' || print "43\n";
+&get_log(44, 'tty0');
+&not_log('--foo', 45);
+&check_log('^Got mail in ~/ok', 46) == 1 || print "47\n";
+&check_log('successfully decoded', 48) == 1 || print "49\n";
&cleanup;
unlink 'mail';
diff --git a/agent/test/mime b/agent/test/mime
new file mode 100644
index 0000000..8e05599
--- /dev/null
+++ b/agent/test/mime
@@ -0,0 +1,38 @@
+From raphael_manfredi@pobox.com Tue Jun 10 17:44:12 2008
+Received: from tours.ram.loc (fetchmail@localhost [127.0.0.1])
+ by tours.ram.loc (8.14.3/8.13.8/Debian-3) with ESMTP id m5AFiCJq002957
+ for <ram@localhost>; Tue, 10 Jun 2008 17:44:12 +0200
+From: "Raphael Manfredi" <Raphael_Manfredi@pobox.com>
+To: "Raphael Manfredi" <Raphael_Manfredi@pobox.com>
+Date: Tue, 10 Jun 2008 15:35:21 +0000
+Subject: Sample MIME message
+Message-ID: <D42E3A26BC29C94DB4CC3DA3569AE82B0157AE42DE@GVW1088EXB.pobox.com>
+MIME-Version: 1.0
+Content-Type: multipart/alternative; boundary="foo"
+Status: RO
+Content-Length: 609
+Lines: 22
+
+--foo
+Content-Type: text/html; charset="iso8859-1"
+
+<html>
+<head>
+ <title>Sample HTML part</title>
+</head>
+<body>
+ <p>Sole paragraph</p>
+</body>
+</html>
+
+--foo
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+Content-Length: 346
+
+VGhpcyBtZXNzYWdlIGlzIG9uZSBiaWcgTUlNRSBwYXJ0IHRoYXQgaGFzIGJlZW4gYmFzZTY0LWVu
+Y29kZWQuDQoNClRoZSBtYWlsYWdlbnQgdGVzdCBzdWl0ZSBpcyBnb2luZyB0byBsb29rIGZvciB0
+aGUgZm9sbG93aW5nIGxpbmU6DQoNCiAgICAgICAgKioqIFlFUywgc3VjY2Vzc2Z1bGx5IGRlY29k
+ZWQgKioqDQoNCmluIHRoZSBkZWNvZGVkIGJvZHkgYXMgcHJvb2YgdGhhdCB0aGUgYmFzZTY0IGRl
+Y29kaW5nIGxvZ2ljIGlzIHdvcmtpbmcuDQo=
+--foo--
diff --git a/revision.h b/revision.h
index 885bad8..8d3e430 100644
--- a/revision.h
+++ b/revision.h
@@ -4,4 +4,4 @@
* Generated by ./bin/svn-revision.
*/
-#define REVISION 37
+#define REVISION 39