diff options
author | Raphael Manfredi <Raphael_Manfredi@pobox.com> | 2020-04-08 10:16:01 +0200 |
---|---|---|
committer | Raphael Manfredi <Raphael_Manfredi@pobox.com> | 2020-04-09 16:16:46 +0200 |
commit | 9a14a1512556db68543bfeebac2138ce1be0b0fd (patch) | |
tree | 07b18f90e51052742f61a23a542f97fe9d17e7f2 | |
parent | 3b6d126355db75f9f5015175cefd0819b8bff131 (diff) |
Biffing for unknown charsets is now forced to ASCII.
Output is converted into ASCII characters, whith controls swallowed
silently and non-ASCII characters dumped as '.'. This allows biffing to
occur even when the body of the message is encoded with an unknown
charset, the assumption being that ASCII is a firm common ground for all
such charsets!
-rw-r--r-- | agent/pl/biff.pl | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/agent/pl/biff.pl b/agent/pl/biff.pl index 6346440..f48c971 100644 --- a/agent/pl/biff.pl +++ b/agent/pl/biff.pl @@ -267,6 +267,16 @@ sub is_blank { return $l =~ /^[\W_]*$/; # Contains only non-words and underscores } +# Keep only printable ASCII chars from biffable lines in specified body array +# Control chars are swallowed, non-ASCII chars converted to '.'. +sub to_ascii { + my ($aref, $lines) = @_; # Body as array ref, amount of lines to convert + my $n = $lines > @{$aref} ? @{$aref} : $lines; + for (my $i = 0; $i < $n; $i++) { + $aref->[$i] =~ s/(.)/mangle_ascii($1)/ge; + } +} + # Print first $cf'bifflines lines or $cf'bifflen charaters, whichever # comes first. Assumes TTY already opened correctly # Also known as the %-B macro if called body(0), or %-T if called body(1). @@ -285,6 +295,7 @@ sub body { my ($content, $entity, $enc, $biffenc); ($content, $entity) = unmime(\@body) if $'Header{'Mime-Version'}; + my $convert_to_ascii = 0; if (length($content)) { &'add_log("biffing $entity entity is $content") if $'loglvl > 8; my $charset; @@ -292,9 +303,9 @@ sub body { if (defined $charset) { $enc = Encode::find_encoding($charset); unless (ref $enc) { - &'add_log("WARNING unknown charset '$charset', no body shown") + &'add_log("WARNING unknown charset '$charset', handling as ASCII") if $'loglvl > 1; - @body = ("[body hidden: unknown charset '$charset']"); + $convert_to_ascii = 1; } # If the encoding is the same as the one used in the terminal, @@ -313,6 +324,7 @@ sub body { strip_html(\@body) if $content =~ /html\b/; &trim(*body) if $trim; # Smart trim of leading reply text + to_ascii(\@body, $lines) if $convert_to_ascii; &mh(*body, $len) if $cf'biffmh =~ /^on/i; my $reformat = $cf'biffnice =~ /^on/i; @@ -518,6 +530,18 @@ sub format { # Perload OFF +# Mangle given character to ASCII, or swallow it if CTRL char +# MUST NOT be dataloaded (would mess $1 in the regexp) +sub mangle_ascii { + my ($x) = @_; + my $c = unpack("U", $x); # Read as Unicode + return '' if $c <= 8; # Invisible + # Chars 9 and 10 are \t and \n in ASCII + return '' if $c >= 11 && $c < 32; # Invisible + return '.' if $c >= 127; # Outside the ASCII range + return pack("C", $c); # Write as a byte (ASCII) +} + # Quoted-printable decoder # MUST NOT be dataloaded (would mess $1 in the regexp) sub to_txt { |