##---------------------------------------------------------------------------## ## File: ## @(#) mhtxtplain.pl 2.8 99/08/15 22:19:04 ## Author: ## Earl Hood mhonarc@pobox.com ## Description: ## Library defines routine to filter text/plain body parts to HTML ## for MHonArc. ## Filter routine can be registered with the following: ## ## text/plain:m2h_text_plain'filter:mhtxtplain.pl ## ##---------------------------------------------------------------------------## ## MHonArc -- Internet mail-to-HTML converter ## Copyright (C) 1995-1999 Earl Hood, mhonarc@pobox.com ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package m2h_text_plain; $Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' . '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)'; $UrlExp = $Url . q%[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]%; $HUrlExp = $Url . q%(?:&(?![gl]t;)|[^\s\(\)\|<>"'\&])+% . q%[^\.?!;,"'\|\[\]\(\)\s<>\&]%; $QuoteChars = '[>\|\]+:]'; $HQuoteChars = '>|[\|\]+:]'; ##---------------------------------------------------------------------------## ## Text/plain filter for mhonarc. The following filter arguments ## are recognized ($args): ## ## asis=set1:set2:... ## Colon separated lists of charsets to leave as-is. ## Only HTML special characters will be converted into ## entities. The default value is "us-ascii:iso-8859-1". ## ## default=set Default charset to use if not set. ## ## inlineexts="ext1,ext2,..." ## A comma separated list of message specified filename ## extensions to treat as inline data. ## Applicable only when uudecode options specified. ## ## keepspace Preserve whitespace if nonfixed ## ## nourl Do hyperlink URLs ## ## nonfixed Use normal typeface ## ## maxwidth=# Set the maximum width of lines. Lines exceeding ## the maxwidth will be broken up across multiple lines. ## ## quote Italicize quoted message text ## ## target=name Set TARGET attribute for links if converting URLs ## to links. Defaults to _top. ## ## uudecode Decoded any embedded uuencoded data. ## ## All arguments should be separated by at least one space ## sub filter { local($header, *fields, *data, $isdecode, $args) = @_; local($_); ## Parse arguments $args = "" unless defined($args); ## Check if decoding uuencoded data. The implementation chosen here ## for decoding uuencoded data was done so when uudecode is not ## specified, there is no extra overhead (besides the $args check for ## uudecode). However, when uudecode is specified, more overhead may ## exist over other potential implementations. ## I.e. We only try to penalize performance when uudecode is specified. if ($args =~ s/\buudecode\b//ig) { # $args has uudecode stripped out for recursive calls # Make sure we have needed routines require 'base64.pl'; require 'mhmimetypes.pl'; # Grab any filename extensions that imply inlining my $inlineexts = ''; if ($args =~ /\binlineexts=(\S+)/) { $inlineexts = ',' . lc($1) . ','; $inlineexts =~ s/['"]//g; } local($pdata); # have to use local() since typeglobs used my($inext, $uddata, $file, $urlfile); my @files = ( ); my $ret = ""; my $i = 0; # Split on uuencoded data. For text portions, recursively call # filter to convert text data: makes it easier to handle all # the various formatting options. foreach $pdata (split(/^(begin \d\d\d \S+\n[!-M].*?\nend\n)/sm, $data)) { if ($i % 2) { # uuencoded data # extract filename extension ($file) = $pdata =~ /^begin \d\d\d (\S+)/; if ($file =~ /\.(\w+)$/) { $inext = $1; } else { $inext = ""; } # decode data $uddata = base64::uudecode($pdata); # save to file push(@files, mhonarc::write_attachment( 'application/octet-stream', \$uddata, '', '', $inext)); $urlfile = mhonarc::htmlize($files[$#files]); # create link to file if (index($inlineexts, ','.lc($inext).',') >= $[) { $ret .= qq|| . qq|
\n|; } else { $ret .= qq|| . mhonarc::htmlize($file) . qq|
\n|; } } else { # plain text $ret .= &filter($header, *fields, *pdata, $isdecode, $args); } ++$i; } ## Done with uudecode return ($ret, @files); } my($charset, $nourl, $doquote, $igncharset, $nonfixed, $keepspace, $maxwidth, $target, $defset); my(%asis) = ( 'us-ascii' => 1, 'iso-8859-1' => 1, ); $nourl = ($mhonarc::NOURL || ($args =~ /\bnourl\b/i)); $doquote = ($args =~ /\bquote\b/i); $nonfixed = ($args =~ /\bnonfixed\b/i); $keepspace = ($args =~ /\bkeepspace\b/i); if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; } else { $maxwidth = 0; } if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; } else { $defset = 'us-ascii'; } $target = ""; if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; } elsif ($args =~ /\btarget=(\S+)/i) { $target = $1; } $target =~ s/['"]//g; if ($target) { $target = qq/TARGET="$target"/; } $defset =~ s/['"\s]//g; ## Grab charset parameter (if defined) if ( defined($fields{'content-type'}) and $fields{'content-type'} =~ /\bcharset\s*=\s*([^\s;]+)/i ) { $charset = lc $1; $charset =~ s/['";\s]//g; } else { $charset = $defset; } ## Check if certain charsets should be left alone if ($args =~ /\basis=(\S+)/i) { my $t = lc $1; $t =~ s/['"]//g; %asis = ('us-ascii' => 1); # should us-ascii always be "as-is"? local($_); foreach (split(':', $t)) { $asis{$_} = 1; } } ## Check MIMECharSetConverters if charset should be left alone if (defined($readmail::MIMECharSetConverters{$charset}) and $readmail::MIMECharSetConverters{$charset} eq "-decode-") { $asis{$charset} = 1; } ## Check if max-width set if ($maxwidth) { $data =~ s/^(.*)$/&break_line($1, $maxwidth)/gem; } ## Convert data according to charset if (!$asis{$charset}) { # Japanese if ($charset =~ /iso-2022-jp/) { require "iso2022jp.pl"; return (&iso_2022_jp::jp2022_to_html($data, $nourl)); # Latin 1-6, Greek, Hebrew, Arabic } elsif ($charset =~ /\biso-8859-(10|[1-9])\b/ || $charset =~ /\blatin[1-6]\b/) { require "iso8859.pl"; $data = &iso_8859::str2sgml($data, $charset); # Other } else { warn qq/Warning: Unrecognized character set: $charset\n/; &esc_chars_inplace(\$data); } } else { &esc_chars_inplace(\$data); } ## Check for quoting if ($doquote) { $data =~ s@^( ?${HQuoteChars})(.*)$@$1$2@gom; } ## Check if using nonfixed font if ($nonfixed) { $data =~ s/(\r?\n)/
$1/g; if ($keepspace) { $data =~ s/^(.*)$/&preserve_space($1)/gem; } } else { $data = "
\n" . $data . "
\n"; } ## Convert URLs to hyperlinks $data =~ s@($HUrlExp)@$1@gio unless $nourl; ($data); } ##---------------------------------------------------------------------------## sub esc_chars_inplace { my($foo) = shift; $$foo =~ s/&/&/g; $$foo =~ s//>/g; 1; } ##---------------------------------------------------------------------------## sub preserve_space { my($str) = shift; 1 while $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e; $str =~ s/ /\ /g; $str; } ##---------------------------------------------------------------------------## sub break_line { my($str) = shift; my($width) = shift; my($q, $new) = ('', ''); my($try, $trywidth, $len); ## Translate tabs to spaces 1 while $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e; ## Do nothing if str <= width return $str if length($str) <= $width; ## See if str begins with a quote char if ($str =~ s/^( ?$QuoteChars)//o) { $q = $1; --$width; } ## Create new string by breaking up str while ($str ne "") { # If $str less than width, break out if (length($str) <= $width) { $new .= $q . $str; last; } # handle case where no-whitespace line larger than width if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) { $new .= $q . $1; substr($str, 0, $len) = ""; next; } # Break string at whitespace $try = ''; $trywidth = $width; $try = substr($str, 0, $trywidth); if ($try =~ /(\S+)$/) { $trywidth -= length($1); $new .= $q . substr($str, 0, $trywidth); } else { $new .= $q . $try; } substr($str, 0, $trywidth) = ''; } continue { $new .= "\n" if $str; } $new; } ##---------------------------------------------------------------------------## 1;