Re: Re: [PATCH] http://lists.insecure.org and changes to hypetombox.pl

---------

From: Peter C. McCluskey (pcm@rahul.net)
Date: Tue Oct 03 2000 - 19:16:31 CDT


 ashley@pcraft.com ("Ashley M. Kirchner") writes:
> Could you regenerate a diff patch based on what's on CVS right now, and I'll
>look into applying it later tonight and test it before committing it.

--- hypetombox.pl.old Sun May 28 09:43:31 2000
+++ hypetombox.pl Tue Oct 3 15:50:44 2000
@@ -8,19 +8,33 @@
 #
 # Scott Rose, rose@cs.[wisc,washington].edu.
 #
-# With modifications by Kent Landfield (kent@landfield.com).
+# With modifications by Kent Landfield.
 # Further modifications by Peter McCluskey (pcm@rahul.net).
 # If it's broke, it's my fault.
+# For the latest version, check http://www.rahul.net/pcm/hypetombox.pl
+# There should also be a version at http://www.hypermail.org, but it
+# may be older.
+#
+# A few more mods by Fyodor (fyodor@insecure.org) [9/24/00]:
+# * Added -H (additional header) option (useful for filtering messages)
+# * Extended the date formats accepted in "Received:" headers
+# * Added 'a' option to append to mailbox rather than clobbering it
+# * Updated usage info
 #
 # Usage:
 #
-# hypetombox.pl [-d <directory>] [-m <output_filename>]
+# hypetombox.pl [-a] [-H <header(s) to add to each msg] [-d <directory>] [-m <output_filename>] [-n <to_address>]
+# -a means append to mailbox instead of clobbering it
+# -H adds the header(s) you specify. If more than one, put the three
+# characters %0A between them.
+# -R removes carriage returns (\r) from the end of lines; they sometimes get
+# added for reasons I haven't been able to explain.
 #
-# $Header: /CVS/hypermail/contrib/hypetombox.pl,v 1.1.1.1 2000/05/28 16:43:31 cvsdev Exp $
+# $Header: /home/pcm/CVS/hypetombox/hypetombox.pl,v 1.6 2000/10/03 22:50:44 pcm Exp $
 
 require 5.000;
 use Getopt::Std;
-getopts('d:m:');
+getopts('ad:H:m:n:R');
 
 # This is a list of the fields in the comment header of each message.
 
@@ -33,11 +47,24 @@
 
 @msgs = sort glob($fpat);
 
+$to_address = $opt_n || 'bogus';
+
 # Open the output file for write.
 
 $mbox_name = $opt_m || 'mbox';
 
-(open MBOX, ">$mbox_name") || die "can't open $mbox_name";
+if ($opt_H) {
+ $opt_H =~ s/%0A/\n/g;
+ chomp($opt_H);
+ $opt_H .= "\n";
+}
+
+if ($opt_a) {
+ $openflag = ">>";
+} else {
+ $openflag = ">";
+}
+(open MBOX, "$openflag$mbox_name") || die "can't open $mbox_name";
 
 # Loop on the input files.
 
@@ -49,6 +76,10 @@
 
     # Open the message file for read.
 
+ # reset the header count for each message.
+ # added by Erik Peterson 1/3/00.
+ $header_count=0;
+
     $cntr += 1;
 
     (open M, $msg) || die "can't open $msg";
@@ -56,6 +87,10 @@
     # Loop on lines in the file
 
     $state = 'HeaderComments';
+ foreach $fld (@fields)
+ {
+ $$fld = '';
+ }
 
     my $line;
 
@@ -63,9 +98,10 @@
 
         # This is a header comment; save the value in a variable with the
         # same name.
+ $line =~ s|\r(\n)$|\n| if($opt_R);
 
         if($state eq 'HeaderComments'
- && ($line =~ /^<!-- (\w+)="([^\"]+)" -->$/)) {
+ && ($line =~ /^<!-- (\w+)="(.+)" -->(\s*)$/)) {
             $key = $1;
             $value = $2;
             $value =~ s/&amp;/&/g;
@@ -84,6 +120,9 @@
                 {
                     $date = $1;
                 }
+ elsif($received =~ /^(\w{3}), (\d{1,2}) (\w{3}) (\d{4}) (\d{2}:\d{2}:\d{2})/) {
+ $date = sprintf("%s %s %.2d %s %s", $1, $3, $2, $5, $4);
+ }
                 else {
                     $date = 'Bogus date';
                     $boguscntr += 1;
@@ -92,9 +131,12 @@
                 print MBOX "From $email $date\n";
                 print MBOX "Date: $sent\n";
                 print MBOX "Message-Id: <$id>\n";
- print MBOX "To: bogus\n";
+ print MBOX "To: $to_address\n";
                 print MBOX "From: $email ($name)\n";
                 print MBOX "Subject: $subject\n";
+ if ($opt_H) {
+ print MBOX $opt_H;
+ }
                 if ($inreplyto) {
                     print MBOX "In-Reply-To: <$inreplyto>\n";
                 }
@@ -108,16 +150,16 @@
 
             # This is a body line.
 
- next if($line =~ /^<br>$/i);
- next if($line =~ m|^</EM><BR>$|i);
- $line =~ s/<br>$//; # lose the trailing <br>
- $line =~ s/<BR>$//; # lose the trailing <br>
- $line =~ s/<pre>$//; # lose the <pre>formatted tags
- $line =~ s/<PRE>$//; # lose the <PRE>formatted tags
- $line =~ s/<\/pre>$//; # lose the </pre>formatted tags
- $line =~ s/<\/PRE>$//; # lose the </PRE>formatted tags
- $line =~ s/<P>$//; # lose the paragraph tags
- $line =~ s/<p>$//; # lose the paragraph tags
+ next if($line =~ /^<br>\s*$/i);
+ next if($line =~ m|^</EM><BR>\s*$|i);
+ $line =~ s/<br>$\s*//; # lose the trailing <br>
+ $line =~ s/<BR>$\s*//; # lose the trailing <br>
+ $line =~ s/<pre>$\s*//; # lose the <pre>formatted tags
+ $line =~ s/<PRE>$\s*//; # lose the <PRE>formatted tags
+ $line =~ s/<\/pre>$\s*//; # lose the </pre>formatted tags
+ $line =~ s/<\/PRE>$\s*//; # lose the </PRE>formatted tags
+ $line =~ s/<P>$\s*//; # lose the paragraph tags
+ $line =~ s/<p>$\s*//; # lose the paragraph tags
             $line =~ s%<a href=[^>]+>([^<]+)</a>%\1%g; # lose hyperlinks
             $line =~ s%<A HREF=[^>]+>([^<]+)</A>%\1%g; # lose hyperlinks
             $line =~ s/&lt;/</g; # reverse map special characters
@@ -141,7 +183,7 @@
     (close M) || die "can't close $msg";
 
 
-# ... then, the body:
+# ... then, the body: (this doesn't seem to have any function - pcm 2000-10-03)
 
     print MBOX @body;
 


---------

This archive was generated by hypermail 2.1.5.