From: pdc@lunch.asd.sgi.com (Paul Close) Subject: Digest code diffs for 1.90 Date: Thu, 21 Apr 1994 17:56:22 -0700 (PDT) Here are my changes to digest which support config file settings for specifying the digest size in lines and/or the maximum age of the oldest article, in days. Also support a new flag, -p (for "push"), intended for use by cron jobs. It checks to see if a digest should be sent, and sends it if it should (pretty well because an article is too old, but you could use this to send all the time). A few comments on the code. In &should_be_sent, I calculate how big the article would be if the headers were stripped, both in bytes and in lines. I add in a bit of a fudge factor for mail headers, just so we don't get too close to maxlength bytes before sending. Typically, the line count will cause a digest to be sent before the byte count would (I see the maxlength count as more of a mailer issue than a digest issue). The old digest code had a strange construct: s/\n+$/\n/; I assumed that this was to trim newlines off the end of the string, but multi-line regexps don't work that way. The only way I could get this to work is: $len = length($body) - 1; $len-- while (substr($body,$len,1) eq "\n"); substr($body,$len+1) = ""; Any clever hacks appreciated. In the same area, I changed the code that reads the body of the message to read the whole thing at once (undef $/) rather than do multiple string concatenations. Seems more efficient. I also added a ^From escaper, using enough of a real "From " line pattern, that it shouldn't match just any line beginning with From. Under the heading of random perl lore, to count the number of newlines in a multi-line string, I used: $lines += ($body =~ s/\n/\n/g); seems pretty straightforward, but I have the nagging suspicion there's an easier way. Finally, I made digest safe past the year 2000, by printing $year+1900 rather than 19$year. Whoopee! Comments welcome! The code is based on 1.90b2, which is the latest I have. Index: digest/digest *** digest/digest.old Sun Mar 6 14:47:06 1994 --- digest/digest Thu Apr 21 17:35:33 1994 *************** *** 63,72 **** if (defined($opt_r)) { &receive_message; } elsif (defined($opt_m)) { &make_digest; } else { ! &abort("Usage: digest {-r|-m} [-c config|(-C -l list)]\nStopped"); } &free_lock; --- 63,79 ---- if (defined($opt_r)) { &receive_message; + if (&should_be_sent()) { + &make_digest; + } } elsif (defined($opt_m)) { &make_digest; + } elsif (defined($opt_p)) { + if (&should_be_sent()) { + &make_digest; + } } else { ! &abort("Usage: digest {-r|-m|-p} [-c config|(-C -l list)]\nStopped"); } &free_lock; *************** *** 73,97 **** exit(0); sub receive_message { - $sum = 0; $i = 0; do { ! $i++; ! $file = sprintf("%s/%03d", $V{'INCOMING'}, $i); ! $sum += (-s $file); } until (! -e $file); print STDERR "Receiving $i\n"; open(MSG, ">$file") || &abort("open(MSG, \">$file\"): $!"); while () { print MSG $_; } close(MSG); - $sum += (-s $file); - if ($sum > $V{'DIGEST_SIZE'}) { - &make_digest; - } - return(1); } --- 80,146 ---- exit(0); + sub should_be_sent { + # fudge factors for headers and footers + $sum = 600 + length($HEADER) + length($HEADERS) + length($TRAILER); + $lines = 25; + $i = 0; + while (1) { + $file = sprintf("%s/%03d", $V{'INCOMING'}, ++$i); + last unless (-e $file); + open(COUNT, "<$file") || &abort("open(COUNT, \"<$file\"): $!"); + + $/ = ''; # grab the header + $head = ; + + # only count From/Date/Subject header fields to get a + # more accurate size and line count. + $head =~ s/\n\s+/ /g; + $head =~ /^(From:\s+.*)/i && ($sum += length($1)+1, $lines++); + $head =~ /^(Subject:\s+.*)/i && ($sum += length($1)+1, $lines++); + $head =~ /^(Date:\s+.*)/i && ($sum += length($1)+1, $lines++); + $sum++, $lines++; + + # count the body of the message + undef $/; + $body = ; + $sum += length($body); + $lines += ($body =~ s/\n/\n/g); # count newlines + + $/ = "\n"; + close(COUNT); + $sum += length($EB) + 2, $lines += 2; # account for message delimiter + + if ($V{'DIGEST_SIZE'} && $sum > $V{'DIGEST_SIZE'}) { + return(1); + } + if ($V{'DIGEST_LINES'} && $lines > $V{'DIGEST_LINES'}) { + return(1); + } + if ($V{'MAX_AGE'} && (-M $file) > $V{'MAX_AGE'}) { + return(1); + } + } + print "don't send. sum = $sum, lines = $lines\n"; + + return(0); + } + sub receive_message { $i = 0; do { ! $file = sprintf("%s/%03d", $V{'INCOMING'}, ++$i); } until (! -e $file); + print STDERR "Receiving $i\n"; open(MSG, ">$file") || &abort("open(MSG, \">$file\"): $!"); + + # copy the message while () { print MSG $_; } + close(MSG); } *************** *** 111,129 **** $head = ; $head =~ s/\n\s+/ /g; $body = ""; ! ($subj) = ($head =~ /^subject:\s+(.*)/i); ! $subj = "[none]" unless $subj; ! ($from) = ($head =~ /^from:\s+(.*)/i); ! ($date) = ($head =~ /^date:\s+(.*)/i); ! $/ = "\n"; ! while () { ! s/^-/- -/; #escape encapsulation boundaries in message ! $body .= $_; ! } close(message); ! $body =~ s/\n+$/\n/; push(@subj,$subj); print TEMP <; $head =~ s/\n\s+/ /g; $body = ""; ! $subj = ($head =~ /^Subject:\s+(.*)/i)? $1: "[none]"; ! ($from) = $head =~ /^From:\s+(.*)/i; ! ($date) = $head =~ /^Date:\s+(.*)/i; ! undef $/; ! $body = ; close(message); ! ! # escape ^From ... ! $body =~ ! s/^From (\S+\s+\w{3}\s+\w{3}\s+\d+\s+\d+:\d+:\d+)/>From $1/g; ! $body =~ s/^-/- -/g; # escape encapsulation boundaries in message ! # trim trailing \n's ! $len = length($body) - 1; ! $len-- while (substr($body,$len,1) eq "\n"); ! substr($body,$len+1) = ""; + $/ = "\n"; + push(@subj,$subj); print TEMP <))[7]; chdir($HOME); ! &getopt("rmc:Cl:") || ! &abort("Usage: digest {-r|-m} [-c config|(-C -l list)]\nStopped"); $config = $opt_c || "$HOME/.digestrc"; $TEMP = "/tmp/digest.$$"; $SIG{'INT'} = 'cleanup'; --- 260,267 ---- $* = 1; $HOME = $ENV{"HOME"} || (getpwuid($>))[7]; chdir($HOME); ! &getopt("rmpc:Cl:") || ! &abort("Usage: digest {-r|-m|-p} [-c config|(-C -l list)]\nStopped"); $config = $opt_c || "$HOME/.digestrc"; $TEMP = "/tmp/digest.$$"; $SIG{'INT'} = 'cleanup'; *************** *** 245,252 **** $NUMBER = $config_opts{$opt_l,"digest_issue"}; $Precedence = $config_opts{$opt_l,"precedence"}; $Precedence = "bulk" if ($Precedence eq ""); ! $V{'ARCHIVE'} = "$filedir/$opt_l$filedirsuffix"; $V{'DIGEST_SIZE'} = $config_opts{$opt_l, "maxlength"}; $V{'ERRORS-TO'} = $config_opts{$opt_l,"sender"}; $V{'FROM'} = $config_opts{$opt_l, "sender"}; $V{'INCOMING'} = "$digest_work_dir/$opt_l"; --- 301,310 ---- $NUMBER = $config_opts{$opt_l,"digest_issue"}; $Precedence = $config_opts{$opt_l,"precedence"}; $Precedence = "bulk" if ($Precedence eq ""); ! $V{'ARCHIVE'} = "$filedir/$opt_l$filedir_suffix"; $V{'DIGEST_SIZE'} = $config_opts{$opt_l, "maxlength"}; + $V{'DIGEST_LINES'} = $config_opts{$opt_l, "digest_maxlines"}; + $V{'MAX_AGE'} = $config_opts{$opt_l, "digest_maxdays"}; $V{'ERRORS-TO'} = $config_opts{$opt_l,"sender"}; $V{'FROM'} = $config_opts{$opt_l, "sender"}; $V{'INCOMING'} = "$digest_work_dir/$opt_l"; *************** *** 327,333 **** sub getdate { local($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); ! return($DAYS[$wday] . ", $mday " . $MONTHS[$mon] . " 19$year"); } sub set_lock { --- 385,392 ---- sub getdate { local($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); ! $year += 1900; ! return("$DAYS[$wday], $mday $MONTHS[$mon] $year"); } sub set_lock { Index: config_parse.pl *** config_parse.pl.old Thu Apr 21 07:32:50 1994 --- config_parse.pl Thu Apr 21 07:41:33 1994 *************** *** 128,133 **** --- 128,135 ---- 'digest_archive', '', 'digest_rm_footer', '', 'digest_rm_fronter', '', + 'digest_maxlines', '', + 'digest_maxdays', '', # general stuff below 'comments', '', # comments about config file ); *************** *** 331,336 **** --- 333,346 ---- Just like digest_rm_footer, it is also non-operative.', ); + 'digest_maxlines', + "automatically generate a new digest when the size of the digest exceeds + this number of lines.", + + 'digest_maxdays', + "automatically generate a new digest when the age of the oldest article in + the queue exceeds this number of days.", + # match commands to their subsystem, by default only 4 subsystems # exist, majordomo, resend, digest and config. %subsystem = ( *************** *** 372,377 **** --- 382,389 ---- 'digest_archive', 'digest', 'digest_rm_footer', 'digest', 'digest_rm_fronter', 'digest', + 'digest_maxlines', 'digest', + 'digest_maxdays', 'digest', # general stuff here 'comments', 'config', ); *************** *** 418,423 **** --- 430,437 ---- 'digest_archive', 'grab_absolute_dir', 'digest_rm_footer', 'grab_word', 'digest_rm_fronter', 'grab_word', + 'digest_maxlines', 'grab_integer', + 'digest_maxdays', 'grab_integer', # general stuff below 'comments', 'grab_string_array', ); -- Paul Close pdc@sgi.com ...!{ames, decwrl, uunet}!sgi!pdc No fate but what we make