#!/usr/bin/perl use POSIX; # # ReportLog # # Extract information from files in and store in # log file . Each file in begins with # date/time string of form yyyy-mm-dd-hh:mm (ex: 2001-08-17-12:30) # which determines time of file data. This program ReportLog # scans files in chronological order and stores extracted numbers # in log file - one line in the log file corresponds to data # extracted from one HTML file. # # When called ReportLog scans the log file for the # last entry logged, and proceeds to search for entries with # later data/times. # # The arguments LABEL, COUNT and PATTERN determine which # data is extracted from the HTML file. They work as follows # LABEL1 - tells program to ignore all input lines before line # containing string LABEL1 # LABEL2 - tells program to ignore all input lines following # line containing string LABEL2 # COUNT - actually consists of three numbers, COUNT,SKIP0,SKIP # separated by commas with _no_blanks_. If SKIP0 and SKIP # are omitted, they are taken to be zero. COUNT is # the number of values to record, SKIP0 is the number of # values to skip before the first recorded value, # SKIP is the number of values to skip between remaining # recorded vlaues. # PATTERN - perl regular expression which must match the value. # (ex: />(\[0-9,\]+) and < # die "Need arguments\n" unless @ARGV>=5; ($HTML_DIR, $LOG_NAME, $LABEL, $COUNT, $PATTERN) = @ARGV; # Final date to search log for, 30 minutes before last time on half-hour $LastDateTime = &date("",-30,30); # # Find first and last date/time for updating log # # Get last line from current output file $StartDateTime=""; if ( open (LOG, $LOG_NAME) ) { while () { $line = $_ if (! /^\s*#/ ); } close (LOG); ($StartDateTime) = split (/ /, $line); } # First date/time to use (two weeks previous if no previous entry) $StartDateTime = &date("",-60*24*14,30) if $StartDateTime eq ""; # # Update log # # Loop from $FirstDateTime to $LastDateTime ($CurDate,$CurTime) = ($StartDateTime=~/(\d{4}-\d\d-\d\d)-(\d\d:\d\d)/); $CurDateTime = $CurDate . "-" . $CurTime; # Open log for appending open (LOG, ">> $LOG_NAME") || die "$0: Cannot open $LOG_NAME for appending"; $limit = 0; while ( $CurDateTime ne $LastDateTime ) { # Increase current date/time by 30 min $CurDateTime = &add30min ($CurDateTime); # Prevent runaway loop (this time stuff can easily go wrong) last if ++$limit>1000; # Read Info from 30min HTML file $FileName = "$HTML_DIR/$CurDateTime.html"; @f = &ReadFileInfo ($FileName,$LABEL,$COUNT,$PATTERN); print LOG $CurDateTime, " ", join(" ",@f), "\n" if defined(@f) && $f[0] ne ""; } close (LOG); exit; # # Read file, search for $LABEL, and then return next # $COUNT strings which match $PATTERN # sub ReadFileInfo { my ($FileName,$LABEL,$COUNT,$PATTERN) = @_; my (@f) = (); my (@t,$LABEL1,$LABEL2); # Split $COUNT in to components # SKIP0 - number of values to skip before first value read # SKIP - number of values to skip before second, third, ... # NFIELD - total number of fields needed to read ($COUNT,$SKIP0,$SKIP) = split(/,/,$COUNT); $NFIELD = $COUNT + $SKIP0 + ($COUNT-1)*$SKIP; ($LABEL1,$LABEL2) = split(/\//,$LABEL); # Cannot open file, return blank fields open (HTML, $FileName) or return @f; # Find LocalHosts line my ($label_found) = 0; while () { # Stop reading file of LABEL2 found last if $LABEL2 && /$LABEL2/; # Start collecting values if LABEL1 found unless ($label_found) { $label_found = /$LABEL1/; next unless $label_found; } eval ("push \@f, $PATTERN"); last if @f >= $NFIELD; } close (HTML); # Keep only $COUNT elements $j=0; for ($i=$SKIP0;$i<=$NFIELD;$i+=$SKIP+1) { $f[$j++] = $f[$i]; } @f = @f[0..$COUNT-1]; # Remove comma's from numbers for (@f) { s/,//g; } return @f; } # Local date command &date(format,date,incr) # # date in format yyyy-mm-dd # incr minutes to add/sub from date # round round of date/time to ? minutes # sub date { my ($date,$incr,$round) = @_; my ($time); if ($date=~/^(\d{4})-(\d{1,2})-(\d{1,2})-(\d{1,2}):(\d{1,2})/) { # TROUBLE-TROUBLE # The 9th argument to mktime is the isdst (is daylight savings time) # flag. When it is -1 mktime() returns unworkable time when # daylight time changes to standard time. 0 seems to work ok. # It remains to be seen how this behaves when the change reverses, # that is when standard time changes to daylight time. May have # to change to another value. This stinks (solution: see add30min() # below). $time = mktime (0,$5,$4,$3,$2-1,$1-1900,0,0,0); } elsif ($date=~/^(\d{4})-(\d{1,2})-(\d{1,2})$/) { $time = mktime (0,0,0,$3,$2-1,$1-1900); } else { $time = time; } $time += $incr * 60; $time -= $time % (60*$round) if $round>0; return strftime "%Y-%m-%d-%H:%M", localtime($time); } # Add 30min to date, rounds down to 30 minutes # - this implementation avoids trouble with Daylight<->Standard time conversions. # 2002-04-11 - Still needed work ... JR sub add30min { my ($curdate) = @_; my ($year,$month,$day,$hour,$min,$minofday,$newdate); ($year,$month,$day,$hour,$min) = $curdate=~/^(\d{4})-(\d{1,2})-(\d{1,2})-(\d{1,2}):(\d{1,2})/; $min -= $min%30; $minofday = $hour*60+$min; # Date doesn't change, just change time if ( $minofday < 60*24-30 ) { $minofday += 30; $hour = $minofday / 60; $min = $minofday % 60; return sprintf "%04d-%02d-%02d-%02d:%02d", $year, $month,$day,$hour,$min; # Date change, use system date routine to get date, # but DON'T trust the time (Daylight Savings Bug?) } else { $newdate = &date($curdate,30); $newdate=~s/-\d\d:\d\d/-00:00/; return $newdate; } }