#! /usr/local/bin/perl

# extracts interesting information from lsf logs

# Steve Golson -- Trilobyte Systems -- sgolson@trilobyte.com
# @(#)compile_stats	1.2 02/22/01 16:43:06

use POSIX;
use Getopt::Long;

GetOptions(
	"number_of_jobs_to_print=i" => \$number_of_jobs_to_print,
	"normalize" => \$normalize);

$number_of_jobs_to_print = 10 unless $number_of_jobs_to_print;

if ($normalize) {
    # normalize cpu times as if all jobs were run on the fastest machine

    open(LSHOSTS, "lshosts -w |") or die "Couldn't run lshosts\n";

    $_ = <LSHOSTS>; # skip the first line header

    while (<LSHOSTS>) {
	($name, $os, $type, $cpuf, $rest) = split;
	$cpuf_ratio{$name} = $cpuf;
    }

    close (LSHOSTS);

    $largest_cpuf = 0.0;
    while (($name, $cpuf) = each(%cpuf_ratio)) {
	$largest_cpuf = $cpuf if ($cpuf > $largest_cpuf);
    }

    while (($name, $cpuf) = each(%cpuf_ratio)) {
	$cpuf_ratio{$name} = $cpuf / $largest_cpuf;
    }
}

##### parse the lsf logs

$job_name = "ERROR";

while (<>) {
    chop;
    split /[\s:]+/;

    if (/^Subject: /) {
	/<.*>/;
	$job_name = $&;
	$jobs{$job_name} = true;
	# remove values from previous runs of this job
	delete $cpu_time{$job_name};
	delete $max_memory{$job_name};
	delete $max_swap{$job_name};
    }

    if (/^Job was executed on host/) {
	($cpu_name) = ($_[5] =~ /<(.*)>/);
	if ($normalize && ! exists($cpuf_ratio{$cpu_name})) {
	    warn "Unknown cpu name $cpu_name";
	    $cpuf_ratio{$cpu_name} = 1.0;
	}
    }

    if (/CPU time/) {
	$cpu_time = $_[3];
	die "Time is not seconds" unless ($_[4] eq "sec.");
	$cpu_time = $cpu_time * $cpuf_ratio{$cpu_name} if ($normalize);
	$cpu_time{$job_name} = $cpu_time;
    }

    if (/Max Memory/) {
	if    ($_[4] eq "MB") { $memory = $_[3]; }
	elsif ($_[4] eq "KB") { $memory = $_[3] / 1024; }
	else { die "Memory type is unknown"; }
	$max_memory{$job_name} = $memory;
    }

    if (/Max Swap/) {
	if    ($_[4] eq "MB") { $swap = $_[3]; }
	elsif ($_[4] eq "KB") { $swap = $_[3] / 1024; }
	else { die "Swap type is unknown"; }
	$max_swap{$job_name} = $swap;
    }
}

##### calculate the totals

while (($job_name,$cpu_time) = each(%cpu_time)) { $total_cpu_time += $cpu_time; }

$number_of_jobs = keys(%jobs);

##### print the totals

if ($normalize) {
    print "CPU times normalized to CPU factor of $largest_cpuf\n\n";
}

$~ = "HEADER";
write;

##### print the worst cpu times

$~ = "TIME_HEADER";
write;

$~ = "TIME";

@keys = sort { $cpu_time{$b} <=> $cpu_time{$a} } (keys %cpu_time);

if (@keys < $number_of_jobs_to_print) { $max = @keys }
else                                  { $max = $number_of_jobs_to_print };

$tot = 0.0;
for ($i=0;$i<$max;$i++) {
    $num = $i + 1;
    $cpu_time = $cpu_time{$keys[$i]};
    $value = $cpu_time / (60 * 60);
    $job = $keys[$i];
    $pct = 100 * ($cpu_time / $total_cpu_time);
    $tot += $pct;
    write;
}

##### print the worst memory

$~ = "MEMORY_HEADER";
write;

$~ = "MEMORY";

@keys = sort { $max_memory{$b} <=> $max_memory{$a} } (keys %max_memory);

if (@keys < $number_of_jobs_to_print) { $max = @keys }
else                                  { $max = $number_of_jobs_to_print };

for ($i=0;$i<$max;$i++) {
    $num = $i + 1;
    $value = $max_memory{$keys[$i]};
    $job = $keys[$i];
    write;
}

##### print the worst swap

$~ = "SWAP_HEADER";
write;

$~ = "MEMORY";

@keys = sort { $max_swap{$b} <=> $max_swap{$a} } (keys %max_swap);

if (@keys < $number_of_jobs_to_print) { $max = @keys }
else                                  { $max = $number_of_jobs_to_print };

for ($i=0;$i<$max;$i++) {
    $num = $i + 1;
    $value = $max_swap{$keys[$i]};
    $job = $keys[$i];
    write;
}

##############################################################################
# formats

#-----------------------------------------------------------------------------
format HEADER =
Number of LSF jobs: @#####
                    $number_of_jobs

Total CPU time: @######.## hours = @###.## days
{               $total_cpu_time / (60 * 60),
		$total_cpu_time / (60 * 60 * 24) }
.
#-----------------------------------------------------------------------------
format TIME_HEADER =

                                                      Percentage of Total:
Max CPU time:                                              Cumulative  Job
.
#-----------------------------------------------------------------------------
format TIME =
 @>)@###.## hours in job ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @##% @##%
 $num,$value,            $job,                                   $tot,$pct
~~                       ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
                         $job
.
#-----------------------------------------------------------------------------
format MEMORY_HEADER =

Max memory:
.
#-----------------------------------------------------------------------------
format SWAP_HEADER =

Max swap:
.
#-----------------------------------------------------------------------------
format MEMORY =
 @>)@###### MB    in job ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 $num,$value,            $job
~~                       ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
                         $job
.
#-----------------------------------------------------------------------------
