torque_install/torque/t/07_job_arrays.t
ruoyunbai 2bb9621e30 1
2021-09-29 21:06:16 +08:00

201 lines
5.4 KiB
Perl

#!/usr/bin/perl -w
use strict;
use warnings;
use Test::More qw(no_plan);
use subs qw(check_jobs cancel_jobs qstat_info);
our @Jobs = ();
our $MAX_CANCEL_TRIES = 5;
our $Jobcount = 0;
our $Joblength = 1200; # seconds (Default)
our $Testuser = undef;
diag('Testing job arrays on TORQUE - this may take several minutes');
# Check Test User
{
ok(exists $ENV{'TORQUE_TEST_USER'}, 'Test User Exists') or
BAIL_OUT('Test user does not exist');
ok(defined $ENV{'TORQUE_TEST_USER'}, 'Valid Test User') or
BAIL_OUT('Invalid test user');
ok(length $ENV{'TORQUE_TEST_USER'}, 'Valid Test User') or
BAIL_OUT('Invalid test user');
$Testuser = $ENV{'TORQUE_TEST_USER'};
}
# Determine TORQUE version
my $version = 0;
{
my $qstat = `qstat --about 2>&1`;
ok(defined $qstat, 'TORQUE Information from qstat') or
BAIL_OUT('Unable to gather TORQUE information from qstat');
$version = $1 if $qstat =~ /Version:\s+(\d+\.\d+\.\d+)/;
}
SKIP:
{
my $versionMM = ($version =~ /^(\d+\.\d+)\.\d+$/) ? $1 : 0;
skip "TORQUE not version 2.2.0 or higher ($version)", 1 if $versionMM < 2.2;
# Determine Number of Jobs
{
my %nodes = ();
my $node = undef;
my $proccount = 0;
my $pbsnodes = `pbsnodes` || undef;
ok(defined $pbsnodes, 'Node Information from pbsnodes') or
BAIL_OUT('Unable to gather node information from pbsnodes');
foreach my $line (split /[\r\n]+/, $pbsnodes)
{
if ($line =~ /^(\S+)/)
{
$node = $1;
$nodes{$node} = 1;
next;
}
$nodes{$node} += $1 - 1
if ($line =~ /^\s+np = (\d+)/) and defined $node;
}
map { $proccount += $_ } (values %nodes);
$Jobcount = 2 * $proccount;
ok($proccount, 'Processor Count') or
BAIL_OUT('TORQUE reported 0 processors');
$Joblength = 300 + ($proccount * 30); # seconds
}
# Submit Jobs
{
my $walltime = 1.1 * $Joblength;
my $baseid = `su $Testuser -c 'echo "sleep $Joblength" | qsub -k oe -l nodes=1,walltime=$walltime -t 0-$Jobcount'` || undef;
$baseid =~ s/\D//g if defined $baseid;
ok(defined $baseid, "Job Submission") or
BAIL_OUT("Unable to submit job to TORQUE as '$Testuser' - see TORQUE docs, Section 2.1");
ok($baseid =~ /^\d+\S*\s*$/, "Job Submission") or
BAIL_OUT("Unable to submit job to TORQUE as '$Testuser' - see TORQUE docs, Section 2.1");
@Jobs = map { "$baseid-$_" } (0..$Jobcount);
}
# Jobs In Queue
{
sleep 5;
my %data = qstat_info;
for my $i (0..$Jobcount)
{
ok(exists $data{$Jobs[$i]}, "Job in Queue ($Jobs[$i])") or
BAIL_OUT("Submitted job ($Jobs[$i]) does not appear in the TORQUE queue");
}
}
# Job Set 1 (first half)
{
check_jobs 0, int($Jobcount / 2) - 1;
cancel_jobs 0, int($Jobcount / 2) - 1;
}
# Job Set 2 (second half)
{
check_jobs int($Jobcount / 2), $Jobcount - 1;
cancel_jobs int($Jobcount / 2), $Jobcount - 1;
}
}
#------------------------------------------------------------------------------
#
sub check_jobs ($$)
{
my ($first, $last, undef) = @_;
my $waittime = int($Joblength * .25) + 1;
$waittime = 180 if $waittime < 180;
my %complete = ();
# Wait up to $waittime seconds for job to start
ROUND: for my $second (0..($waittime - 1))
{
sleep 1;
my %data = qstat_info;
# Check Information
my $allcomplete = 1;
for my $i ($first..$last)
{
my $id = $Jobs[$i];
$complete{$id} = ((exists $data{$id}) and ($data{$id} =~ /^[RC]$/)) ? 1 : 0;
unless ($complete{$id})
{
$allcomplete = 0;
next ROUND;
}
}
last ROUND if $allcomplete;
}
for my $i ($first..$last)
{
ok($complete{$Jobs[$i]}, "Job Running ($Jobs[$i])") or
BAIL_OUT("Submitted job ($Jobs[$i]) has failed to start within $waittime seconds - check scheduler - see TORQUE docs, Section 5.1");
}
}
#
#------------------------------------------------------------------------------
#
sub cancel_jobs ($$)
{
my ($first, $last, undef) = @_;
my $tries = 0;
CANCEL: for my $i ($first..$last)
{
my $qdel = `qdel $Jobs[$i] 2>&1` || undef;
$qdel =~ s/[\r\n]//g if defined $qdel;
if ((defined $qdel) && ($qdel =~ /MSG=invalid state for job/i))
{
$qdel = undef if $qdel =~ /MSG=invalid state for job/i;
}
if ((defined $qdel) and ($tries < $MAX_CANCEL_TRIES))
{
$tries++;
sleep 1;
diag("Failed to cancel job ($Jobs[$i]) - Retry $tries/$MAX_CANCEL_TRIES - [$qdel]");
redo CANCEL;
}
ok(!defined $qdel, "Cancel Job ($Jobs[$i])") or
BAIL_OUT("Submitted job ($Jobs[$i]) could not be cancelled");
$tries = 0;
}
}
#
#------------------------------------------------------------------------------
#
sub qstat_info
{
my %data;
# Gather Information
my $qstat = `qstat` || undef;
ok(defined $qstat, 'qstat Results') or
BAIL_OUT('Cannot gather information from qstat');
foreach my $line (split /[\r\n]+/, $qstat)
{
next unless $line =~
/^
(\d+-\d+)\S*\s+ # Job ID
\S+\s+ # Name
\S+\s+ # User
\S+\s+ # Time Use
(\w)\s+ # State
\S+\s* # Queue
$/x;
$data{$1} = $2 if defined($1) and defined($2);
}
return %data;
}
#
#------------------------------------------------------------------------------