179 lines
4.2 KiB
Bash
179 lines
4.2 KiB
Bash
#!/bin/sh
|
|
#
|
|
# pbs_mom This script will start and stop the PBS Mom
|
|
#
|
|
# chkconfig: 345 95 05
|
|
# description: TORQUE/PBS is a versatile batch system for SMPs and clusters
|
|
#
|
|
#### BEGIN INIT INFO
|
|
# Provides: pbs_mom
|
|
# Required-Start: $local_fs $network $syslog
|
|
# Required-Stop: $local_fs $network $syslog
|
|
# Default-Start: 2 3 4 5
|
|
# Default-Stop: 0 1 6
|
|
# Short-Description: Start up the pbs_mom daemon
|
|
# Description: pbs_mom is part of a batch scheduler
|
|
# This service starts up the compute node.
|
|
### END INIT INFO
|
|
|
|
ulimit -n 32768
|
|
# Source the library functions
|
|
. /etc/rc.d/init.d/functions
|
|
|
|
# NOTE: customize these variables as needed
|
|
SBIN_PATH=@sbindir@
|
|
PBS_DAEMON="$SBIN_PATH/pbs_mom"
|
|
PBS_HOME=@PBS_HOME@
|
|
PBS_ARGS=""
|
|
|
|
SUBSYS_LOCK="/var/lock/subsys/pbs_mom"
|
|
|
|
if [ -f /etc/sysconfig/pbs_mom ];then
|
|
. /etc/sysconfig/pbs_mom
|
|
fi
|
|
|
|
MOM_LOCK="$PBS_HOME/mom_priv/mom.lock"
|
|
|
|
if [ -z "$previous" ];then
|
|
# being run manually, don't disturb jobs
|
|
args="$args -p"
|
|
else
|
|
args="$args -q"
|
|
fi
|
|
|
|
pidof_pbs_mom() {
|
|
# This function will echo the PID of the master pbs_mom process if such
|
|
# a process is running and then return 0. Otherwise it will echo nothing
|
|
# and return 1.
|
|
|
|
lockfile_info=`stat -Lc "%d:%i" $MOM_LOCK 2>/dev/null`
|
|
if [ $? -ne 0 ]; then
|
|
return 1
|
|
fi
|
|
|
|
# now we are sure the $MOM_LOCK file exists
|
|
parent_mom_pid=`cat $MOM_LOCK 2>/dev/null`
|
|
pidof pbs_mom | egrep -e "(^| )${parent_mom_pid}( |$)" >/dev/null 2>/dev/null
|
|
if [ $? -ne 0 ]; then
|
|
return 1
|
|
fi
|
|
|
|
# the parent pbs_mom should have a lock on the $MOM_LOCK file, this is
|
|
# typically acquired early in the pbs_mom process, so this loop typically
|
|
# stops at the sixth open file.
|
|
for fd in `ls /proc/${parent_mom_pid}/fd/ | sort -n`; do
|
|
fd_info=`stat -Lc "%d:%i" /proc/${parent_mom_pid}/fd/${fd} 2>/dev/null`
|
|
if [ x${fd_info} == x${lockfile_info} ]; then
|
|
echo $parent_mom_pid
|
|
return 0
|
|
fi
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
kill_pbs_mom() {
|
|
# This function will try to terminate the master pbs_mom process if such a
|
|
# process is running.
|
|
pid=`pidof_pbs_mom`
|
|
|
|
if [ $? -ne 0 ]; then
|
|
return 0;
|
|
fi
|
|
|
|
retval=1
|
|
for i in {1..5}; do
|
|
kill -0 $pid &>/dev/null || return 0
|
|
$SBIN_PATH/momctl -s && return $?
|
|
sleep 1
|
|
done
|
|
return $retval
|
|
}
|
|
|
|
# how were we called
|
|
case "$1" in
|
|
start)
|
|
echo -n "Starting TORQUE Mom: "
|
|
# check if pbs_mom is already running
|
|
stat $SUBSYS_LOCK &> /dev/null
|
|
lock_present=$?
|
|
|
|
pid=`pidof_pbs_mom`
|
|
if [ $? -eq 0 ]; then
|
|
if [ $lock_present -eq 0 ]; then
|
|
echo -n "pbs_mom already running (pid $pid)"
|
|
else
|
|
touch $SUBSYS_LOCK && echo -n "pbs_mom running (pid $pid)"
|
|
fi
|
|
RET=$?
|
|
[ $RET -eq 0 ] && success && echo && exit 0
|
|
fi
|
|
|
|
# ulimit -c unlimited # Uncomment this to preserve core files
|
|
daemon $PBS_DAEMON $args -d $PBS_HOME $PBS_ARGS
|
|
RET=$?
|
|
touch $SUBSYS_LOCK
|
|
echo
|
|
;;
|
|
purge)
|
|
[ -f $SUBSYS_LOCK ] && $0 stop
|
|
echo -n "Starting TORQUE Mom with purge: "
|
|
daemon $PBS_DAEMON -r
|
|
RET=$?
|
|
[ $RET -eq 0 ] && touch $SUBSYS_LOCK
|
|
echo
|
|
;;
|
|
stop)
|
|
echo -n "Shutting down TORQUE Mom: "
|
|
# check if pbs_mom is running
|
|
pid=`pidof_pbs_mom`
|
|
[ $? -ne 0 ] && echo -n "pbs_mom already stopped" && success && echo && exit 0
|
|
|
|
kill_pbs_mom
|
|
RET=$?
|
|
[ $RET -eq 0 ] && success "shutdown" || failure "shutdown"
|
|
echo
|
|
rm -f $SUBSYS_LOCK
|
|
;;
|
|
status)
|
|
stat $SUBSYS_LOCK &> /dev/null
|
|
lock_present=$?
|
|
|
|
pid=`pidof_pbs_mom`
|
|
if [ $? -ne 0 ]; then
|
|
pid=-1
|
|
fi
|
|
|
|
# Return codes per Linux Standard Base (LSB) Core Specificiation 3.1
|
|
[ $pid -eq -1 -a $lock_present -eq 0 ] && echo -n "pbs_mom dead but subsys locked" && failure && echo && exit 2
|
|
[ $pid -eq -1 ] && echo -n "pbs_mom already stopped" && success && echo && exit 3
|
|
[ $pid -ne -1 -a $lock_present -eq 0 ] && echo -n "pbs_mom already running" && success && echo && exit 0
|
|
[ $pid -ne -1 ] && echo -n "pbs_mom running but subsys not locked" && failure && echo && exit 0
|
|
;;
|
|
restart)
|
|
$0 stop
|
|
sleep 1
|
|
$0 start
|
|
;;
|
|
condrestart|try-restart)
|
|
$0 status || exit 0
|
|
$0 restart
|
|
;;
|
|
reload)
|
|
echo -n "Re-reading TORQUE Mom config file: "
|
|
pid=`pidof_pbs_mom`
|
|
if [ $? -eq 0 ]; then
|
|
kill -HUP $pid
|
|
RET=$?
|
|
[ $RET -eq 0 ] && success "HUP" || failure "HUP"
|
|
else
|
|
failure "HUP"
|
|
fi
|
|
echo
|
|
;;
|
|
*)
|
|
echo "Usage: pbs_mom {start|stop|restart|reload|status|purge}"
|
|
exit 1
|
|
esac
|
|
exit $RET
|