freebsd-ports/Tools/portbuild/scripts/pdispatch
Kris Kennaway 90e209c3d9 * Cleanup
* Catch up to build ID directory changes
* Add support for ssh_cmd and scp_cmd to allow using HPN-SSH with the
  none cipher where possible (for performance)
* Lazy client setup; claim-chroot will report if the client needs to be
  set up with this buildid, and we initiate the setup and poll until
  it is complete.  This allows fast clients to begin building before
  slow ones have finished setting up.

TODO: a better solution would be to avoid trying to dispatch jobs onto
clients that are in the process of setting up, since they often have low
loads and are picked preferentially by the job scheduler.
2008-07-26 14:01:07 +00:00

211 lines
7.4 KiB
Bash
Executable file

#!/bin/sh
#
# pdispatch <arch> <branch> <buildid> <command> <package.tbz> [<args> ...]
#
# Choose a random machine from ${buildroot}/ulist and dispatch the
# job to it via the ptimeout script.
pb=/var/portbuild
arch=$1
branch=$2
buildid=$3
command=$4
shift 4
pbab=${pb}/${arch}/${branch}
. ${pb}/${arch}/portbuild.conf
. ${pb}/scripts/buildenv
# wait 100 hours maximum
timeout=360000
# number of lines of log to send
loglength=1000
hdrlength=6
builddir=${pbab}/builds/${buildid}
buildenv ${pb} ${arch} ${branch} ${builddir}
# ssh -x doesn't work on some machines
unset DISPLAY
# Use HPN-SSH for performance
if [ -z "${ssh_cmd}" ]; then
ssh_cmd=ssh
fi
if [ -z "${scp_cmd}" ]; then
scp_cmd=scp
fi
pkgname=$(basename $1 ${PKGSUFFIX})
if grep -qxF ${pkgname} ${builddir}/duds; then
echo "skipping ${pkgname}"
exit 1
fi
if [ -z "${pkgname}" ]; then
echo "null packagename"
exit 1
fi
args=${1+"$@"}
flags=""
clean=1
if [ "x$NOCLEAN" != "x" ]; then
flags="${flags} -noclean"
clean=0
fi
if [ "x$NO_RESTRICTED" != "x" ]; then
flags="${flags} -norestr"
fi
if [ "x$NOPLISTCHECK" != "x" ]; then
flags="${flags} -noplistcheck"
fi
if [ "x$WANT_DISTFILES" != "x" ]; then
flags="${flags} -distfiles"
fi
if [ "x$FETCH_ORIGINAL" != "x" ]; then
flags="${flags} -fetch-original"
fi
if [ "x$TRYBROKEN" != "x" ]; then
flags="${flags} -trybroken"
fi
while `true`; do
host=
chroot=
while [ -z "${host}" -o -z "${chroot}" ]; do
chroot=
host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch})
# If ulist is empty, then all build machines are busy, so try
# again in 15 seconds.
if [ -z "${host}" ]; then
sleep 15
else
. ${pb}/${arch}/portbuild.conf
test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1)
if [ -z "${chrootdata}" ]; then
echo "Failed to claim chroot on ${host}"
fi
case "${chrootdata}" in
*/var/portbuild/scripts/claim-chroot*)
# Error executing script, assume system is booting
chrootdata="wait boot"
;;
esac
echo "Got ${chrootdata} from ${host}"
set -- ${chrootdata}
if [ $# -ge 2 ]; then
case $1 in
chroot)
chroot=$2
;;
setup)
echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}"
# Run in the background so we can potentially
# claim a slot on another machine. In
# practise I think we often end up trying
# again on the same machine though.
# Make sure to close stdin/stderr in the child
# or make will hang until the child process
# exits
# XXX Revert to >&- once this is confirmed as working
${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} >/tmp/setupnode.$$ 2>&1 &
;;
error)
echo "Error reported by ${host}: $2"
sleep 60
;;
wait)
echo "Waiting for setup to finish"
sleep 60
;;
esac
shift 2
fi
if [ -z "${chroot}" ]; then
lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
fi
fi
done
. ${pb}/${arch}/portbuild.conf
test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2
rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2
echo "dispatching: ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}"
${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1
error=$?
#if grep -q " failed unexpectedly on " ${builddir}/logs/${pkgname}.pre.log; then
# cat ${builddir}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto}
#else
# rm ${builddir}/logs/${pkgname}.pre.log
#fi
# Pull in the results of the build from the client
${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log
(${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz
# XXX Set dirty flag if any of the scp's fail
mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname}
${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \
tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf -
touch ${builddir}/distfiles/.pbtmp/${pkgname}/.done
if [ "${error}" = 0 ]; then
${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \
tar --unlink -C ${builddir} -xvf -
test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \
touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX}
rm -f ${builddir}/errors/${pkgname}.log && \
touch ${builddir}/errors/.force
lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname}
log=${builddir}/logs/$pkgname.log
if grep -q "even though it is marked BROKEN" ${log}; then
echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto}
fi
if grep -q "^list of .*file" ${log}; then
buildlogdir=$(realpath ${builddir}/logs/)
baselogdir=$(basename ${buildlogdir})
(sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto}
fi
else
log=${builddir}/errors/${pkgname}.log
${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto}
if ! grep -q "even though it is marked BROKEN" ${log}; then
buildlogdir=$(realpath ${builddir}/logs/)
baselogdir=$(basename ${buildlogdir})
if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then
(echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
else
(echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
fi
fi
lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname}
fi
${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean}
lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
# XXX Set a dirty variable earlier and check here
if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then
exit ${error}
else
echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding"
sleep 120
fi
done