freebsd-ports/Tools/portbuild/scripts/pollmachine
2009-05-21 16:17:15 +00:00

301 lines
8.9 KiB
Python
Executable file

#!/usr/bin/env python
#
# pollmachine
#
# Monitors build machines and notifies qmgr of changes
#
# pollmachine [options] [arch] ...
# - update every machine in the mlist file for [arch]
#
# pollmachine [options] [arch/mach] ...
# - update individual machine(s) for specified architecture
#
# options are:
# -daemon : poll repeatedly
#
# TODO:
# XXX qmgr notification of new/removed machines
# XXX counter before declaring a machine as dead
# Declares a machine as online if it reports 0 data from infoseek?
# * Deal with machines change OS/kernel version
# - ACL list might change!
# - take machine offline, update ACL/arch/etc, reboot, bring online
import sys, threading, socket
from time import sleep
import os, subprocess, logging
if len(sys.argv) < 1:
print "Usage: %s <arch> [<arch> ...]" % sys.argv[0]
sys.exit(1)
arches=set()
mlist={}
polldelay=0
for i in sys.argv[1:]:
if i == "-daemon":
polldelay = 180
continue
if "/" in i:
item=i.partition("/")
arch=item[0]
mach=item[2]
arches.add(arch)
try:
mlist[arch].add(mach)
except KeyError:
mlist[arch] = set((mach,))
else:
arches.add(i)
pb="/var/portbuild"
# set of machines for each arch
machines={}
for i in arches:
machines[i]=set()
# Mapping from machine names to monitor threads
pollthreads={}
class MachinePoll(threading.Thread):
""" Poll a machine regularly """
mach = None # Which machine name to poll
arch = None # Which arch is this assigned to
# Which host/port to poll for this machine status (might be SSH
# tunnel endpoint)
host = None
port = 414
timeout = None # How often to poll
shutdown = False # Exit at next poll wakeup
# State variables tracked
online = False
# Dictionary of variables reported by the client
vars = None
def __init__(self, mach, arch, timeout, host, port):
super(MachinePoll, self).__init__()
self.mach = mach
self.arch = arch
self.timeout = timeout
self.host = host
self.port = port
# How many times the connection timed out since last success
self.timeouts = 0
self.vars = {}
self.setDaemon(True)
def run(self):
while True:
if self.shutdown:
break
self.poll()
if not self.timeout:
break
else:
sleep(self.timeout)
def poll(self):
""" Poll the status of this machine """
nowonline = False
lines = []
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(60)
s.connect((self.host, self.port))
data = ""
while len(data) < 65536:
chunk = s.recv(8192)
if not chunk:
break
data += chunk
nowonline = True
self.timeouts = 0
lines = data.split("\n")
except socket.timeout:
if self.online:
logging.info("[%s] Connection timeout" % self.mach)
self.timeouts += 1
if self.timeouts < 3:
nowonline = self.online
except:
pass
finally:
try:
s.close()
except:
pass
if nowonline != self.online:
logging.info("[%s] Now %s" % (self.mach, "online" if nowonline else "OFFLINE"))
self.online = nowonline
if self.online:
self.timeouts = 0
# XXX inform qmgr of state change
if self.online and not lines and not self.timeouts:
# reportload script is missing
dosetup=1
else:
dosetup=0
for line in lines:
if line == "":
continue
line=line.rstrip()
part=line.partition('=')
if part[1] != '=' or not part[0]:
# if "No such file or directory" in line:
# # Client may require setting up post-boot
# dosetup=1
logging.info("[%s] Bad input: %s" % (self.mach, line))
# Assume client needs setting up
dosetup=1
try:
old = self.vars[part[0]]
except KeyError:
old = ""
if old != part[2]:
self.vars[part[0]] = part[2]
# logging.info("%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2]))
# XXX update qmgr
try:
envs = self.vars['buildenvs']
for e in envs.split():
(arch, branch, buildid) = e.split("/")
f = "/var/portbuild/%s/%s/builds/%s/.active" % \
(arch, branch, buildid)
if os.path.exists(f):
continue
# Clean up a stale buildenv
logging.info("[%s] Cleaning up stale build: %s" % (self.mach, e))
(err, out) = self.setup(branch, buildid, "-nocopy -full")
if err:
logging.info("[%s] Error from cleanup" % (self.mach))
for l in out.split("\n"):
if l == "":
continue
logging.info("[%s] %s" % (self.mach, l))
except KeyError:
pass
if dosetup:
logging.info("[%s] Setting up machine" % (self.mach))
(err, out) = self.setup("-", "-")
if err:
logging.info("[%s] Error from setup" % (self.mach))
for l in out.split("\n"):
if l == "":
continue
logging.info("[%s] %s" % (self.mach, l))
logging.info("[%s] Setup complete" % (self.mach))
# Validate that arch has not changed (e.g. i386 -> amd64)
try:
if self.arch != self.vars['arch']:
logging.info("[%s] Unexpected arch: %s -> %s" % \
(self.mach, self.arch, self.vars['arch']))
except KeyError:
pass
# Record current system load
try:
f = file("%s/%s/loads/%s" % (pb, self.arch, self.mach), "w")
except:
return
try:
f.write("%s %s\n" % (self.vars['jobs'], self.vars['load']))
except:
pass
f.close()
def setup(self, branch, buildid, args = ""):
cmd = "su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s %s %s %s %s\""\
% (self.arch, self.arch, branch, buildid, self.mach, args)
child = subprocess.Popen(cmd, shell=True, stderr = subprocess.STDOUT,
stdout = subprocess.PIPE)
err = child.wait()
out = "".join(child.stdout.readlines())
return (err, out)
logging.basicConfig(level=logging.INFO,
format='[%(asctime)s] %(message)s',
datefmt='%d %b %Y %H:%M:%S',
filename='/var/log/pollmachine.log', filemode='w')
log_console = logging.StreamHandler()
log_console.setLevel(logging.INFO)
formatter = logging.Formatter('[%(asctime)s] %(message)s',
datefmt = '%d %b %Y %H:%M:%S')
log_console.setFormatter(formatter)
logging.getLogger('').addHandler(log_console)
while True:
for arch in arches:
try:
now = mlist[arch]
except KeyError:
mlistfile="%s/%s/mlist" % (pb, arch)
try:
f = file(mlistfile, "r")
except OSError, error:
raise
now=set(mach.rstrip() for mach in f.readlines())
f.close()
gone = machines[arch].difference(now)
new = now.difference(machines[arch])
machines[arch]=now
for mach in gone:
logging.info("Removing machine %s/%s" % (arch, mach))
# XXX disable from qmgr
pollthreads[mach].shutdown=True
del pollthreads[mach]
for mach in new:
logging.info("Adding machine %s/%s" % (arch, mach))
# XXX set up qmgr
pc="%s/%s/portbuild.conf" % (pb, arch)
pch="%s/%s/portbuild.%s" % (pb, arch, mach)
cmd = "test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch)
config = subprocess.Popen(cmd, shell = True,
stdout = subprocess.PIPE)
host=config.stdout.readline().rstrip()
if not host:
host = mach
port=config.stdout.readline().rstrip()
try:
port = int(port)
except (TypeError, ValueError):
port = 414
pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port)
pollthreads[mach].start()
if not polldelay:
break
sleep(polldelay)