forked from Lainports/freebsd-ports
301 lines
8.9 KiB
Python
Executable file
301 lines
8.9 KiB
Python
Executable file
#!/usr/bin/env python
|
|
#
|
|
# pollmachine
|
|
#
|
|
# Monitors build machines and notifies qmgr of changes
|
|
|
|
#
|
|
# pollmachine [options] [arch] ...
|
|
# - update every machine in the mlist file for [arch]
|
|
#
|
|
# pollmachine [options] [arch/mach] ...
|
|
# - update individual machine(s) for specified architecture
|
|
#
|
|
# options are:
|
|
# -daemon : poll repeatedly
|
|
|
|
#
|
|
# TODO:
|
|
# XXX qmgr notification of new/removed machines
|
|
# XXX counter before declaring a machine as dead
|
|
# Declares a machine as online if it reports 0 data from infoseek?
|
|
|
|
# * Deal with machines change OS/kernel version
|
|
# - ACL list might change!
|
|
# - take machine offline, update ACL/arch/etc, reboot, bring online
|
|
|
|
import sys, threading, socket
|
|
from time import sleep
|
|
import os, subprocess, logging
|
|
|
|
if len(sys.argv) < 1:
|
|
print "Usage: %s <arch> [<arch> ...]" % sys.argv[0]
|
|
sys.exit(1)
|
|
|
|
arches=set()
|
|
mlist={}
|
|
polldelay=0
|
|
for i in sys.argv[1:]:
|
|
if i == "-daemon":
|
|
polldelay = 180
|
|
continue
|
|
|
|
if "/" in i:
|
|
item=i.partition("/")
|
|
arch=item[0]
|
|
mach=item[2]
|
|
arches.add(arch)
|
|
try:
|
|
mlist[arch].add(mach)
|
|
except KeyError:
|
|
mlist[arch] = set((mach,))
|
|
else:
|
|
arches.add(i)
|
|
|
|
pb="/var/portbuild"
|
|
|
|
# set of machines for each arch
|
|
machines={}
|
|
for i in arches:
|
|
machines[i]=set()
|
|
|
|
# Mapping from machine names to monitor threads
|
|
pollthreads={}
|
|
|
|
class MachinePoll(threading.Thread):
|
|
""" Poll a machine regularly """
|
|
|
|
mach = None # Which machine name to poll
|
|
arch = None # Which arch is this assigned to
|
|
|
|
# Which host/port to poll for this machine status (might be SSH
|
|
# tunnel endpoint)
|
|
host = None
|
|
port = 414
|
|
|
|
timeout = None # How often to poll
|
|
shutdown = False # Exit at next poll wakeup
|
|
|
|
# State variables tracked
|
|
online = False
|
|
|
|
# Dictionary of variables reported by the client
|
|
vars = None
|
|
|
|
def __init__(self, mach, arch, timeout, host, port):
|
|
super(MachinePoll, self).__init__()
|
|
self.mach = mach
|
|
self.arch = arch
|
|
self.timeout = timeout
|
|
self.host = host
|
|
self.port = port
|
|
|
|
# How many times the connection timed out since last success
|
|
self.timeouts = 0
|
|
|
|
self.vars = {}
|
|
|
|
self.setDaemon(True)
|
|
|
|
def run(self):
|
|
while True:
|
|
if self.shutdown:
|
|
break
|
|
|
|
self.poll()
|
|
|
|
if not self.timeout:
|
|
break
|
|
else:
|
|
sleep(self.timeout)
|
|
|
|
def poll(self):
|
|
""" Poll the status of this machine """
|
|
|
|
nowonline = False
|
|
lines = []
|
|
try:
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
s.settimeout(60)
|
|
s.connect((self.host, self.port))
|
|
|
|
data = ""
|
|
while len(data) < 65536:
|
|
chunk = s.recv(8192)
|
|
if not chunk:
|
|
break
|
|
data += chunk
|
|
|
|
nowonline = True
|
|
self.timeouts = 0
|
|
lines = data.split("\n")
|
|
except socket.timeout:
|
|
if self.online:
|
|
logging.info("[%s] Connection timeout" % self.mach)
|
|
self.timeouts += 1
|
|
if self.timeouts < 3:
|
|
nowonline = self.online
|
|
except:
|
|
pass
|
|
finally:
|
|
try:
|
|
s.close()
|
|
except:
|
|
pass
|
|
|
|
if nowonline != self.online:
|
|
logging.info("[%s] Now %s" % (self.mach, "online" if nowonline else "OFFLINE"))
|
|
self.online = nowonline
|
|
if self.online:
|
|
self.timeouts = 0
|
|
# XXX inform qmgr of state change
|
|
|
|
if self.online and not lines and not self.timeouts:
|
|
# reportload script is missing
|
|
dosetup=1
|
|
else:
|
|
dosetup=0
|
|
|
|
for line in lines:
|
|
if line == "":
|
|
continue
|
|
line=line.rstrip()
|
|
part=line.partition('=')
|
|
if part[1] != '=' or not part[0]:
|
|
# if "No such file or directory" in line:
|
|
# # Client may require setting up post-boot
|
|
# dosetup=1
|
|
logging.info("[%s] Bad input: %s" % (self.mach, line))
|
|
# Assume client needs setting up
|
|
dosetup=1
|
|
try:
|
|
old = self.vars[part[0]]
|
|
except KeyError:
|
|
old = ""
|
|
if old != part[2]:
|
|
self.vars[part[0]] = part[2]
|
|
# logging.info("%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2]))
|
|
# XXX update qmgr
|
|
|
|
try:
|
|
envs = self.vars['buildenvs']
|
|
for e in envs.split():
|
|
(arch, branch, buildid) = e.split("/")
|
|
f = "/var/portbuild/%s/%s/builds/%s/.active" % \
|
|
(arch, branch, buildid)
|
|
if os.path.exists(f):
|
|
continue
|
|
# Clean up a stale buildenv
|
|
logging.info("[%s] Cleaning up stale build: %s" % (self.mach, e))
|
|
(err, out) = self.setup(branch, buildid, "-nocopy -full")
|
|
if err:
|
|
logging.info("[%s] Error from cleanup" % (self.mach))
|
|
for l in out.split("\n"):
|
|
if l == "":
|
|
continue
|
|
logging.info("[%s] %s" % (self.mach, l))
|
|
|
|
except KeyError:
|
|
pass
|
|
|
|
if dosetup:
|
|
logging.info("[%s] Setting up machine" % (self.mach))
|
|
(err, out) = self.setup("-", "-")
|
|
if err:
|
|
logging.info("[%s] Error from setup" % (self.mach))
|
|
for l in out.split("\n"):
|
|
if l == "":
|
|
continue
|
|
logging.info("[%s] %s" % (self.mach, l))
|
|
logging.info("[%s] Setup complete" % (self.mach))
|
|
|
|
# Validate that arch has not changed (e.g. i386 -> amd64)
|
|
try:
|
|
if self.arch != self.vars['arch']:
|
|
logging.info("[%s] Unexpected arch: %s -> %s" % \
|
|
(self.mach, self.arch, self.vars['arch']))
|
|
except KeyError:
|
|
pass
|
|
|
|
# Record current system load
|
|
try:
|
|
f = file("%s/%s/loads/%s" % (pb, self.arch, self.mach), "w")
|
|
except:
|
|
return
|
|
try:
|
|
f.write("%s %s\n" % (self.vars['jobs'], self.vars['load']))
|
|
except:
|
|
pass
|
|
f.close()
|
|
|
|
def setup(self, branch, buildid, args = ""):
|
|
cmd = "su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s %s %s %s %s\""\
|
|
% (self.arch, self.arch, branch, buildid, self.mach, args)
|
|
child = subprocess.Popen(cmd, shell=True, stderr = subprocess.STDOUT,
|
|
stdout = subprocess.PIPE)
|
|
err = child.wait()
|
|
out = "".join(child.stdout.readlines())
|
|
return (err, out)
|
|
|
|
logging.basicConfig(level=logging.INFO,
|
|
format='[%(asctime)s] %(message)s',
|
|
datefmt='%d %b %Y %H:%M:%S',
|
|
filename='/var/log/pollmachine.log', filemode='w')
|
|
|
|
log_console = logging.StreamHandler()
|
|
log_console.setLevel(logging.INFO)
|
|
formatter = logging.Formatter('[%(asctime)s] %(message)s',
|
|
datefmt = '%d %b %Y %H:%M:%S')
|
|
log_console.setFormatter(formatter)
|
|
logging.getLogger('').addHandler(log_console)
|
|
|
|
while True:
|
|
for arch in arches:
|
|
try:
|
|
now = mlist[arch]
|
|
except KeyError:
|
|
mlistfile="%s/%s/mlist" % (pb, arch)
|
|
try:
|
|
f = file(mlistfile, "r")
|
|
except OSError, error:
|
|
raise
|
|
|
|
now=set(mach.rstrip() for mach in f.readlines())
|
|
f.close()
|
|
|
|
gone = machines[arch].difference(now)
|
|
new = now.difference(machines[arch])
|
|
|
|
machines[arch]=now
|
|
|
|
for mach in gone:
|
|
logging.info("Removing machine %s/%s" % (arch, mach))
|
|
# XXX disable from qmgr
|
|
pollthreads[mach].shutdown=True
|
|
del pollthreads[mach]
|
|
|
|
for mach in new:
|
|
logging.info("Adding machine %s/%s" % (arch, mach))
|
|
# XXX set up qmgr
|
|
|
|
pc="%s/%s/portbuild.conf" % (pb, arch)
|
|
pch="%s/%s/portbuild.%s" % (pb, arch, mach)
|
|
cmd = "test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch)
|
|
config = subprocess.Popen(cmd, shell = True,
|
|
stdout = subprocess.PIPE)
|
|
host=config.stdout.readline().rstrip()
|
|
if not host:
|
|
host = mach
|
|
port=config.stdout.readline().rstrip()
|
|
try:
|
|
port = int(port)
|
|
except (TypeError, ValueError):
|
|
port = 414
|
|
|
|
pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port)
|
|
pollthreads[mach].start()
|
|
|
|
if not polldelay:
|
|
break
|
|
|
|
sleep(polldelay)
|