forked from hornet/lainmonitor
Merge: Rewritten from scratch' from Refactor2.0 into main
Reviewed-on: #1
This commit is contained in:
commit
ce133c03ee
1 changed files with 148 additions and 67 deletions
215
lainmonitor.py
215
lainmonitor.py
|
|
@ -1,75 +1,156 @@
|
||||||
#description: telegram bot for monitoring the system
|
#!/usr/bin/env python3
|
||||||
#dependencies: telebot
|
|
||||||
#usage: python3 lainmonitor.py | or run it as a service
|
# --------------------------------------------------------------------------
|
||||||
#author: hornetmaidan
|
# Description: A Telegram bot for monitoring critical infrastructur services
|
||||||
|
# Dependencies: telebot
|
||||||
|
# Usage: python3 lainmonitor.py | or run it as a service
|
||||||
|
# Author: h@x
|
||||||
|
# Version: 2.0
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import telebot
|
import telebot
|
||||||
#define the variables
|
import paramiko
|
||||||
status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk, ping = 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown'
|
import requests
|
||||||
#telegram bot token
|
import time
|
||||||
TOKEN = 'PLACE_YOUR_TOKEN_HERE'
|
import logging
|
||||||
|
import ssl
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
import config
|
||||||
|
|
||||||
#bot init
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
||||||
bot = telebot.TeleBot(TOKEN)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
#get system info
|
CERT_DIR = os.path.join(os.path.dirname(__file__), 'certs')
|
||||||
def getinfo():
|
os.makedirs(CERT_DIR, exist_ok=True)
|
||||||
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
|
|
||||||
hostname = subprocess.check_output(['hostname']).decode().strip()
|
|
||||||
uptime = subprocess.check_output(['uptime', '-p']).decode().strip()
|
|
||||||
#systemd-only services
|
|
||||||
zerotier = subprocess.Popen("sudo systemctl status zerotier-one | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
|
||||||
prosody = subprocess.Popen("sudo systemctl status prosody | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
|
||||||
postgres = subprocess.Popen("sudo systemctl status postgresql | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
|
||||||
tailscale = subprocess.Popen("sudo systemctl status tailscaled | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
|
||||||
disk = subprocess.check_output(['df', '-h']).decode().strip()
|
|
||||||
if hostname == 'unknown':
|
|
||||||
status = 'offline'
|
|
||||||
else:
|
|
||||||
status = 'online'
|
|
||||||
return hostname, uptime, zerotier, prosody, postgres, tailscale, disk
|
|
||||||
|
|
||||||
#ping tailscale (change the IP address to the one you want or add more)
|
bot = telebot.TeleBot(config.TOKEN)
|
||||||
def check_tailscale():
|
ALLOWED_CHATS = set(config.ALLOWED_CHATS)
|
||||||
global ping
|
|
||||||
ping = subprocess.Popen("ping TAILSCALE_IP -c 1 | grep '1 packets'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
|
||||||
if '1 received' in ping:
|
|
||||||
ping = 'connected'
|
|
||||||
else:
|
|
||||||
ping = 'unreachable'
|
|
||||||
return ping
|
|
||||||
|
|
||||||
#debug handler
|
def run_cmd(cmd, timeout=5):
|
||||||
def check():
|
try:
|
||||||
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||||
getinfo()
|
return result.stdout.strip()
|
||||||
print('system status:', status)
|
except subprocess.TimeoutExpired:
|
||||||
print('hostname:', hostname)
|
return 'timeout'
|
||||||
print('uptime:', uptime)
|
except OSError as e:
|
||||||
print('zerotier:', zerotier)
|
logger.error("OS error running %s: %s", cmd, e)
|
||||||
print('prosody:', prosody)
|
return 'error'
|
||||||
print('postgres:', postgres)
|
|
||||||
print('tailscale:', tailscale)
|
|
||||||
print('disk:', disk)
|
|
||||||
return status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
|
|
||||||
|
|
||||||
#message handling
|
def get_local_info():
|
||||||
@bot.message_handler(commands=['start', 'help', 'status', 'reboot', 'ping'])
|
try:
|
||||||
def handle(message):
|
hostname = run_cmd(['hostname'])
|
||||||
if message.text == '/start':
|
uptime = run_cmd(['uptime', '-p'])
|
||||||
bot.reply_to(message, 'lainmonitor v1.0 --- standing by...')
|
load_line = run_cmd(['uptime'])
|
||||||
elif message.text == '/help':
|
load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown'
|
||||||
bot.reply_to(message, 'commands: /start, /help, /status, /reboot, /ping')
|
memory = run_cmd(['free', '-h'])
|
||||||
elif message.text == '/status':
|
disk = run_cmd(['df', '-h'])
|
||||||
check()
|
status = 'online' if hostname and hostname not in ('error', 'timeout') else 'offline'
|
||||||
status_message = f'hostname: {hostname}\nsystem status: {status}\nuptime: {uptime}\nzerotier: {zerotier}\nprosody: {prosody}\npostgres: {postgres}\ntailscale: {tailscale}'
|
return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status}
|
||||||
bot.reply_to(message, status_message)
|
except Exception as e:
|
||||||
bot.reply_to(message, f'filesystem info for {hostname}: \n\n{disk}')
|
logger.error("Local info error: %s", e)
|
||||||
elif message.text == '/reboot':
|
return {'hostname': 'error', 'uptime': 'error', 'load_avg': 'error', 'memory': 'error', 'disk': 'error', 'status': 'error'}
|
||||||
bot.reply_to(message, 'work in progress...')
|
|
||||||
elif message.text == '/ping':
|
|
||||||
check_tailscale()
|
|
||||||
bot.reply_to(message, f'ping status: {ping}')
|
|
||||||
|
|
||||||
#polling
|
def fetch_certificate(host, port):
|
||||||
bot.polling()
|
cert_path = os.path.join(CERT_DIR, f"{host}.pem")
|
||||||
|
if os.path.isfile(cert_path):
|
||||||
|
return cert_path
|
||||||
|
try:
|
||||||
|
cert = ssl.get_server_certificate((host, port))
|
||||||
|
with open(cert_path, 'w') as f:
|
||||||
|
f.write(cert)
|
||||||
|
return cert_path
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Certificate fetch error for %s: %s", host, e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_ssh_info(ip, cfg):
|
||||||
|
try:
|
||||||
|
client = paramiko.SSHClient()
|
||||||
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
|
client.connect(ip, username=cfg['ssh_user'], password=cfg['ssh_pass'], timeout=5)
|
||||||
|
info = {}
|
||||||
|
cmds = {'hostname':'hostname','uptime':'uptime -p','load_avg':'uptime','memory':'free -h','disk':'df -h'}
|
||||||
|
for key, cmd in cmds.items():
|
||||||
|
try:
|
||||||
|
stdin, stdout, stderr = client.exec_command(cmd)
|
||||||
|
out = stdout.read().decode().strip()
|
||||||
|
if key == 'load_avg' and 'load average:' in out:
|
||||||
|
out = out.split('load average:')[-1].strip()
|
||||||
|
info[key] = out
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("SSH cmd error %s on %s: %s", cmd, ip, e)
|
||||||
|
info[key] = 'error'
|
||||||
|
info['status'] = 'online'
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("SSH connection error to %s: %s", ip, e)
|
||||||
|
info = {'status': 'unreachable'}
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
client.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return ip, info
|
||||||
|
|
||||||
|
def get_opnsense_info(ip, cfg):
|
||||||
|
try:
|
||||||
|
url = cfg['api_url']
|
||||||
|
host_part = url.split('//')[-1].split('/')[0]
|
||||||
|
parts = host_part.split(':')
|
||||||
|
host = parts[0]
|
||||||
|
port = int(parts[1]) if len(parts) > 1 else 443
|
||||||
|
verify = fetch_certificate(host, port)
|
||||||
|
resp = requests.get(f"{url}/core/get/health", auth=(cfg['api_key'], cfg['api_secret']), verify=verify, timeout=5)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json().get('health', {})
|
||||||
|
return ip, {
|
||||||
|
'status': data.get('health', 'unknown'),
|
||||||
|
'uptime': data.get('uptime', 'unknown'),
|
||||||
|
'memory': f"{data.get('mem_used', '?')}MB/{data.get('mem_total', '?')}MB",
|
||||||
|
'load_avg': data.get('load_avg', 'unknown'),
|
||||||
|
'disk': f"{data.get('disk_used', '?')}%/{data.get('disk_total', '?')}%"
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("OPNsense API error for %s: %s", ip, e)
|
||||||
|
return ip, {'status': 'unreachable'}
|
||||||
|
|
||||||
|
def gather_clients(concurrency=5):
|
||||||
|
results = {}
|
||||||
|
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||||
|
futures = {executor.submit(get_ssh_info if cfg['type'] == 'generic' else get_opnsense_info, ip, cfg): ip for ip, cfg in config.HOSTS.items()}
|
||||||
|
for future in as_completed(futures):
|
||||||
|
host = futures[future]
|
||||||
|
try:
|
||||||
|
ip, info = future.result()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Gather error for %s: %s", host, e)
|
||||||
|
ip, info = host, {'status': 'error'}
|
||||||
|
results[ip] = info
|
||||||
|
return results
|
||||||
|
|
||||||
|
@bot.message_handler(commands=['status', 'ping'])
|
||||||
|
def handle_status(msg):
|
||||||
|
if msg.chat.id not in ALLOWED_CHATS:
|
||||||
|
bot.reply_to(msg, 'Unauthorized access')
|
||||||
|
return
|
||||||
|
local = get_local_info()
|
||||||
|
clients = gather_clients()
|
||||||
|
lines = [
|
||||||
|
f"Local: {local['hostname']} ({local['status']})",
|
||||||
|
f"Uptime: {local['uptime']}",
|
||||||
|
f"Load Avg: {local['load_avg']}",
|
||||||
|
f"Memory:\n{local['memory']}",
|
||||||
|
f"Disk:\n{local['disk']}",
|
||||||
|
"Clients:"
|
||||||
|
]
|
||||||
|
for ip, info in clients.items():
|
||||||
|
lines.append(f"{ip}: {info.get('status', 'unknown')}")
|
||||||
|
bot.reply_to(msg, '\n'.join(lines))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
bot.polling()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Polling error: %s", e)
|
||||||
|
time.sleep(5)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue