Merge: Rewritten from scratch' from Refactor2.0 into main

Reviewed-on: #1
This commit is contained in:
h@x 2025-07-22 09:12:38 +00:00
commit ce133c03ee

View file

@ -1,75 +1,156 @@
#description: telegram bot for monitoring the system
#dependencies: telebot
#usage: python3 lainmonitor.py | or run it as a service
#author: hornetmaidan
#!/usr/bin/env python3
# --------------------------------------------------------------------------
# Description: A Telegram bot for monitoring critical infrastructur services
# Dependencies: telebot
# Usage: python3 lainmonitor.py | or run it as a service
# Author: h@x
# Version: 2.0
# --------------------------------------------------------------------------
import subprocess
import telebot
#define the variables
status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk, ping = 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown'
#telegram bot token
TOKEN = 'PLACE_YOUR_TOKEN_HERE'
import paramiko
import requests
import time
import logging
import ssl
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import config
#bot init
bot = telebot.TeleBot(TOKEN)
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
#get system info
def getinfo():
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
hostname = subprocess.check_output(['hostname']).decode().strip()
uptime = subprocess.check_output(['uptime', '-p']).decode().strip()
#systemd-only services
zerotier = subprocess.Popen("sudo systemctl status zerotier-one | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
prosody = subprocess.Popen("sudo systemctl status prosody | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
postgres = subprocess.Popen("sudo systemctl status postgresql | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
tailscale = subprocess.Popen("sudo systemctl status tailscaled | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
disk = subprocess.check_output(['df', '-h']).decode().strip()
if hostname == 'unknown':
status = 'offline'
else:
status = 'online'
return hostname, uptime, zerotier, prosody, postgres, tailscale, disk
CERT_DIR = os.path.join(os.path.dirname(__file__), 'certs')
os.makedirs(CERT_DIR, exist_ok=True)
#ping tailscale (change the IP address to the one you want or add more)
def check_tailscale():
global ping
ping = subprocess.Popen("ping TAILSCALE_IP -c 1 | grep '1 packets'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
if '1 received' in ping:
ping = 'connected'
else:
ping = 'unreachable'
return ping
bot = telebot.TeleBot(config.TOKEN)
ALLOWED_CHATS = set(config.ALLOWED_CHATS)
#debug handler
def check():
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
getinfo()
print('system status:', status)
print('hostname:', hostname)
print('uptime:', uptime)
print('zerotier:', zerotier)
print('prosody:', prosody)
print('postgres:', postgres)
print('tailscale:', tailscale)
print('disk:', disk)
return status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
def run_cmd(cmd, timeout=5):
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return 'timeout'
except OSError as e:
logger.error("OS error running %s: %s", cmd, e)
return 'error'
#message handling
@bot.message_handler(commands=['start', 'help', 'status', 'reboot', 'ping'])
def handle(message):
if message.text == '/start':
bot.reply_to(message, 'lainmonitor v1.0 --- standing by...')
elif message.text == '/help':
bot.reply_to(message, 'commands: /start, /help, /status, /reboot, /ping')
elif message.text == '/status':
check()
status_message = f'hostname: {hostname}\nsystem status: {status}\nuptime: {uptime}\nzerotier: {zerotier}\nprosody: {prosody}\npostgres: {postgres}\ntailscale: {tailscale}'
bot.reply_to(message, status_message)
bot.reply_to(message, f'filesystem info for {hostname}: \n\n{disk}')
elif message.text == '/reboot':
bot.reply_to(message, 'work in progress...')
elif message.text == '/ping':
check_tailscale()
bot.reply_to(message, f'ping status: {ping}')
def get_local_info():
try:
hostname = run_cmd(['hostname'])
uptime = run_cmd(['uptime', '-p'])
load_line = run_cmd(['uptime'])
load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown'
memory = run_cmd(['free', '-h'])
disk = run_cmd(['df', '-h'])
status = 'online' if hostname and hostname not in ('error', 'timeout') else 'offline'
return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status}
except Exception as e:
logger.error("Local info error: %s", e)
return {'hostname': 'error', 'uptime': 'error', 'load_avg': 'error', 'memory': 'error', 'disk': 'error', 'status': 'error'}
#polling
bot.polling()
def fetch_certificate(host, port):
cert_path = os.path.join(CERT_DIR, f"{host}.pem")
if os.path.isfile(cert_path):
return cert_path
try:
cert = ssl.get_server_certificate((host, port))
with open(cert_path, 'w') as f:
f.write(cert)
return cert_path
except Exception as e:
logger.error("Certificate fetch error for %s: %s", host, e)
return False
def get_ssh_info(ip, cfg):
try:
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(ip, username=cfg['ssh_user'], password=cfg['ssh_pass'], timeout=5)
info = {}
cmds = {'hostname':'hostname','uptime':'uptime -p','load_avg':'uptime','memory':'free -h','disk':'df -h'}
for key, cmd in cmds.items():
try:
stdin, stdout, stderr = client.exec_command(cmd)
out = stdout.read().decode().strip()
if key == 'load_avg' and 'load average:' in out:
out = out.split('load average:')[-1].strip()
info[key] = out
except Exception as e:
logger.error("SSH cmd error %s on %s: %s", cmd, ip, e)
info[key] = 'error'
info['status'] = 'online'
except Exception as e:
logger.error("SSH connection error to %s: %s", ip, e)
info = {'status': 'unreachable'}
finally:
try:
client.close()
except Exception:
pass
return ip, info
def get_opnsense_info(ip, cfg):
try:
url = cfg['api_url']
host_part = url.split('//')[-1].split('/')[0]
parts = host_part.split(':')
host = parts[0]
port = int(parts[1]) if len(parts) > 1 else 443
verify = fetch_certificate(host, port)
resp = requests.get(f"{url}/core/get/health", auth=(cfg['api_key'], cfg['api_secret']), verify=verify, timeout=5)
resp.raise_for_status()
data = resp.json().get('health', {})
return ip, {
'status': data.get('health', 'unknown'),
'uptime': data.get('uptime', 'unknown'),
'memory': f"{data.get('mem_used', '?')}MB/{data.get('mem_total', '?')}MB",
'load_avg': data.get('load_avg', 'unknown'),
'disk': f"{data.get('disk_used', '?')}%/{data.get('disk_total', '?')}%"
}
except Exception as e:
logger.error("OPNsense API error for %s: %s", ip, e)
return ip, {'status': 'unreachable'}
def gather_clients(concurrency=5):
results = {}
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = {executor.submit(get_ssh_info if cfg['type'] == 'generic' else get_opnsense_info, ip, cfg): ip for ip, cfg in config.HOSTS.items()}
for future in as_completed(futures):
host = futures[future]
try:
ip, info = future.result()
except Exception as e:
logger.error("Gather error for %s: %s", host, e)
ip, info = host, {'status': 'error'}
results[ip] = info
return results
@bot.message_handler(commands=['status', 'ping'])
def handle_status(msg):
if msg.chat.id not in ALLOWED_CHATS:
bot.reply_to(msg, 'Unauthorized access')
return
local = get_local_info()
clients = gather_clients()
lines = [
f"Local: {local['hostname']} ({local['status']})",
f"Uptime: {local['uptime']}",
f"Load Avg: {local['load_avg']}",
f"Memory:\n{local['memory']}",
f"Disk:\n{local['disk']}",
"Clients:"
]
for ip, info in clients.items():
lines.append(f"{ip}: {info.get('status', 'unknown')}")
bot.reply_to(msg, '\n'.join(lines))
while True:
try:
bot.polling()
except Exception as e:
logger.error("Polling error: %s", e)
time.sleep(5)