lainmonitor/lainmonitor.py
hax 435c481720 Add: /ping and /status separated
- /ping can now be used individually to check against any IP address.
- /status will bring up inline keyboard, where you can select either a general status request or per machine

Signed-off-by: hax <hax@lainlounge.org>
2025-07-22 09:53:21 +00:00

188 lines
7 KiB
Python

#!/usr/bin/env python3
# --------------------------------------------------------------------------
# Description: A Telegram bot for monitoring critical infrastructur services
# Dependencies: telebot
# Usage: python3 lainmonitor.py | or run it as a service
# Author: h@x
# Version: 2.1.0
# --------------------------------------------------------------------------
import subprocess
import telebot
import paramiko
import requests
import time
import socket
import logging
import ssl
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from telebot import types
import config
# Configure logging
tlogging_format = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
logging.basicConfig(level=logging.INFO, format=tlogging_format)
logger = logging.getLogger(__name__)
# Ensure certificate directory exists
CERT_DIR = os.path.join(os.path.dirname(__file__), 'certs')
if not os.path.isdir(CERT_DIR):
os.makedirs(CERT_DIR, exist_ok=True)
bot = telebot.TeleBot(config.TOKEN)
ALLOWED_CHATS = set(config.ALLOWED_CHATS)
# Utility for command execution with timeout
def run_cmd(cmd, timeout=5):
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
return result.stdout.strip()
except subprocess.TimeoutExpired as e:
logger.warning(f"Command {cmd} timed out: {e}")
return 'timeout'
except OSError as e:
logger.error(f"OS error running {cmd}: {e}")
return 'error'
# Local system info
def get_local_info():
hostname = run_cmd(['hostname'])
uptime = run_cmd(['uptime', '-p'])
load_line = run_cmd(['uptime'])
load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown'
memory = run_cmd(['free', '-h'])
disk = run_cmd(['df', '-h'])
status = 'online' if hostname not in ('', 'error', 'timeout') else 'offline'
return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status}
# Fetch and store SSL certificate once
def fetch_certificate(host, port):
cert_path = os.path.join(CERT_DIR, f"{host}.pem")
if os.path.isfile(cert_path):
return cert_path
try:
cert = ssl.get_server_certificate((host, port))
with open(cert_path, 'w') as f:
f.write(cert)
logger.info(f"Saved certificate for {host} to {cert_path}")
return cert_path
except Exception as e:
logger.error(f"Failed to fetch certificate for {host}: {e}")
return True
# SSH-based info gathering
def get_ssh_info(ip, cfg):
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
try:
client.connect(ip, username=cfg['ssh_user'], password=cfg['ssh_pass'], timeout=5)
info = {}
cmds = {'hostname': 'hostname', 'uptime': 'uptime -p', 'load_avg': 'uptime', 'memory': 'free -h', 'disk': 'df -h'}
for key, cmd in cmds.items():
try:
stdin, stdout, stderr = client.exec_command(cmd, timeout=5)
out = stdout.read().decode().strip()
if key == 'load_avg' and 'load average:' in out:
out = out.split('load average:')[-1].strip()
info[key] = out
except (socket.timeout, paramiko.SSHException) as e:
logger.error(f"SSH command {cmd} on {ip} failed: {e}")
info[key] = 'error'
info['status'] = 'online'
except (paramiko.AuthenticationException, paramiko.SSHException, socket.timeout) as e:
logger.error(f"SSH connection to {ip} failed: {e}")
info = {'status': 'unreachable'}
finally:
try: client.close()
except Exception as e: logger.warning(f"Error closing SSH to {ip}: {e}")
return ip, info
# OPNsense API-based info gathering
def get_opnsense_info(ip, cfg):
url = cfg['api_url']
host = url.split('//')[1].split('/')[0].split(':')[0]
port = int(url.split('//')[1].split('/')[0].split(':')[1]) if ':' in url.split('//')[1].split('/')[0] else 443
verify = fetch_certificate(host, port)
try:
resp = requests.get(f"{url}/core/get/health", auth=(cfg['api_key'], cfg['api_secret']), verify=verify, timeout=5)
resp.raise_for_status()
data = resp.json().get('health', {})
return ip, {'status': data.get('health','unknown'), 'uptime': data.get('uptime','unknown'), 'memory': f"{data.get('mem_used','?')}MB/{data.get('mem_total','?')}MB", 'load_avg': data.get('load_avg','unknown'), 'disk': f"{data.get('disk_used','?')}%/{data.get('disk_total','?')}%"}
except requests.RequestException as e:
logger.error(f"OPNsense API call for {ip} failed: {e}")
return ip, {'status': 'unreachable'}
# Gather info for given host or all hosts
def gather_host(ip=None):
if ip and ip in config.HOSTS:
cfg = config.HOSTS[ip]
return [get_ssh_info(ip, cfg) if cfg['type']=='generic' else get_opnsense_info(ip, cfg)]
# all hosts
return gather_clients()
# Ping utility
def ping_ip(ip):
res = run_cmd(['ping', '-c', '1', ip], timeout=3)
if '1 packets transmitted, 1 received' in res or '1 packets transmitted, 1 packets received' in res:
return 'reachable'
if res in ('timeout', 'error'):
return res
return 'unreachable'
# Access control decorator
def restricted(func):
def wrapper(msg, *args, **kwargs):
if msg.chat.id not in ALLOWED_CHATS:
bot.reply_to(msg, 'Unauthorized access')
return
return func(msg, *args, **kwargs)
return wrapper
# /status: show menu of available hosts
@bot.message_handler(commands=['status'])
@restricted
def handle_status(msg):
keyboard = types.InlineKeyboardMarkup()
for ip in config.HOSTS.keys():
keyboard.add(types.InlineKeyboardButton(ip, callback_data=f'status:{ip}'))
keyboard.add(types.InlineKeyboardButton('All', callback_data='status:all'))
bot.send_message(msg.chat.id, 'Select host for status:', reply_markup=keyboard)
# Callback handler for inline menu
@bot.callback_query_handler(func=lambda c: c.data.startswith('status:'))
@restricted
def callback_status(call):
_, key = call.data.split(':', 1)
if key == 'all':
entries = gather_clients()
else:
entries = dict(gather_host(key))
lines = []
for ip, info in entries.items():
lines.append(f"{ip}: {info.get('status','unknown')}")
if info.get('status')=='online':
for field in ('uptime','load_avg','memory','disk'):
lines.append(f" {field}: {info.get(field,'-')}")
bot.send_message(call.message.chat.id, '\n'.join(lines))
# /ping <IP>
@bot.message_handler(func=lambda m: m.text and m.text.startswith('/ping'))
@restricted
def handle_ping(msg):
parts = msg.text.split()
if len(parts) != 2:
bot.reply_to(msg, 'Usage: /ping <IP>')
return
ip = parts[1]
status = ping_ip(ip)
bot.reply_to(msg, f"Ping {ip}: {status}")
# Run polling with retry
while True:
try:
bot.polling()
except Exception as e:
logger.error(f"Polling error: {e}")
time.sleep(5)