forked from hornet/lainmonitor
- /ping can now be used individually to check against any IP address. - /status will bring up inline keyboard, where you can select either a general status request or per machine Signed-off-by: hax <hax@lainlounge.org>
188 lines
7 KiB
Python
188 lines
7 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# --------------------------------------------------------------------------
|
|
# Description: A Telegram bot for monitoring critical infrastructur services
|
|
# Dependencies: telebot
|
|
# Usage: python3 lainmonitor.py | or run it as a service
|
|
# Author: h@x
|
|
# Version: 2.1.0
|
|
# --------------------------------------------------------------------------
|
|
|
|
import subprocess
|
|
import telebot
|
|
import paramiko
|
|
import requests
|
|
import time
|
|
import socket
|
|
import logging
|
|
import ssl
|
|
import os
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from telebot import types
|
|
import config
|
|
|
|
# Configure logging
|
|
tlogging_format = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
|
logging.basicConfig(level=logging.INFO, format=tlogging_format)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Ensure certificate directory exists
|
|
CERT_DIR = os.path.join(os.path.dirname(__file__), 'certs')
|
|
if not os.path.isdir(CERT_DIR):
|
|
os.makedirs(CERT_DIR, exist_ok=True)
|
|
|
|
bot = telebot.TeleBot(config.TOKEN)
|
|
ALLOWED_CHATS = set(config.ALLOWED_CHATS)
|
|
|
|
# Utility for command execution with timeout
|
|
def run_cmd(cmd, timeout=5):
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
|
return result.stdout.strip()
|
|
except subprocess.TimeoutExpired as e:
|
|
logger.warning(f"Command {cmd} timed out: {e}")
|
|
return 'timeout'
|
|
except OSError as e:
|
|
logger.error(f"OS error running {cmd}: {e}")
|
|
return 'error'
|
|
|
|
# Local system info
|
|
def get_local_info():
|
|
hostname = run_cmd(['hostname'])
|
|
uptime = run_cmd(['uptime', '-p'])
|
|
load_line = run_cmd(['uptime'])
|
|
load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown'
|
|
memory = run_cmd(['free', '-h'])
|
|
disk = run_cmd(['df', '-h'])
|
|
status = 'online' if hostname not in ('', 'error', 'timeout') else 'offline'
|
|
return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status}
|
|
|
|
# Fetch and store SSL certificate once
|
|
def fetch_certificate(host, port):
|
|
cert_path = os.path.join(CERT_DIR, f"{host}.pem")
|
|
if os.path.isfile(cert_path):
|
|
return cert_path
|
|
try:
|
|
cert = ssl.get_server_certificate((host, port))
|
|
with open(cert_path, 'w') as f:
|
|
f.write(cert)
|
|
logger.info(f"Saved certificate for {host} to {cert_path}")
|
|
return cert_path
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch certificate for {host}: {e}")
|
|
return True
|
|
|
|
# SSH-based info gathering
|
|
def get_ssh_info(ip, cfg):
|
|
client = paramiko.SSHClient()
|
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
try:
|
|
client.connect(ip, username=cfg['ssh_user'], password=cfg['ssh_pass'], timeout=5)
|
|
info = {}
|
|
cmds = {'hostname': 'hostname', 'uptime': 'uptime -p', 'load_avg': 'uptime', 'memory': 'free -h', 'disk': 'df -h'}
|
|
for key, cmd in cmds.items():
|
|
try:
|
|
stdin, stdout, stderr = client.exec_command(cmd, timeout=5)
|
|
out = stdout.read().decode().strip()
|
|
if key == 'load_avg' and 'load average:' in out:
|
|
out = out.split('load average:')[-1].strip()
|
|
info[key] = out
|
|
except (socket.timeout, paramiko.SSHException) as e:
|
|
logger.error(f"SSH command {cmd} on {ip} failed: {e}")
|
|
info[key] = 'error'
|
|
info['status'] = 'online'
|
|
except (paramiko.AuthenticationException, paramiko.SSHException, socket.timeout) as e:
|
|
logger.error(f"SSH connection to {ip} failed: {e}")
|
|
info = {'status': 'unreachable'}
|
|
finally:
|
|
try: client.close()
|
|
except Exception as e: logger.warning(f"Error closing SSH to {ip}: {e}")
|
|
return ip, info
|
|
|
|
# OPNsense API-based info gathering
|
|
def get_opnsense_info(ip, cfg):
|
|
url = cfg['api_url']
|
|
host = url.split('//')[1].split('/')[0].split(':')[0]
|
|
port = int(url.split('//')[1].split('/')[0].split(':')[1]) if ':' in url.split('//')[1].split('/')[0] else 443
|
|
verify = fetch_certificate(host, port)
|
|
try:
|
|
resp = requests.get(f"{url}/core/get/health", auth=(cfg['api_key'], cfg['api_secret']), verify=verify, timeout=5)
|
|
resp.raise_for_status()
|
|
data = resp.json().get('health', {})
|
|
return ip, {'status': data.get('health','unknown'), 'uptime': data.get('uptime','unknown'), 'memory': f"{data.get('mem_used','?')}MB/{data.get('mem_total','?')}MB", 'load_avg': data.get('load_avg','unknown'), 'disk': f"{data.get('disk_used','?')}%/{data.get('disk_total','?')}%"}
|
|
except requests.RequestException as e:
|
|
logger.error(f"OPNsense API call for {ip} failed: {e}")
|
|
return ip, {'status': 'unreachable'}
|
|
|
|
# Gather info for given host or all hosts
|
|
def gather_host(ip=None):
|
|
if ip and ip in config.HOSTS:
|
|
cfg = config.HOSTS[ip]
|
|
return [get_ssh_info(ip, cfg) if cfg['type']=='generic' else get_opnsense_info(ip, cfg)]
|
|
# all hosts
|
|
return gather_clients()
|
|
|
|
# Ping utility
|
|
def ping_ip(ip):
|
|
res = run_cmd(['ping', '-c', '1', ip], timeout=3)
|
|
if '1 packets transmitted, 1 received' in res or '1 packets transmitted, 1 packets received' in res:
|
|
return 'reachable'
|
|
if res in ('timeout', 'error'):
|
|
return res
|
|
return 'unreachable'
|
|
|
|
# Access control decorator
|
|
def restricted(func):
|
|
def wrapper(msg, *args, **kwargs):
|
|
if msg.chat.id not in ALLOWED_CHATS:
|
|
bot.reply_to(msg, 'Unauthorized access')
|
|
return
|
|
return func(msg, *args, **kwargs)
|
|
return wrapper
|
|
|
|
# /status: show menu of available hosts
|
|
@bot.message_handler(commands=['status'])
|
|
@restricted
|
|
def handle_status(msg):
|
|
keyboard = types.InlineKeyboardMarkup()
|
|
for ip in config.HOSTS.keys():
|
|
keyboard.add(types.InlineKeyboardButton(ip, callback_data=f'status:{ip}'))
|
|
keyboard.add(types.InlineKeyboardButton('All', callback_data='status:all'))
|
|
bot.send_message(msg.chat.id, 'Select host for status:', reply_markup=keyboard)
|
|
|
|
# Callback handler for inline menu
|
|
@bot.callback_query_handler(func=lambda c: c.data.startswith('status:'))
|
|
@restricted
|
|
def callback_status(call):
|
|
_, key = call.data.split(':', 1)
|
|
if key == 'all':
|
|
entries = gather_clients()
|
|
else:
|
|
entries = dict(gather_host(key))
|
|
lines = []
|
|
for ip, info in entries.items():
|
|
lines.append(f"{ip}: {info.get('status','unknown')}")
|
|
if info.get('status')=='online':
|
|
for field in ('uptime','load_avg','memory','disk'):
|
|
lines.append(f" {field}: {info.get(field,'-')}")
|
|
bot.send_message(call.message.chat.id, '\n'.join(lines))
|
|
|
|
# /ping <IP>
|
|
@bot.message_handler(func=lambda m: m.text and m.text.startswith('/ping'))
|
|
@restricted
|
|
def handle_ping(msg):
|
|
parts = msg.text.split()
|
|
if len(parts) != 2:
|
|
bot.reply_to(msg, 'Usage: /ping <IP>')
|
|
return
|
|
ip = parts[1]
|
|
status = ping_ip(ip)
|
|
bot.reply_to(msg, f"Ping {ip}: {status}")
|
|
|
|
# Run polling with retry
|
|
while True:
|
|
try:
|
|
bot.polling()
|
|
except Exception as e:
|
|
logger.error(f"Polling error: {e}")
|
|
time.sleep(5)
|