diff --git a/lainmonitor.py b/lainmonitor.py index 343c1c2..a1a7cbc 100644 --- a/lainmonitor.py +++ b/lainmonitor.py @@ -5,7 +5,7 @@ # Dependencies: telebot # Usage: python3 lainmonitor.py | or run it as a service # Author: h@x -# Version: 2.0 +# Version: 2.1.0 # -------------------------------------------------------------------------- import subprocess @@ -13,45 +13,51 @@ import telebot import paramiko import requests import time +import socket import logging import ssl import os from concurrent.futures import ThreadPoolExecutor, as_completed +from telebot import types import config -logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") +# Configure logging +tlogging_format = "%(asctime)s [%(levelname)s] %(name)s: %(message)s" +logging.basicConfig(level=logging.INFO, format=tlogging_format) logger = logging.getLogger(__name__) +# Ensure certificate directory exists CERT_DIR = os.path.join(os.path.dirname(__file__), 'certs') -os.makedirs(CERT_DIR, exist_ok=True) +if not os.path.isdir(CERT_DIR): + os.makedirs(CERT_DIR, exist_ok=True) bot = telebot.TeleBot(config.TOKEN) ALLOWED_CHATS = set(config.ALLOWED_CHATS) +# Utility for command execution with timeout def run_cmd(cmd, timeout=5): try: result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) return result.stdout.strip() - except subprocess.TimeoutExpired: + except subprocess.TimeoutExpired as e: + logger.warning(f"Command {cmd} timed out: {e}") return 'timeout' except OSError as e: - logger.error("OS error running %s: %s", cmd, e) + logger.error(f"OS error running {cmd}: {e}") return 'error' +# Local system info def get_local_info(): - try: - hostname = run_cmd(['hostname']) - uptime = run_cmd(['uptime', '-p']) - load_line = run_cmd(['uptime']) - load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown' - memory = run_cmd(['free', '-h']) - disk = run_cmd(['df', '-h']) - status = 'online' if hostname and hostname not in ('error', 'timeout') else 'offline' - return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status} - except Exception as e: - logger.error("Local info error: %s", e) - return {'hostname': 'error', 'uptime': 'error', 'load_avg': 'error', 'memory': 'error', 'disk': 'error', 'status': 'error'} + hostname = run_cmd(['hostname']) + uptime = run_cmd(['uptime', '-p']) + load_line = run_cmd(['uptime']) + load_avg = load_line.split('load average:')[-1].strip() if 'load average:' in load_line else 'unknown' + memory = run_cmd(['free', '-h']) + disk = run_cmd(['df', '-h']) + status = 'online' if hostname not in ('', 'error', 'timeout') else 'offline' + return {'hostname': hostname, 'uptime': uptime, 'load_avg': load_avg, 'memory': memory, 'disk': disk, 'status': status} +# Fetch and store SSL certificate once def fetch_certificate(host, port): cert_path = os.path.join(CERT_DIR, f"{host}.pem") if os.path.isfile(cert_path): @@ -60,97 +66,123 @@ def fetch_certificate(host, port): cert = ssl.get_server_certificate((host, port)) with open(cert_path, 'w') as f: f.write(cert) + logger.info(f"Saved certificate for {host} to {cert_path}") return cert_path except Exception as e: - logger.error("Certificate fetch error for %s: %s", host, e) - return False + logger.error(f"Failed to fetch certificate for {host}: {e}") + return True +# SSH-based info gathering def get_ssh_info(ip, cfg): + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: - client = paramiko.SSHClient() - client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(ip, username=cfg['ssh_user'], password=cfg['ssh_pass'], timeout=5) info = {} - cmds = {'hostname':'hostname','uptime':'uptime -p','load_avg':'uptime','memory':'free -h','disk':'df -h'} + cmds = {'hostname': 'hostname', 'uptime': 'uptime -p', 'load_avg': 'uptime', 'memory': 'free -h', 'disk': 'df -h'} for key, cmd in cmds.items(): try: - stdin, stdout, stderr = client.exec_command(cmd) + stdin, stdout, stderr = client.exec_command(cmd, timeout=5) out = stdout.read().decode().strip() if key == 'load_avg' and 'load average:' in out: out = out.split('load average:')[-1].strip() info[key] = out - except Exception as e: - logger.error("SSH cmd error %s on %s: %s", cmd, ip, e) + except (socket.timeout, paramiko.SSHException) as e: + logger.error(f"SSH command {cmd} on {ip} failed: {e}") info[key] = 'error' info['status'] = 'online' - except Exception as e: - logger.error("SSH connection error to %s: %s", ip, e) + except (paramiko.AuthenticationException, paramiko.SSHException, socket.timeout) as e: + logger.error(f"SSH connection to {ip} failed: {e}") info = {'status': 'unreachable'} finally: - try: - client.close() - except Exception: - pass + try: client.close() + except Exception as e: logger.warning(f"Error closing SSH to {ip}: {e}") return ip, info +# OPNsense API-based info gathering def get_opnsense_info(ip, cfg): + url = cfg['api_url'] + host = url.split('//')[1].split('/')[0].split(':')[0] + port = int(url.split('//')[1].split('/')[0].split(':')[1]) if ':' in url.split('//')[1].split('/')[0] else 443 + verify = fetch_certificate(host, port) try: - url = cfg['api_url'] - host_part = url.split('//')[-1].split('/')[0] - parts = host_part.split(':') - host = parts[0] - port = int(parts[1]) if len(parts) > 1 else 443 - verify = fetch_certificate(host, port) resp = requests.get(f"{url}/core/get/health", auth=(cfg['api_key'], cfg['api_secret']), verify=verify, timeout=5) resp.raise_for_status() data = resp.json().get('health', {}) - return ip, { - 'status': data.get('health', 'unknown'), - 'uptime': data.get('uptime', 'unknown'), - 'memory': f"{data.get('mem_used', '?')}MB/{data.get('mem_total', '?')}MB", - 'load_avg': data.get('load_avg', 'unknown'), - 'disk': f"{data.get('disk_used', '?')}%/{data.get('disk_total', '?')}%" - } - except Exception as e: - logger.error("OPNsense API error for %s: %s", ip, e) + return ip, {'status': data.get('health','unknown'), 'uptime': data.get('uptime','unknown'), 'memory': f"{data.get('mem_used','?')}MB/{data.get('mem_total','?')}MB", 'load_avg': data.get('load_avg','unknown'), 'disk': f"{data.get('disk_used','?')}%/{data.get('disk_total','?')}%"} + except requests.RequestException as e: + logger.error(f"OPNsense API call for {ip} failed: {e}") return ip, {'status': 'unreachable'} -def gather_clients(concurrency=5): - results = {} - with ThreadPoolExecutor(max_workers=concurrency) as executor: - futures = {executor.submit(get_ssh_info if cfg['type'] == 'generic' else get_opnsense_info, ip, cfg): ip for ip, cfg in config.HOSTS.items()} - for future in as_completed(futures): - host = futures[future] - try: - ip, info = future.result() - except Exception as e: - logger.error("Gather error for %s: %s", host, e) - ip, info = host, {'status': 'error'} - results[ip] = info - return results +# Gather info for given host or all hosts +def gather_host(ip=None): + if ip and ip in config.HOSTS: + cfg = config.HOSTS[ip] + return [get_ssh_info(ip, cfg) if cfg['type']=='generic' else get_opnsense_info(ip, cfg)] + # all hosts + return gather_clients() -@bot.message_handler(commands=['status', 'ping']) +# Ping utility +def ping_ip(ip): + res = run_cmd(['ping', '-c', '1', ip], timeout=3) + if '1 packets transmitted, 1 received' in res or '1 packets transmitted, 1 packets received' in res: + return 'reachable' + if res in ('timeout', 'error'): + return res + return 'unreachable' + +# Access control decorator +def restricted(func): + def wrapper(msg, *args, **kwargs): + if msg.chat.id not in ALLOWED_CHATS: + bot.reply_to(msg, 'Unauthorized access') + return + return func(msg, *args, **kwargs) + return wrapper + +# /status: show menu of available hosts +@bot.message_handler(commands=['status']) +@restricted def handle_status(msg): - if msg.chat.id not in ALLOWED_CHATS: - bot.reply_to(msg, 'Unauthorized access') - return - local = get_local_info() - clients = gather_clients() - lines = [ - f"Local: {local['hostname']} ({local['status']})", - f"Uptime: {local['uptime']}", - f"Load Avg: {local['load_avg']}", - f"Memory:\n{local['memory']}", - f"Disk:\n{local['disk']}", - "Clients:" - ] - for ip, info in clients.items(): - lines.append(f"{ip}: {info.get('status', 'unknown')}") - bot.reply_to(msg, '\n'.join(lines)) + keyboard = types.InlineKeyboardMarkup() + for ip in config.HOSTS.keys(): + keyboard.add(types.InlineKeyboardButton(ip, callback_data=f'status:{ip}')) + keyboard.add(types.InlineKeyboardButton('All', callback_data='status:all')) + bot.send_message(msg.chat.id, 'Select host for status:', reply_markup=keyboard) +# Callback handler for inline menu +@bot.callback_query_handler(func=lambda c: c.data.startswith('status:')) +@restricted +def callback_status(call): + _, key = call.data.split(':', 1) + if key == 'all': + entries = gather_clients() + else: + entries = dict(gather_host(key)) + lines = [] + for ip, info in entries.items(): + lines.append(f"{ip}: {info.get('status','unknown')}") + if info.get('status')=='online': + for field in ('uptime','load_avg','memory','disk'): + lines.append(f" {field}: {info.get(field,'-')}") + bot.send_message(call.message.chat.id, '\n'.join(lines)) + +# /ping +@bot.message_handler(func=lambda m: m.text and m.text.startswith('/ping')) +@restricted +def handle_ping(msg): + parts = msg.text.split() + if len(parts) != 2: + bot.reply_to(msg, 'Usage: /ping ') + return + ip = parts[1] + status = ping_ip(ip) + bot.reply_to(msg, f"Ping {ip}: {status}") + +# Run polling with retry while True: try: bot.polling() except Exception as e: - logger.error("Polling error: %s", e) + logger.error(f"Polling error: {e}") time.sleep(5)