diff --git a/lainmonitor.py b/lainmonitor.py index 2550725..906de4a 100644 --- a/lainmonitor.py +++ b/lainmonitor.py @@ -1,85 +1,127 @@ -#description: telegram bot for monitoring the system -#dependencies: telebot -#usage: python3 lainmonitor.py | or run it as a service -#authors: hornet +# --/usr/bin/env python3 -- # +# description: telegram bot for monitoring the system +# dependencies: telebot +# usage: python3 lainmonitor.py | or run it as a service +# author: hornetmaidan +# contributors: h@x +# version: 1.1.6 +import os import subprocess import threading +import queue from time import sleep -from telebot import * +import telebot +import logging +# Setup logging +logging.basicConfig(filename='lainmonitor.log', level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') -#define the variables -status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk, ping = 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown' -nodes, hostnames, reach, threads = [], [], [], [] +# Load environment variables and config files securely +script_dir = os.path.dirname(os.path.realpath(__file__)) +env_path = os.path.join(script_dir, '.env') +auth_users_path = os.path.join(script_dir, '.authorized_users') -#change this to your instance's hostname -host = subprocess.check_output(['hostname']).decode().strip() -#print ('host:', host) # debug +# Load the token +try: + with open(env_path, 'r') as f: + token = f.read().strip() +except FileNotFoundError: + logging.error('Token file not found. Exiting...') + exit(1) -#load the token -token = open('.env', 'r').read().strip() +# Load the authorized users +try: + authorized_users = [line.strip() for line in open(auth_users_path, 'r').readlines()] +except FileNotFoundError: + logging.error('Authorized users file not found. Exiting...') + exit(1) -#load the authorized users -authorized_users = [line.strip() for line in open('.authorized_users', 'r').readlines()] -#print('authorized users:', authorized_users) # debug - -#bot init +# Initialize the bot bot = telebot.TeleBot(token) -updater = bot.update_listener -#get system info -def getinfo(): - global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk - hostname = subprocess.check_output(['hostname']).decode().strip() - uptime = subprocess.check_output(['uptime', '-p']).decode().strip() - #systemd-only services - zerotier = subprocess.Popen("sudo systemctl status zerotier-one | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - prosody = subprocess.Popen("sudo systemctl status prosody | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - postgres = subprocess.Popen("sudo systemctl status postgresql | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - tailscale = subprocess.Popen("sudo systemctl status tailscaled | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - nginx = subprocess.Popen("sudo systemctl status nginx | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - disk = subprocess.check_output(['df', '-h']).decode().strip() - if hostname == 'unknown': +# Define status variables +status, hostname, uptime = 'unknown', 'unknown', 'unknown' +zerotier, prosody, postgres, tailscale, nginx, disk = ['unknown'] * 6 +nodes, hostnames, threads = [], [], [] +reach_queue = queue.Queue() + +# Get basic system info +def get_system_info(): + global hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk + try: + hostname = subprocess.check_output(['hostname']).decode().strip() + uptime = subprocess.check_output(['uptime', '-p']).decode().strip() + + services = ['zerotier-one', 'prosody', 'postgresql', 'tailscaled', 'nginx'] + status_results = [] + for service in services: + status_results.append(get_service_status(service)) + zerotier, prosody, postgres, tailscale, nginx = status_results + + disk = subprocess.check_output(['df', '-h']).decode().strip() + except subprocess.CalledProcessError as e: + logging.error(f"Error fetching system info: {e}") status = 'offline' else: status = 'online' - return hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk -#function to ping tailscale nodes +# Helper function to get service status +def get_service_status(service): + try: + subprocess.run(['sudo', 'systemctl', 'is-active', '--quiet', service], check=True) + return f'{service} is active' + except subprocess.CalledProcessError: + return f'{service} is inactive' + +# Function to ping a Tailscale node def ping_node(node, hostname): - ping = subprocess.Popen(f"ping {node} -c 1 | grep '1 packets'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - if '1 received' in ping: - reach.append(f'{node}/{hostname} is reachable') - else: - reach.append(f'{node}/{hostname} is unreachable') + try: + ping = subprocess.run(['ping', '-c', '1', node], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) + reach_queue.put(f'{node}/{hostname} is reachable') + except subprocess.CalledProcessError: + reach_queue.put(f'{node}/{hostname} is unreachable') -#ping tailscale nodes -def check_tailscale(): - global nodes, hostnames, reach, threads, ping - nodes_output = subprocess.Popen("tailscale status | grep '100'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - nodes = [line.split()[0] for line in nodes_output.split('\n') if line] - hostnames = [line.split()[1] for line in nodes_output.split('\n') if line] +# Check Tailscale nodes +def check_tailscale_nodes(): + global nodes, hostnames, threads + try: + nodes_output = subprocess.check_output("tailscale status | grep '100'", shell=True).decode().strip() + nodes = [line.split()[0] for line in nodes_output.split('\n') if line] + hostnames = [line.split()[1] for line in nodes_output.split('\n') if line] - for node, hostname in zip(nodes, hostnames): + for node, hostname in zip(nodes, hostnames): thread = threading.Thread(target=ping_node, args=(node, hostname)) threads.append(thread) thread.start() - for thread in threads: - thread.join() + for thread in threads: + thread.join() - return reach + reach = [] + while not reach_queue.empty(): + reach.append(reach_queue.get()) -#restart services + return reach + except subprocess.CalledProcessError as e: + logging.error(f"Error checking Tailscale status: {e}") + return ['Error checking Tailscale status'] + +# Function to restart a service def restart_service(service): - print(f'restarting {service}...') - subprocess.Popen(f'sudo systemctl restart {service}', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - sleep(3) - service_status = subprocess.Popen(f'sudo systemctl status {service} | grep "Active"', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - status_message = f'{service} restarted! status: {service_status}' - return status_message + logging.info(f'Restarting {service}...') + try: + subprocess.run(['sudo', 'systemctl', 'restart', service], check=True) + sleep(3) + service_status = get_service_status(service) + status_message = f'{service} restarted! Status: {service_status}' + logging.info(status_message) + return status_message + except subprocess.CalledProcessError as e: + logging.error(f"Error restarting {service}: {e}") + return f'Error restarting {service}' -#restart services menu +# Restart services menu def restart_menu(): keyboard = [ [telebot.types.InlineKeyboardButton('zerotier-one', callback_data='zerotier-one')], @@ -92,7 +134,7 @@ def restart_menu(): reply_markup = telebot.types.InlineKeyboardMarkup(keyboard) return reply_markup -#callback query handler +# Callback query handler for service restart @bot.callback_query_handler(func=lambda call: True) def callback_query(call): service = call.data @@ -101,57 +143,50 @@ def callback_query(call): bot.send_message(call.message.chat.id, status_message) else: bot.edit_message_reply_markup(call.message.chat.id, call.message.message_id, reply_markup=None) - bot.send_message(call.message.chat.id, 'canceled') + bot.send_message(call.message.chat.id, 'Canceled') +# Reboot system function def reboot(): - subprocess.Popen('sudo reboot', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() + logging.info('Rebooting system...') + subprocess.run(['sudo', 'reboot'], check=True) -#debug handler -def check(): - global status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk - getinfo() - print('system status:', status) - print('hostname:', hostname) - print('uptime:', uptime) - print('zerotier:', zerotier) - print('prosody:', prosody) - print('postgres:', postgres) - print('tailscale:', tailscale) - print('disk:', disk) - return status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk - -#message handling +# Message handlers @bot.message_handler(commands=['start', 'help', 'status', 'restart', 'reboot', 'ping']) def handle(message): + user_id = str(message.from_user.id) if message.text == '/start': bot.reply_to(message, 'lainmonitor v1.0 --- standing by...') elif message.text == '/help': bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping') + bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping') elif message.text == '/status': - check() - status_message = f'hostname: {hostname}\nsystem status: {status}\nuptime: {uptime}\nzerotier: {zerotier}\nprosody: {prosody}\npostgres: {postgres}\ntailscale: {tailscale}\nnginx: {nginx}' + get_system_info() + status_message = ( + f'hostname: {hostname}\n' + f'system status: {status}\n' + f'uptime: {uptime}\n' + f'zerotier: {zerotier}\n' + f'prosody: {prosody}\n' + f'postgres: {postgres}\n' + f'tailscale: {tailscale}\n' + f'nginx: {nginx}' + ) bot.reply_to(message, status_message) - bot.reply_to(message, f'filesystem info for {hostname}: \n\n{disk}') - elif message.text == f'/restart {host}': - if message.text == f'/restart {host}' and str(message.from_user.id) in authorized_users: - bot.send_message(message.chat.id, 'select a service to restart:', reply_markup=restart_menu()) - else: - bot.reply_to(message, 'you are not authorized to restart services on this host') - elif message.text == f'/reboot {host}': - if message.text == f'/reboot {host}' and str(message.from_user.id) in authorized_users: - bot.reply_to(message, f'rebooting {host}...') - reboot() - else: - bot.reply_to(message, 'you are not authorized to reboot this host') - elif message.text == '/ping': - if message.text == f'/restart {host}' and str(message.from_user.id) in authorized_users: - check_tailscale() - ping_status = '\n'.join(reach) - bot.reply_to(message, f'ping status:\n\n{ping_status}') - ping_status = '' - reach.clear() - else: - bot.reply_to(message, 'you are not authorized to view ping status') + bot.reply_to(message, f'Filesystem info for {hostname}:\n\n{disk}') + elif message.text == f'/restart {hostname}' and user_id in authorized_users: + bot.send_message(message.chat.id, 'Select a service to restart:', reply_markup=restart_menu()) + elif message.text == f'/reboot {hostname}' and user_id in authorized_users: + bot.reply_to(message, f'Rebooting {hostname}...') + reboot() + elif message.text == '/ping' and user_id in authorized_users: + reach = check_tailscale_nodes() + bot.reply_to(message, f'Ping status:\n\n{"\n".join(reach)}') + else: + bot.reply_to(message, 'You are not authorized for this action') + +# Polling with timeout and error handling +try: + bot.polling(none_stop=True, timeout=60, long_polling_timeout=60) +except Exception as e: + logging.error(f'Polling error: {e}') -#polling -bot.polling() \ No newline at end of file