diff --git a/lainmonitor.py b/lainmonitor.py index 5386e5a..790af29 100644 --- a/lainmonitor.py +++ b/lainmonitor.py @@ -1,75 +1,191 @@ -#description: telegram bot for monitoring the system -#dependencies: telebot -#usage: python3 lainmonitor.py | or run it as a service -#author: hornetmaidan +# --/usr/bin/env python3 -- # +# description: telegram bot for monitoring the system +# dependencies: telebot +# usage: python3 lainmonitor.py | or run it as a service +# author: hornetmaidan +# contributors: h@x +# version: 1.1.6 +import os import subprocess +import threading +import queue +from time import sleep import telebot -#define the variables -status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk, ping = 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown' -#telegram bot token -TOKEN = 'PLACE_YOUR_TOKEN_HERE' +import logging -#bot init -bot = telebot.TeleBot(TOKEN) +# Setup logging +logging.basicConfig(filename='lainmonitor.log', level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') -#get system info -def getinfo(): - global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk - hostname = subprocess.check_output(['hostname']).decode().strip() - uptime = subprocess.check_output(['uptime', '-p']).decode().strip() - #systemd-only services - zerotier = subprocess.Popen("sudo systemctl status zerotier-one | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - prosody = subprocess.Popen("sudo systemctl status prosody | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - postgres = subprocess.Popen("sudo systemctl status postgresql | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - tailscale = subprocess.Popen("sudo systemctl status tailscaled | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - disk = subprocess.check_output(['df', '-h']).decode().strip() - if hostname == 'unknown': +# Load environment variables and config files securely +script_dir = os.path.dirname(os.path.realpath(__file__)) +env_path = os.path.join(script_dir, '.env') +auth_users_path = os.path.join(script_dir, '.authorized_users') + +# Load the token +try: + with open(env_path, 'r') as f: + token = f.read().strip() +except FileNotFoundError: + logging.error('Token file not found. Exiting...') + exit(1) + +# Load the authorized users +try: + authorized_users = [line.strip() for line in open(auth_users_path, 'r').readlines()] +except FileNotFoundError: + logging.error('Authorized users file not found. Exiting...') + exit(1) + +# Initialize the bot +bot = telebot.TeleBot(token) + +# Define status variables +status, hostname, uptime = 'unknown', 'unknown', 'unknown' +zerotier, prosody, postgres, tailscale, nginx, disk = ['unknown'] * 6 +nodes, hostnames, threads = [], [], [] +reach_queue = queue.Queue() + +# Get basic system info +def get_system_info(): + global hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk + try: + hostname = subprocess.check_output(['hostname']).decode().strip() + uptime = subprocess.check_output(['uptime', '-p']).decode().strip() + + services = ['zerotier-one', 'prosody', 'postgresql', 'tailscaled', 'nginx'] + status_results = [] + for service in services: + status_results.append(get_service_status(service)) + zerotier, prosody, postgres, tailscale, nginx = status_results + + disk = subprocess.check_output(['df', '-h']).decode().strip() + except subprocess.CalledProcessError as e: + logging.error(f"Error fetching system info: {e}") status = 'offline' else: status = 'online' - return hostname, uptime, zerotier, prosody, postgres, tailscale, disk -#ping tailscale (change the IP address to the one you want or add more) -def check_tailscale(): - global ping - ping = subprocess.Popen("ping TAILSCALE_IP -c 1 | grep '1 packets'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip() - if '1 received' in ping: - ping = 'connected' +# Helper function to get service status +def get_service_status(service): + try: + subprocess.run(['sudo', 'systemctl', 'is-active', '--quiet', service], check=True) + return f'{service} is active' + except subprocess.CalledProcessError: + return f'{service} is inactive' + +# Function to ping a Tailscale node +def ping_node(node, hostname): + try: + ping = subprocess.run(['ping', '-c', '1', node], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) + reach_queue.put(f'{node}/{hostname} is reachable') + except subprocess.CalledProcessError: + reach_queue.put(f'{node}/{hostname} is unreachable') + +# Check Tailscale nodes +def check_tailscale_nodes(): + global nodes, hostnames, threads + try: + nodes_output = subprocess.check_output("tailscale status | grep '100'", shell=True).decode().strip() + nodes = [line.split()[0] for line in nodes_output.split('\n') if line] + hostnames = [line.split()[1] for line in nodes_output.split('\n') if line] + + for node, hostname in zip(nodes, hostnames): + thread = threading.Thread(target=ping_node, args=(node, hostname)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + reach = [] + while not reach_queue.empty(): + reach.append(reach_queue.get()) + + return reach + except subprocess.CalledProcessError as e: + logging.error(f"Error checking Tailscale status: {e}") + return ['Error checking Tailscale status'] + +# Function to restart a service +def restart_service(service): + logging.info(f'Restarting {service}...') + try: + subprocess.run(['sudo', 'systemctl', 'restart', service], check=True) + sleep(3) + service_status = get_service_status(service) + status_message = f'{service} restarted! Status: {service_status}' + logging.info(status_message) + return status_message + except subprocess.CalledProcessError as e: + logging.error(f"Error restarting {service}: {e}") + return f'Error restarting {service}' + +# Restart services menu +def restart_menu(): + keyboard = [ + [telebot.types.InlineKeyboardButton('zerotier-one', callback_data='zerotier-one')], + [telebot.types.InlineKeyboardButton('prosody', callback_data='prosody')], + [telebot.types.InlineKeyboardButton('postgresql', callback_data='postgresql')], + [telebot.types.InlineKeyboardButton('tailscaled', callback_data='tailscaled')], + [telebot.types.InlineKeyboardButton('nginx', callback_data='nginx')], + [telebot.types.InlineKeyboardButton('cancel', callback_data='cancel')] + ] + reply_markup = telebot.types.InlineKeyboardMarkup(keyboard) + return reply_markup + +# Callback query handler for service restart +@bot.callback_query_handler(func=lambda call: True) +def callback_query(call): + service = call.data + if service != 'cancel': + status_message = restart_service(service) + bot.send_message(call.message.chat.id, status_message) else: - ping = 'unreachable' - return ping + bot.edit_message_reply_markup(call.message.chat.id, call.message.message_id, reply_markup=None) + bot.send_message(call.message.chat.id, 'Canceled') -#debug handler -def check(): - global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk - getinfo() - print('system status:', status) - print('hostname:', hostname) - print('uptime:', uptime) - print('zerotier:', zerotier) - print('prosody:', prosody) - print('postgres:', postgres) - print('tailscale:', tailscale) - print('disk:', disk) - return status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk +# Reboot system function +def reboot(): + logging.info('Rebooting system...') + subprocess.run(['sudo', 'reboot'], check=True) -#message handling -@bot.message_handler(commands=['start', 'help', 'status', 'reboot', 'ping']) +# Message handlers +@bot.message_handler(commands=['start', 'help', 'status', 'restart', 'reboot', 'ping']) def handle(message): + user_id = str(message.from_user.id) if message.text == '/start': bot.reply_to(message, 'lainmonitor v1.0 --- standing by...') elif message.text == '/help': - bot.reply_to(message, 'commands: /start, /help, /status, /reboot, /ping') + bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping') elif message.text == '/status': - check() - status_message = f'hostname: {hostname}\nsystem status: {status}\nuptime: {uptime}\nzerotier: {zerotier}\nprosody: {prosody}\npostgres: {postgres}\ntailscale: {tailscale}' + get_system_info() + status_message = ( + f'hostname: {hostname}\n' + f'system status: {status}\n' + f'uptime: {uptime}\n' + f'zerotier: {zerotier}\n' + f'prosody: {prosody}\n' + f'postgres: {postgres}\n' + f'tailscale: {tailscale}\n' + f'nginx: {nginx}' + ) bot.reply_to(message, status_message) - bot.reply_to(message, f'filesystem info for {hostname}: \n\n{disk}') - elif message.text == '/reboot': - bot.reply_to(message, 'work in progress...') - elif message.text == '/ping': - check_tailscale() - bot.reply_to(message, f'ping status: {ping}') + bot.reply_to(message, f'Filesystem info for {hostname}:\n\n{disk}') + elif message.text == f'/restart {hostname}' and user_id in authorized_users: + bot.send_message(message.chat.id, 'Select a service to restart:', reply_markup=restart_menu()) + elif message.text == f'/reboot {hostname}' and user_id in authorized_users: + bot.reply_to(message, f'Rebooting {hostname}...') + reboot() + elif message.text == '/ping' and user_id in authorized_users: + reach = check_tailscale_nodes() + bot.reply_to(message, f'Ping status:\n\n{"\n".join(reach)}') + else: + bot.reply_to(message, 'You are not authorized for this action') + +# Polling with timeout and error handling +try: + bot.polling(none_stop=True, timeout=60, long_polling_timeout=60) +except Exception as e: + logging.error(f'Polling error: {e}') -#polling -bot.polling() \ No newline at end of file