lainmonitor/lainmonitor.py

197 lines
7.3 KiB
Python

# --/usr/bin/env python3 -- #
# description: telegram bot for monitoring the system
# dependencies: telebot
# usage: python3 lainmonitor.py | or run it as a service
# author: hornetmaidan
# contributors: h@x
# version: 1.2
import os
import subprocess
import threading
import queue
from time import sleep
import telebot
import logging
# Setup logging
logging.basicConfig(filename='lainmonitor.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
# Load environment variables and config files securely
script_dir = os.path.dirname(os.path.realpath(__file__))
env_path = os.path.join(script_dir, '.env')
auth_users_path = os.path.join(script_dir, '.authorized_users')
# Load the token
try:
with open(env_path, 'r') as f:
token = f.read().strip()
except FileNotFoundError:
logging.error('Token file not found. Exiting...')
exit(1)
# Load the authorized users
try:
authorized_users = [str(line.strip()) for line in open(auth_users_path, 'r').readlines()]
except FileNotFoundError:
logging.error('Authorized users file not found. Exiting...')
exit(1)
# Initialize the bot
bot = telebot.TeleBot(token)
# Define status variables
status, hostname, uptime = 'unknown', 'unknown', 'unknown'
zerotier, prosody, postgres, tailscale, nginx, disk = ['unknown'] * 6
nodes, hostnames, threads = [], [], []
reach_queue = queue.Queue()
# Get basic system info
def get_system_info():
global hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk
try:
hostname = subprocess.check_output(['hostname']).decode().strip()
uptime = subprocess.check_output(['uptime', '-p']).decode().strip()
services = ['zerotier-one', 'prosody', 'postgresql', 'tailscaled', 'nginx']
status_results = []
for service in services:
status_results.append(get_service_status(service))
zerotier, prosody, postgres, tailscale, nginx = status_results
disk = subprocess.check_output(['df', '-h']).decode().strip()
except subprocess.CalledProcessError as e:
logging.error(f"Error fetching system info: {e}")
status = 'offline'
else:
status = 'online'
# Helper function to get service status
def get_service_status(service):
try:
subprocess.run(['sudo', 'systemctl', 'is-active', '--quiet', service], check=True)
return f'{service} is active'
except subprocess.CalledProcessError:
return f'{service} is inactive/not present'
# Function to ping a Tailscale node
def ping_node(node, hostname):
try:
ping = subprocess.run(['ping', '-c', '1', node], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
reach_queue.put(f'{node}/{hostname} is reachable')
except subprocess.CalledProcessError:
reach_queue.put(f'{node}/{hostname} is unreachable')
# Check Tailscale nodes
def check_tailscale_nodes():
global nodes, hostnames, threads
try:
nodes_output = subprocess.check_output("tailscale status | grep '100'", shell=True).decode().strip()
nodes = [line.split()[0] for line in nodes_output.split('\n') if line]
hostnames = [line.split()[1] for line in nodes_output.split('\n') if line]
for node, hostname in zip(nodes, hostnames):
thread = threading.Thread(target=ping_node, args=(node, hostname))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
reach = []
while not reach_queue.empty():
reach.append(reach_queue.get())
return reach
except subprocess.CalledProcessError as e:
logging.error(f"Error checking Tailscale status: {e}")
return ['Error checking Tailscale status']
# Function to restart a service
def restart_service(service):
logging.info(f'Restarting {service}...')
try:
subprocess.run(['sudo', 'systemctl', 'restart', service], check=True)
sleep(3)
service_status = get_service_status(service)
status_message = f'{service} restarted! Status: {service_status}'
logging.info(status_message)
return status_message
except subprocess.CalledProcessError as e:
logging.error(f"Error restarting {service}: {e}")
return f'Error restarting {service}'
# Restart services menu
def restart_menu():
keyboard = [
[telebot.types.InlineKeyboardButton('zerotier-one', callback_data='zerotier-one')],
[telebot.types.InlineKeyboardButton('prosody', callback_data='prosody')],
[telebot.types.InlineKeyboardButton('postgresql', callback_data='postgresql')],
[telebot.types.InlineKeyboardButton('tailscaled', callback_data='tailscaled')],
[telebot.types.InlineKeyboardButton('nginx', callback_data='nginx')],
[telebot.types.InlineKeyboardButton('cancel', callback_data='cancel')]
]
reply_markup = telebot.types.InlineKeyboardMarkup(keyboard)
return reply_markup
# Callback query handler for service restart
@bot.callback_query_handler(func=lambda call: True)
def callback_query(call):
service = call.data
if service != 'cancel':
status_message = restart_service(service)
bot.send_message(call.message.chat.id, status_message)
else:
bot.edit_message_reply_markup(call.message.chat.id, call.message.message_id, reply_markup=None)
bot.send_message(call.message.chat.id, 'Canceled')
# Reboot system function
def reboot():
logging.info('Rebooting system...')
subprocess.run(['sudo', 'reboot'], check=True)
# Populate teh variables on first start
get_system_info()
# Message handlers
@bot.message_handler(commands=['start', 'help', 'status', 'restart', 'reboot', 'ping'])
def handle(message):
user_id = str(message.from_user.id)
if user_id not in authorized_users:
bot.reply_to(message, 'You are not authorized for this action')
else:
if message.text == '/start':
bot.reply_to(message, 'lainmonitor v1.2 --- standing by...')
elif message.text == '/help':
bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping')
bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping')
elif message.text == '/status':
get_system_info()
status_message = (
f'hostname: {hostname}\n'
f'system status: {status}\n'
f'uptime: {uptime}\n'
f'zerotier: {zerotier}\n'
f'prosody: {prosody}\n'
f'postgres: {postgres}\n'
f'tailscale: {tailscale}\n'
f'nginx: {nginx}'
)
bot.reply_to(message, status_message)
bot.reply_to(message, f'Filesystem info for {hostname}:\n\n{disk}')
elif message.text == f'/restart {hostname}':
bot.send_message(message.chat.id, 'Select a service to restart:', reply_markup=restart_menu())
elif message.text == f'/reboot {hostname}':
bot.reply_to(message, f'Rebooting {hostname}...')
reboot()
elif message.text == '/ping':
reach = check_tailscale_nodes()
bot.reply_to(message, f'Ping status:\n\n{"\n".join(reach)}')
else:
pass
# Polling with timeout and error handling
try:
bot.polling(none_stop=True, timeout=60, long_polling_timeout=60)
except Exception as e:
logging.error(f'Polling error: {e}')