Merge branch 'hax-dev' into dev
This commit is contained in:
commit
1b52ceb881
1 changed files with 135 additions and 100 deletions
235
lainmonitor.py
235
lainmonitor.py
|
|
@ -1,85 +1,127 @@
|
|||
#description: telegram bot for monitoring the system
|
||||
#dependencies: telebot
|
||||
#usage: python3 lainmonitor.py | or run it as a service
|
||||
#authors: hornet
|
||||
# --/usr/bin/env python3 -- #
|
||||
# description: telegram bot for monitoring the system
|
||||
# dependencies: telebot
|
||||
# usage: python3 lainmonitor.py | or run it as a service
|
||||
# author: hornetmaidan
|
||||
# contributors: h@x
|
||||
# version: 1.1.6
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
import queue
|
||||
from time import sleep
|
||||
from telebot import *
|
||||
import telebot
|
||||
import logging
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(filename='lainmonitor.log', level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
#define the variables
|
||||
status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk, ping = 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown'
|
||||
nodes, hostnames, reach, threads = [], [], [], []
|
||||
# Load environment variables and config files securely
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
env_path = os.path.join(script_dir, '.env')
|
||||
auth_users_path = os.path.join(script_dir, '.authorized_users')
|
||||
|
||||
#change this to your instance's hostname
|
||||
host = subprocess.check_output(['hostname']).decode().strip()
|
||||
#print ('host:', host) # debug
|
||||
# Load the token
|
||||
try:
|
||||
with open(env_path, 'r') as f:
|
||||
token = f.read().strip()
|
||||
except FileNotFoundError:
|
||||
logging.error('Token file not found. Exiting...')
|
||||
exit(1)
|
||||
|
||||
#load the token
|
||||
token = open('.env', 'r').read().strip()
|
||||
# Load the authorized users
|
||||
try:
|
||||
authorized_users = [line.strip() for line in open(auth_users_path, 'r').readlines()]
|
||||
except FileNotFoundError:
|
||||
logging.error('Authorized users file not found. Exiting...')
|
||||
exit(1)
|
||||
|
||||
#load the authorized users
|
||||
authorized_users = [line.strip() for line in open('.authorized_users', 'r').readlines()]
|
||||
#print('authorized users:', authorized_users) # debug
|
||||
|
||||
#bot init
|
||||
# Initialize the bot
|
||||
bot = telebot.TeleBot(token)
|
||||
updater = bot.update_listener
|
||||
|
||||
#get system info
|
||||
def getinfo():
|
||||
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, disk
|
||||
hostname = subprocess.check_output(['hostname']).decode().strip()
|
||||
uptime = subprocess.check_output(['uptime', '-p']).decode().strip()
|
||||
#systemd-only services
|
||||
zerotier = subprocess.Popen("sudo systemctl status zerotier-one | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
prosody = subprocess.Popen("sudo systemctl status prosody | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
postgres = subprocess.Popen("sudo systemctl status postgresql | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
tailscale = subprocess.Popen("sudo systemctl status tailscaled | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
nginx = subprocess.Popen("sudo systemctl status nginx | grep 'Active'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
disk = subprocess.check_output(['df', '-h']).decode().strip()
|
||||
if hostname == 'unknown':
|
||||
# Define status variables
|
||||
status, hostname, uptime = 'unknown', 'unknown', 'unknown'
|
||||
zerotier, prosody, postgres, tailscale, nginx, disk = ['unknown'] * 6
|
||||
nodes, hostnames, threads = [], [], []
|
||||
reach_queue = queue.Queue()
|
||||
|
||||
# Get basic system info
|
||||
def get_system_info():
|
||||
global hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk
|
||||
try:
|
||||
hostname = subprocess.check_output(['hostname']).decode().strip()
|
||||
uptime = subprocess.check_output(['uptime', '-p']).decode().strip()
|
||||
|
||||
services = ['zerotier-one', 'prosody', 'postgresql', 'tailscaled', 'nginx']
|
||||
status_results = []
|
||||
for service in services:
|
||||
status_results.append(get_service_status(service))
|
||||
zerotier, prosody, postgres, tailscale, nginx = status_results
|
||||
|
||||
disk = subprocess.check_output(['df', '-h']).decode().strip()
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(f"Error fetching system info: {e}")
|
||||
status = 'offline'
|
||||
else:
|
||||
status = 'online'
|
||||
return hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk
|
||||
|
||||
#function to ping tailscale nodes
|
||||
# Helper function to get service status
|
||||
def get_service_status(service):
|
||||
try:
|
||||
subprocess.run(['sudo', 'systemctl', 'is-active', '--quiet', service], check=True)
|
||||
return f'{service} is active'
|
||||
except subprocess.CalledProcessError:
|
||||
return f'{service} is inactive'
|
||||
|
||||
# Function to ping a Tailscale node
|
||||
def ping_node(node, hostname):
|
||||
ping = subprocess.Popen(f"ping {node} -c 1 | grep '1 packets'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
if '1 received' in ping:
|
||||
reach.append(f'{node}/{hostname} is reachable')
|
||||
else:
|
||||
reach.append(f'{node}/{hostname} is unreachable')
|
||||
try:
|
||||
ping = subprocess.run(['ping', '-c', '1', node], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
||||
reach_queue.put(f'{node}/{hostname} is reachable')
|
||||
except subprocess.CalledProcessError:
|
||||
reach_queue.put(f'{node}/{hostname} is unreachable')
|
||||
|
||||
#ping tailscale nodes
|
||||
def check_tailscale():
|
||||
global nodes, hostnames, reach, threads, ping
|
||||
nodes_output = subprocess.Popen("tailscale status | grep '100'", shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
nodes = [line.split()[0] for line in nodes_output.split('\n') if line]
|
||||
hostnames = [line.split()[1] for line in nodes_output.split('\n') if line]
|
||||
# Check Tailscale nodes
|
||||
def check_tailscale_nodes():
|
||||
global nodes, hostnames, threads
|
||||
try:
|
||||
nodes_output = subprocess.check_output("tailscale status | grep '100'", shell=True).decode().strip()
|
||||
nodes = [line.split()[0] for line in nodes_output.split('\n') if line]
|
||||
hostnames = [line.split()[1] for line in nodes_output.split('\n') if line]
|
||||
|
||||
for node, hostname in zip(nodes, hostnames):
|
||||
for node, hostname in zip(nodes, hostnames):
|
||||
thread = threading.Thread(target=ping_node, args=(node, hostname))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
return reach
|
||||
reach = []
|
||||
while not reach_queue.empty():
|
||||
reach.append(reach_queue.get())
|
||||
|
||||
#restart services
|
||||
return reach
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(f"Error checking Tailscale status: {e}")
|
||||
return ['Error checking Tailscale status']
|
||||
|
||||
# Function to restart a service
|
||||
def restart_service(service):
|
||||
print(f'restarting {service}...')
|
||||
subprocess.Popen(f'sudo systemctl restart {service}', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
sleep(3)
|
||||
service_status = subprocess.Popen(f'sudo systemctl status {service} | grep "Active"', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
status_message = f'{service} restarted! status: {service_status}'
|
||||
return status_message
|
||||
logging.info(f'Restarting {service}...')
|
||||
try:
|
||||
subprocess.run(['sudo', 'systemctl', 'restart', service], check=True)
|
||||
sleep(3)
|
||||
service_status = get_service_status(service)
|
||||
status_message = f'{service} restarted! Status: {service_status}'
|
||||
logging.info(status_message)
|
||||
return status_message
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error(f"Error restarting {service}: {e}")
|
||||
return f'Error restarting {service}'
|
||||
|
||||
#restart services menu
|
||||
# Restart services menu
|
||||
def restart_menu():
|
||||
keyboard = [
|
||||
[telebot.types.InlineKeyboardButton('zerotier-one', callback_data='zerotier-one')],
|
||||
|
|
@ -92,7 +134,7 @@ def restart_menu():
|
|||
reply_markup = telebot.types.InlineKeyboardMarkup(keyboard)
|
||||
return reply_markup
|
||||
|
||||
#callback query handler
|
||||
# Callback query handler for service restart
|
||||
@bot.callback_query_handler(func=lambda call: True)
|
||||
def callback_query(call):
|
||||
service = call.data
|
||||
|
|
@ -101,57 +143,50 @@ def callback_query(call):
|
|||
bot.send_message(call.message.chat.id, status_message)
|
||||
else:
|
||||
bot.edit_message_reply_markup(call.message.chat.id, call.message.message_id, reply_markup=None)
|
||||
bot.send_message(call.message.chat.id, 'canceled')
|
||||
bot.send_message(call.message.chat.id, 'Canceled')
|
||||
|
||||
# Reboot system function
|
||||
def reboot():
|
||||
subprocess.Popen('sudo reboot', shell=True, stdout=subprocess.PIPE).stdout.read().decode().strip()
|
||||
logging.info('Rebooting system...')
|
||||
subprocess.run(['sudo', 'reboot'], check=True)
|
||||
|
||||
#debug handler
|
||||
def check():
|
||||
global status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk
|
||||
getinfo()
|
||||
print('system status:', status)
|
||||
print('hostname:', hostname)
|
||||
print('uptime:', uptime)
|
||||
print('zerotier:', zerotier)
|
||||
print('prosody:', prosody)
|
||||
print('postgres:', postgres)
|
||||
print('tailscale:', tailscale)
|
||||
print('disk:', disk)
|
||||
return status, hostname, uptime, zerotier, prosody, postgres, tailscale, nginx, disk
|
||||
|
||||
#message handling
|
||||
# Message handlers
|
||||
@bot.message_handler(commands=['start', 'help', 'status', 'restart', 'reboot', 'ping'])
|
||||
def handle(message):
|
||||
user_id = str(message.from_user.id)
|
||||
if message.text == '/start':
|
||||
bot.reply_to(message, 'lainmonitor v1.0 --- standing by...')
|
||||
elif message.text == '/help':
|
||||
bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping')
|
||||
bot.reply_to(message, 'commands: /start, /help, /status, /restart, /reboot, /ping')
|
||||
elif message.text == '/status':
|
||||
check()
|
||||
status_message = f'hostname: {hostname}\nsystem status: {status}\nuptime: {uptime}\nzerotier: {zerotier}\nprosody: {prosody}\npostgres: {postgres}\ntailscale: {tailscale}\nnginx: {nginx}'
|
||||
get_system_info()
|
||||
status_message = (
|
||||
f'hostname: {hostname}\n'
|
||||
f'system status: {status}\n'
|
||||
f'uptime: {uptime}\n'
|
||||
f'zerotier: {zerotier}\n'
|
||||
f'prosody: {prosody}\n'
|
||||
f'postgres: {postgres}\n'
|
||||
f'tailscale: {tailscale}\n'
|
||||
f'nginx: {nginx}'
|
||||
)
|
||||
bot.reply_to(message, status_message)
|
||||
bot.reply_to(message, f'filesystem info for {hostname}: \n\n{disk}')
|
||||
elif message.text == f'/restart {host}':
|
||||
if message.text == f'/restart {host}' and str(message.from_user.id) in authorized_users:
|
||||
bot.send_message(message.chat.id, 'select a service to restart:', reply_markup=restart_menu())
|
||||
else:
|
||||
bot.reply_to(message, 'you are not authorized to restart services on this host')
|
||||
elif message.text == f'/reboot {host}':
|
||||
if message.text == f'/reboot {host}' and str(message.from_user.id) in authorized_users:
|
||||
bot.reply_to(message, f'rebooting {host}...')
|
||||
reboot()
|
||||
else:
|
||||
bot.reply_to(message, 'you are not authorized to reboot this host')
|
||||
elif message.text == '/ping':
|
||||
if message.text == f'/restart {host}' and str(message.from_user.id) in authorized_users:
|
||||
check_tailscale()
|
||||
ping_status = '\n'.join(reach)
|
||||
bot.reply_to(message, f'ping status:\n\n{ping_status}')
|
||||
ping_status = ''
|
||||
reach.clear()
|
||||
else:
|
||||
bot.reply_to(message, 'you are not authorized to view ping status')
|
||||
bot.reply_to(message, f'Filesystem info for {hostname}:\n\n{disk}')
|
||||
elif message.text == f'/restart {hostname}' and user_id in authorized_users:
|
||||
bot.send_message(message.chat.id, 'Select a service to restart:', reply_markup=restart_menu())
|
||||
elif message.text == f'/reboot {hostname}' and user_id in authorized_users:
|
||||
bot.reply_to(message, f'Rebooting {hostname}...')
|
||||
reboot()
|
||||
elif message.text == '/ping' and user_id in authorized_users:
|
||||
reach = check_tailscale_nodes()
|
||||
bot.reply_to(message, f'Ping status:\n\n{"\n".join(reach)}')
|
||||
else:
|
||||
bot.reply_to(message, 'You are not authorized for this action')
|
||||
|
||||
# Polling with timeout and error handling
|
||||
try:
|
||||
bot.polling(none_stop=True, timeout=60, long_polling_timeout=60)
|
||||
except Exception as e:
|
||||
logging.error(f'Polling error: {e}')
|
||||
|
||||
#polling
|
||||
bot.polling()
|
||||
Loading…
Add table
Reference in a new issue