mirror of
https://github.com/alexbelgium/hassio-addons.git
synced 2026-06-12 02:21:28 +02:00
Update 30-monitoring.sh
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# shellcheck shell=bash
|
# shellcheck shell=bash
|
||||||
# Improved BirdNET-Pi Monitoring Script
|
# Improved BirdNET-Pi Monitoring Script with Recovery Alerts
|
||||||
# Adapted and enhanced based on your original script.
|
|
||||||
|
|
||||||
HOME="/home/pi"
|
HOME="/home/pi"
|
||||||
|
|
||||||
@@ -21,7 +20,7 @@ set +u
|
|||||||
source /etc/birdnet/birdnet.conf
|
source /etc/birdnet/birdnet.conf
|
||||||
|
|
||||||
########################################
|
########################################
|
||||||
# Wait 1 minutes for system stabilization
|
# Wait 1 minute for system stabilization
|
||||||
########################################
|
########################################
|
||||||
sleep 1m
|
sleep 1m
|
||||||
|
|
||||||
@@ -32,7 +31,6 @@ log_green "Starting service: throttlerecording"
|
|||||||
########################################
|
########################################
|
||||||
INGEST_DIR="${RECS_DIR/StreamData:-$HOME/BirdSongs/StreamData}"
|
INGEST_DIR="${RECS_DIR/StreamData:-$HOME/BirdSongs/StreamData}"
|
||||||
ANALYZING_NOW_FILE="$INGEST_DIR/analyzing_now.txt"
|
ANALYZING_NOW_FILE="$INGEST_DIR/analyzing_now.txt"
|
||||||
# Create the file if it does not exist.
|
|
||||||
touch "$ANALYZING_NOW_FILE"
|
touch "$ANALYZING_NOW_FILE"
|
||||||
|
|
||||||
# Ensure directories and set permissions
|
# Ensure directories and set permissions
|
||||||
@@ -47,14 +45,14 @@ ANALYZER_SERVICE="birdnet_analysis"
|
|||||||
# Notification settings
|
# Notification settings
|
||||||
NOTIFICATION_INTERVAL=1800 # seconds (30 minutes)
|
NOTIFICATION_INTERVAL=1800 # seconds (30 minutes)
|
||||||
last_notification_time=0
|
last_notification_time=0
|
||||||
|
issue_reported=0 # 1 = an issue was reported, 0 = system is normal
|
||||||
|
|
||||||
# Disk usage threshold (percentage)
|
# Disk usage threshold (percentage)
|
||||||
DISK_USAGE_THRESHOLD=95
|
DISK_USAGE_THRESHOLD=95
|
||||||
|
|
||||||
# "Analyzing" file check variables
|
# "Analyzing" file check variables
|
||||||
same_file_counter=0
|
same_file_counter=0
|
||||||
SAME_FILE_THRESHOLD=10 # number of iterations to consider the file as "stuck"
|
SAME_FILE_THRESHOLD=10
|
||||||
# Initialize the content of analyzing_now from the file
|
|
||||||
if [[ -f "$ANALYZING_NOW_FILE" ]]; then
|
if [[ -f "$ANALYZING_NOW_FILE" ]]; then
|
||||||
analyzing_now=$(cat "$ANALYZING_NOW_FILE")
|
analyzing_now=$(cat "$ANALYZING_NOW_FILE")
|
||||||
else
|
else
|
||||||
@@ -65,14 +63,13 @@ fi
|
|||||||
# Functions
|
# Functions
|
||||||
########################################
|
########################################
|
||||||
|
|
||||||
# Send a notification using Apprise.
|
# Send an issue notification
|
||||||
apprisealert() {
|
apprisealert() {
|
||||||
local issue_message="$1"
|
local issue_message="$1"
|
||||||
local current_time
|
local current_time
|
||||||
current_time=$(date +%s)
|
current_time=$(date +%s)
|
||||||
local time_diff=$(( current_time - last_notification_time ))
|
local time_diff=$(( current_time - last_notification_time ))
|
||||||
|
|
||||||
# Throttle notifications
|
|
||||||
if (( time_diff < NOTIFICATION_INTERVAL )); then
|
if (( time_diff < NOTIFICATION_INTERVAL )); then
|
||||||
log_yellow "Notification suppressed (last sent ${time_diff} seconds ago)"
|
log_yellow "Notification suppressed (last sent ${time_diff} seconds ago)"
|
||||||
return
|
return
|
||||||
@@ -81,7 +78,7 @@ apprisealert() {
|
|||||||
local notification=""
|
local notification=""
|
||||||
local stopped_service="<br><b>Stopped services:</b> "
|
local stopped_service="<br><b>Stopped services:</b> "
|
||||||
|
|
||||||
# Check for stopped services (add or remove services as needed)
|
# Check for stopped services
|
||||||
local services=(birdnet_analysis chart_viewer spectrogram_viewer icecast2 birdnet_recording birdnet_log birdnet_stats)
|
local services=(birdnet_analysis chart_viewer spectrogram_viewer icecast2 birdnet_recording birdnet_log birdnet_stats)
|
||||||
for service in "${services[@]}"; do
|
for service in "${services[@]}"; do
|
||||||
if [[ "$(systemctl is-active "$service")" != "active" ]]; then
|
if [[ "$(systemctl is-active "$service")" != "active" ]]; then
|
||||||
@@ -89,7 +86,6 @@ apprisealert() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Build the notification message in HTML format.
|
|
||||||
notification+="<b>Issue:</b> $issue_message"
|
notification+="<b>Issue:</b> $issue_message"
|
||||||
notification+="$stopped_service"
|
notification+="$stopped_service"
|
||||||
notification+="<br><b>System:</b> ${SITE_NAME:-$(hostname)}"
|
notification+="<br><b>System:</b> ${SITE_NAME:-$(hostname)}"
|
||||||
@@ -100,54 +96,78 @@ apprisealert() {
|
|||||||
if [[ -f "$HOME/BirdNET-Pi/birdnet/bin/apprise" && -s "$HOME/BirdNET-Pi/apprise.txt" ]]; then
|
if [[ -f "$HOME/BirdNET-Pi/birdnet/bin/apprise" && -s "$HOME/BirdNET-Pi/apprise.txt" ]]; then
|
||||||
"$HOME/BirdNET-Pi/birdnet/bin/apprise" -vv -t "$TITLE" -b "$notification" --input-format=html --config="$HOME/BirdNET-Pi/apprise.txt"
|
"$HOME/BirdNET-Pi/birdnet/bin/apprise" -vv -t "$TITLE" -b "$notification" --input-format=html --config="$HOME/BirdNET-Pi/apprise.txt"
|
||||||
last_notification_time=$current_time
|
last_notification_time=$current_time
|
||||||
|
issue_reported=1 # Mark that an issue was reported
|
||||||
else
|
else
|
||||||
log_red "Apprise not configured or missing!"
|
log_red "Apprise not configured or missing!"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check and restart a given service if it is not active.
|
# Send a "System is back to normal" notification
|
||||||
|
apprisealert_recovery() {
|
||||||
|
if (( issue_reported == 1 )); then
|
||||||
|
log_green "$(date) INFO: System is back to normal. Sending recovery notification."
|
||||||
|
|
||||||
|
local TITLE="BirdNET-Pi System Recovered"
|
||||||
|
local notification="<b>All monitored services are back to normal.</b><br>"
|
||||||
|
notification+="<b>System:</b> ${SITE_NAME:-$(hostname)}<br>"
|
||||||
|
notification+="Available disk space: $(df -h "$HOME/BirdSongs" | awk 'NR==2 {print $4}')"
|
||||||
|
|
||||||
|
if [[ -f "$HOME/BirdNET-Pi/birdnet/bin/apprise" && -s "$HOME/BirdNET-Pi/apprise.txt" ]]; then
|
||||||
|
"$HOME/BirdNET-Pi/birdnet/bin/apprise" -vv -t "$TITLE" -b "$notification" --input-format=html --config="$HOME/BirdNET-Pi/apprise.txt"
|
||||||
|
fi
|
||||||
|
issue_reported=0 # Reset issue tracker
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Restart a service if inactive
|
||||||
check_and_restart_service() {
|
check_and_restart_service() {
|
||||||
local service_name="$1"
|
local service_name="$1"
|
||||||
local state
|
local state
|
||||||
state=$(systemctl is-active "$service_name")
|
state=$(systemctl is-active "$service_name")
|
||||||
|
|
||||||
if [[ "$state" != "active" ]]; then
|
if [[ "$state" != "active" ]]; then
|
||||||
log_yellow "$(date) INFO: Restarting $service_name"
|
log_yellow "$(date) INFO: Restarting $service_name"
|
||||||
sudo systemctl restart "$service_name"
|
sudo systemctl restart "$service_name"
|
||||||
sleep 61
|
sleep 61
|
||||||
state=$(systemctl is-active "$service_name")
|
state=$(systemctl is-active "$service_name")
|
||||||
|
|
||||||
if [[ "$state" != "active" ]]; then
|
if [[ "$state" != "active" ]]; then
|
||||||
log_red "$(date) WARNING: $service_name could not restart"
|
log_red "$(date) WARNING: $service_name could not restart"
|
||||||
apprisealert "$service_name cannot restart ! Your system seems stuck."
|
apprisealert "$service_name cannot restart! Your system seems stuck."
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check disk usage and send a notification if above threshold.
|
# Check disk usage
|
||||||
check_disk_space() {
|
check_disk_space() {
|
||||||
local current_usage
|
local current_usage
|
||||||
current_usage=$(df -h "$HOME/BirdSongs" | awk 'NR==2 {print $5}' | sed 's/%//')
|
current_usage=$(df -h "$HOME/BirdSongs" | awk 'NR==2 {print $5}' | sed 's/%//')
|
||||||
|
|
||||||
if (( current_usage >= DISK_USAGE_THRESHOLD )); then
|
if (( current_usage >= DISK_USAGE_THRESHOLD )); then
|
||||||
log_red "$(date) WARNING: Disk usage is at ${current_usage}% (threshold is ${DISK_USAGE_THRESHOLD}%)"
|
log_red "$(date) WARNING: Disk usage is at ${current_usage}%"
|
||||||
apprisealert "Disk usage critical: ${current_usage}%"
|
apprisealert "Disk usage critical: ${current_usage}%"
|
||||||
|
else
|
||||||
|
apprisealert_recovery
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Handle the file queue (number of .wav files in the ingest directory).
|
# Handle queue size
|
||||||
handle_queue() {
|
handle_queue() {
|
||||||
local wav_count="$1"
|
local wav_count="$1"
|
||||||
|
|
||||||
if (( wav_count > 50 )); then
|
if (( wav_count > 50 )); then
|
||||||
log_red "$(date) WARNING: Too many files in queue (>50). Pausing ${RECORDER_SERVICE} and restarting ${ANALYZER_SERVICE}"
|
log_red "$(date) WARNING: Queue >50. Pausing ${RECORDER_SERVICE} and restarting ${ANALYZER_SERVICE}"
|
||||||
apprisealert "Queue >50: ${RECORDER_SERVICE} paused, ${ANALYZER_SERVICE} restarted"
|
apprisealert "Queue >50: ${RECORDER_SERVICE} paused, ${ANALYZER_SERVICE} restarted"
|
||||||
sudo systemctl stop "$RECORDER_SERVICE"
|
sudo systemctl stop "$RECORDER_SERVICE"
|
||||||
sudo systemctl restart "$ANALYZER_SERVICE"
|
sudo systemctl restart "$ANALYZER_SERVICE"
|
||||||
elif (( wav_count > 30 )); then
|
elif (( wav_count > 30 )); then
|
||||||
log_red "$(date) WARNING: Queue growing (>30). Restarting ${ANALYZER_SERVICE}"
|
log_red "$(date) WARNING: Queue >30. Restarting ${ANALYZER_SERVICE}"
|
||||||
apprisealert "Queue >30: ${ANALYZER_SERVICE} restarted"
|
apprisealert "Queue >30: ${ANALYZER_SERVICE} restarted"
|
||||||
sudo systemctl restart "$ANALYZER_SERVICE"
|
sudo systemctl restart "$ANALYZER_SERVICE"
|
||||||
else
|
else
|
||||||
# If the queue is normal, check both services.
|
|
||||||
check_and_restart_service "$RECORDER_SERVICE"
|
check_and_restart_service "$RECORDER_SERVICE"
|
||||||
check_and_restart_service "$ANALYZER_SERVICE"
|
check_and_restart_service "$ANALYZER_SERVICE"
|
||||||
|
apprisealert_recovery
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,11 +176,8 @@ handle_queue() {
|
|||||||
########################################
|
########################################
|
||||||
while true; do
|
while true; do
|
||||||
sleep 61
|
sleep 61
|
||||||
|
|
||||||
# Check disk space usage first.
|
|
||||||
check_disk_space
|
check_disk_space
|
||||||
|
|
||||||
# Check the content of the analyzing_now file to see if it has changed.
|
|
||||||
current_file=$(cat "$ANALYZING_NOW_FILE" 2>/dev/null)
|
current_file=$(cat "$ANALYZING_NOW_FILE" 2>/dev/null)
|
||||||
if [[ "$current_file" == "$analyzing_now" ]]; then
|
if [[ "$current_file" == "$analyzing_now" ]]; then
|
||||||
(( same_file_counter++ ))
|
(( same_file_counter++ ))
|
||||||
@@ -170,17 +187,13 @@ while true; do
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if (( same_file_counter >= SAME_FILE_THRESHOLD )); then
|
if (( same_file_counter >= SAME_FILE_THRESHOLD )); then
|
||||||
log_yellow "$(date) WARNING: 'analyzing_now' unchanged for ${SAME_FILE_THRESHOLD} iterations, restarting services"
|
log_yellow "$(date) WARNING: 'analyzing_now' unchanged for ${SAME_FILE_THRESHOLD} iterations"
|
||||||
apprisealert "No change in analyzing_now for ${SAME_FILE_THRESHOLD} iterations"
|
apprisealert "No change in analyzing_now for ${SAME_FILE_THRESHOLD} iterations"
|
||||||
"$HOME/BirdNET-Pi/scripts/restart_services.sh"
|
"$HOME/BirdNET-Pi/scripts/restart_services.sh"
|
||||||
same_file_counter=0
|
same_file_counter=0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Count the number of .wav files in the ingest directory.
|
|
||||||
wav_count=$(find -L "$INGEST_DIR" -maxdepth 1 -name '*.wav' | wc -l)
|
wav_count=$(find -L "$INGEST_DIR" -maxdepth 1 -name '*.wav' | wc -l)
|
||||||
log_green "$(date) INFO: ${wav_count} wav files waiting in ${INGEST_DIR}"
|
log_green "$(date) INFO: ${wav_count} wav files waiting in ${INGEST_DIR}"
|
||||||
|
|
||||||
# Handle queue size conditions and service health.
|
|
||||||
handle_queue "$wav_count"
|
handle_queue "$wav_count"
|
||||||
|
|
||||||
done
|
done
|
||||||
|
|||||||
Reference in New Issue
Block a user