#!/bin/bash
set -euo pipefail
check_port() {
local host=$1 port=$2 timeout=${3:-3}
timeout $timeout bash -c "echo >/dev/tcp/$host/$port" 2>/dev/null && \
echo "OK: $host:$port" || echo "FAIL: $host:$port"
}
check_http() {
local url=$1
local code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$url")
[[ $code == "200" ]] && echo "OK: $url ($code)" || echo "FAIL: $url ($code)"
}
# 检查清单
SERVICES=(
"localhost:22"
"localhost:80"
"localhost:3306"
)
for svc in "${SERVICES[@]}"; do
IFS=: read -r host port <<< "$svc"
check_port "$host" "$port"
done
check_http "http://localhost/health"
#!/bin/bash
set -euo pipefail
LOG_DIR="/var/log/myapp"
RETENTION_DAYS=30
ARCHIVE_DIR="$LOG_DIR/archive"
mkdir -p "$ARCHIVE_DIR"
find "$LOG_DIR" -name "*.log" -mtime +$RETENTION_DAYS -print0 | while IFS= read -r -d '' f; do
gzip -c "$f" > "$ARCHIVE_DIR/$(basename "$f")-$(date +%Y%m%d).gz"
rm "$f"
echo "Archived: $f"
done
# 清理超过 90 天的归档
find "$ARCHIVE_DIR" -name "*.gz" -mtime +90 -delete
echo "Cleanup done."
#!/bin/bash
set -euo pipefail
HOSTS_FILE="${1:-hosts.txt}"
COMMAND="${2:-uptime}"
[[ ! -f "$HOSTS_FILE" ]] && { echo "Hosts file not found"; exit 1; }
while IFS= read -r host; do
[[ -z "$host" || "$host" =~ ^# ]] && continue
echo "=== $host ==="
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$host" "$COMMAND" 2>&1 || \
echo "FAILED to connect"
echo
done < "$HOSTS_FILE"
#!/bin/bash
THRESHOLD=85
ALERT_EMAIL="sre@example.com"
df -h | awk 'NR>1 {print $5" "$6}' | while read -r used mount; do
pct=${used%\%}
if [[ $pct -gt $THRESHOLD ]]; then
echo "WARNING: $mount is ${pct}% full" | \
mail -s "Disk Alert: $mount" "$ALERT_EMAIL"
fi
done
#!/bin/bash
PROCESS_NAME="myapp"
RESTART_CMD="systemctl restart myapp"
MAX_RESTART=3
CHECK_INTERVAL=10
restart_count=0
while true; do
if ! pgrep -x "$PROCESS_NAME" > /dev/null; then
echo "[$(date)] $PROCESS_NAME is DOWN"
if [[ $restart_count -lt $MAX_RESTART ]]; then
$RESTART_CMD
((restart_count++))
echo "Restarted (attempt $restart_count/$MAX_RESTART)"
else
echo "FATAL: Max restart attempts reached" | \
mail -s "CRITICAL: $PROCESS_NAME failed" sre@example.com
exit 1
fi
fi
sleep "$CHECK_INTERVAL"
done