网站首页学无止境LINUX
服务器自动监控Shell脚本
发布时间:2014-10-24 22:48:14编辑:songlin阅读(102)
-
一、使用方法:
- git clone git://gist.github.com/1216837.git gist-1216837
- vim gist-1216837/sys-mon.sh //修改内存、CPU等预设阀值
- mkdir /var/script
- mv gist-1216837/sys-mon.sh /var/script
- crontab -e
- * * * * * /bin/bash /var/shell/sys-mon.sh
- #! /bin/bash
- #====================================================================
- # sys-mon.sh
- #
- # Copyright (c) 2011, WangYan
- # All rights reserved.
- # Distributed under the GNU General Public License, version 3.0.
- #
- # Monitor system mem and load, if too high, restart some service.
- #
- # See: http://wangyan.org/blog/sys-mon-shell-script.html
- #
- # V 0.5, Date: 2011-12-08
- #====================================================================
- # Need to monitor the service name
- # Must be in /etc/init.d folder exists
- NAME_LIST="httpd nginx mysql"
- # Single process to allow the maximum CPU (%)
- PID_CPU_MAX="25"
- # The maximum allowed memory (%)
- PID_MEM_SUM_MAX="95"
- # The maximum allowed system load
- SYS_LOAD_MAX="6"
- # Log path settings
- LOG_PATH="/var/log/sys-mon.log"
- # Date time format setting
- DATA_TIME=$(date +"%y-%m-%d %H:%M:%S")
- # Your email address
- EMAIL="webmaster@example.com"
- # Your website url
- MY_URL="http://106.187.38.210/p.php"
- #====================================================================
- for NAME in $NAME_LIST
- do
- PID_CPU_SUM="0";PID_MEM_SUM="0"
- PID_LIST=`ps aux | grep $NAME | grep -v root`
- IFS_TMP="$IFS";IFS=$'\n'
- for PID in $PID_LIST
- do
- PID_NUM=`echo $PID | awk '{print $2}'`
- PID_CPU=`echo $PID | awk '{print $3}'`
- PID_MEM=`echo $PID | awk '{print $4}'`
- # echo "$NAME: PID_NUM($PID_NUM) PID_CPU($PID_CPU) PID_MEM($PID_MEM)"
- PID_CPU_SUM=`echo "$PID_CPU_SUM + $PID_CPU" | bc`
- PID_MEM_SUM=`echo "$PID_MEM_SUM + $PID_MEM" | bc`
- if [ `echo "$PID_CPU >= $PID_CPU_MAX" | bc` -eq 1 ];then
- if [[ "$NAME" = "php-fpm" || "$NAME" = "httpd" ]];then
- sleep 5
- if [ `echo "$PID_CPU >= $PID_CPU_MAX" | bc` -eq 1 ];then
- echo "${DATA_TIME}: kill ${NAME}($PID_NUM) successful (CPU:$PID_CPU)" | tee -a $LOG_PATH
- kill $PID_NUM
- fi
- else
- echo "${DATA_TIME}: [WARNING!] ${NAME}($PID_NUM) cpu usage is too high! (CPU:$PID_CPU)" | tee -a $LOG_PATH
- fi
- fi
- done
- IFS="$IFS_TMP"
- SYS_LOAD=`uptime | awk '{print $(NF-2)}' | sed 's/,//'`
- SYS_MON="CPU:$PID_CPU_SUM MEM:$PID_MEM_SUM LOAD:$SYS_LOAD"
- # echo -e "$NAME: $SYS_MON\n"
- SYS_LOAD_TOO_HIGH=`awk 'BEGIN{print('$SYS_LOAD'>'$SYS_LOAD_MAX')}'`
- PID_MEM_SUM_TOO_HIGH=`awk 'BEGIN{print('$PID_MEM_SUM'>'$PID_MEM_SUM_MAX')}'`
- if [[ "$SYS_LOAD_TOO_HIGH" = "1" || "$PID_MEM_SUM_TOO_HIGH" = "1" ]];then
- /etc/init.d/$NAME stop
- sleep 5
- for ((i=1;i<4;i++))
- do
- if [ `pgrep $NAME | wc -l` = "0" ];then
- echo "$DATA_TIME: Stop $NAME successful! ($SYS_MON)" | tee -a $LOG_PATH
- break
- else
- echo "${DATA_TIME}: [WARNING!] Stop $NAME failed[$i]! ($SYS_MON)" | tee -a $LOG_PATH
- pkill $NAME && killall $NAME
- fi
- done
- /etc/init.d/$NAME start
- sleep 5
- for ((ii=1;ii<4;ii++))
- do
- if [ `pgrep $NAME | wc -l` != "0" ];then
- echo "$DATA_TIME: Start $NAME successful!" | tee -a $LOG_PATH
- break
- else
- echo "${DATA_TIME}: [WARNING!] Start $NAME failed[$ii]! ($SYS_MON)" | tee -a $LOG_PATH
- /etc/init.d/$NAME start
- sleep 5
- fi
- done
- if [ `pgrep $NAME | wc -l` != "0" ];then
- echo "${DATA_TIME}: [ERROR!] Start $NAME failed! ($SYS_MON)" | mail -s "Start $NAME failed" $EMAIL
- fi
- fi
- done
- STATUS_CODE=`curl -o /dev/null -s -w %{http_code} $MY_URL`
- #echo -e "STATUS CODE: $STATUS_CODE\n"
- if [ "$STATUS_CODE" != "200" ];then
- sleep 3
- STATUS_CODE=`curl -o /dev/null -s -w %{http_code} $MY_URL`
- if [ "$STATUS_CODE" != "200" ];then
- echo "${DATA_TIME}: [WARNING!] Website Downtime! ($SYS_MON)" | tee -a $LOG_PATH
- echo "${DATA_TIME}: [WARNING!] Website Downtime! ($SYS_MON)" | mail -s "Start $NAME failed" $EMAIL
- fi
- fi
- ./xxx.sh # xxx为自命名
- ./pid_auto_reboot.sh: line 57: bc: command not found
- ./pid_auto_reboot.sh: line 59: bc: command not found
- ./pid_auto_reboot.sh: line 59: [: -eq: unary operator expected
- ./pid_auto_reboot.sh: line 56: bc: command not found
- ./pid_auto_reboot.sh: line 57: bc: command not found
- ./pid_auto_reboot.sh: line 59: bc: command not found
- ./pid_auto_reboot.sh: line 59: [: -eq: unary operator expected
- ./pid_auto_reboot.sh: line 56: bc: command not found
- ./pid_auto_reboot.sh: line 57: bc: command not found
- ./pid_auto_reboot.sh: line 59: bc: command not found
- ./pid_auto_reboot.sh: line 59: [: -eq: unary operator expected
- ./pid_auto_reboot.sh: line 56: bc: command not found
- ./pid_auto_reboot.sh: line 57: bc: command not found
- ./pid_auto_reboot.sh: line 59: bc: command not found
- ./pid_auto_reboot.sh: line 59: [: -eq: unary operator expected
- awk: BEGIN{print(>95)}
- awk: ^ syntax error
- awk: BEGIN{print(>95)}
- awk: ^ syntax error
- ./pid_auto_reboot.sh: line 56: bc: command not found
- ./pid_auto_reboot.sh: line 57: bc: command not found
- ./pid_auto_reboot.sh: line 59: bc: command not found
- ./pid_auto_reboot.sh: line 59: [: -eq: unary operator expected
- yum -y install bc
设置每分钟执行一次
二、Shell脚本内容
脚本内容不难理解,原理解释可参考《Linux 进程自动监控shell脚本》
三、注意事项
1、NAME_LIST 指定的监控程序必须在/etc/init.d 文件夹中存在,并且支持stop和start操作
2、PID_CPU_MAX 指的是单个进程的CPU占用,只针对php-fpm或httpd。
3、PID_MEM_SUM_MAX 指的是该程序所有进程实际内存占用,而并非系统总内存。
4、EMAIL 只有在程序启动失败后,你才能收到邮件提醒。
四、调试(补充,以上为WangYan Blog 原文章)
脚本写完之后让我们运行一下,当然脚本里面的EMail 和 weburl还是需要改成你自己的,结果:
什么问题? 哦原来是没有bc命令 没关系,没有我们就安装一下嘛,我用的是CentOs 直接
安装一下。再运行搞定。
本文来源地址:http://wangyan.org/blog/sys-mon-shell-script.html