Article
nagios监控主机
# 更新
- 2016-1-20
- 2016-4-20 nrpe编译好后,在编译目录有 init-script 服务脚本。
# 正文
和Cacti查看时间序列图形不同,Nagios更多的是状态的预警。
- 下载应用
到sourceforge下面最新版的应用。
[root@cu2 nagios]# cat /etc/redhat-release
CentOS release 6.6 (Final)
[root@cu2 nagios]# ll
-rw-r--r-- 1 root root 11206656 Sep 23 12:47 nagios-4.1.1.tar.gz
-rw-r--r-- 1 root root 2677352 Sep 23 12:47 nagios-plugins-2.1.1.tar.gz
-rw-r--r-- 1 root root 419695 Sep 23 15:18 nrpe-2.15.tar.gz
- 新增nagios用户
[root@cu2 nagios]# useradd nagios
[root@cu2 nagios]# groupadd nagcmd
[root@cu2 nagios]# usermod -G nagcmd nagios
[root@cu2 nagios]# usermod -G nagcmd apache
# 如果已经安装了httpd,查看下是哪个用户进程,把该用户加入到nagcmd组。修改后需要重启httpd
- 编译Nagios服务端
由于前面安装Cacti已经把依赖都安装了,如gcc、gd、httpd、php php-devel php-mysql php-pear php-common php-gd php-mbstring php-cli。
[root@cu2 nagios]# cd nagios-4.1.1/
[root@cu2 nagios-4.1.1]# ./configure --with-command-group=nagcmd
...
Creating sample config files in sample-config/ ...
*** Configuration summary for nagios 4.1.1 08-19-2015 ***:
General Options:
-------------------------
Nagios executable: nagios
Nagios user/group: nagios,nagios
Command user/group: nagios,nagcmd
Event Broker: yes
Install ${prefix}: /usr/local/nagios
Install ${includedir}: /usr/local/nagios/include/nagios
Lock file: ${prefix}/var/nagios.lock
Check result directory: ${prefix}/var/spool/checkresults
Init directory: /etc/rc.d/init.d
Apache conf.d directory: /etc/httpd/conf.d
Mail program: /bin/mail
Host OS: linux-gnu
IOBroker Method: epoll
Web Interface Options:
------------------------
HTML URL: http://localhost/nagios/
CGI URL: http://localhost/nagios/cgi-bin/
Traceroute (used by WAP):
Review the options above for accuracy. If they look okay,
type 'make all' to compile the main program and CGIs.
# 编译
[root@cu2 nagios-4.1.1]# make all
...
*** Compile finished ***
If the main program and CGIs compiled without any errors, you
can continue with installing Nagios as follows (type 'make'
without any arguments for a list of all possible options):
make install
- This installs the main program, CGIs, and HTML files
make install-init
- This installs the init script in /etc/rc.d/init.d
make install-commandmode
- This installs and configures permissions on the
directory for holding the external command file
make install-config
- This installs *SAMPLE* config files in /usr/local/nagios/etc
You'll have to modify these sample files before you can
use Nagios. Read the HTML documentation for more info
on doing this. Pay particular attention to the docs on
object configuration files, as they determine what/how
things get monitored!
make install-webconf
- This installs the Apache config file for the Nagios
web interface
make install-exfoliation
- This installs the Exfoliation theme for the Nagios
web interface
make install-classicui
- This installs the classic theme for the Nagios
web interface
*** Support Notes ***
If you have questions about configuring or running Nagios,
please make sure that you:
- Look at the sample config files
- Read the documentation on the Nagios Library at:
https://library.nagios.com
before you post a question to one of the mailing lists.
Also make sure to include pertinent information that could
help others help you. This might include:
- What version of Nagios you are using
- What version of the plugins you are using
- Relevant snippets from your config files
- Relevant error messages from the Nagios log file
For more information on obtaining support for Nagios, visit:
https://support.nagios.com
*************************************************************
Enjoy.
# 安装
[root@cu2 nagios-4.1.1]# make install
...
*** Main program, CGIs and HTML files installed ***
You can continue with installing Nagios as follows (type 'make'
without any arguments for a list of all possible options):
make install-init
- This installs the init script in /etc/rc.d/init.d
make install-commandmode
- This installs and configures permissions on the
directory for holding the external command file
make install-config
- This installs sample config files in /usr/local/nagios/etc
make[1]: Leaving directory `/data/nagios/nagios-4.1.1'
# 初始化
[root@cu2 nagios-4.1.1]# make install-init
/usr/bin/install -c -m 755 -d -o root -g root /etc/rc.d/init.d
/usr/bin/install -c -m 755 -o root -g root daemon-init /etc/rc.d/init.d/nagios
*** Init script installed ***
# 安装/初始化配置
[root@cu2 nagios-4.1.1]# make install-config
...
*** Config files installed ***
Remember, these are *SAMPLE* config files. You'll need to read
the documentation for more information on how to actually define
services, hosts, etc. to fit your particular needs.
# 修改权限
[root@cu2 nagios-4.1.1]# make install-commandmode
/usr/bin/install -c -m 775 -o nagios -g nagcmd -d /usr/local/nagios/var/rw
chmod g+s /usr/local/nagios/var/rw
*** External command directory configured ***
# 配置
[root@cu2 nagios-4.1.1]# vi /usr/local/nagios/etc/objects/contacts.cfg
...修改define contact定义中的mail为你邮件。
define contact{
contact_name nagiosadmin ; Short name of user
use generic-contact ; Inherit default values from generic-contact template (defined above)
alias Nagios Admin ; Full name of user
email 1234@XXX.cn ; <<* CHANGE THIS TO YOUR EMAIL ADDRESS **
}
# httpd配置
[root@cu2 nagios-4.1.1]# make install-webconf
/usr/bin/install -c -m 644 sample-config/httpd.conf /etc/httpd/conf.d/nagios.conf
*** Nagios/Apache conf file installed ***
# 权限/密码
[root@cu2 nagios-4.1.1]# htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
New password:
Re-type new password:
Adding password for user nagiosadmin
[root@cu-omc1 nagios-4.1.1]# chmod 644 /usr/local/nagios/etc/htpasswd.users
# 重启HTTPD(应该可以不用重启)
[root@cu2 nagios-4.1.1]# service httpd restart
Stopping httpd: [ OK ]
Starting httpd: httpd: Could not reliably determine the server's fully qualified domain name, using 192.168.0.214 for ServerName
[ OK ]
- 编译nagios-plugin
如果configure没有mysql和ssl,先安装依赖
yum -y install mysql-devel openssl
[root@cu2 nagios]# cd nagios-plugins-2.1.1/
[root@cu2 nagios-plugins-2.1.1]# ./configure --with-nagios-user=nagios --with-nagios-group=nagios
...
--with-apt-get-command:
--with-ping6-command: /bin/ping6 -n -U -w %d -c %d %s
--with-ping-command: /bin/ping -n -U -w %d -c %d %s
--with-ipv6: yes
--with-mysql: /usr/bin/mysql_config
--with-openssl: yes
--with-gnutls: no
--enable-extra-opts: yes
--with-perl: /usr/bin/perl
--enable-perl-modules: no
--with-cgiurl: /nagios/cgi-bin
--with-trusted-path: /usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
--enable-libtap: no
[root@cu2 nagios-plugins-2.1.1]# make
[root@cu2 nagios-plugins-2.1.1]# make install
打开浏览器,使用nagiosadmin和刚刚用htpasswd设置的密码登录就可以localhost的状态了。
手动制造一点异常,如登录用户超过50个。然后看刚刚设置的提醒邮箱是否收到告警。
[root@cu2 nagios]# service nagios status
nagios (pid 53764) is running...
[root@cu2 nagios]# lsof -p 53764
nagios 53764 nagios 4u REG 8,3 12715 280762 /usr/local/nagios/var/nagios.log
# 日志目录,看看是否有错误,然后做相应的处理
[root@cu2 nagios]# less /usr/local/nagios/var/nagios.log
# 如,机器可能没有安装mail等
[root@cu2 nagios]# yum install mail -y
- 被监控机器安装nrpe
程序编译都在cu2上面操作,编译完后,把编译安装的程序直接scp到其他机器就可以了。被监控机器需要nagios-plugin和nrpe。
编译之前可以先看看nrpe-2.15/docs/NRPE.pdf,讲的很详细和清楚。
[root@cu2 nagios-plugins-2.1.1]# cd ../nrpe-2.15
[root@cu2 nrpe-2.15]# ./configure
...
*** Configuration summary for nrpe 2.15 09-06-2013 ***:
General Options:
-------------------------
NRPE port: 5666
NRPE user: nagios
NRPE group: nagios
Nagios user: nagios
Nagios group: nagios
Review the options above for accuracy. If they look okay,
type 'make all' to compile the NRPE daemon and client.
[root@cu2 nrpe-2.15]# make all
cd ./src/; make ; cd ..
make[1]: Entering directory `/data/nagios/nrpe-2.15/src'
gcc -g -O2 -I/usr/include/openssl -I/usr/include -DHAVE_CONFIG_H -I ../include -I ./../include -o nrpe ./nrpe.c ./utils.c ./acl.c -L/usr/lib64 -lssl -lcrypto -lnsl -lwrap
gcc -g -O2 -I/usr/include/openssl -I/usr/include -DHAVE_CONFIG_H -I ../include -I ./../include -o check_nrpe ./check_nrpe.c ./utils.c -L/usr/lib64 -lssl -lcrypto -lnsl
make[1]: Leaving directory `/data/nagios/nrpe-2.15/src'
*** Compile finished ***
If the NRPE daemon and client compiled without any errors, you
can continue with the installation or upgrade process.
Read the PDF documentation (NRPE.pdf) for information on the next
steps you should take to complete the installation or upgrade.
[root@cu2 nrpe-2.15]# make install-plugin
cd ./src/ && make install-plugin
make[1]: Entering directory `/data/nagios/nrpe-2.15/src'
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/libexec
/usr/bin/install -c -m 775 -o nagios -g nagios check_nrpe /usr/local/nagios/libexec
make[1]: Leaving directory `/data/nagios/nrpe-2.15/src'
[root@cu2 nrpe-2.15]# make install-daemon
cd ./src/ && make install-daemon
make[1]: Entering directory `/data/nagios/nrpe-2.15/src'
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/bin
/usr/bin/install -c -m 775 -o nagios -g nagios nrpe /usr/local/nagios/bin
make[1]: Leaving directory `/data/nagios/nrpe-2.15/src'
[root@cu2 nrpe-2.15]# make install-daemon-config
/usr/bin/install -c -m 775 -o nagios -g nagios -d /usr/local/nagios/etc
/usr/bin/install -c -m 644 -o nagios -g nagios sample-config/nrpe.cfg /usr/local/nagios/etc
# 启动nrpe服务
[root@cu2 nrpe-2.15]# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
[root@cu2 nrpe-2.15]# /usr/local/nagios/libexec/check_nrpe -H localhost
CHECK_NRPE: Error - Could not complete SSL handshake.
[root@cu2 nrpe-2.15]# /usr/local/nagios/libexec/check_nrpe -H 127.0.0.1
NRPE v2.15
注意: 如果需要启用传递参数功能需要添加参数--enable-command-args,同时修改配置dont_blame_nrpe=1。http://www.cppblog.com/fwxjj/archive/2011/10/28/159262.aspx
~~更新201604:(2016-4-20 13:10:02)
编译好nrpe后,在编译目录下面有 init-script 脚本,直接把脚本拷贝到 /etc/init.d 目录就可以把 nrpe 作为服务来管理了。
[root@cu2 nrpe-2.15]# cp init-script /etc/init.d/nrpe
[root@cu2 nrpe-2.15]# chmod +x /etc/init.d/nrpe
[root@cu2 nrpe-2.15]# service nrpe start
Starting nrpe: [ OK ]
[root@cu2 nrpe-2.15]# service nrpe status
nrpe (pid 30791) is running...
其实管理nrpe的启动。同步到其他机器,就可以用 service nrpe start 来代替启动命令了。
~~更新201604END
拷贝程序到其他机器:
[root@cu2 local]# scp -r nagios cu3:/usr/local/
[root@cu3 ~]# cd /usr/local/nagios/
[root@cu3 nagios]# bin/nrpe -c etc/nrpe.cfg -d
[root@cu3 nagios]#
[root@cu3 nagios]# libexec/check_nrpe -H 127.0.0.1
NRPE v2.15
修改配置,添加可以访问nrpe的白名单:
[root@cu3 nagios]# vi etc/nrpe.cfg
...修改
allowed_hosts=127.0.0.1,192.168.0.0/24
...查看
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
nrpe.cfg最下面是nrpe的command,nagios配置中会用到
[root@cu3 nagios]# pkill nrpe
[root@cu3 nagios]# bin/nrpe -c etc/nrpe.cfg -d
注意:如果你用xinetd的话: ** only_from = 127.0.0.1 192.168.0.0/16 **
再回到cu2,把cu3加入nagios管理列表中:
首先,nagios的配置入口为etc/nagios.cfg。其他配置文件都是通过cfg_file去定位的。
[root@cu2 nagios]# vi etc/nagios.cfg
...添加
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_file=/usr/local/nagios/etc/objects/cu3.cfg
[root@cu2 nagios]# vi etc/objects/commands.cfg
...添加
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
[root@cu2 nagios]# vi etc/objects/cu3.cfg
...新增
define host{
use linux-server ; Name of host template to use
; This host definition will inherit all variables that are defined
; in (or inherited by) the linux-server host template definition.
host_name cu3
alias cu3
address 192.168.0.148
}
define service{
use generic-service ; Name of service template to use
host_name cu3
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use generic-service ; Name of service template to use
host_name cu3
service_description Root Partition
check_command check_nrpe!check_hda1
}
define service{
use generic-service ; Name of service template to use
host_name cu3
service_description Current Users
check_command check_nrpe!check_users
}
define service{
use generic-service ; Name of service template to use
host_name cu3
service_description Total Processes
check_command check_nrpe!check_total_procs
}
define service{
use generic-service ; Name of service template to use
host_name cu3
service_description Current Load
check_command check_nrpe!check_load
}
[root@cu2 nagios]# vi etc/objects/hosts.cfg
... 新增
define hostgroup{
hostgroup_name linux-servers ; The name of the hostgroup
alias Linux Servers ; Long name of the group
members localhost,cu3,cu4,cu5,cu1 ; Comma separated list of hosts that belong to this group
}
配置完后,校验配置,然后重启nagios。然后就可以打开浏览器查看cu3状态。
[root@cu2 nagios]# bin/nagios -v etc/nagios.cfg
Nagios Core 4.1.1
Copyright (c) 2009-present Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 08-19-2015
License: GPL
Website: https://www.nagios.org
Reading configuration data...
Read main config file okay...
Read object config files okay...
Running pre-flight check on configuration data...
Checking objects...
Checked 23 services.
Checked 4 hosts.
Checked 1 host groups.
Checked 0 service groups.
Checked 1 contacts.
Checked 1 contact groups.
Checked 25 commands.
Checked 5 time periods.
Checked 0 host escalations.
Checked 0 service escalations.
Checking for circular paths...
Checked 4 hosts
Checked 0 service dependencies
Checked 0 host dependencies
Checked 5 timeperiods
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flight check
[root@cu2 nagios]# service nagios restart
Running configuration check...
Stopping nagios: done.
Starting nagios: done.
# 后记
技巧:配置服务的时刻,制定host_name可以使用正则表达式,一个服务通吃。对于功能类似的机器,可以减少很多工作量:
vi etc/nagios.cfg
... 修改
use_regexp_matching=1
use_true_regexp_matching=1
vi cu.cfg
...
define service{
use generic-service ; Name of service template to use
host_name cu.*
service_description Current Load
check_command check_nrpe!check_load
}
基本功能就算配置好了,如果出现异常就能得到邮件提醒。
# 后后后记(2015-12-7 21:42:03)
tar zxvf nagios-plugins-2.1.1.tar.gz
tar zxvf nrpe-2.15.tar.gz
ps axu|grep httpd
useradd nagios
groupadd nagcmd
usermod -G nagcmd nagios
usermod -G nagcmd apache
yum -y install mysql-devel openssl
cd nagios-plugins-2.1.1/
./configure --with-nagios-user=nagios --with-nagios-group=nagios
make && make install
cd ../nrpe-2.15
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
cd /usr/local/
for h in `cat /etc/hosts | grep hadoop | awk '{print $2}'` ; do rsync -vaz nagios root@$h:/usr/local/ ; done
rsync --dry-run -vaz nagios nagios-client > nagios-client.list
rsync --dry-run --include-from=nagios-client.list --exclude=* -vaz nagios nagios-clint
for h in `cat /etc/hosts | grep "-" | grep -v omc1 | awk '{print $2}'` ; do
rsync --include-from=nagios-client.list --exclude=* -vaz nagios $h:/usr/local/ ;
ssh $h "pkill nrpe; cd /usr/local/nagios ; bin/nrpe -c etc/nrpe.cfg -d" ;
echo $h; ssh $h "if ! ps aux|grep nrpe | grep -v grep | grep nrpe ; then cd /usr/local/nagios ; bin/nrpe -c etc/nrpe.cfg -d ; fi" ;
done
# nrpe下面有init-script,拷贝到/etc/init.d/下面就可以用service来进行启动了。
cp init-script /etc/init.d/nrpe
chmod +x /etc/init.d/nrpe
service nrpe start
chkconfig --add nrpe
--
umask 0022
cd
tar zxvf nagios-4.1.1.tar.gz
cd nagios-4.1.1
./configure --with-command-group=nagcmd
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf
htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
chmod 644 /usr/local/nagios/etc/htpasswd.users
service httpd restart
service nagios start
##======================================================================
修改 vi nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/cu.cfg
use_regexp_matching=1
添加 for h in `cat /etc/hosts | grep "-" | grep -v omc1 | awk '{print $2}'` ; do echo "
> define host {
> use linux-server
> host_name HN-${h#*-}
> alias HN-${h#*-}
> address $h
> }
> " ; done >hosts.cfg
define hostgroup{
hostgroup_name cu servers
alias cu servers
members HN-omc*, HN-uc*, HN-ud*, HN-db*
}
define hostgroup{
hostgroup_name hadoop slavers
alias hadoop slavers
members HN-slaver*, HN-master*
}
添加 vi commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
编辑 vi /etc/mail.rc
set from=esw_notify@189.cn
set smtp=smtp.189.cn
set smtp-auth-user=XXX
set smtp-auth-password=XXX
set smtp-auth=login
echo test | /bin/mail -s "** Service Alert **" XXX@189.cn
##-----------------------------------
[root@dr01 ~]# umount /root/.gvfs
command_line /usr/lib/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -u GB -A -i .gvfs
# 参考
- http://nagios-cn.sourceforge.net/nagios-cn/beginning.html#quickstart-fedora
- http://skypegnu1.blog.51cto.com/8991766/1532948
- http://nagios-cn.sourceforge.net/nagios-cn/beginning.html#monitoring-linux
–END
Related
Related posts
-
logstash采集网站的访问日志
2018-01-20
-
使用Puppet安装配置Ganglia
2016-06-17
-
flamegraphs java cpu
2016-05-06
-
RPM打包
2016-04-04