Files
openstack-helm-images/nagios/plugins/check_update_prometheus_hosts.py
Steve Wilkerson e1a1faec28 Nagios: Make scripts and plugins executable
This updates the entrypoint script and the plugins included with
the nagios image executable

Change-Id: Iaeb2fad62ac213b74637dadc329e7ea304602ab8
Signed-off-by: Steve Wilkerson <sw5822@att.com>
2019-06-26 17:35:55 -05:00

203 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
import sys
import requests
import os
import signal
import time
NAGIOS_HOST_FORMAT = """
define host {{
use linux-server
address {host_ip}
host_name {host_name}
hostgroups {hostgroups}
notifications_enabled 1
max_check_attempts 1
notification_period 24x7
contact_groups snmp_and_http_notifying_contact_group
}}
"""
NAGIOS_HOSTGROUP_FORMAT = """
define hostgroup {{
hostgroup_name {hostgroup}
}}
"""
NAGIOS_OK = 0
NAGIOS_CRITICAL = 2
def main():
parser = argparse.ArgumentParser(
description='Queries Prometheus and Keeps Nagios config Updated.')
parser.add_argument(
'--prometheus_api',
metavar='prometheus_api',
type=str,
required=True,
help='Prometheus query API with scheme, host and port')
parser.add_argument(
'--update_seconds',
metavar='update_seconds',
type=int,
required=False,
default=60,
help='When run as daemon, sleep time')
parser.add_argument(
'--hosts',
metavar='get_nagios_hosts',
type=str,
required=False,
help='Output Nagios Host definition to stdout')
parser.add_argument(
'--hostgroups',
metavar='get_nagios_hostgroups',
type=str,
required=False,
help='Output Nagios Hostgroup definition to stdout')
parser.add_argument(
'--object_file_loc',
metavar='object_file_loc',
type=str,
required=False,
default="/opt/nagios/etc/conf.d/prometheus_discovery_objects.cfg",
help='Output Nagios Host definition to stdout')
parser.add_argument(
'-d',
action='store_true',
help="Flag to run as a deamon")
args, unknown = parser.parse_known_args()
if args.hosts:
print(get_nagios_hosts(args.prometheus_api))
elif args.hostgroups:
print(get_nagios_hostgroups(args.prometheus_api))
elif args.object_file_loc:
if args.d:
while True:
try:
update_config_file(
args.prometheus_api, args.object_file_loc)
time.sleep(args.update_seconds)
except Exception as e:
print("Error updating nagios config")
sys.exit(NAGIOS_CRITICAL)
else:
if os.path.exists(args.object_file_loc) and (os.path.getsize(args.object_file_loc) > 0):
print("OK- Nagios host configuration already updated.")
sys.exit(NAGIOS_OK)
try:
update_config_file(args.prometheus_api, args.object_file_loc)
except Exception as e:
print("Error updating nagios config")
sys.exit(NAGIOS_CRITICAL)
print("Nagios hosts have been successfully updated")
sys.exit(NAGIOS_OK)
def update_config_file(prometheus_api, object_file_loc):
nagios_hosts = get_nagios_hosts(prometheus_api)
nagios_hostgroups = get_nagios_hostgroups(prometheus_api)
if not nagios_hosts:
print("no hosts discovered. Either prometheus is unreachable or is not collecting node metrics.")
sys.exit(NAGIOS_CRITICAL)
with open(object_file_loc, 'w+') as object_file:
object_file.write("{} \n {}".format(nagios_hosts, nagios_hostgroups))
reload_nagios()
def reload_nagios():
try:
# NOTE(srwilkers): We need the worker PIDs parent, which is the
# grandparent of the running process
worker_pid = os.getpid()
grandparent_pid = os.popen("ps -p %d -oppid=" % os.getppid()).read().strip()
os.kill(int(grandparent_pid), signal.SIGHUP)
except Exception as e:
print('Unable to reload Nagios with new host configuration')
print('Nagios worker PID: {}. Nagios worker grandparent PID: {}'.format(worker_pid, grandparent_pid))
sys.exit(NAGIOS_CRITICAL)
def get_nagios_hostgroups(prometheus_api):
hostgroup_labels = set()
for host, labels in get_nagios_hostgroups_dictionary(
prometheus_api).iteritems():
hostgroup_labels.update(labels)
nagios_hostgroups = []
for label in hostgroup_labels:
nagios_hostgroup_defn = NAGIOS_HOSTGROUP_FORMAT.format(
hostgroup=label)
nagios_hostgroups.append(nagios_hostgroup_defn)
return "\n".join(nagios_hostgroups)
def get_nagios_hostgroups_dictionary(prometheus_api):
nagios_hostgroups = {}
try:
labels_json = query_prometheus(prometheus_api, 'kube_node_labels')
for label_dictionary in labels_json['data']['result']:
host_name = label_dictionary['metric']['node']
labels = set()
for key in label_dictionary['metric']:
if key.startswith('label_'):
labels.add(key[6:])
nagios_hostgroups[host_name] = labels
except Exception as e:
print("Unable to query prometheus at {} to retrieve hosts".format(prometheus_api))
sys.exit(NAGIOS_CRITICAL)
return nagios_hostgroups
def get_nagios_hosts(prometheus_api):
nagios_hosts = []
try:
unames_json = query_prometheus(prometheus_api, 'node_uname_info')
hostgroup_dictionary = get_nagios_hostgroups_dictionary(prometheus_api)
for uname in unames_json['data']['result']:
host_name = uname['metric']['nodename']
host_ip = uname['metric']['instance'].split(':')[0]
hostgroups = 'all,base-os'
if hostgroup_dictionary[host_name]:
hostgroups = hostgroups + "," + \
",".join(hostgroup_dictionary[host_name])
if hostgroups.find("promenade_genesis") != -1:
hostgroups = hostgroups + ",prometheus-hosts"
nagios_host_defn = NAGIOS_HOST_FORMAT.format(
host_name=host_name, host_ip=host_ip, hostgroups=hostgroups)
nagios_hosts.append(nagios_host_defn)
except Exception as e:
print("Unable to query prometheus at {} to retrieve hosts".format(prometheus_api))
sys.exit(NAGIOS_CRITICAL)
return "\n".join(nagios_hosts)
def query_prometheus(prometheus_api, query):
url = "{}/api/v1/query".format(include_schema(prometheus_api))
params = {"query": query}
response = requests.get(
url,
headers={
"Accept": "application/json"},
params=params)
return response.json()
def include_schema(prometheus_api):
if prometheus_api.startswith(
"http://") or prometheus_api.startswith("https://"):
return prometheus_api
else:
return "http://{}".format(prometheus_api)
if __name__ == '__main__':
sys.exit(main())