ES节点简单监控

    xiaoxiao2021-03-25  81

    本文通过ES提供的Restful Api来简单监控各节点的内存及负载。

    #!/usr/bin/python #coding:utf8 import sys, re, time, datetime, socket, smtplib import os, urllib, urllib2, json, string DOMAIN_NAME = "es.op.xxx.com" NODES_URL = "http://"+DOMAIN_NAME+":9200/_cluster/state/nodes,master_node" NODE_STAT_URL = "http://"+DOMAIN_NAME+":9200/_nodes/%s/stats?human=true" HEAP_THRESHOLD = 90 LOAD_THRESHOLD = 30 def main(): nodes_name = get_nodes() for node_id in nodes_name : node_info = get_node_info(node_id) indices, jvm, load = extract_node_info(node_info) if jvm['heap_used_percent'] < HEAP_THRESHOLD and load[2] < LOAD_THRESHOLD: continue; print time.ctime(), nodes_name[node_id], jvm, load, indices content = "机器名: ".decode('utf-8') + nodes_name[node_id] + "\n"; content += "当前索引: ".decode('utf-8') + str(indices['current_index']) + "\n"; content += "当前查询: ".decode('utf-8') + str(indices['current_query']) + "\n"; content += "当前合并: ".decode('utf-8') + str(indices['current_merge']) + "\n"; content += "堆使用: ".decode('utf-8') + str(jvm['heap_used']) + " (" + str(jvm['heap_used_percent']) + "%)" + "\n"; content += "Old GC: ".decode('utf-8') + str(jvm['old_gc_count']) + "\n"; content += "Young GC: ".decode('utf-8') + str(jvm['young_gc_count']) + "\n"; content += "OS Load: ".decode('utf-8') + ','.join(str(x) for x in load) + "\n"; mailSender(content) ######################################## # 获取ES集群所有节点:host_name node_id ######################################## def get_nodes(): page = urllib2.urlopen(NODES_URL) res = json.loads(page.read()) nodes = res.get("nodes") nodes_name = {} for id, node_info in nodes.iteritems(): nodes_name[id] = node_info.get('name').decode('utf-8') return nodes_name ######################################### # 获取ES集群节点状态 ######################################### def get_node_info(node_id): node_info_url = NODE_STAT_URL % node_id page = urllib2.urlopen(node_info_url) res = json.loads(page.read()) node_info = res.get("nodes", {}).get(node_id) return node_info ########################################## # 提取单个节点的状态信息 ########################################## def extract_node_info(node_info): indices = {} indices['current_index'] = node_info.get('indices',{}).get('indexing',{}).get('index_current',0); indices['current_query'] = node_info.get('indices',{}).get('search',{}).get('query_current',0); indices['current_merge'] = node_info.get('indices',{}).get('merges',{}).get('current',0); jvm = {} org_jvm_info = node_info.get('jvm',{}) jvm['heap_used'] = org_jvm_info.get('mem',{}).get('heap_used',0); jvm['heap_used_percent'] = org_jvm_info.get('mem',{}).get('heap_used_percent',0); jvm['non_heap_used'] = org_jvm_info.get('mem',{}).get('non_heap_used',0); jvm['old_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('old',{}).get('used',0); jvm['young_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('young',{}).get('used',0); jvm['survivor_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('survivor',{}).get('used',0); jvm['young_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('young',{}).get('collection_count',0); jvm['old_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('old',{}).get('collection_count',0); load = node_info.get('os',{}).get('load_average',[]); return indices, jvm, load if __name__ == "__main__": main()

    对于CPU及Load太高的机器,发送报警邮件:

    from email.mime.text import MIMEText from email.header import Header sender = 'monitor@xxx.com' receiver = ['lz@xxx.com','xxx@xxx.com','xxx@xxx.com'] subject = 'ES Cluster Monitor: Heap(' + str(HEAP_THRESHOLD) + '%)' + ' Load(' + str(LOAD_THRESHOLD) + ')' smtpServer = 'smtp.exmail.qq.com' userName = 'monitor@xxx.com' password = 'xxx' charset = 'gb2312' def mailSender(content): msg = MIMEText(content, 'plain', charset) msg['From'] = sender msg['To'] = ','.join(receiver) msg['Subject'] = Header(subject, charset) smtp = smtplib.SMTP() smtp.connect(smtpServer) smtp.login(userName, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit()

    报警邮件如下:

    机器名: idc02-xxx-es-06 当前索引: 2360563 当前查询: 0 当前合并: 2 堆使用: 29.1gb (92%) Old GC: 5523 Young GC: 1360285 OS Load: 2.95,2.82,2.21
    转载请注明原文地址: https://ju.6miu.com/read-23830.html

    最新回复(0)