前言
技术栈
Elasticsearch 7.17.2python 3.8httpx 0.22.0loguru 0.6.0
- 有时更新
ES
分词器或近程词典后,不确定每个节点是否都已更新到位,没找到间接的命令来校验,故写了一份Python
脚本来做校验 代码创立了
test_{nodeName}
的索引,测试完后手动删除DELETE test_*
代码
# encoding: utf8# author: qbit# date: 202-06-16# summary: 遍历 ES 数据节点校验分词后果import pprintimport httpxfrom loguru import loggercoordnode = 'http://192.168.2.67:9200' # ES 协调节点地址esuser = 'elastic' # ES 集群账号espwd = 'xxxx' # ES 集群明码analyzer = "hao_index_mode" # 分词器intext = "燕雀安知鸿鹄之志" # 分词文本 outtext = "燕雀;安;知;鸿鹄之志;鸿鹄" # 分词后果 def GetNodeList(): r""" 获取 ES 集群节点列表 """ url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json' r = httpx.get(url, auth=(esuser, espwd)) result = r.json() for dic in result: logger.debug(dic) return resultdef CheckOneNodeAnalyzer(nodeDict: dict, expected: str): r""" 在某个节点创立索引,并测试分词 """ nodeName = nodeDict['name'] nodeIP = nodeDict['ip'] indexName = f"test_{nodeName}" url = f"{coordnode}/{indexName}" logger.info(f"{nodeName}, {nodeIP}, {indexName}") dic = { "settings": { "index": { "number_of_shards": 1, "number_of_replicas": 0, "routing.allocation.include._ip": nodeIP } } } r = httpx.put(url, auth=(esuser, espwd), json=dic) # 创立索引 logger.debug(r) url = f"{coordnode}/{indexName}/_analyze" dic = { "analyzer": analyzer, "text": intext } r = httpx.post(url, auth=(esuser, espwd), json=dic) # 验证分词 logger.debug(r) tokenList = list() for dic in r.json()['tokens']: # logger.debug(dic) tokenList.append(dic['token']) tokenLine = ';'.join(tokenList) logger.info(tokenLine) if tokenLine == expected: return ['ok', nodeName, nodeIP, tokenLine] else: return ['no', nodeName, nodeIP, tokenLine]if __name__ == '__main__': nodeList = GetNodeList() okList = list() noList = list() for node in nodeList: if 'd' in node['node.role']: # 数据节点 result = CheckOneNodeAnalyzer(node, outtext) if result[0] == 'ok': okList.append(result) else: noList.append(result) print('------') logger.info(f"okList size: {len(okList)}") pprint.pprint(okList) logger.info(f"noList size: {len(noList)}") pprint.pprint(noList)
qbit snap