共计 1810 个字符,预计需要花费 5 分钟才能阅读完成。
前言
-
技术栈
Elasticsearch 7.17.2 python 3.8 httpx 0.22.0 loguru 0.6.0
- 有时更新
ES
分词器或近程词典后,不确定每个节点是否都已更新到位,没找到间接的命令来校验,故写了一份Python
脚本来做校验 -
代码创立了
test_{nodeName}
的索引,测试完后手动删除DELETE test_*
代码
# encoding: utf8
# author: qbit
# date: 202-06-16
# summary: 遍历 ES 数据节点校验分词后果
import pprint
import httpx
from loguru import logger
coordnode = 'http://192.168.2.67:9200' # ES 协调节点地址
esuser = 'elastic' # ES 集群账号
espwd = 'xxxx' # ES 集群明码
analyzer = "hao_index_mode" # 分词器
intext = "燕雀安知鸿鹄之志" # 分词文本
outtext = "燕雀; 安; 知; 鸿鹄之志; 鸿鹄" # 分词后果
def GetNodeList():
r"""获取 ES 集群节点列表"""
url = f'{coordnode}/_cat/nodes?v=true&h=name,ip,master,node.role&s=name&format=json'
r = httpx.get(url, auth=(esuser, espwd))
result = r.json()
for dic in result:
logger.debug(dic)
return result
def CheckOneNodeAnalyzer(nodeDict: dict, expected: str):
r"""在某个节点创立索引,并测试分词"""
nodeName = nodeDict['name']
nodeIP = nodeDict['ip']
indexName = f"test_{nodeName}"
url = f"{coordnode}/{indexName}"
logger.info(f"{nodeName}, {nodeIP}, {indexName}")
dic = {
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"routing.allocation.include._ip": nodeIP
}
}
}
r = httpx.put(url, auth=(esuser, espwd), json=dic) # 创立索引
logger.debug(r)
url = f"{coordnode}/{indexName}/_analyze"
dic = {
"analyzer": analyzer,
"text": intext
}
r = httpx.post(url, auth=(esuser, espwd), json=dic) # 验证分词
logger.debug(r)
tokenList = list()
for dic in r.json()['tokens']:
# logger.debug(dic)
tokenList.append(dic['token'])
tokenLine = ';'.join(tokenList)
logger.info(tokenLine)
if tokenLine == expected:
return ['ok', nodeName, nodeIP, tokenLine]
else:
return ['no', nodeName, nodeIP, tokenLine]
if __name__ == '__main__':
nodeList = GetNodeList()
okList = list()
noList = list()
for node in nodeList:
if 'd' in node['node.role']: # 数据节点
result = CheckOneNodeAnalyzer(node, outtext)
if result[0] == 'ok':
okList.append(result)
else:
noList.append(result)
print('------')
logger.info(f"okList size: {len(okList)}")
pprint.pprint(okList)
logger.info(f"noList size: {len(noList)}")
pprint.pprint(noList)
qbit snap
正文完
发表至: elasticsearch
2022-06-16