共计 5785 个字符,预计需要花费 15 分钟才能阅读完成。
准备数据:
POST /forum/_bulk
{"index": { "_id": 1}}
{"articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01"}
{"index": { "_id": 2}}
{"articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02"}
{"index": { "_id": 3}}
{"articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01"}
{"index": { "_id": 4}}
{"articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02"}
1、为帖子数据增加标题字段
POST /forum/_bulk
{"update": { "_id": "1"} }
{"doc" : {"title" : "this is java and elasticsearch blog"} }
{"update": { "_id": "2"} }
{"doc" : {"title" : "this is java blog"} }
{"update": { "_id": "3"} }
{"doc" : {"title" : "this is elasticsearch blog"} }
{"update": { "_id": "4"} }
{"doc" : {"title" : "this is java, elasticsearch, hadoop blog"} }
{"update": { "_id": "5"} }
{"doc" : {"title" : "this is spark blog"} }
2、搜索标题中包含 java 或 elasticsearch 的 blog
这个就跟之前的那个 term filter/query 不一样了。不是搜索 exact value,而是进行 full text 全文搜索。
match query 是负责进行全文检索的。当然如果要检索的 field 是 not_analyzed 类型的,那么 match query 也相当于 term query
GET /forum/_search
{
"query": {
"match": {"title": "java elasticsearch"}
}
}
{
"took" : 1139,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.97797304,
"hits" : [
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.97797304,
"_source" : {
"articleID" : "XHDK-A-1293-#fJ3",
"userID" : 1,
"hidden" : false,
"postDate" : "2017-01-01",
"tag" : [
"java",
"hadoop"
],
"tag_cnt" : 2,
"view_cnt" : 30,
"title" : "this is java and elasticsearch blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "4",
"_score" : 0.97797304,
"_source" : {
"articleID" : "QQPX-R-3956-#aD8",
"userID" : 2,
"hidden" : true,
"postDate" : "2017-01-02",
"tag" : [
"java",
"elasticsearch"
],
"tag_cnt" : 2,
"view_cnt" : 80,
"title" : "this is java, elasticsearch, hadoop blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.57843524,
"_source" : {
"articleID" : "KDKE-B-9947-#kL5",
"userID" : 1,
"hidden" : false,
"postDate" : "2017-01-02",
"tag" : ["java"],
"tag_cnt" : 1,
"view_cnt" : 50,
"title" : "this is java blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.57843524,
"_source" : {
"articleID" : "JODL-X-1937-#pV7",
"userID" : 2,
"hidden" : false,
"postDate" : "2017-01-01",
"tag" : ["hadoop"],
"tag_cnt" : 1,
"view_cnt" : 100,
"title" : "this is elasticsearch blog"
}
}
]
}
}
3、搜索标题中包含 java 和 elasticsearch 的 blog
搜索结果精确控制的第一步就是灵活使用 and 关键字,如果你是希望所有的搜索关键字都要匹配的,那么就用 and, 可以实现单纯 match query 无法实现的效果
GET /forum/_search
{
"query": {
"match": {
"title": {
"query": "java elasticsearch",
"operator": "and"
}
}
}
}
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.97797304,
"hits" : [
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.97797304,
"_source" : {
"articleID" : "XHDK-A-1293-#fJ3",
"userID" : 1,
"hidden" : false,
"postDate" : "2017-01-01",
"tag" : [
"java",
"hadoop"
],
"tag_cnt" : 2,
"view_cnt" : 30,
"title" : "this is java and elasticsearch blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "4",
"_score" : 0.97797304,
"_source" : {
"articleID" : "QQPX-R-3956-#aD8",
"userID" : 2,
"hidden" : true,
"postDate" : "2017-01-02",
"tag" : [
"java",
"elasticsearch"
],
"tag_cnt" : 2,
"view_cnt" : 80,
"title" : "this is java, elasticsearch, hadoop blog"
}
}
]
}
}
4、搜索包含 java、elasticsearch、spark、hadoop,4 个关键字中至少 3 个的 blog
控制搜索结果的精确度的第二步就是指定一些关键字中,必须至少匹配其中的多少个关键字,才能作为结果返回
GET /forum/_search
{
"query": {
"match": {
"title": {
"query": "java elasticsearch spark hadoop",
"minimum_should_match": 3
}
}
}
}
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 2.2356422,
"hits" : [
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "4",
"_score" : 2.2356422,
"_source" : {
"articleID" : "QQPX-R-3956-#aD8",
"userID" : 2,
"hidden" : true,
"postDate" : "2017-01-02",
"tag" : [
"java",
"elasticsearch"
],
"tag_cnt" : 2,
"view_cnt" : 80,
"title" : "this is java, elasticsearch, hadoop blog"
}
}
]
}
}
5、用 bool 组合多个搜索条件,来搜索 title
GET /forum/_search
{
"query": {
"bool": {
"must": [
{
"match": {"title": "java"}
}
],
"must_not": [
{
"match": {"title": "spark"}
}
],
"should": [
{
"match": {"title": "hadoop"}
},
{
"match": {"title": "elasticsearch"}
}
]
}
}
}
{
"took" : 12,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 2.2356422,
"hits" : [
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "4",
"_score" : 2.2356422,
"_source" : {
"articleID" : "QQPX-R-3956-#aD8",
"userID" : 2,
"hidden" : true,
"postDate" : "2017-01-02",
"tag" : [
"java",
"elasticsearch"
],
"tag_cnt" : 2,
"view_cnt" : 80,
"title" : "this is java, elasticsearch, hadoop blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.97797304,
"_source" : {
"articleID" : "XHDK-A-1293-#fJ3",
"userID" : 1,
"hidden" : false,
"postDate" : "2017-01-01",
"tag" : [
"java",
"hadoop"
],
"tag_cnt" : 2,
"view_cnt" : 30,
"title" : "this is java and elasticsearch blog"
}
},
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.57843524,
"_source" : {
"articleID" : "KDKE-B-9947-#kL5",
"userID" : 1,
"hidden" : false,
"postDate" : "2017-01-02",
"tag" : ["java"],
"tag_cnt" : 1,
"view_cnt" : 50,
"title" : "this is java blog"
}
}
]
}
}
6、bool 组合多个搜索条件,如何计算 relevance score
must 和 should 搜索对应的分数,加起来,除以 must 和 should 的总分数
所以排在第一位的是:包含 java、hadoop、elasticsearch
排在第二位的是:包含 java、elasticsearch
排在第三位的是:包含 java
should 是可以影响相关度分数的
must 确保说谁必须有这个关键字,同时会根据这个 must 的条件去计算出 document 对这个搜索条件的 relevance score。在满足 must 的基础上,should 中的条件,不匹配也是可以的,但是如果匹配的更多,那么 document 的 relevance score 就会更高。
7、should 实现搜索四个关键字中至少包含三个关键字
默认情况下,should 是可以不匹配任何一个的,但是有一个例外的情况,就是如果没有 must 的情况下,那么 should 中必须至少匹配一个才可以
GET /forum/_search
{
"query": {
"bool": {
"should": [
{
"match": {"title": "java"}
},
{
"match": {"title": "elasticsearch"}
},
{
"match": {"title": "hadoop"}
},
{
"match": {"title": "spark"}
}
],
"minimum_should_match": 3
}
}
}
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 2.2356422,
"hits" : [
{
"_index" : "forum",
"_type" : "_doc",
"_id" : "4",
"_score" : 2.2356422,
"_source" : {
"articleID" : "QQPX-R-3956-#aD8",
"userID" : 2,
"hidden" : true,
"postDate" : "2017-01-02",
"tag" : [
"java",
"elasticsearch"
],
"tag_cnt" : 2,
"view_cnt" : 80,
"title" : "this is java, elasticsearch, hadoop blog"
}
}
]
}
}
正文完
发表至:无分类
2019-06-01