关于后端:elasticsearch之metric聚合

31次阅读

共计 6987 个字符,预计需要花费 18 分钟才能阅读完成。

1、背景

此篇文章简略的记录一下 elasticsearchmetric 聚合操作。比方求 平均值、最大值、最小值、求和、总计、去重总计等。

2、筹备数据

2.1 筹备 mapping

PUT /index_person
{
  "settings": {"number_of_shards": 1},
  "mappings": {
    "properties": {
      "id":{"type": "long"},
      "name": {"type": "keyword"},
      "age": {"type": "integer"},
      "class":{
        "type": "text",
        "fielddata": true
      },
      "province":{"type": "keyword"}
    }
  }
}

2.2 筹备数据

PUT /index_person/_bulk
{"index":{"_id":1}}
{"id":1, "name":"张三","age":18,"class":"大一班","province":"湖北"}
{"index":{"_id":2}}
{"id":2, "name":"李四","age":19,"class":"大一班","province":"湖北"}
{"index":{"_id":3}}
{"id":3, "name":"王武","age":20,"class":"大二班","province":"北京"}
{"index":{"_id":4}}
{"id":4, "name":"赵六","age":21,"class":"大三班技术班","province":"北京"}
{"index":{"_id":5}}
{"id":5, "name":"钱七","age":22,"class":"大三班","province":"湖北"}

3、metric 聚合

3.1 max 平均值

3.1.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "field": "age",
        "missing": 10
      }
    }
  }
}


POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "script": {
          "lang": "painless",
          "source": """doc.age"""
        }
      }
    }
  }
}


POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "field": "age", 
        "script": {
          "lang": "painless",
          "source": """_value * params.a""",
          "params": {"a": 2}
        }
      }
    }
  }
}

3.1.2 java 代码

@Test
@DisplayName("最大值聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    // 聚合的字段
                                    max.field("age")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

@Test
@DisplayName("脚本聚合")
public void test02() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    max.script(script ->
                                            script.inline(inline ->
                                                    inline.lang(ScriptLanguage.Painless)
                                                            // 脚本表达式
                                                            .source("doc.age")
                                            )
                                    )
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

@Test
@DisplayName("值脚本聚合")
public void test03() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    // 指定参加聚合的字段
                                    max.field("age")
                                            .script(script ->
                                                    script.inline(inline ->
                                                            inline.lang(ScriptLanguage.Painless)
                                                                    // 脚本表达式
                                                                    .source("_value * params.plus")
                                                                    // 参数
                                                                    .params("plus", JsonData.of(2))
                                                    )
                                            )
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.2 min 最小值

3.2.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "min": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.2.2 java

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "min": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.3 min 最小值

3.3.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "avg": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.3.2 java

@Test
@DisplayName("平均值聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.avg(avg ->
                                    // 聚合的字段
                                    avg.field("age")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.4 min 最小值

3.4.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "sum": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.4.2 java

@Test
@DisplayName("求和聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.sum(sum ->
                                    // 聚合的字段
                                    sum.field("age")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.5 count(*)

3.5.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "value_count": {
        "field": "province",
        "missing": 10
      }
    }
  }
}

3.5.2 java

@Test
@DisplayName("count(*)聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.valueCount(valueCount ->
                                    // 聚合的字段
                                    valueCount.field("age")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.6 count(distinct)

3.6.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "cardinality": {
        "field": "province",
        "missing": 10
      }
    }
  }
}

3.6.2 java

@Test
@DisplayName("count(distinct)聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.cardinality(cardinality ->
                                    // 聚合的字段
                                    cardinality.field("province")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.7 stat (max,min,avg,count,sum)

3.7.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {"match_all": {}
  },
  "aggs": {
    "agg_01": {
      "stats": {
        "field": "avg",
        "missing": 10
      }
    }
  }
}

3.7.2 java

@Test
@DisplayName("stat 聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.stats(stats ->
                                    // 聚合的字段
                                    stats.field("age")
                                            // 如果聚合的文档缺失这个字段,则给 10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.8 聚合后返回每个聚合波及的文档

3.8.1 需要

依据 province进行 terms 聚合,而后获取每个 terms 聚合 age最大的那个文档。

3.8.2 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {"gte": 10}
    }
  },
  "aggs": {
    "agg_01": {
      "terms": {"field": "province"},
      "aggs": {
        "agg_02": {
          "top_hits": {
            "from": 0,
            "size": 1,
            "sort": [
              {"age": {"order": "desc"}
              }
            ],
            "_source": {"includes": ["id","age","name"]
            }
          }
        }
      }
    }
  }
}

3.8.3 java

@Test
@DisplayName("top hits 聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .query(query -> query.range(range -> range.field("age").gt(JsonData.of(10))))
                    .aggregations("agg_01", agg ->
                            agg.terms(terms ->
                                            terms.field("province")
                                    )
                                    .aggregations("agg_02", subAgg ->
                                            subAgg.topHits(topHits ->
                                                    topHits.from(0)
                                                            .size(1)
                                                            .sort(sort -> sort.field(field -> field.field("age").order(SortOrder.Desc)))
                                                            .source(source -> source.filter(filter -> filter.includes(Arrays.asList("id", "age", "name"))))
                                            )
                                    )
                    )
    );
    System.out.println("request:" + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response:" + response);
}

3.8.4 运行后果

4、残缺代码

https://gitee.com/huan1993/spring-cloud-parent/tree/master/es/es8-api/src/main/java/com/huan/es8/aggregations/metric

5、参考文档

1、https://www.elastic.co/guide/en/elasticsearch/reference/7.17/search-aggregations-metrics-max-aggregation.html

正文完
 0