elasticsearch之metric聚合

2022-12-27 15:02:40

1、背景

此篇文章簡單的記錄一下 elasticsearchmetric聚合操作。比如求 平均值、最大值、最小值、求和、總計、去重總計等。

2、準備資料

2.1 準備mapping

PUT /index_person
{
  "settings": {
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "id":{
        "type": "long"
      },
      "name": {
        "type": "keyword"
      },
      "age": {
        "type": "integer"
      },
      "class":{
        "type": "text",
        "fielddata": true
      },
      "province":{
        "type": "keyword"
      }
    }
  }
}

2.2 準備資料

PUT /index_person/_bulk
{"index":{"_id":1}}
{"id":1, "name":"張三","age":18,"class":"大一班","province":"湖北"}
{"index":{"_id":2}}
{"id":2, "name":"李四","age":19,"class":"大一班","province":"湖北"}
{"index":{"_id":3}}
{"id":3, "name":"王武","age":20,"class":"大二班","province":"北京"}
{"index":{"_id":4}}
{"id":4, "name":"趙六","age":21,"class":"大三班技術班","province":"北京"}
{"index":{"_id":5}}
{"id":5, "name":"錢七","age":22,"class":"大三班","province":"湖北"}

3、metric聚合

3.1 max 平均值

3.1.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "field": "age",
        "missing": 10
      }
    }
  }
}


POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "script": {
          "lang": "painless",
          "source": """
            doc.age
          """
        }
      }
    }
  }
}


POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "max": {
        "field": "age", 
        "script": {
          "lang": "painless",
          "source": """
            _value * params.a
          """,
          "params": {
            "a": 2
          }
        }
      }
    }
  }
}

3.1.2 java程式碼

@Test
@DisplayName("最大值聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    // 聚合的欄位
                                    max.field("age")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

@Test
@DisplayName("指令碼聚合")
public void test02() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    max.script(script ->
                                            script.inline(inline ->
                                                    inline.lang(ScriptLanguage.Painless)
                                                            // 指令碼錶示式
                                                            .source("doc.age")
                                            )
                                    )
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

@Test
@DisplayName("值指令碼聚合")
public void test03() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.max(max ->
                                    // 指定參與聚合的欄位
                                    max.field("age")
                                            .script(script ->
                                                    script.inline(inline ->
                                                            inline.lang(ScriptLanguage.Painless)
                                                                    // 指令碼錶示式
                                                                    .source("_value * params.plus")
                                                                    // 引數
                                                                    .params("plus", JsonData.of(2))
                                                    )
                                            )
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.2 min最小值

3.2.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "min": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.2.2 java

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "min": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.3 min最小值

3.3.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "avg": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.3.2 java

@Test
@DisplayName("平均值聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.avg(avg ->
                                    // 聚合的欄位
                                    avg.field("age")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.4 min最小值

3.4.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "sum": {
        "field": "age",
        "missing": 10
      }
    }
  }
}

3.4.2 java

@Test
@DisplayName("求和聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.sum(sum ->
                                    // 聚合的欄位
                                    sum.field("age")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.5 count(*)

3.5.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "value_count": {
        "field": "province",
        "missing": 10
      }
    }
  }
}

3.5.2 java

@Test
@DisplayName("count(*)聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.valueCount(valueCount ->
                                    // 聚合的欄位
                                    valueCount.field("age")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.6 count(distinct)

3.6.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "cardinality": {
        "field": "province",
        "missing": 10
      }
    }
  }
}

3.6.2 java

@Test
@DisplayName("count(distinct)聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.cardinality(cardinality ->
                                    // 聚合的欄位
                                    cardinality.field("province")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.7 stat (max,min,avg,count,sum)

3.7.1 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "agg_01": {
      "stats": {
        "field": "avg",
        "missing": 10
      }
    }
  }
}

3.7.2 java

@Test
@DisplayName("stat聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .aggregations("agg_01", agg ->
                            agg.stats(stats ->
                                    // 聚合的欄位
                                    stats.field("age")
                                            // 如果聚合的檔案缺失這個欄位,則給10
                                            .missing(10)
                            )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.8 聚合後返回每個聚合涉及的檔案

3.8.1 需求

根據 province進行terms聚合,然後獲取每個terms聚合 age最大的那個檔案。

3.8.2 dsl

POST /index_person/_search
{
  "size": 0,
  "query": {
    "range": {
      "age": {
        "gte": 10
      }
    }
  },
  "aggs": {
    "agg_01": {
      "terms": {
        "field": "province"
      },
      "aggs": {
        "agg_02": {
          "top_hits": {
            "from": 0,
            "size": 1,
            "sort": [
              {
                "age": {"order": "desc"}
              }
            ],
            "_source": {
              "includes": ["id","age","name"]
            }
          }
        }
      }
    }
  }
}

3.8.3 java

@Test
@DisplayName("top hits 聚合")
public void test01() throws IOException {
    SearchRequest request = SearchRequest.of(searchRequest ->
            searchRequest.index("index_person")
                    .size(0)
                    .query(query -> query.range(range -> range.field("age").gt(JsonData.of(10))))
                    .aggregations("agg_01", agg ->
                            agg.terms(terms ->
                                            terms.field("province")
                                    )
                                    .aggregations("agg_02", subAgg ->
                                            subAgg.topHits(topHits ->
                                                    topHits.from(0)
                                                            .size(1)
                                                            .sort(sort -> sort.field(field -> field.field("age").order(SortOrder.Desc)))
                                                            .source(source -> source.filter(filter -> filter.includes(Arrays.asList("id", "age", "name"))))
                                            )
                                    )
                    )
    );
    System.out.println("request: " + request);
    SearchResponse<String> response = client.search(request, String.class);
    System.out.println("response: " + response);
}

3.8.4 執行結果

4、完整程式碼

https://gitee.com/huan1993/spring-cloud-parent/tree/master/es/es8-api/src/main/java/com/huan/es8/aggregations/metric

5、參考檔案

1、https://www.elastic.co/guide/en/elasticsearch/reference/7.17/search-aggregations-metrics-max-aggregation.html