es 聚合操作(二)
书接上文,示例数据在上一篇,这里就不展示了
一、Pipeline Aggregation
支持对聚合分析的结果,再次进行聚合分析。
Pipeline 的分析结果会输出到原结果中,根据位置的不同,分为两类:
- Sibling - 结果和现有分析结果同级
- Max,min,Avg & Sum Bucket
- Stats,Extended Status Bucket
- Percentiles Bucket
- Parent -结果内嵌到现有的聚合分析结果之中
- Derivative(求导)
- Cumultive Sum(累计求和)
- Moving Function(移动平均值 )
min_bucket
在员工数最多的工种里,找出平均工资最低的工种
#在员工数最多的工种里,找出平均工资最低的工种
POST /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 10
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"min_salary_by_job": {
"min_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 35000.0
}
}
]
},
"min_salary_by_job" : {
"value" : 19250.0,
"keys" : [
"Javascript Programmer"
]
}
}
}
可以看到上面所有工种 里面 最低薪资如下:
- min_salary_by_job 结果和 jobs 的聚合同级
- min_bucket 求之前结果的最小值
- 通过 bucket_path 关键字指定路径
stats_bucket
所有工种里面 薪资的统计数据
#所有工种里面 薪资的统计数据
POST /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 10
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"stats_salary_by_job": {
"stats_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 35000.0
}
}
]
},
"stats_salary_by_job" : {
"count" : 7,
"min" : 19250.0,
"max" : 50000.0,
"avg" : 27974.48979591837,
"sum" : 195821.42857142858
}
}
}
可以看到薪资的统计值:
percentiles
平均工资的百分位数
#平均工资的百分位数
POST /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 10
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"percentiles_salary_by_job": {
"percentiles_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 35000.0
}
}
]
},
"percentiles_salary_by_job" : {
"values" : {
"1.0" : 19250.0,
"5.0" : 19250.0,
"25.0" : 21000.0,
"50.0" : 25000.0,
"75.0" : 35000.0,
"95.0" : 50000.0,
"99.0" : 50000.0
}
}
}
}
Cumulative_sum
累计求和
# 累计求和
POST /employees/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "age",
"min_doc_count": 0,
"interval": 1
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
},
"cumulative_salary": {
"cumulative_sum": {
"buckets_path": "avg_salary"
}
}
}
}
}
}
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"age" : {
"buckets" : [
{
"key" : 20.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 9000.0
},
"cumulative_salary" : {
"value" : 9000.0
}
},
{
"key" : 21.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 16000.0
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 22.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 23.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 24.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 25.0,
"doc_count" : 3,
"avg_salary" : {
"value" : 17333.333333333332
},
"cumulative_salary" : {
"value" : 42333.33333333333
}
},
{
"key" : 26.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 22000.0
},
"cumulative_salary" : {
"value" : 64333.33333333333
}
},
{
"key" : 27.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
},
"cumulative_salary" : {
"value" : 84333.33333333333
}
},
{
"key" : 28.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 84333.33333333333
}
},
{
"key" : 29.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
},
"cumulative_salary" : {
"value" : 104333.33333333333
}
},
{
"key" : 30.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 30000.0
},
"cumulative_salary" : {
"value" : 134333.3333333333
}
},
{
"key" : 31.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 28500.0
},
"cumulative_salary" : {
"value" : 162833.3333333333
}
},
{
"key" : 32.0,
"doc_count" : 3,
"avg_salary" : {
"value" : 27333.333333333332
},
"cumulative_salary" : {
"value" : 190166.66666666666
}
},
{
"key" : 33.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 28000.0
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 34.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 35.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 36.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 38000.0
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 37.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 38.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 39.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 40.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 41.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
},
"cumulative_salary" : {
"value" : 306166.6666666666
}
}
]
}
}
}
二、聚合的作用范围
默认情况下,es 会对 query 中所有范围进行聚合操作
也可以指定范围:
Filter
找到所有工种,并计算年龄大于35的平均薪资
#找到所有工种,并计算年龄大于35的平均薪资
POST /employees/_search
{
"size": 0,
"aggs": {
"older_person": {
"filter": {
"range": {
"age": {
"gte": 35
}
}
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"all_jobs": {
"terms": {
"field": "job.keyword"
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"older_person" : {
"doc_count" : 2,
"avg_salary" : {
"value" : 44000.0
}
},
"all_jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7
},
{
"key" : "Javascript Programmer",
"doc_count" : 4
},
{
"key" : "QA",
"doc_count" : 3
},
{
"key" : "DBA",
"doc_count" : 2
},
{
"key" : "Web Designer",
"doc_count" : 2
},
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Product Manager",
"doc_count" : 1
}
]
}
}
}
Post field
找到所有工种,并匹配符合条件的工种
POST /employees/_search
{
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 10
}
}
},
"post_filter": {
"match": {
"job.keyword": "Dev Manager"
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Underwood",
"age" : 41,
"job" : "Dev Manager",
"gender" : "male",
"salary" : 50000
}
}
]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7
},
{
"key" : "Javascript Programmer",
"doc_count" : 4
},
{
"key" : "QA",
"doc_count" : 3
},
{
"key" : "DBA",
"doc_count" : 2
},
{
"key" : "Web Designer",
"doc_count" : 2
},
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Product Manager",
"doc_count" : 1
}
]
}
}
}
global
默认基于 query 条件聚合,可以使用 global 进行全部数据聚合
POST employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 40
}
}
},
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword"
}
},
"all":{
"global":{},
"aggs":{
"salary_avg":{
"avg":{
"field":"salary"
}
}
}
}
}
}
可以看到 大于 40 的工种只有一个,但是平均薪资是计算的全部员工
三、排序
指定order,按照count和key进行排序:
- 默认情况,按照count降序排序
- 指定size,就能返回相应的桶
找到所有工种,先以 Count 升序,如果 count 一样,然后再根据 key 降序
POST /employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 20
}
}
},
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"order": [
{"_count": "asc"},
{"_key": "desc"}
]
}
}
}
}
根据平均薪资降序,并统计平均薪资
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"order":[ {
"avg_salary":"desc"
}]
},
"aggs": {
"avg_salary": {
"avg": {
"field":"salary"
}
}
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dev Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"avg_salary" : {
"value" : 35000.0
}
},
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
}
]
}
}
}
对工种分类,并根据最低薪资降序,并统计薪资
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"order":[ {
"stats_salary.min":"desc"
}]
},
"aggs": {
"stats_salary": {
"stats": {
"field":"salary"
}
}
}
}
}
}
结果:
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dev Manager",
"doc_count" : 1,
"stats_salary" : {
"count" : 1,
"min" : 50000.0,
"max" : 50000.0,
"avg" : 50000.0,
"sum" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"stats_salary" : {
"count" : 1,
"min" : 35000.0,
"max" : 35000.0,
"avg" : 35000.0,
"sum" : 35000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"stats_salary" : {
"count" : 2,
"min" : 20000.0,
"max" : 30000.0,
"avg" : 25000.0,
"sum" : 50000.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"stats_salary" : {
"count" : 3,
"min" : 18000.0,
"max" : 25000.0,
"avg" : 21000.0,
"sum" : 63000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"stats_salary" : {
"count" : 2,
"min" : 18000.0,
"max" : 22000.0,
"avg" : 20000.0,
"sum" : 40000.0
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"stats_salary" : {
"count" : 4,
"min" : 16000.0,
"max" : 25000.0,
"avg" : 19250.0,
"sum" : 77000.0
}
},
{
"key" : "Java Programmer",
"doc_count" : 7,
"stats_salary" : {
"count" : 7,
"min" : 9000.0,
"max" : 38000.0,
"avg" : 25571.428571428572,
"sum" : 179000.0
}
}
]
}
}
}
感谢观看!!!感兴趣的小伙伴可以关注收藏,持续更新中~~~