Elasticsearch analytics percent

2019-05-11 01:04发布

问题:

I am using Elasticsearch 1.7.3 to accumulate data for analytics reports.

I have an index that holds documents where each document has a numeric field called 'duration' (how many milliseconds the request took), and a string field called 'component'. There can be many documents with the same component name.

Eg.

{"component": "A", "duration": 10}
{"component": "B", "duration": 27}
{"component": "A", "duration": 5}
{"component": "C", "duration": 2}

I would like to produce a report that states for each component:

The sum of all 'duration' fields for this component.

A: 15
B: 27
C: 2

The percentage of this sum out of the total sum of duration of all documents. In my example

A: (10+5) / (10+27+5+2) * 100
B: 27 / (10+27+5+2) * 100
C: 2 / (10+27+5+2) * 100

The percentage of the documents for each component, out of the total components.

A: 2 / 4 * 100
B: 1 / 4 * 100
C: 1 / 4 * 100

How do I do that with Elasticsearch 1.7.3?

回答1:

With ES 1.7.3, there is no way to compute data based on the results of two different aggregations, this is something that can be done in ES 2.0 with pipeline aggregations, though.

However, what you're asking is not too complicated to do on the client-side with 1.7.3. If you use the query below, you'll get all you need to get the figures you expect:

POST components/_search
{
   "size": 0,
   "aggs": {
      "total_duration": {
         "sum": {
            "field": "duration"
         }
      },
      "components": {
         "terms": {
            "field": "component"
         },
         "aggs": {
            "duration_sum": {
               "sum": {
                  "field": "duration"
               }
            }
         }
      }
   }
}

The results would look like this:

{
   "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "total_duration": {
         "value": 44
      },
      "components": {
         "doc_count_error_upper_bound": 0,
         "sum_other_doc_count": 0,
         "buckets": [
            {
               "key": "a",
               "doc_count": 2,
               "duration_sum": {
                  "value": 15
               }
            },
            {
               "key": "b",
               "doc_count": 1,
               "duration_sum": {
                  "value": 27
               }
            },
            {
               "key": "c",
               "doc_count": 1,
               "duration_sum": {
                  "value": 2
               }
            }
         ]
      }
   }
}

Now all you need to do would be the following. I'm using JavaScript, but you can do it in any other language that can read JSON.

var response = ...the JSON response above...
var total_duration = response.aggregations.total_duration.value;
var total_docs = response.hits.total;

response.aggregations.components.buckets.forEach(function(comp_stats) {
    // total duration for the component
    var total_duration_comp = comp_stats.duration_sum.value;

    // percentage duration of the component
    var perc_duration_comp = total_duration_comp / total_duration * 100;

    // percentage documents for the component
    var perc_doc_comp = comp_stats.doc_count / total_docs * 100;
});


回答2:

In ElasticSearch[2.x], You can use the bucket script aggregation, which is perfectly meet your needs!

eg:

{
    "bucket_script": {
        "buckets_path": {
            "my_var1": "the_sum", 
            "my_var2": "the_value_count"
        },
        "script": "my_var1 / my_var2"
    }
}

detail:

POST /sales/_search
{
    "size": 0,
    "aggs" : {
        "sales_per_month" : {
            "date_histogram" : {
                "field" : "date",
                "interval" : "month"
            },
            "aggs": {
                "total_sales": {
                    "sum": {
                        "field": "price"
                    }
                },
                "t-shirts": {
                  "filter": {
                    "term": {
                      "type": "t-shirt"
                    }
                  },
                  "aggs": {
                    "sales": {
                      "sum": {
                        "field": "price"
                      }
                    }
                  }
                },
                "t-shirt-percentage": {
                    "bucket_script": {
                        "buckets_path": {
                          "tShirtSales": "t-shirts>sales",
                          "totalSales": "total_sales"
                        },
                        "script": "params.tShirtSales / params.totalSales * 100"
                    }
                }
            }
        }
    }
}