Pandas for GrimoireLab indexes

from datetime import datetime

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
import pandas as pd

es = Elasticsearch('http://localhost:9200', verify_certs=False)

s = Search(using=es, index='git')
s.aggs.bucket('by_authors', 'terms', field='author_name', size=10000) \
    .metric('first_commit', 'min', field='author_date')
s = s.sort("author_date")

result = s.execute()

buckets_result = result['aggregations']['by_authors']['buckets']
buckets = []
for bucket in buckets_result:
    first_commit = bucket['first_commit']['value']/1000
    buckets.append(
        {'first_commit': datetime.utcfromtimestamp(first_commit),
        'author': bucket['key']}
        )

from pprint import pprint
pprint(result.to_dict())

{'_shards': {'failed': 0, 'successful': 5, 'total': 5},
 'aggregations': {'by_authors': {'buckets': [{'doc_count': 1345,
                                              'first_commit': {'value': 1443636916000.0,
                                                               'value_as_string': '2015-09-30T18:15:16.000Z'},
                                              'key': 'Alvaro del Castillo'},
                                             {'doc_count': 557,
                                              'first_commit': {'value': 1439921307000.0,
                                                               'value_as_string': '2015-08-18T18:08:27.000Z'},
                                              'key': 'Santiago Dueñas'},

authors = pd.DataFrame.from_records(buckets)

authors.sort_values(by='first_commit', ascending=False, inplace=True)

pprint(authors)

                            author        first_commit
...
7       Jesus M. Gonzalez-Barahona 2015-12-31 19:16:25
0              Alvaro del Castillo 2015-09-30 18:15:16
1                  Santiago Dueñas 2015-08-18 18:08:27

by_month = authors['first_commit'] \
    .groupby([authors.first_commit.dt.year,
            authors.first_commit.dt.month]) \
    .agg('count')

first_commit  first_commit
2015          8               1
              9               1
              12              1
2016          2               1
              3               4
              4               1
              7               3
              11              2
2017          1               1
Name: first_commit, dtype: int64

by_month.to_csv('authors_per_month.csv')
authors.to_csv('authors_first.csv',
                columns=['first_commit', 'author'],
                index=False)

2015,8,1
2015,9,1
2015,12,1
2016,2,1
2016,3,4
2016,4,1
2016,7,3
2016,11,2
2017,1,1

...
2015-12-31 19:16:25,Jesus M. Gonzalez-Barahona
2015-09-30 18:15:16,Alvaro del Castillo
2015-08-18 18:08:27,Santiago Dueñas

Pandas for GrimoireLab indexes

Building a dataframe from an index