defconnect_elasticsearch(): es = None es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) if es.ping(): print('Connected ') else: print('it could not connect!') return es
# res = es.search(index = 'kibana_sample_data_logs', body = {'query': {"match_all": {}} ,"size": 100} )
# res = es.search(index = 'kibana_sample_data_logs', body = {"query":{"bool":{"must":[{"prefix":{"host":"www"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"aggs":{}} )
# res = es.search(index = 'kibana_sample_data_logs', body ={'query':{"bool":{"must":[{"query_string":{"default_field":"host","query":"elastic-elastic-elastic.org"}}]}},"size":100} )
res = es.search(index = 'kibana_sample_data_logs', body = {"query":{"bool":{"must":[{"term":{"referer":"http://twitter.com/success/wendy-lawrence"}},{"range":{"bytes":{"gt":"6000"}}},{"match":{"extension":"deb"}}],"must_not":[],"should":[]}},"from":0,"size":10,"sort":[],"aggs":{}} )
1 2 3 4 5 6 7 8 9 10 11 12 13 14
aim = res["hits"]['hits']
outter = []
for item in aim: # 提取需要的内容 ip = item['_source']['clientip'] agent = item['_source']['agent'] host = item['_source']['host'] content = [ip,agent,host] outter.append(content) print(outter)
[['223.87.60.27', 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1', 'artifacts.elastic.co'], ['223.87.60.27', 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1', 'artifacts.elastic.co']]
5.导出内容为CSV
1 2 3
data = pd.DataFrame(columns = ["IP地址","访问标识","主机名"],data=outter)