0%

使用Python操作ElasticSearch

摘要:使用 python 的 elasticsearch 库,连接 ES 数据库。

Python 程序示例

1. json 文件转 ndjson

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import json
import os

## 指定文件地址
new_file_path = r"C:\\Users\\ndjson.json"
old_file_path = r"C:\\Users\\old.json"

## 打开新文件
new_jsonfile = open(new_file_path,'w',encoding='UTF-8')

## 打开原始json文件遍历对象列表,写入元数据和对象数据
with open(old_file_path,"r",encoding="utf-8") as oj:
json_data = json.load(oj)
id_num = 1
for i in range(0,len(json_data["data"]["obj"])):
# 元数据写入
new_data={}
new_data['index']={}
new_data['index']['_index']="my_index_name"
new_data['index']['_id']=str(id_num)
id_num=id_num+1
temp=json.dumps(new_data).encode("utf-8").decode('unicode_escape')
new_jsonfile.write(temp)
new_jsonfile.write('\n')

# 将json对象处理为1行并写入
old_data = {}
old_data["kId"] = json_data["data"]["obj"][i]["kId"]
old_data["name"] = json_data["data"]["obj"][i]["name"]
old_data["organization"] = json_data["data"]["obj"][i]["organization"]

temp=json.dumps(old_data).encode("utf-8").decode('unicode_escape')
new_jsonfile.write(temp)
new_jsonfile.write('\n')

# 关闭新文件
new_jsonfile.close()

2. 读取本地 json 文件入 ES 库

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import json
import os
from datetime import datetime
from elasticsearch import Elasticsearch

## 实例化ES数据库
es = Elasticsearch(host="localhost", port="9200")

## 指定json文件地址
file_path = r"C:\\Users\\demo.json"

## 打开json文件并遍历对象列表
with open(file_path,"r",encoding="utf-8") as oj:
json_data = json.load(oj)
id_num = 1
for i in range(0,len(json_data["data"]["obj"])):
doc = {}
doc = json_data["data"]["obj"][i]

# 将文档添加到ES库,打印结果
res = es.index(index="somebody_lawschool_list", body=doc)
print(res['result'] + " -- "+ str(i))

elasticsearch-py 安装和用例

完整文档详见: http://elasticsearch-py.readthedocs.org/

使用 pip 安装:

1
pip3 install elasticsearch

Elasticsearch 2.x 对应安装方法:

1
pip install elasticsearch2
1
2
3
4
5
# Elasticsearch 6.x
elasticsearch>=6.0.0,<7.0.0

# Elasticsearch 2.x
elasticsearch2

elasticsearch-py 示例代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
>>> from datetime import datetime
>>> from elasticsearch import Elasticsearch

# by default we connect to localhost:9200
>>> es = Elasticsearch()

# datetimes will be serialized
>>> es.index(index="my-index", doc_type="test-type", id=42, body={"any": "data", "timestamp": datetime.now()})
{u'_id': u'42', u'_index': u'my-index', u'_type': u'test-type', u'_version': 1, u'ok': True}

# but not deserialized
>>> es.get(index="my-index", doc_type="test-type", id=42)['_source']
{u'any': u'data', u'timestamp': u'2013-05-12T19:45:31.804229'}

原文地址 www.elastic.co