-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfetchCluster.py
More file actions
48 lines (41 loc) · 1.69 KB
/
fetchCluster.py
File metadata and controls
48 lines (41 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from conn import connElasticSearch
import pandas as pd
def fetch_cluster(size):
cn = connElasticSearch()
# Define the match query to retrieve all documents
matchq = {"match_all": {}}
# Execute the query and retrieve the matching documents
result = cn.search(index='books', query=matchq, size=size)
summary = []
isbns = []
# Extract the summary and ISBN of each book
for hit in result['hits']['hits']:
summary.append(hit['_source']['summary'])
isbns.append(hit['_source']['isbn'])
# Aggregation for each ISBN with a maximum of 100 users
aggr = {
"aggs": {
"top_hits": {
"size": 100,
"_source": ['uid', 'rating']
}
}
}
users = []
for isbn in isbns:
# Match query to retrieve users who rated the book
match_isbn = {"match": {"isbn": isbn}}
res = cn.search(index='bratings', query=match_isbn, aggregations=aggr, size=0)
temp_list = []
# Retrieve users and their ratings
for hit in res['aggregations']['aggs']['hits']['hits']:
match_user = {"match": {"uid": hit['_source']['uid']}}
userRes = cn.search(index='users', query=match_user, size=1)
# Check if the user exists
if userRes['hits']['hits']:
temp = userRes['hits']['hits'][0]['_source']
temp_list.append((temp['location'], temp['age'], hit['_source']['rating']))
users.append(temp_list)
# Create a DataFrame to store the retrieved data
return pd.DataFrame({"summary": [s for s in summary], "users": [user for user in users]},
columns=['summary', 'users'])