-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtika.py
More file actions
33 lines (25 loc) · 853 Bytes
/
tika.py
File metadata and controls
33 lines (25 loc) · 853 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import asyncio
import json
class TikaCollect():
data: str
headers: dict
request: dict
def __init__(self):
self.data = ""
self.headers = dict()
self.request = dict()
async def async_collect(self, tasks: list):
responses = await asyncio.gather(*tasks)
for req in range(len(responses)):
try:
tika: dict = json.loads(responses[req][0])
self.data += tika["X-TIKA:content"]
if len(self.headers.keys) == 0:
self.headers = tika
del self.headers["X-TIKA:content"]
self.request = responses[req][1]
except:
pass
def get_request(self)->list:
self.request["X-TIKA:content"] = self.data
return (json.dumps(self.request), self.headers)