-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeech to text.py
More file actions
107 lines (85 loc) · 3.2 KB
/
speech to text.py
File metadata and controls
107 lines (85 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from __future__ import print_function
import time
import json
import boto3
import os.path
import requests
from pydub import AudioSegment
from pydub.utils import mediainfo
# 파일 결합하기
'''
sound1 = AudioSegment.from_wav("filename01.wav")
sound2 = AudioSegment.from_wav("filename02.wav")
combined_sounds = sound1 + sound2
combined_sounds.export("joinedFile.wav", format = "wav")
'''
# 로컬 파일 전처리하기
local_file = "main.wav"
local_file_name = local_file.split(".")[0]
change_file = local_file_name + ".wav"
sound = AudioSegment.from_file(local_file)
if sound.frame_rate != 16000:
sound = sound.set_frame_rate(16000)
if '.wav' not in local_file:
sound.export(change_file, format = "wav")
local_file = change_file
# 로컬 파일 버킷에 업로드하기
s3 = boto3.client('s3')
bucket_file = local_file
bucket_name = 'speech.to.text'
s3.upload_file(local_file, bucket_name, bucket_file)
# 버킷 파일 로컬에 저장하기
s3.download_file(bucket_name, bucket_file, local_file)
### 버킷 파일 텍스트화 하기
transcribe = boto3.client('transcribe')
job_name = bucket_file
job_uri = 's3://speech.to.text/' + job_name
transcribe.start_transcription_job(
TranscriptionJobName = job_name,
Media = {'MediaFileUri': job_uri},
MediaFormat = 'wav',
LanguageCode = 'ko-KR',
Settings = {'ShowSpeakerLabels': True, 'MaxSpeakerLabels': 4}
)
while True:
status = transcribe.get_transcription_job(TranscriptionJobName = job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
break
print("Not ready yet...")
time.sleep(10)
### 텍스트화 파일 로컬에 저장하기
url = status['TranscriptionJob']['Transcript']['TranscriptFileUri']
r = requests.get(url, allow_redirects = True)
open('origin.json', 'wb').write(r.content)
with open("origin.json", "r", encoding = "utf-8") as json_file:
json_data = json.load(json_file)
# start_time, speaker_label, end_time
result = []
for i in range(0, len(json_data['results']['speaker_labels']['segments'])):
segments = json_data['results']['speaker_labels']['segments'][i]
del segments['items']
result.append(segments)
# 단어 단위를 문장 단위로 변경하기
count = 0
content = ""
for i in range(0, len(json_data['results']['items'])):
# content = word
try:
if float(result[count]['end_time']) >= float(json_data['results']['items'][i]['end_time']):
content += json_data['results']['items'][i]['alternatives'][0]['content'] + " "
if i == len(json_data['results']['items']) - 1:
result[count]['result'] = content
else:
result[count]['result'] = content
count += 1
content = json_data['results']['items'][i]['alternatives'][0]['content'] + " "
# content != word
except:
if i == len(json_data['results']['items']) - 1:
result[count]['result'] = content
# list to dict
final_result = {}
final_result["result"] = result
final_result["speakers"] = json_data['results']['speaker_labels']['speakers']
with open('data.json', 'w', encoding = "utf-8") as json_file:
json.dump(final_result, json_file, indent = 4)