Conformer1-Demo / helpers.py
patloeber's picture
Upload 4 files
bfb5aad
raw
history blame
2.84 kB
import requests
import time
from scipy.io.wavfile import write
import io
upload_endpoint = "https://api.assemblyai.com/v2/upload"
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
def make_header(api_key):
return {
'authorization': api_key,
'content-type': 'application/json'
}
def _read_file(filename, chunk_size=5242880):
"""Reads the file in chunks. Helper for `upload_file()`"""
with open(filename, "rb") as f:
while True:
data = f.read(chunk_size)
if not data:
break
yield data
def _read_array(audio, chunk_size=5242880):
"""Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array"""
sr, aud = audio
# Create temporary "file" and write data to it
bytes_wav = bytes()
temp_file = io.BytesIO(bytes_wav)
write(temp_file, sr, aud)
while True:
data = temp_file.read(chunk_size)
if not data:
break
yield data
def upload_file(audio_file, header, is_file=True):
"""Uploads a file to AssemblyAI"""
upload_response = requests.post(
upload_endpoint,
headers=header,
data=_read_file(audio_file) if is_file else _read_array(audio_file)
)
if upload_response.status_code != 200:
upload_response.raise_for_status()
# Returns {'upload_url': <URL>}
return upload_response.json()
def request_transcript(upload_url, header):
"""Requests a transcript from AssemblyAI"""
# If input is a dict returned from `upload_file` rather than a raw upload_url string
if type(upload_url) is dict:
upload_url = upload_url['upload_url']
# Create request
transcript_request = {
'audio_url': upload_url,
}
# POST request
transcript_response = requests.post(
transcript_endpoint,
json=transcript_request,
headers=header
)
return transcript_response.json()
def wait_for_completion(transcript_id, header):
"""Given a polling endpoint, waits for the transcription/audio analysis to complete"""
polling_endpoint = "https://api.assemblyai.com/v2/transcript/" + transcript_id
while True:
polling_response = requests.get(polling_endpoint, headers=header)
polling_response = polling_response.json()
if polling_response['status'] == 'completed':
return polling_response, None
elif polling_response['status'] == 'error':
return None, f"Error: {polling_response['error']}"
time.sleep(5)
def make_paragraphs_string(transc_id, header):
endpoint = transcript_endpoint + "/" + transc_id + "/paragraphs"
paras = requests.get(endpoint, headers=header).json()['paragraphs']
return '\n\n'.join(i['text'] for i in paras)