patloeber commited on
Commit
bfb5aad
1 Parent(s): c1dd78e

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +85 -0
  3. audio/audio.mp3 +3 -0
  4. helpers.py +98 -0
  5. images/logo.png +0 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ audio/audio.mp3 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ from helpers import make_header, upload_file, request_transcript, wait_for_completion, make_paragraphs_string
5
+
6
+
7
+ title = """<h1 align="center">🔥AssemblyAI: Conformer-1 Demo🔥</h1>"""
8
+
9
+ subtitle = """<h2 align="center">Automatic Speech Recognition using the AssemblyAI API</h2>"""
10
+ link = """<p align="center"><a href="https://www.assemblyai.com/blog/conformer-1/">Click here to learn more about the Conformer-1 model</a></p>"""
11
+
12
+
13
+ def submit_to_AAI(api_key,
14
+ radio,
15
+ audio_file,
16
+ mic_recording):
17
+
18
+ if radio == "Audio File":
19
+ audio_data = audio_file
20
+ elif radio == "Record Audio":
21
+ audio_data = mic_recording
22
+
23
+ header = make_header(api_key)
24
+
25
+ # 1. Upload the audio
26
+ upload_url = upload_file(audio_data, header, is_file=False)
27
+
28
+ # 2. Request transcript
29
+ transcript_response = request_transcript(upload_url, header)
30
+
31
+ transcript_id = transcript_response['id']
32
+
33
+ # 3. Wait for the transcription to complete
34
+ _, error = wait_for_completion(transcript_id, header)
35
+
36
+ if error is not None:
37
+ return error
38
+
39
+ # 4. Fetch paragraphs of transcript
40
+ return make_paragraphs_string(transcript_id, header)
41
+
42
+
43
+ def change_audio_source(radio):
44
+ if radio == "Audio File":
45
+ return [gr.Audio.update(visible=True),
46
+ gr.Audio.update(visible=False)]
47
+ elif radio == "Record Audio":
48
+ return [gr.Audio.update(visible=False),
49
+ gr.Audio.update(visible=True)]
50
+
51
+ with gr.Blocks(css = """#col_container {width: 1000px; margin-left: auto; margin-right: auto;}
52
+ #chatbot {height: 520px; overflow: auto;}""") as demo:
53
+ gr.HTML('<center><a href="https://www.assemblyai.com/"><img src="file/images/logo.png" width="180px"></a></center>')
54
+ gr.HTML(title)
55
+ gr.HTML(subtitle)
56
+ gr.HTML(link)
57
+ gr.HTML('''<center><a href="https://huggingface.co/spaces/assemblyai/Conformer1-Demo?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your AssemblyAI API Key</center>''')
58
+
59
+ with gr.Column(elem_id="col_container"):
60
+ api_key = gr.Textbox(type='password', label="Enter your AssemblyAI API key here")
61
+
62
+ with gr.Box():
63
+ # Selector for audio source
64
+ radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File")
65
+ # Audio object for both file and microphone data
66
+ audio_file = gr.Audio()
67
+ mic_recording = gr.Audio(source="microphone", visible=False)
68
+
69
+ gr.Examples([os.path.join(os.path.dirname(__file__),"audio/audio.mp3")], audio_file)
70
+
71
+ btn = gr.Button("Run")
72
+
73
+ out = gr.Textbox(placeholder="Your formatted transcript will appear here ...", lines=10)
74
+
75
+ # Changing audio source changes Audio input component
76
+ radio.change(fn=change_audio_source,
77
+ inputs=[radio],
78
+ outputs=[audio_file, mic_recording])
79
+
80
+ # Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results
81
+ btn.click(fn=submit_to_AAI,
82
+ inputs=[api_key,radio,audio_file,mic_recording],
83
+ outputs=out)
84
+
85
+ demo.launch(debug=True)
audio/audio.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37d851f5525c4b54b3c565f46fa47105f5c9533deed15eb7e6874f31b340659b
3
+ size 2353876
helpers.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ from scipy.io.wavfile import write
4
+ import io
5
+
6
+
7
+ upload_endpoint = "https://api.assemblyai.com/v2/upload"
8
+ transcript_endpoint = "https://api.assemblyai.com/v2/transcript"
9
+
10
+
11
+ def make_header(api_key):
12
+ return {
13
+ 'authorization': api_key,
14
+ 'content-type': 'application/json'
15
+ }
16
+
17
+
18
+ def _read_file(filename, chunk_size=5242880):
19
+ """Reads the file in chunks. Helper for `upload_file()`"""
20
+ with open(filename, "rb") as f:
21
+ while True:
22
+ data = f.read(chunk_size)
23
+ if not data:
24
+ break
25
+ yield data
26
+
27
+
28
+ def _read_array(audio, chunk_size=5242880):
29
+ """Like _read_file but for array - creates temporary unsaved "file" from sample rate and audio np.array"""
30
+ sr, aud = audio
31
+
32
+ # Create temporary "file" and write data to it
33
+ bytes_wav = bytes()
34
+ temp_file = io.BytesIO(bytes_wav)
35
+ write(temp_file, sr, aud)
36
+
37
+ while True:
38
+ data = temp_file.read(chunk_size)
39
+ if not data:
40
+ break
41
+ yield data
42
+
43
+
44
+ def upload_file(audio_file, header, is_file=True):
45
+ """Uploads a file to AssemblyAI"""
46
+ upload_response = requests.post(
47
+ upload_endpoint,
48
+ headers=header,
49
+ data=_read_file(audio_file) if is_file else _read_array(audio_file)
50
+ )
51
+ if upload_response.status_code != 200:
52
+ upload_response.raise_for_status()
53
+ # Returns {'upload_url': <URL>}
54
+ return upload_response.json()
55
+
56
+
57
+ def request_transcript(upload_url, header):
58
+ """Requests a transcript from AssemblyAI"""
59
+
60
+ # If input is a dict returned from `upload_file` rather than a raw upload_url string
61
+ if type(upload_url) is dict:
62
+ upload_url = upload_url['upload_url']
63
+
64
+ # Create request
65
+ transcript_request = {
66
+ 'audio_url': upload_url,
67
+ }
68
+
69
+ # POST request
70
+ transcript_response = requests.post(
71
+ transcript_endpoint,
72
+ json=transcript_request,
73
+ headers=header
74
+ )
75
+
76
+ return transcript_response.json()
77
+
78
+
79
+ def wait_for_completion(transcript_id, header):
80
+ """Given a polling endpoint, waits for the transcription/audio analysis to complete"""
81
+ polling_endpoint = "https://api.assemblyai.com/v2/transcript/" + transcript_id
82
+
83
+ while True:
84
+ polling_response = requests.get(polling_endpoint, headers=header)
85
+ polling_response = polling_response.json()
86
+
87
+ if polling_response['status'] == 'completed':
88
+ return polling_response, None
89
+ elif polling_response['status'] == 'error':
90
+ return None, f"Error: {polling_response['error']}"
91
+
92
+ time.sleep(5)
93
+
94
+
95
+ def make_paragraphs_string(transc_id, header):
96
+ endpoint = transcript_endpoint + "/" + transc_id + "/paragraphs"
97
+ paras = requests.get(endpoint, headers=header).json()['paragraphs']
98
+ return '\n\n'.join(i['text'] for i in paras)
images/logo.png ADDED