Spaces:
Sleeping
Sleeping
IliaLarchenko
commited on
Commit
·
855dfb9
1
Parent(s):
87ae702
Using whisper without saving file
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ def hide_settings():
|
|
13 |
audio_input = gr.Audio(
|
14 |
label="Record audio",
|
15 |
sources=["microphone"],
|
16 |
-
type="
|
17 |
waveform_options={"show_controls": False},
|
18 |
interactive=True,
|
19 |
editable=False,
|
@@ -35,7 +35,7 @@ def hide_solution():
|
|
35 |
audio_input = gr.Audio(
|
36 |
label="Record audio",
|
37 |
sources=["microphone"],
|
38 |
-
type="
|
39 |
waveform_options={"show_controls": False},
|
40 |
interactive=False,
|
41 |
editable=False,
|
@@ -93,7 +93,7 @@ with gr.Blocks() as demo:
|
|
93 |
audio_input = gr.Audio(
|
94 |
label="Record audio",
|
95 |
sources=["microphone"],
|
96 |
-
type="
|
97 |
waveform_options={"show_controls": False},
|
98 |
interactive=False,
|
99 |
editable=False,
|
@@ -124,11 +124,12 @@ with gr.Blocks() as demo:
|
|
124 |
|
125 |
audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
|
126 |
fn=return_none, inputs=None, outputs=[audio_input]
|
127 |
-
).then(
|
128 |
-
fn=send_request,
|
129 |
-
inputs=[code, previous_code, message, chat_history, chat, model_select],
|
130 |
-
outputs=[chat_history, chat, message, previous_code],
|
131 |
)
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
|
134 |
|
|
|
13 |
audio_input = gr.Audio(
|
14 |
label="Record audio",
|
15 |
sources=["microphone"],
|
16 |
+
type="numpy",
|
17 |
waveform_options={"show_controls": False},
|
18 |
interactive=True,
|
19 |
editable=False,
|
|
|
35 |
audio_input = gr.Audio(
|
36 |
label="Record audio",
|
37 |
sources=["microphone"],
|
38 |
+
type="numpy",
|
39 |
waveform_options={"show_controls": False},
|
40 |
interactive=False,
|
41 |
editable=False,
|
|
|
93 |
audio_input = gr.Audio(
|
94 |
label="Record audio",
|
95 |
sources=["microphone"],
|
96 |
+
type="numpy",
|
97 |
waveform_options={"show_controls": False},
|
98 |
interactive=False,
|
99 |
editable=False,
|
|
|
124 |
|
125 |
audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
|
126 |
fn=return_none, inputs=None, outputs=[audio_input]
|
|
|
|
|
|
|
|
|
127 |
)
|
128 |
+
# .then(
|
129 |
+
# fn=send_request,
|
130 |
+
# inputs=[code, previous_code, message, chat_history, chat, model_select],
|
131 |
+
# outputs=[chat_history, chat, message, previous_code],
|
132 |
+
# )
|
133 |
|
134 |
chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
|
135 |
|
audio.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import wave
|
3 |
+
|
4 |
+
|
5 |
+
def numpy_audio_to_bytes(audio_data):
|
6 |
+
sample_rate = 44100
|
7 |
+
num_channels = 1
|
8 |
+
sampwidth = 2
|
9 |
+
|
10 |
+
buffer = io.BytesIO()
|
11 |
+
with wave.open(buffer, "wb") as wf:
|
12 |
+
wf.setnchannels(num_channels)
|
13 |
+
wf.setsampwidth(sampwidth)
|
14 |
+
wf.setframerate(sample_rate)
|
15 |
+
wf.writeframes(audio_data.tobytes())
|
16 |
+
|
17 |
+
return buffer.getvalue()
|
llm.py
CHANGED
@@ -3,6 +3,7 @@ import json
|
|
3 |
from dotenv import load_dotenv
|
4 |
from openai import OpenAI
|
5 |
|
|
|
6 |
from prompts import coding_interviewer_prompt, grading_feedback_prompt
|
7 |
|
8 |
load_dotenv()
|
@@ -73,9 +74,10 @@ def send_request(code, previous_code, message, chat_history, chat_display, model
|
|
73 |
return chat_history, chat_display, "", code
|
74 |
|
75 |
|
76 |
-
def transcribe_audio(
|
77 |
-
|
78 |
-
|
|
|
79 |
|
80 |
return transcription
|
81 |
|
|
|
3 |
from dotenv import load_dotenv
|
4 |
from openai import OpenAI
|
5 |
|
6 |
+
from audio import numpy_audio_to_bytes
|
7 |
from prompts import coding_interviewer_prompt, grading_feedback_prompt
|
8 |
|
9 |
load_dotenv()
|
|
|
74 |
return chat_history, chat_display, "", code
|
75 |
|
76 |
|
77 |
+
def transcribe_audio(audio, client=client):
|
78 |
+
transcription = client.audio.transcriptions.create(
|
79 |
+
model="whisper-1", file=("temp.wav", numpy_audio_to_bytes(audio[1]), "audio/wav"), response_format="text"
|
80 |
+
)
|
81 |
|
82 |
return transcription
|
83 |
|