IliaLarchenko commited on
Commit
855dfb9
·
1 Parent(s): 87ae702

Using whisper without saving file

Browse files
Files changed (3) hide show
  1. app.py +8 -7
  2. audio.py +17 -0
  3. llm.py +5 -3
app.py CHANGED
@@ -13,7 +13,7 @@ def hide_settings():
13
  audio_input = gr.Audio(
14
  label="Record audio",
15
  sources=["microphone"],
16
- type="filepath",
17
  waveform_options={"show_controls": False},
18
  interactive=True,
19
  editable=False,
@@ -35,7 +35,7 @@ def hide_solution():
35
  audio_input = gr.Audio(
36
  label="Record audio",
37
  sources=["microphone"],
38
- type="filepath",
39
  waveform_options={"show_controls": False},
40
  interactive=False,
41
  editable=False,
@@ -93,7 +93,7 @@ with gr.Blocks() as demo:
93
  audio_input = gr.Audio(
94
  label="Record audio",
95
  sources=["microphone"],
96
- type="filepath",
97
  waveform_options={"show_controls": False},
98
  interactive=False,
99
  editable=False,
@@ -124,11 +124,12 @@ with gr.Blocks() as demo:
124
 
125
  audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
126
  fn=return_none, inputs=None, outputs=[audio_input]
127
- ).then(
128
- fn=send_request,
129
- inputs=[code, previous_code, message, chat_history, chat, model_select],
130
- outputs=[chat_history, chat, message, previous_code],
131
  )
 
 
 
 
 
132
 
133
  chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
134
 
 
13
  audio_input = gr.Audio(
14
  label="Record audio",
15
  sources=["microphone"],
16
+ type="numpy",
17
  waveform_options={"show_controls": False},
18
  interactive=True,
19
  editable=False,
 
35
  audio_input = gr.Audio(
36
  label="Record audio",
37
  sources=["microphone"],
38
+ type="numpy",
39
  waveform_options={"show_controls": False},
40
  interactive=False,
41
  editable=False,
 
93
  audio_input = gr.Audio(
94
  label="Record audio",
95
  sources=["microphone"],
96
+ type="numpy",
97
  waveform_options={"show_controls": False},
98
  interactive=False,
99
  editable=False,
 
124
 
125
  audio_input.stop_recording(fn=transcribe_audio, inputs=[audio_input], outputs=[message]).then(
126
  fn=return_none, inputs=None, outputs=[audio_input]
 
 
 
 
127
  )
128
+ # .then(
129
+ # fn=send_request,
130
+ # inputs=[code, previous_code, message, chat_history, chat, model_select],
131
+ # outputs=[chat_history, chat, message, previous_code],
132
+ # )
133
 
134
  chat.change(fn=read_last_message, inputs=[chat], outputs=[audio_output])
135
 
audio.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import wave
3
+
4
+
5
+ def numpy_audio_to_bytes(audio_data):
6
+ sample_rate = 44100
7
+ num_channels = 1
8
+ sampwidth = 2
9
+
10
+ buffer = io.BytesIO()
11
+ with wave.open(buffer, "wb") as wf:
12
+ wf.setnchannels(num_channels)
13
+ wf.setsampwidth(sampwidth)
14
+ wf.setframerate(sample_rate)
15
+ wf.writeframes(audio_data.tobytes())
16
+
17
+ return buffer.getvalue()
llm.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  from dotenv import load_dotenv
4
  from openai import OpenAI
5
 
 
6
  from prompts import coding_interviewer_prompt, grading_feedback_prompt
7
 
8
  load_dotenv()
@@ -73,9 +74,10 @@ def send_request(code, previous_code, message, chat_history, chat_display, model
73
  return chat_history, chat_display, "", code
74
 
75
 
76
- def transcribe_audio(filename, client=client):
77
- with open(filename, "rb") as audio_file:
78
- transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file, response_format="text")
 
79
 
80
  return transcription
81
 
 
3
  from dotenv import load_dotenv
4
  from openai import OpenAI
5
 
6
+ from audio import numpy_audio_to_bytes
7
  from prompts import coding_interviewer_prompt, grading_feedback_prompt
8
 
9
  load_dotenv()
 
74
  return chat_history, chat_display, "", code
75
 
76
 
77
+ def transcribe_audio(audio, client=client):
78
+ transcription = client.audio.transcriptions.create(
79
+ model="whisper-1", file=("temp.wav", numpy_audio_to_bytes(audio[1]), "audio/wav"), response_format="text"
80
+ )
81
 
82
  return transcription
83