akhaliq HF staff commited on
Commit
d6247a0
1 Parent(s): ef46ff0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -26
app.py CHANGED
@@ -6,6 +6,7 @@ import tempfile
6
  import os
7
  import base64
8
  import openai
 
9
  from dataclasses import dataclass, field
10
  from threading import Lock
11
 
@@ -21,10 +22,47 @@ def create_client(api_key):
21
  api_key=api_key
22
  )
23
 
24
- def transcribe_audio(audio):
25
- # This is a placeholder function. In a real-world scenario, you'd use a
26
- # speech-to-text service here. For now, we'll just return a dummy transcript.
27
- return "This is a dummy transcript. Please implement actual speech-to-text functionality."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def generate_response_and_audio(message, state):
30
  if state.client is None:
@@ -74,26 +112,6 @@ def chat(message, state):
74
 
75
  return generate_response_and_audio(message, state)
76
 
77
- def process_audio(audio, state):
78
- if audio is None:
79
- return "", state
80
-
81
- # Convert numpy array to wav
82
- audio_segment = AudioSegment(
83
- audio[1].tobytes(),
84
- frame_rate=audio[0],
85
- sample_width=audio[1].dtype.itemsize,
86
- channels=1 if len(audio[1].shape) == 1 else audio[1].shape[1]
87
- )
88
-
89
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
90
- audio_segment.export(temp_audio.name, format="wav")
91
- transcript = transcribe_audio(temp_audio.name)
92
-
93
- os.unlink(temp_audio.name)
94
-
95
- return transcript, state
96
-
97
  def set_api_key(api_key, state):
98
  if not api_key:
99
  raise gr.Error("Please enter a valid API key.")
@@ -111,7 +129,7 @@ with gr.Blocks() as demo:
111
 
112
  with gr.Row():
113
  with gr.Column(scale=1):
114
- audio_input = gr.Audio(source="microphone", type="numpy")
115
  with gr.Column(scale=2):
116
  chatbot = gr.Chatbot()
117
  text_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
@@ -119,7 +137,11 @@ with gr.Blocks() as demo:
119
  audio_output = gr.Audio(label="Generated Audio")
120
 
121
  set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
122
- audio_input.change(process_audio, inputs=[audio_input, state], outputs=[text_input, state])
 
 
 
 
123
  text_input.submit(chat, inputs=[text_input, state], outputs=[chatbot, audio_output, state])
124
 
125
  demo.launch()
 
6
  import os
7
  import base64
8
  import openai
9
+ import time
10
  from dataclasses import dataclass, field
11
  from threading import Lock
12
 
 
22
  api_key=api_key
23
  )
24
 
25
+ def process_audio_file(audio_file, state):
26
+ if state.client is None:
27
+ raise gr.Error("Please enter a valid API key first.")
28
+
29
+ format_ = "opus"
30
+ bitrate = 16
31
+
32
+ with open(audio_file.name, "rb") as f:
33
+ audio_bytes = f.read()
34
+ audio_data = base64.b64encode(audio_bytes).decode()
35
+
36
+ try:
37
+ stream = state.client.chat.completions.create(
38
+ extra_body={
39
+ "require_audio": True,
40
+ "tts_preset_id": "jessica",
41
+ "tts_audio_format": format_,
42
+ "tts_audio_bitrate": bitrate
43
+ },
44
+ model="llama3.1-8b",
45
+ messages=[{"role": "user", "content": [{"type": "audio", "data": audio_data}]}],
46
+ temperature=0.5,
47
+ max_tokens=128,
48
+ stream=True,
49
+ )
50
+
51
+ transcript = ""
52
+ audio_chunks = []
53
+
54
+ for chunk in stream:
55
+ if chunk.choices[0].delta.content:
56
+ transcript += chunk.choices[0].delta.content
57
+ if hasattr(chunk.choices[0], 'audio') and chunk.choices[0].audio:
58
+ audio_chunks.extend(chunk.choices[0].audio)
59
+
60
+ audio_data = b''.join([base64.b64decode(a) for a in audio_chunks])
61
+
62
+ return transcript, audio_data, state
63
+
64
+ except Exception as e:
65
+ raise gr.Error(f"Error processing audio: {str(e)}")
66
 
67
  def generate_response_and_audio(message, state):
68
  if state.client is None:
 
112
 
113
  return generate_response_and_audio(message, state)
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def set_api_key(api_key, state):
116
  if not api_key:
117
  raise gr.Error("Please enter a valid API key.")
 
129
 
130
  with gr.Row():
131
  with gr.Column(scale=1):
132
+ audio_file_input = gr.File(label="Upload Audio File")
133
  with gr.Column(scale=2):
134
  chatbot = gr.Chatbot()
135
  text_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
 
137
  audio_output = gr.Audio(label="Generated Audio")
138
 
139
  set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
140
+ audio_file_input.change(
141
+ process_audio_file,
142
+ inputs=[audio_file_input, state],
143
+ outputs=[text_input, audio_output, state]
144
+ )
145
  text_input.submit(chat, inputs=[text_input, state], outputs=[chatbot, audio_output, state])
146
 
147
  demo.launch()