Spaces:
Runtime error
Runtime error
Raghavan1988
commited on
Commit
•
7bd7744
1
Parent(s):
4ffa9cc
adding seamlessM4TModel and conditional check to see if user has added an audio
Browse files
app.py
CHANGED
@@ -25,6 +25,11 @@ DEFAULT_TARGET_LANGUAGE = "English"
|
|
25 |
AUDIO_SAMPLE_RATE = 16000.0
|
26 |
MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def predict(
|
30 |
task_name: str,
|
@@ -243,6 +248,15 @@ def process_and_query(text, image,audio):
|
|
243 |
# If an image is provided, process it with OpenAI and use the response as the text query for Vectara
|
244 |
if image is not None:
|
245 |
text = process_image_with_openai(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
# Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
|
248 |
vectara_response_json = query_vectara(text)
|
|
|
25 |
AUDIO_SAMPLE_RATE = 16000.0
|
26 |
MAX_INPUT_AUDIO_LENGTH = 60 # in seconds
|
27 |
|
28 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
29 |
+
|
30 |
+
processor = AutoProcessor.from_pretrained("ylacombe/hf-seamless-m4t-large")
|
31 |
+
model = SeamlessM4TModel.from_pretrained("ylacombe/hf-seamless-m4t-large").to(device)
|
32 |
+
|
33 |
|
34 |
def predict(
|
35 |
task_name: str,
|
|
|
248 |
# If an image is provided, process it with OpenAI and use the response as the text query for Vectara
|
249 |
if image is not None:
|
250 |
text = process_image_with_openai(image)
|
251 |
+
|
252 |
+
if audio is not None:
|
253 |
+
audio = audio[0].numpy()
|
254 |
+
audio = audio.astype(np.float32)
|
255 |
+
audio = audio / np.max(np.abs(audio))
|
256 |
+
audio = audio * 32768
|
257 |
+
audio = audio.astype(np.int16)
|
258 |
+
audio = audio.tobytes()
|
259 |
+
audio = base64.b64encode(audio).decode('utf-8')
|
260 |
|
261 |
# Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara
|
262 |
vectara_response_json = query_vectara(text)
|