Spaces:
Runtime error
Runtime error
File size: 2,906 Bytes
7a2502d bfe831e 7a2502d bfe831e 7a2502d bfe831e 7a2502d 0674a08 7a2502d 0674a08 7a2502d 0674a08 bfe831e 7a2502d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import json
import gradio as gr
import requests
import os
def is_file_larger_than_30mb(file_path):
try:
file_size = os.path.getsize(file_path)
return file_size > (30 * 1024 * 1024)
except FileNotFoundError:
return False
except PermissionError:
return False
except Exception as e:
return False
def upload_audio(audio_path):
try:
size = is_file_larger_than_30mb(audio_path)
if size == True:
return 'size'
with open(audio_path, 'rb') as audio_file:
response = requests.post('http://sls-titan-6.csail.mit.edu:8080/upload/', files={'audio_file': audio_file})
if response.status_code == 200:
return response.json()["path"]
except:
return None
def predict(audio_path, question):
upload_statues = upload_audio(audio_path)
if upload_statues == None:
return 'Please upload an audio file.'
if upload_statues == 'size':
return 'This demo does not support audio file size larger than 30MB.'
if question == '':
return 'Please ask a question.'
print(audio_path, question)
response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
'audio_path': audio_path, 'question': question
})
answer = json.loads(response.content)
ans_str = answer['output']
return ans_str
if __name__ == '__main__':
link = "https://github.com/YuanGongND/ltu"
text = "[Github]"
paper_link = "https://www.researchgate.net/publication/374153208_Joint_Audio_and_Speech_Understanding"
paper_text = "[Paper]"
sample_audio_link = "https://drive.google.com/drive/folders/17yeBevX0LIS1ugt0DZDOoJolwxvncMja?usp=sharing"
sample_audio_text = "[sample audios from AudioSet evaluation set]"
demo = gr.Interface(fn=predict,
inputs=[gr.Audio(type="filepath"),
gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
label='Edit the textbox to ask your own questions!')],
outputs=[gr.Textbox(label="LTU-AS Output")],
cache_examples=True,
title="Demo of LTU-AS",
description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
"LTU-AS is authored by Yuan Gong, Alexander H. Liu, Hongyin Luo, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab). <br>" +
"Input should be wav file sampled at 16kHz. This demo trim input audio to 10 seconds. <br>"
"**Research Demo, No Commercial Use (Due to license of LLaMA).**")
demo.launch(debug=False, share=False) |