Spaces:

yuangongfdu
/

ltu-2

Running

App Files Files Community

yuangongfdu commited on Sep 30, 2023

Commit

b457cd5

•

1 Parent(s): 8c1e172

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -10

app.py CHANGED Viewed

@@ -26,17 +26,28 @@ def upload_audio(audio_path):
     except:
         return None
-def predict(audio_path, question, model):
-    upload_statues = upload_audio(audio_path)
-    if upload_statues == None:
-        return 'Please upload an audio file.'
-    if upload_statues == 'size':
-        return 'This demo does not support audio file size larger than 30MB.'
-    if question == '':
-        return 'Please ask a question.'
-    print(audio_path, question)
     if model == '7B (Default)':
         response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
             'audio_path': audio_path, 'question': question
         })
@@ -45,6 +56,14 @@ def predict(audio_path, question, model):
         return ans_str_7b
     if model == '13B (Beta)':
         response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
             'audio_path': audio_path, 'question': question
         })
@@ -62,7 +81,7 @@ if __name__ == '__main__':
     demo = gr.Interface(fn=predict,
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?', label='Edit the textbox to ask your own questions!'),
-                                gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments in the ASRU 2023 paper are 7B LLM.")],
                         outputs=[gr.Textbox(label="LTU-AS-Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",

     except:
         return None
+def upload_audio_13b(audio_path):
+    try:
+        size = is_file_larger_than_30mb(audio_path)
+        if size == True:
+            return 'size'
+        with open(audio_path, 'rb') as audio_file:
+            response = requests.post('http://sls-titan-5.csail.mit.edu:8080/upload/', files={'audio_file': audio_file})
+        if response.status_code == 200:
+            return response.json()["path"]
+    except:
+        return None
+def predict(audio_path, question, model):
     if model == '7B (Default)':
+        upload_statues = upload_audio(audio_path)
+        if upload_statues == None:
+            return 'Please upload an audio file.'
+        if upload_statues == 'size':
+            return 'This demo does not support audio file size larger than 30MB.'
+        if question == '':
+            return 'Please ask a question.'
+        print(audio_path, question)
         response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
             'audio_path': audio_path, 'question': question
         })
         return ans_str_7b
     if model == '13B (Beta)':
+        upload_statues = upload_audio_13b(audio_path)
+        if upload_statues == None:
+            return 'Please upload an audio file.'
+        if upload_statues == 'size':
+            return 'This demo does not support audio file size larger than 30MB.'
+        if question == '':
+            return 'Please ask a question.'
+        print(audio_path, question)
         response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
             'audio_path': audio_path, 'question': question
         })
     demo = gr.Interface(fn=predict,
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?', label='Edit the textbox to ask your own questions!'),
+                                gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments are 7B LLM.")],
                         outputs=[gr.Textbox(label="LTU-AS-Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",