Spaces:

yuangongfdu
/

ltu-2

Running

App Files Files Community

yuangongfdu commited on Sep 30, 2023

Commit

ad0f220

•

1 Parent(s): 9a72e52

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -14

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ def upload_audio(audio_path):
     except:
         return None
-def predict(audio_path, question):
     upload_statues = upload_audio(audio_path)
     if upload_statues == None:
         return 'Please upload an audio file.'
@@ -35,18 +35,22 @@ def predict(audio_path, question):
     if question == '':
         return 'Please ask a question.'
     print(audio_path, question)
-    response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
-        'audio_path': audio_path, 'question': question
-    })
-    answer_7b = json.loads(response.content)
-    ans_str_7b = answer_7b['output']
-    response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
-        'audio_path': audio_path, 'question': question
-    })
-    answer_13b = json.loads(response.content)
-    ans_str_13b = answer_13b['output']
-    return ans_str_7b, ans_str_13b
 if __name__ == '__main__':
     link = "https://github.com/YuanGongND/ltu"
@@ -58,8 +62,9 @@ if __name__ == '__main__':
     demo = gr.Interface(fn=predict,
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
-                                           label='Edit the textbox to ask your own questions!')],
-                        outputs=[gr.Textbox(label="LTU-AS-7B Output"), gr.Textbox(label="LTU-AS-13B Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",
                         description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +

     except:
         return None
+def predict(audio_path, question, model):
     upload_statues = upload_audio(audio_path)
     if upload_statues == None:
         return 'Please upload an audio file.'
     if question == '':
         return 'Please ask a question.'
     print(audio_path, question)
+    if model == '7B (Default)':
+        response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
+            'audio_path': audio_path, 'question': question
+        })
+        answer_7b = json.loads(response.content)
+        ans_str_7b = answer_7b['output']
+        return ans_str_7b
+    if model == '13B (Beta)'
+        response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
+            'audio_path': audio_path, 'question': question
+        })
+        answer_13b = json.loads(response.content)
+        ans_str_13b = answer_13b['output']
+        return ans_str_13b
 if __name__ == '__main__':
     link = "https://github.com/YuanGongND/ltu"
     demo = gr.Interface(fn=predict,
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
+                                           label='Edit the textbox to ask your own questions!'),
+                                gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments are 7B LLM.")]
+                        outputs=[gr.Textbox(label="LTU-AS-Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",
                         description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +