Spaces:

yuangongfdu
/

ltu-2

Runtime error

App Files Files Community

yuangongfdu commited on Sep 30, 2023

Commit

9a72e52

•

1 Parent(s): a626e80

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -5

app.py CHANGED Viewed

@@ -38,9 +38,15 @@ def predict(audio_path, question):
     response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
         'audio_path': audio_path, 'question': question
     })
-    answer = json.loads(response.content)
-    ans_str = answer['output']
-    return ans_str
 if __name__ == '__main__':
     link = "https://github.com/YuanGongND/ltu"
@@ -53,12 +59,12 @@ if __name__ == '__main__':
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
                                            label='Edit the textbox to ask your own questions!')],
-                        outputs=[gr.Textbox(label="LTU-AS Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",
                         description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
                                     "LTU-AS is authored by Yuan Gong, Alexander H. Liu, Hongyin Luo, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab). <br>" +
-                                    "Input should be wav file sampled at 16kHz. This demo trims input audio to 10 seconds. <br>" +
                                     "Code of LTU-AS will be available soon at " + f"<a href='{link}'>{text}</a> <br>" +
                                     "**Research Demo, Not for Commercial Use (Due to license of LLaMA).**")
     demo.launch(debug=False, share=False)

     response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
         'audio_path': audio_path, 'question': question
     })
+    answer_7b = json.loads(response.content)
+    ans_str_7b = answer_7b['output']
+    response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
+        'audio_path': audio_path, 'question': question
+    })
+    answer_13b = json.loads(response.content)
+    ans_str_13b = answer_13b['output']
+    return ans_str_7b, ans_str_13b
 if __name__ == '__main__':
     link = "https://github.com/YuanGongND/ltu"
                         inputs=[gr.Audio(type="filepath"),
                                 gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
                                            label='Edit the textbox to ask your own questions!')],
+                        outputs=[gr.Textbox(label="LTU-AS-7B Output"), gr.Textbox(label="LTU-AS-13B Output")],
                         cache_examples=True,
                         title="Demo of LTU-AS",
                         description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
                                     "LTU-AS is authored by Yuan Gong, Alexander H. Liu, Hongyin Luo, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab). <br>" +
+                                    "Input should be wav file sampled at 16kHz. This demo trims input audio to 10 seconds. <br>" +
                                     "Code of LTU-AS will be available soon at " + f"<a href='{link}'>{text}</a> <br>" +
                                     "**Research Demo, Not for Commercial Use (Due to license of LLaMA).**")
     demo.launch(debug=False, share=False)