yuangongfdu commited on
Commit
9a72e52
1 Parent(s): a626e80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -38,9 +38,15 @@ def predict(audio_path, question):
38
  response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
39
  'audio_path': audio_path, 'question': question
40
  })
41
- answer = json.loads(response.content)
42
- ans_str = answer['output']
43
- return ans_str
 
 
 
 
 
 
44
 
45
  if __name__ == '__main__':
46
  link = "https://github.com/YuanGongND/ltu"
@@ -53,12 +59,12 @@ if __name__ == '__main__':
53
  inputs=[gr.Audio(type="filepath"),
54
  gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
55
  label='Edit the textbox to ask your own questions!')],
56
- outputs=[gr.Textbox(label="LTU-AS Output")],
57
  cache_examples=True,
58
  title="Demo of LTU-AS",
59
  description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
60
  "LTU-AS is authored by Yuan Gong, Alexander H. Liu, Hongyin Luo, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab). <br>" +
61
- "Input should be wav file sampled at 16kHz. This demo trims input audio to 10 seconds. <br>" +
62
  "Code of LTU-AS will be available soon at " + f"<a href='{link}'>{text}</a> <br>" +
63
  "**Research Demo, Not for Commercial Use (Due to license of LLaMA).**")
64
  demo.launch(debug=False, share=False)
 
38
  response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
39
  'audio_path': audio_path, 'question': question
40
  })
41
+ answer_7b = json.loads(response.content)
42
+ ans_str_7b = answer_7b['output']
43
+
44
+ response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
45
+ 'audio_path': audio_path, 'question': question
46
+ })
47
+ answer_13b = json.loads(response.content)
48
+ ans_str_13b = answer_13b['output']
49
+ return ans_str_7b, ans_str_13b
50
 
51
  if __name__ == '__main__':
52
  link = "https://github.com/YuanGongND/ltu"
 
59
  inputs=[gr.Audio(type="filepath"),
60
  gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
61
  label='Edit the textbox to ask your own questions!')],
62
+ outputs=[gr.Textbox(label="LTU-AS-7B Output"), gr.Textbox(label="LTU-AS-13B Output")],
63
  cache_examples=True,
64
  title="Demo of LTU-AS",
65
  description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
66
  "LTU-AS is authored by Yuan Gong, Alexander H. Liu, Hongyin Luo, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab). <br>" +
67
+ "Input should be wav file sampled at 16kHz. This demo trims input audio to 10 seconds. <br>" +
68
  "Code of LTU-AS will be available soon at " + f"<a href='{link}'>{text}</a> <br>" +
69
  "**Research Demo, Not for Commercial Use (Due to license of LLaMA).**")
70
  demo.launch(debug=False, share=False)