yuangongfdu commited on
Commit
ad0f220
1 Parent(s): 9a72e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -26,7 +26,7 @@ def upload_audio(audio_path):
26
  except:
27
  return None
28
 
29
- def predict(audio_path, question):
30
  upload_statues = upload_audio(audio_path)
31
  if upload_statues == None:
32
  return 'Please upload an audio file.'
@@ -35,18 +35,22 @@ def predict(audio_path, question):
35
  if question == '':
36
  return 'Please ask a question.'
37
  print(audio_path, question)
38
- response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
39
- 'audio_path': audio_path, 'question': question
40
- })
41
- answer_7b = json.loads(response.content)
42
- ans_str_7b = answer_7b['output']
43
 
44
- response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
45
- 'audio_path': audio_path, 'question': question
46
- })
47
- answer_13b = json.loads(response.content)
48
- ans_str_13b = answer_13b['output']
49
- return ans_str_7b, ans_str_13b
 
 
 
 
 
 
 
 
 
50
 
51
  if __name__ == '__main__':
52
  link = "https://github.com/YuanGongND/ltu"
@@ -58,8 +62,9 @@ if __name__ == '__main__':
58
  demo = gr.Interface(fn=predict,
59
  inputs=[gr.Audio(type="filepath"),
60
  gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
61
- label='Edit the textbox to ask your own questions!')],
62
- outputs=[gr.Textbox(label="LTU-AS-7B Output"), gr.Textbox(label="LTU-AS-13B Output")],
 
63
  cache_examples=True,
64
  title="Demo of LTU-AS",
65
  description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +
 
26
  except:
27
  return None
28
 
29
+ def predict(audio_path, question, model):
30
  upload_statues = upload_audio(audio_path)
31
  if upload_statues == None:
32
  return 'Please upload an audio file.'
 
35
  if question == '':
36
  return 'Please ask a question.'
37
  print(audio_path, question)
 
 
 
 
 
38
 
39
+ if model == '7B (Default)':
40
+ response = requests.put('http://sls-titan-6.csail.mit.edu:8080/items/0', json={
41
+ 'audio_path': audio_path, 'question': question
42
+ })
43
+ answer_7b = json.loads(response.content)
44
+ ans_str_7b = answer_7b['output']
45
+ return ans_str_7b
46
+
47
+ if model == '13B (Beta)'
48
+ response = requests.put('http://sls-titan-5.csail.mit.edu:8080/items/0', json={
49
+ 'audio_path': audio_path, 'question': question
50
+ })
51
+ answer_13b = json.loads(response.content)
52
+ ans_str_13b = answer_13b['output']
53
+ return ans_str_13b
54
 
55
  if __name__ == '__main__':
56
  link = "https://github.com/YuanGongND/ltu"
 
62
  demo = gr.Interface(fn=predict,
63
  inputs=[gr.Audio(type="filepath"),
64
  gr.Textbox(value='What can be inferred from the spoken text and sounds? Why?',
65
+ label='Edit the textbox to ask your own questions!'),
66
+ gr.Radio(["7B (Default)", "13B (Beta)"], value='7B (Default)', label="LLM size", info="All experiments are 7B LLM.")]
67
+ outputs=[gr.Textbox(label="LTU-AS-Output")],
68
  cache_examples=True,
69
  title="Demo of LTU-AS",
70
  description="LTU-AS an improved version of LTU. LTU-AS is stronger in spoken text understanding and music understanding. " + f"<a href='{paper_link}'>{paper_text}</a> <br>" +