Rongjiehuang commited on
Commit
3075f9b
1 Parent(s): 5db7a2d

update huggingface

Browse files
Files changed (2) hide show
  1. .gitignore +16 -0
  2. app.py +17 -9
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JetBrains PyCharm IDE
2
+ .idea/
3
+ .github/
4
+ .circleci/
5
+
6
+ # Byte-compiled / optimized / DLL files
7
+ *__pycache__/
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+
12
+ # C extensions
13
+ *.so
14
+
15
+ # macOS dir files
16
+ .DS_Store
app.py CHANGED
@@ -5,13 +5,20 @@ from langchain.llms.openai import OpenAI
5
  from audio_foundation_models import *
6
  import gradio as gr
7
 
8
- AUDIO_CHATGPT_PREFIX = """Audio ChatGPT
9
- AUdio ChatGPT can not directly read audios, but it has a list of tools to finish different audio synthesis tasks. Each audio will have a file name formed as "audio/xxx.wav". When talking about audios, Audio ChatGPT is very strict to the file name and will never fabricate nonexistent files.
10
- AUdio ChatGPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the audio content and audio file name. It will remember to provide the file name from the last tool observation, if a new audio is generated.
11
- Human may provide Audio ChatGPT with a description. Audio ChatGPT should generate audios according to this description rather than directly imagine from memory or yourself."
 
 
 
 
 
 
 
12
  TOOLS:
13
  ------
14
- Audio ChatGPT has access to the following tools:"""
15
 
16
  AUDIO_CHATGPT_FORMAT_INSTRUCTIONS = """To use a tool, please use the following format:
17
  ```
@@ -161,7 +168,7 @@ class ConversationBot:
161
  print("Inputs:", state)
162
  print("======>Previous memory:\n %s" % self.agent.memory)
163
  # inpaint = Inpaint(device="cpu")
164
- new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
165
  AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
166
  self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
167
  print("======>Current memory:\n %s" % self.agent.memory)
@@ -188,7 +195,7 @@ class ConversationBot:
188
 
189
 
190
 
191
- if __name__ == '__main__':
192
  bot = ConversationBot({'ImageCaptioning': 'cuda:0',
193
  'T2A': 'cuda:0',
194
  'I2A': 'cuda:0',
@@ -203,6 +210,8 @@ if __name__ == '__main__':
203
  'TargetSoundDetection': 'cpu'
204
  })
205
  with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
 
 
206
  with gr.Row():
207
  openai_api_key_textbox = gr.Textbox(
208
  placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
@@ -210,8 +219,7 @@ if __name__ == '__main__':
210
  lines=1,
211
  type="password",
212
  )
213
- with gr.Row():
214
- gr.Markdown("## AudioGPT")
215
  chatbot = gr.Chatbot(elem_id="chatbot", label="AudioGPT")
216
  state = gr.State([])
217
  with gr.Row(visible = False) as input_raws:
 
5
  from audio_foundation_models import *
6
  import gradio as gr
7
 
8
+ _DESCRIPTION = '# [AudioGPT](https://github.com/AIGC-Audio/AudioGPT)'
9
+ _DESCRIPTION += '\n<p>This is a demo to the work [AudioGPT: Sending and Receiving Speech, Sing, Audio, and Talking head during chatting](https://github.com/AIGC-Audio/AudioGPT).</p>'
10
+ _DESCRIPTION += '\n<p>This model can only be used for non-commercial purposes. To learn more about the model, take a look at the <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.</p>'
11
+
12
+
13
+ AUDIO_CHATGPT_PREFIX = """AudioGPT
14
+ AudioGPT can not directly read audios, but it has a list of tools to finish different speech, audio, and singing voice tasks. Each audio will have a file name formed as "audio/xxx.wav". When talking about audios, AudioGPT is very strict to the file name and will never fabricate nonexistent files.
15
+ AudioGPT is able to use tools in a sequence, and is loyal to the tool observation outputs rather than faking the audio content and audio file name. It will remember to provide the file name from the last tool observation, if a new audio is generated.
16
+ Human may provide new audios to AudioGPT with a description. The description helps AudioGPT to understand this audio, but AudioGPT should use tools to finish following tasks, rather than directly imagine from the description.
17
+ Overall, AudioGPT is a powerful audio dialogue assistant tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics.
18
+
19
  TOOLS:
20
  ------
21
+ AudioGPT has access to the following tools:"""
22
 
23
  AUDIO_CHATGPT_FORMAT_INSTRUCTIONS = """To use a tool, please use the following format:
24
  ```
 
168
  print("Inputs:", state)
169
  print("======>Previous memory:\n %s" % self.agent.memory)
170
  # inpaint = Inpaint(device="cpu")
171
+ new_image_filename, new_audio_filename = self.models['Inpaint'].predict(audio_filename, image_filename)
172
  AI_prompt = "Here are the predict audio and the mel spectrum." + f"*{new_audio_filename}*" + f"![](/file={new_image_filename})*{new_image_filename}*"
173
  self.agent.memory.buffer = self.agent.memory.buffer + 'AI: ' + AI_prompt
174
  print("======>Current memory:\n %s" % self.agent.memory)
 
195
 
196
 
197
 
198
+ if __name__ == '__main__':
199
  bot = ConversationBot({'ImageCaptioning': 'cuda:0',
200
  'T2A': 'cuda:0',
201
  'I2A': 'cuda:0',
 
210
  'TargetSoundDetection': 'cpu'
211
  })
212
  with gr.Blocks(css="#chatbot {overflow:auto; height:500px;}") as demo:
213
+ gr.Markdown(_DESCRIPTION)
214
+
215
  with gr.Row():
216
  openai_api_key_textbox = gr.Textbox(
217
  placeholder="Paste your OpenAI API key here to start AudioGPT(sk-...) and press Enter ↵️",
 
219
  lines=1,
220
  type="password",
221
  )
222
+
 
223
  chatbot = gr.Chatbot(elem_id="chatbot", label="AudioGPT")
224
  state = gr.State([])
225
  with gr.Row(visible = False) as input_raws: