lmzjms commited on
Commit
335ec14
1 Parent(s): 653e975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -71,6 +71,8 @@ class ConversationBot:
71
  if tool == "Generate Image From User Input Text" or tool == "Generate Text From The Audio" or tool == "Transcribe speech":
72
  print("======>Current memory:\n %s" % self.agent.memory)
73
  response = re.sub('(image/\S*png)', lambda m: f'![](/file={m.group(0)})*{m.group(0)}*', res['output'])
 
 
74
  state = state + [(text, response)]
75
  print("Outputs:", state)
76
  return state, state, gr.Audio.update(visible=False), gr.Image.update(visible=False), gr.Button.update(visible=False)
@@ -160,9 +162,9 @@ class ConversationBot:
160
  self.t2s = T2S(device="cpu")
161
  self.i2a = I2A(device="cuda:0")
162
  self.a2t = A2T(device="cpu")
163
- self.asr = ASR(device="cpu")
164
  self.inpaint = Inpaint(device="cuda:0")
165
- self.tts_ood = TTS_OOD(device="cpu")
166
  self.tools = [
167
  Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
168
  description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
@@ -173,11 +175,11 @@ class ConversationBot:
173
  Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
174
  description="useful for when you want to generate an audio from a user input text and it saved it to a file."
175
  "The input to this tool should be a string, representing the text used to generate audio."),
176
- Tool(
177
- name="Generate human speech with style derived from a speech reference and user input text and save it to a file", func= self.tts_ood.inference,
178
- description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
179
- "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
180
- "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
181
  Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
182
  description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
183
  "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
 
71
  if tool == "Generate Image From User Input Text" or tool == "Generate Text From The Audio" or tool == "Transcribe speech":
72
  print("======>Current memory:\n %s" % self.agent.memory)
73
  response = re.sub('(image/\S*png)', lambda m: f'![](/file={m.group(0)})*{m.group(0)}*', res['output'])
74
+ image_filename = res['intermediate_steps'][0][1]
75
+ response = response + f"![](/file={image_filename})*{image_filename}*"
76
  state = state + [(text, response)]
77
  print("Outputs:", state)
78
  return state, state, gr.Audio.update(visible=False), gr.Image.update(visible=False), gr.Button.update(visible=False)
 
162
  self.t2s = T2S(device="cpu")
163
  self.i2a = I2A(device="cuda:0")
164
  self.a2t = A2T(device="cpu")
165
+ self.asr = ASR(device="cuda:0")
166
  self.inpaint = Inpaint(device="cuda:0")
167
+ # self.tts_ood = TTS_OOD(device="cpu")
168
  self.tools = [
169
  Tool(name="Generate Image From User Input Text", func=self.t2i.inference,
170
  description="useful for when you want to generate an image from a user input text and it saved it to a file. like: generate an image of an object or something, or generate an image that includes some objects. "
 
175
  Tool(name="Generate Audio From User Input Text", func=self.t2a.inference,
176
  description="useful for when you want to generate an audio from a user input text and it saved it to a file."
177
  "The input to this tool should be a string, representing the text used to generate audio."),
178
+ # Tool(
179
+ # name="Generate human speech with style derived from a speech reference and user input text and save it to a file", func= self.tts_ood.inference,
180
+ # description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
181
+ # "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
182
+ # "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
183
  Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
184
  description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
185
  "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."