lmzjms commited on
Commit
b1e24e0
1 Parent(s): e8795d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -162,9 +162,9 @@ class ConversationBot:
162
  # self.i2t = ImageCaptioning(device="cuda:0")
163
  # self.t2a = T2A(device="cpu")
164
  self.tts = TTS(device="cpu")
165
- # self.t2s = T2S(device="cuda:0")
166
  # self.i2a = I2A(device="cpu")
167
- self.a2t = A2T(device="cpu")
168
  # self.asr = ASR(device="cuda:0")
169
  # self.inpaint = Inpaint(device="cpu")
170
  #self.tts_ood = TTS_OOD(device="cuda:0")
@@ -183,21 +183,21 @@ class ConversationBot:
183
  # description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
184
  # "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
185
  # "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
186
- # Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
187
- # description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
188
- # "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
189
- # "If Like: Generate a piece of singing voice. Text: xxx, Note: xxx, Duration: xxx. "
190
- # "Or Like: Generate a piece of singing voice. Text is xxx, note is xxx, duration is xxx."
191
- # "The input to this tool should be a comma seperated string of three, representing text, note and duration sequence since User Input Text, Note and Duration Sequence are all provided."),
192
  Tool(name="Synthesize Speech Given the User Input Text", func=self.tts.inference,
193
  description="useful for when you want to convert a user input text into speech audio it saved it to a file."
194
- "The input to this tool should be a string, representing the text used to be converted to speech."),
195
  # Tool(name="Generate Audio From The Image", func=self.i2a.inference,
196
  # description="useful for when you want to generate an audio based on an image."
197
  # "The input to this tool should be a string, representing the image_path. "),
198
- Tool(name="Generate Text From The Audio", func=self.a2t.inference,
199
- description="useful for when you want to describe an audio in text, receives audio_path as input."
200
- "The input to this tool should be a string, representing the audio_path.")]
201
  # Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
202
  # description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
203
  # "The input to this tool should be a string, representing the audio_path.")]
 
162
  # self.i2t = ImageCaptioning(device="cuda:0")
163
  # self.t2a = T2A(device="cpu")
164
  self.tts = TTS(device="cpu")
165
+ self.t2s = T2S(device="cpu")
166
  # self.i2a = I2A(device="cpu")
167
+ # self.a2t = A2T(device="cpu")
168
  # self.asr = ASR(device="cuda:0")
169
  # self.inpaint = Inpaint(device="cpu")
170
  #self.tts_ood = TTS_OOD(device="cuda:0")
 
183
  # description="useful for when you want to generate speech samples with styles (e.g., timbre, emotion, and prosody) derived from a reference custom voice."
184
  # "Like: Generate a speech with style transferred from this voice. The text is xxx., or speak using the voice of this audio. The text is xxx."
185
  # "The input to this tool should be a comma seperated string of two, representing reference audio path and input text."),
186
+ Tool(name="Generate singing voice From User Input Text, Note and Duration Sequence", func= self.t2s.inference,
187
+ description="useful for when you want to generate a piece of singing voice (Optional: from User Input Text, Note and Duration Sequence) and save it to a file."
188
+ "If Like: Generate a piece of singing voice, the input to this tool should be \"\" since there is no User Input Text, Note and Duration Sequence ."
189
+ "If Like: Generate a piece of singing voice. Text: xxx, Note: xxx, Duration: xxx. "
190
+ "Or Like: Generate a piece of singing voice. Text is xxx, note is xxx, duration is xxx."
191
+ "The input to this tool should be a comma seperated string of three, representing text, note and duration sequence since User Input Text, Note and Duration Sequence are all provided."),
192
  Tool(name="Synthesize Speech Given the User Input Text", func=self.tts.inference,
193
  description="useful for when you want to convert a user input text into speech audio it saved it to a file."
194
+ "The input to this tool should be a string, representing the text used to be converted to speech.")]
195
  # Tool(name="Generate Audio From The Image", func=self.i2a.inference,
196
  # description="useful for when you want to generate an audio based on an image."
197
  # "The input to this tool should be a string, representing the image_path. "),
198
+ # Tool(name="Generate Text From The Audio", func=self.a2t.inference,
199
+ # description="useful for when you want to describe an audio in text, receives audio_path as input."
200
+ # "The input to this tool should be a string, representing the audio_path.")]
201
  # Tool(name="Audio Inpainting", func=self.inpaint.show_mel_fn,
202
  # description="useful for when you want to inpaint a mel spectrum of an audio and predict this audio, this tool will generate a mel spectrum and you can inpaint it, receives audio_path as input, "
203
  # "The input to this tool should be a string, representing the audio_path.")]