File size: 2,253 Bytes
8ed55d1
d79329d
 
2cbe783
 
8ed55d1
d79329d
8ed55d1
d79329d
 
 
 
 
 
 
2cbe783
a215723
d79329d
2cbe783
8ed55d1
d79329d
 
 
 
 
8ed55d1
d79329d
 
 
 
 
 
 
 
8ed55d1
d79329d
 
 
 
 
 
f229ec0
d79329d
 
c4bd36b
 
 
 
 
9d143ad
d79329d
 
 
 
c4bd36b
e6634ca
e4bda70
e2149e4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
from transformers import pipeline
from espnet2.bin.tts_inference import Text2Speech
import warnings
warnings.filterwarnings('ignore')

def generateTextAndAudio(inputText, numGen):

  # --- Generating the Text ---
  # With the provided text from user, generate more text up to `numGen` tokens/sub-words
  textOutput = textGenerator(inputText, max_length = numGen)
  # The output of the text generator is a list of dictionaries, grab the first dictionary
  # then get the generated text from the dictionary using the `generated_text` key
  genText = textOutput[0]['generated_text']
  
  print("-"*75)
  print("Input Text:", inputText)
  print("Generated Text:", genText)
  print("-"*75)

  # --- Generating the Audio ---
  # With the newly generated text, generate some speech
  audioOutput = audioGenerator(genText)
  # Get the wav data
  genAudio = audioOutput['wav']

  # Return two things
  # 1) Generated Text
  # 2) 24k sampling rate, and the Generated Audio (wav) as numpy (instead of tensor)
  return genText, (24000, genAudio.numpy())
  
# Main
textGenerator = pipeline('text-generation', model = 'gpt2')
audioGenerator = Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_joint_finetune_conformer_fastspeech2_hifigan")

input1_textbox = gr.Textbox(label="Input text")
input2_slider = gr.Slider(minimum=1, maximum=100, step=1, default=30, label="Number of words to generate")

output1_textbox = gr.Textbox(label = "Generated Text")
output2_Audio = gr.Audio(label = "Generated Audio")

title = "Generate Text and its Audio!"
description = "Provide the text, and how many subwords to generate"

examples = [
            ["I won a", 50],
            ["My name is", 30],
            ["I have", 60]
]
article = "<p style='text-align: center'><img src='https://visitor-badge.glitch.me/badge?page_id=lilyf_generate_text_and_audio' alt='visitor badge'></p>"
iface = gr.Interface(fn=generateTextAndAudio, 
                     inputs=[input1_textbox, input2_slider],
                     outputs=[output1_textbox, output2_Audio],
                     title=title,
                     description=description,
                     examples=examples,
                     article=article).launch(debug = True)