BedtimeStory / app.py
fffiloni's picture
Added age settings
93396a9
import os
import gradio as gr
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from gradio_client import Client
eleven = Client("https://elevenlabs-tts.hf.space/")
import wave
from pydub import AudioSegment
openai_api_key = os.environ.get("OPENAI_API_KEY")
llm = OpenAI(temperature=0.9, max_tokens=-1)
def split_text_into_sentences(text):
sentences = []
current_sentence = ''
words = text.split()
for word in words:
current_sentence += ' ' + word
if word.endswith('.'):
sentences.append(current_sentence.strip())
current_sentence = ''
if current_sentence:
sentences.append(current_sentence.strip())
return sentences
def join_wav_files(input_files, output_file):
# Open the first input file to get its parameters
with wave.open(input_files[0], 'rb') as first_file:
# Get the audio parameters from the first file
params = first_file.getparams()
# Create a new wave file for writing the joined audio
with wave.open(output_file, 'wb') as output:
output.setparams(params)
# Iterate over the input files and write their audio data to the output file
for input_file in input_files:
with wave.open(input_file, 'rb') as input:
output.writeframes(input.readframes(input.getnframes()))
def generate_story(text, lang, age):
"""Generate a story using the langchain library and OpenAI's GPT-3 model."""
prompt = PromptTemplate(
input_variables=["text", "lang", "age"],
template="""
You are a fun and seasoned storyteller.
Generate a short bedtime story for a {age} years old audience about {text}.
Your story must be written in {lang}.
Use short sentences. The story is not too long, but not too short either.
"""
)
story = LLMChain(llm=llm, prompt=prompt)
story_result = story.run(text=text, lang=lang, age=age)
print(story_result)
print("""
β€”
Cutting text in chunks
β€”
""")
input_waves = []
text_chunks = split_text_into_sentences(story_result)
for chunk in text_chunks:
print(chunk)
result = eleven.predict(
chunk, # str representing input in 'Input Text (250 characters max)' Textbox component
"Bella", # str representing input in 'Voice' Dropdown component
"eleven_multilingual_v1", # str representing input in 'Model' Radio component
fn_index=0
)
print(result)
input_waves.append(result)
output_wav = 'output.wav'
join_wav_files(input_waves, output_wav)
# Load the WAV file
wav_file = AudioSegment.from_file("output.wav", format="wav")
# Export it as an MP3 file
mp3_file = "output.mp3"
wav_file.export(mp3_file, format="mp3")
return story_result, 'output.mp3'
def app(text, lang, age):
story = generate_story(text, lang, age)
return story
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
Bedtime Story
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Generate a bedtime story for a 5 years old audience who needs to get some sleep 😴
</p>
</div>""")
with gr.Group():
text = gr.Textbox(label="Subject: what the story should be about ?", info="Will generate and tell a story about {your text input}")
with gr.Row():
lang = gr.Dropdown(label="Pick a language", choices=["English", "French", "German", "Hindi", "Italian", "Polish", "Portuguese", "Spanish"], value="English")
age = gr.Dropdown(label="Age target", choices=["3","4","5","6","7"], value="5")
submit_btn = gr.Button('Submit')
audio = gr.Audio(label="The story audio told")
story = gr.Textbox(label="The story text")
submit_btn.click(fn=app, inputs=[text, lang, age], outputs=[story, audio])
demo.launch()