File size: 4,896 Bytes
9ac6b43
 
 
ce35891
9ac6b43
 
 
 
fc494d5
 
1f67c3c
ac7edd1
93d7e30
ac7edd1
09dbcf5
 
b0df48d
9ac6b43
a091d09
9ac6b43
7402605
 
 
1f67c3c
 
 
7402605
 
 
 
1f67c3c
7402605
 
1f67c3c
7402605
1f67c3c
ac7edd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a091d09
 
 
9ac6b43
 
93396a9
9ac6b43
a2bcc76
93396a9
e5d255d
7402605
 
9ac6b43
 
93396a9
a2bcc76
 
 
 
 
 
ac7edd1
7402605
 
1f67c3c
 
ac7edd1
 
 
e5d255d
ac7edd1
 
 
 
1f67c3c
5e57e56
ac7edd1
 
93d7e30
 
 
 
 
 
 
 
 
9ac6b43
b0df48d
a091d09
9ac6b43
af74651
3b9fcdd
 
 
 
 
 
43cd7f6
 
 
 
 
 
 
 
 
 
 
 
 
 
dcd0485
43cd7f6
 
cfac6d0
d9c140b
 
 
93396a9
 
 
37cc338
cfac6d0
 
 
 
b4943d8
b0df48d
b4943d8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import gradio as gr


from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI

from gradio_client import Client
eleven = Client("https://elevenlabs-tts.hf.space/")

import wave
from pydub import AudioSegment

global openai_api_key

#openai_api_key = os.environ.get("OPENAI_API_KEY")



def split_text_into_sentences(text):
    sentences = []
    current_sentence = ''
    words = text.split()

    for word in words:
        current_sentence += ' ' + word
        if word.endswith('.'):
            sentences.append(current_sentence.strip())
            current_sentence = ''

    if current_sentence:
        sentences.append(current_sentence.strip())

    return sentences

def join_wav_files(input_files, output_file):
    # Open the first input file to get its parameters
    with wave.open(input_files[0], 'rb') as first_file:
        # Get the audio parameters from the first file
        params = first_file.getparams()

        # Create a new wave file for writing the joined audio
        with wave.open(output_file, 'wb') as output:
            output.setparams(params)

            # Iterate over the input files and write their audio data to the output file
            for input_file in input_files:
                with wave.open(input_file, 'rb') as input:
                    output.writeframes(input.readframes(input.getnframes()))

def generate_story(text, lang, age, openai_key):
    openai_api_key = openai_key
    llm = OpenAI(temperature=0.9, max_tokens=-1)
    """Generate a story using the langchain library and OpenAI's GPT-3 model."""
    prompt = PromptTemplate(
        input_variables=["text", "lang", "age"],
        template=""" 
        You are a fun and seasoned storyteller. 
        Generate a short bedtime story for a {age} years old audience about {text}.
        Your story must be written in {lang}.
        Use short sentences. The story is not too long, but not too short either.
        """
    )
    story = LLMChain(llm=llm, prompt=prompt)
    story_result = story.run(text=text, lang=lang, age=age)
    print(story_result)
    print("""
    β€”
    Cutting text in chunks
    β€”
    """)
    input_waves = []
    
    text_chunks = split_text_into_sentences(story_result)
    for chunk in text_chunks:
        print(chunk)
        result = eleven.predict(
				chunk,	# str representing input in 'Input Text (250 characters max)' Textbox component
				"Bella",	# str representing input in 'Voice' Dropdown component
				"eleven_multilingual_v1",	# str representing input in 'Model' Radio component
				fn_index=0
        )
        print(result)
        input_waves.append(result)
    
    output_wav = 'output.wav'

    join_wav_files(input_waves, output_wav)

    # Load the WAV file
    wav_file = AudioSegment.from_file("output.wav", format="wav")

    # Export it as an MP3 file
    mp3_file = "output.mp3"
    wav_file.export(mp3_file, format="mp3")
    
    return story_result, 'output.mp3'

def app(text, lang, age, openai_key):
    story = generate_story(text, lang, age, openai_key)
    return story

css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
                <div
                style="
                    display: inline-flex;
                    align-items: center;
                    gap: 0.8rem;
                    font-size: 1.75rem;
                "
                >
                <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
                    Bedtime Story
                </h1>
                </div>
                <p style="margin-bottom: 10px; font-size: 94%">
                Generate a bedtime story for a 5 years old audience who needs to get some sleep 😴  
                </p>
            </div>""")
        
               
        text = gr.Textbox(label="Subject: what the story should be about ?", info="Will generate and tell a story about {your text input}")
        with gr.Group():
            with gr.Row():
                lang = gr.Dropdown(label="Pick a language", choices=["English", "French", "German", "Hindi", "Italian", "Polish", "Portuguese", "Spanish"], value="English")
                age = gr.Dropdown(label="Age target", choices=["3","4","5","6","7"], value="5")
        openai_key = gr.Textbox(label="πŸ”‘ Your OpenAI API key", type="password", info="*required")
        submit_btn = gr.Button('Submit')
        
        audio = gr.Audio(label="The story audio told")
        story = gr.Textbox(label="The story text")

    submit_btn.click(fn=app, inputs=[text, lang, age, openai_key], outputs=[story, audio])
        
demo.launch()