File size: 3,265 Bytes
9ac6b43
 
 
 
 
 
 
fc494d5
 
1f67c3c
ac7edd1
 
2b1d0c1
9ac6b43
 
 
7402605
 
 
1f67c3c
 
 
7402605
 
 
 
1f67c3c
7402605
 
1f67c3c
7402605
1f67c3c
ac7edd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ac6b43
 
 
 
 
a2bcc76
3b9fcdd
7402605
 
 
9ac6b43
 
1f67c3c
a2bcc76
 
 
 
 
 
ac7edd1
7402605
 
1f67c3c
 
ac7edd1
 
 
 
 
 
 
 
1f67c3c
5e57e56
ac7edd1
 
5e57e56
9ac6b43
 
 
 
af74651
3b9fcdd
 
 
 
 
 
 
b4943d8
3b9fcdd
 
b4943d8
ac7edd1
b4943d8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import gradio as gr

from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI

from gradio_client import Client
eleven = Client("https://elevenlabs-tts.hf.space/")

import wave

openai_api_key = os.environ.get("OPENAI_API_KEY")

llm = OpenAI(temperature=0.9)

def split_text_into_sentences(text):
    sentences = []
    current_sentence = ''
    words = text.split()

    for word in words:
        current_sentence += ' ' + word
        if word.endswith('.'):
            sentences.append(current_sentence.strip())
            current_sentence = ''

    if current_sentence:
        sentences.append(current_sentence.strip())

    return sentences

def join_wav_files(input_files, output_file):
    # Open the first input file to get its parameters
    with wave.open(input_files[0], 'rb') as first_file:
        # Get the audio parameters from the first file
        params = first_file.getparams()

        # Create a new wave file for writing the joined audio
        with wave.open(output_file, 'wb') as output:
            output.setparams(params)

            # Iterate over the input files and write their audio data to the output file
            for input_file in input_files:
                with wave.open(input_file, 'rb') as input:
                    output.writeframes(input.readframes(input.getnframes()))

def generate_story(text):
    """Generate a story using the langchain library and OpenAI's GPT-3 model."""
    prompt = PromptTemplate(
        input_variables=["text"],
        template=""" 
        You are a fun and seasoned storyteller. 
        Generate a short bedtime story for a 5 years old audience about {text}.
        Use short sentences. The story is not too long, but not too short either.
        Always finish your story with "The End".
        """
    )
    story = LLMChain(llm=llm, prompt=prompt)
    story_result = story.run(text=text)
    print(story_result)
    print("""

    Cutting text in chunks

    """)
    input_waves = []
    
    text_chunks = split_text_into_sentences(story_result)
    for chunk in text_chunks:
        print(chunk)
        result = eleven.predict(
				chunk,	# str representing input in 'Input Text (250 characters max)' Textbox component
				"Bella",	# str representing input in 'Voice' Dropdown component
				"eleven_monolingual_v1",	# str representing input in 'Model' Radio component
				fn_index=0
        )
        print(result)
        input_waves.append(result)
    
    output_wav = 'output.wav'

    join_wav_files(input_waves, output_wav)
    return story_result, 'output.wav'

def app(text):
    story = generate_story(text)
    return story

css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        text = gr.Textbox(label="Subject", info="Will generate and tell a story about {your text input}")
        submit_btn = gr.Button('Submit')
        audio = gr.Audio("The story audio told")
        story = gr.Textbox(label="The story text")

    submit_btn.click(fn=app, inputs=[text], outputs=[story, audio])
        
demo.launch()