File size: 2,867 Bytes
9ac6b43
 
 
 
 
 
 
fc494d5
 
1f67c3c
ac7edd1
 
2b1d0c1
9ac6b43
 
 
1f67c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac7edd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ac6b43
 
 
 
 
a2bcc76
 
9ac6b43
 
 
1f67c3c
a2bcc76
 
 
 
 
 
ac7edd1
1f67c3c
04a108f
1f67c3c
 
ac7edd1
 
 
 
 
 
 
 
1f67c3c
ac7edd1
 
 
 
9ac6b43
 
 
 
af74651
b4943d8
 
 
 
1f67c3c
b4943d8
 
ac7edd1
b4943d8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import gradio as gr

from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import OpenAI

from gradio_client import Client
eleven = Client("https://elevenlabs-tts.hf.space/")

import wave

openai_api_key = os.environ.get("OPENAI_API_KEY")

llm = OpenAI(temperature=0.9)

def split_text(text, max_length):
    chunks = []
    current_chunk = ''
    words = text.split()

    for word in words:
        if len(current_chunk) + len(word) <= max_length:
            current_chunk += ' ' + word
        else:
            chunks.append(current_chunk.strip())
            current_chunk = word

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

def join_wav_files(input_files, output_file):
    # Open the first input file to get its parameters
    with wave.open(input_files[0], 'rb') as first_file:
        # Get the audio parameters from the first file
        params = first_file.getparams()

        # Create a new wave file for writing the joined audio
        with wave.open(output_file, 'wb') as output:
            output.setparams(params)

            # Iterate over the input files and write their audio data to the output file
            for input_file in input_files:
                with wave.open(input_file, 'rb') as input:
                    output.writeframes(input.readframes(input.getnframes()))

def generate_story(text):
    """Generate a story using the langchain library and OpenAI's GPT-3 model."""
    prompt = PromptTemplate(
        input_variables=["text"],
        template=""" 
        You are a fun and seasoned storyteller. 
        Generate a short story for a 5 years old audience about {text}.
                 """
    )
    story = LLMChain(llm=llm, prompt=prompt)
    story_result = story.run(text=text)
    print(story_result)
    print("""

    Cutting text in chunks

    """)
    input_waves = []
    max_length = 250
    text_chunks = split_text(story_result, max_length)
    for chunk in text_chunks:
        print(chunk)
        result = eleven.predict(
				chunk,	# str representing input in 'Input Text (250 characters max)' Textbox component
				"Bella",	# str representing input in 'Voice' Dropdown component
				"eleven_monolingual_v1",	# str representing input in 'Model' Radio component
				fn_index=0
        )
        print(result)
        input_waves.append(result)
    
    output_file = 'output.wav'

    join_wav_files(input_waves, output_wav)
    return story_result, 'output_wav'

def app(text):
    story = generate_story(text)
    return story

with gr.Blocks() as demo:
    with gr.Column():
        text = gr.Textbox()
        submit_btn = gr.Button('Submit')
        audio = gr.Audio()
        story = gr.Textbox()

    submit_btn.click(fn=app, inputs=[text], outputs=[story, audio])
        
demo.launch()