import gradio as gr import torch from transformers import AutoProcessor, AutoModel, BitsAndBytesConfig import scipy.io.wavfile as wav import numpy as np import os import openai # Set OpenAI API key openai.api_key = os.getenv("OPENAI_API_KEY") # Check for GPU availability device = "cuda" if torch.cuda.is_available() else "cpu" # Quantization config for speech model if device == "cuda": quantization_config = BitsAndBytesConfig(load_in_8bit=True) else: quantization_config = None # Load speech model and processor # speech_processor = AutoProcessor.from_pretrained("suno/bark-small") # speech_model = AutoModel.from_pretrained( # "suno/bark-small", # device_map="auto" if device == "cuda" else None, # quantization_config=quantization_config # ) # Move speech model to the appropriate device # speech_model.to(device) # Function to generate story using GPT def generate_story(prompt): model_input = f"""You are a creative and educational storyteller for school-going children. Your task is to create an engaging, age-appropriate story that both entertains and teaches valuable lessons. Use the following prompt as inspiration for your story, but feel free to be imaginative and expand upon it. Remember to include educational elements that children can learn from, such as historical facts, scientific concepts, moral lessons, or cultural insights. [Story Prompt Begin] {prompt} [Story Prompt End] [Instruction Begin] Generate a short, creative, and educational story based on this prompt. The story should be suitable for school-going children, entertaining, and contain clear learning points. [Instruction End] """ response = openai.ChatCompletion.create( model="gpt-3.5-turbo", # or "gpt-4" if you have access messages=[ {"role": "system", "content": "You are a skilled storyteller who creates educational and engaging stories for children."}, {"role": "user", "content": model_input} ] ) return response.choices[0].message['content'] # Function to generate speech def generate_speech(text, speaker="v2/en_speaker_6"): inputs = speech_processor(text, voice_preset=speaker, return_tensors="pt").to(device) speech_values = speech_model.generate(**inputs, do_sample=True) audio_array = speech_values.cpu().numpy().squeeze() sample_rate = speech_model.generation_config.sample_rate return (sample_rate, audio_array) # Gradio interface function def text_to_speech(prompt): story = generate_story(prompt) # audio = generate_speech(story) return story # Create Gradio interface iface = gr.Interface( fn=text_to_speech, inputs=gr.Textbox(label="Enter story prompt"), outputs=[ gr.Textbox(label="Generated Story"), # gr.Audio(label="Generated Speech") ], title="Story Generator and Text-to-Speech", description="Enter a prompt to generate a story using GPT, then convert it to speech." ) # Launch the app iface.launch()