Spaces:
Configuration error
Configuration error
import gradio as gr | |
import dotenv | |
from transformers import pipeline | |
from langchain import LLMChain | |
from langchain.llms import OpenAI | |
from langchain.prompts import PromptTemplate | |
import requests | |
import os | |
dotenv.load_dotenv() | |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# image to text | |
def imgToText(url): | |
img_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
text = img_to_text(url)[0]['generated_text'] | |
return text | |
# LLM | |
def generate_story(scenario): | |
template = """ | |
You are a story teller. | |
You can generate a short story based on a simple narrative, the story should be no more than 40 words: | |
CONTEXT: {scenario} | |
STORY: | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["scenario"]) | |
story_llm = LLMChain(llm=OpenAI(model_name="gpt-3.5-turbo"), prompt=prompt, verbose=True) | |
story = story_llm.predict(scenario=scenario) | |
return story | |
def textToSpeech(story): | |
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits" | |
headers = {"Authorization": "Bearer " + HUGGINGFACEHUB_API_TOKEN} | |
payload = {"inputs": story} | |
response = requests.post(API_URL, headers=headers, json=payload) | |
with open("story.flac", "wb") as f: | |
f.write(response.content) | |
def generate_story_and_play_audio(image): | |
scenario = imgToText(image.name) | |
story = generate_story(scenario) | |
textToSpeech(story) | |
return "story.flac" | |
iface = gr.Interface( | |
fn=generate_story_and_play_audio, | |
inputs=gr.inputs.File(label="Upload an image"), | |
outputs=gr.outputs.Audio(label="Generated Story", type="filepath") | |
) | |
iface.launch() | |