MasonSpace / app.py
MasonCrinr's picture
Upload 5 files
ce8fc87
raw
history blame
1.74 kB
import gradio as gr
import dotenv
from transformers import pipeline
from langchain import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import requests
import os
dotenv.load_dotenv()
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# image to text
def imgToText(url):
img_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
text = img_to_text(url)[0]['generated_text']
return text
# LLM
def generate_story(scenario):
template = """
You are a story teller.
You can generate a short story based on a simple narrative, the story should be no more than 40 words:
CONTEXT: {scenario}
STORY:
"""
prompt = PromptTemplate(template=template, input_variables=["scenario"])
story_llm = LLMChain(llm=OpenAI(model_name="gpt-3.5-turbo"), prompt=prompt, verbose=True)
story = story_llm.predict(scenario=scenario)
return story
def textToSpeech(story):
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
headers = {"Authorization": "Bearer " + HUGGINGFACEHUB_API_TOKEN}
payload = {"inputs": story}
response = requests.post(API_URL, headers=headers, json=payload)
with open("story.flac", "wb") as f:
f.write(response.content)
def generate_story_and_play_audio(image):
scenario = imgToText(image.name)
story = generate_story(scenario)
textToSpeech(story)
return "story.flac"
iface = gr.Interface(
fn=generate_story_and_play_audio,
inputs=gr.inputs.File(label="Upload an image"),
outputs=gr.outputs.Audio(label="Generated Story", type="filepath")
)
iface.launch()