Dendup's picture
Update app.py
7d9e246 verified
import streamlit as st
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, GPT2LMHeadModel, GPT2Tokenizer
# Load the BLIP model for image captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
# Load GPT-2 model and tokenizer for story generation
tokenizer_gpt2 = GPT2Tokenizer.from_pretrained("gpt2")
model_gpt2 = GPT2LMHeadModel.from_pretrained("gpt2")
# Streamlit app
st.title("Image to Story Generator")
# Uploading the image
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
image = Image.open(uploaded_image).convert('RGB')
st.image(image, caption='Uploaded Image', use_column_width=True)
# Generate image caption
st.write("Generating caption...")
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
st.write(f"Caption: {caption}")
# Generate story from caption
if st.button('Generate Story from Caption'):
st.write("Generating story...")
story_prompt = f"Based on the image, here's a story: {caption}"
# Encode prompt text to input ids
input_ids = tokenizer_gpt2.encode(story_prompt, return_tensors='pt')
# Generate text using GPT-2 model
story = model_gpt2.generate(
input_ids,
max_length=200,
num_return_sequences=1,
no_repeat_ngram_size=2,
temperature=0.9,
top_k=50,
top_p=0.95,
pad_token_id=tokenizer_gpt2.eos_token_id
)
# Decode and display the story
story_text = tokenizer_gpt2.decode(story[0], skip_special_tokens=True)
st.text_area("Generated Story", story_text, height=250)