import streamlit as st from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration, GPT2LMHeadModel, GPT2Tokenizer # Load the BLIP model for image captioning processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") # Load GPT-2 model and tokenizer for story generation tokenizer_gpt2 = GPT2Tokenizer.from_pretrained("gpt2") model_gpt2 = GPT2LMHeadModel.from_pretrained("gpt2") # Streamlit app st.title("Image to Story Generator") # Uploading the image uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: image = Image.open(uploaded_image).convert('RGB') st.image(image, caption='Uploaded Image', use_column_width=True) # Generate image caption st.write("Generating caption...") inputs = processor(image, return_tensors="pt") out = model.generate(**inputs) caption = processor.decode(out[0], skip_special_tokens=True) st.write(f"Caption: {caption}") # Generate story from caption if st.button('Generate Story from Caption'): st.write("Generating story...") story_prompt = f"Based on the image, here's a story: {caption}" # Encode prompt text to input ids input_ids = tokenizer_gpt2.encode(story_prompt, return_tensors='pt') # Generate text using GPT-2 model story = model_gpt2.generate( input_ids, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2, temperature=0.9, top_k=50, top_p=0.95, pad_token_id=tokenizer_gpt2.eos_token_id ) # Decode and display the story story_text = tokenizer_gpt2.decode(story[0], skip_special_tokens=True) st.text_area("Generated Story", story_text, height=250)