Image_Describer / app.py
kusumakar's picture
Update app.py
57068ee
raw
history blame
2.47 kB
import streamlit as st
import numpy as np
from PIL import Image
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel, GPT2Tokenizer, GPT2LMHeadModel
import torch
from transformers import BartTokenizer, BartForConditionalGeneration
# Load pre-trained BART model and tokenizer
tokenizer_2 = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model_2 = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
# Directory path to the saved model on Google Drive
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
def generate_captions(image):
image = Image.open(image).convert("RGB")
generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to("cpu"))[0])
sentence = generated_caption
text_to_remove = "<|endoftext|>"
generated_caption = sentence.replace(text_to_remove, "")
return generated_caption
def generate_paragraph(caption):
# Tokenize the caption
inputs = tokenizer_2([caption], max_length=1024, truncation=True, padding="longest", return_tensors="pt")
# Generate text
output = model_2.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_length=200, num_beams=4, length_penalty=2.0, early_stopping=True)
# Decode the generated output
generated_text = tokenizer_2.decode(output[0], skip_special_tokens=True)
return generated_text
# create the Streamlit app
def app():
st.title('Image from your Side, Detailed description from my site')
st.write('Upload an image to see what we have in store.')
# create file uploader
uploaded_file = st.file_uploader("Got You Covered, Upload your wish!, magic on the Way! ", type=["jpg", "jpeg", "png"])
# check if file has been uploaded
if uploaded_file is not None:
# load the image
image = Image.open(uploaded_file).convert("RGB")
# Image Captions
string = generate_captions(uploaded_file)
st.image(image, caption='The Uploaded File')
st.write("First is first captions for your Photo : ", string)
generated_paragraph = generate_paragraph(string)
st.write(generated_paragraph)
# run the app
if __name__ == '__main__':
app()