Spaces:
Sleeping
Sleeping
import torch | |
import re | |
from PIL import Image | |
import requests | |
import streamlit as st | |
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel | |
st.set_page_config(page_title="Captionize") | |
st.title("π€ Captionize") | |
st.subheader("Generate Captions for your Image...") | |
st.sidebar.image('./csv_analysis.png',width=300, use_column_width=True) | |
# Applying Styling | |
st.markdown(""" | |
<style> | |
div.stButton > button:first-child { | |
background-color: #0099ff; | |
color:#ffffff; | |
} | |
div.stButton > button:hover { | |
background-color: #00ff00; | |
color:#FFFFFF; | |
} | |
</style>""", unsafe_allow_html=True) | |
device='cpu' | |
encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning" | |
decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning" | |
model_checkpoint = "nlpconnect/vit-gpt2-image-captioning" | |
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint) | |
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint) | |
model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device) | |
def predict(image,max_length=64, num_beams=4): | |
#image = image.convert('RGB') | |
image = Image.open(requests.get(image, stream=True).raw).convert("RGB") | |
image = feature_extractor(image, return_tensors="pt").pixel_values.to(device) | |
clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0] | |
caption_ids = model.generate(image, max_length = max_length)[0] | |
caption_text = clean_text(tokenizer.decode(caption_ids)) | |
return caption_text | |
pic = st.file_uploader(label="Please upload any Image here π",type=['png', 'jpeg', 'jpg'], help="Only 'png', 'jpeg' or 'jpg' formats allowed") | |
button = st.button("Generate Caption") | |
if button: | |
# Get Response | |
caption = predict(pic) | |
st.write(caption) |