Spaces:
Running
Running
File size: 1,829 Bytes
d52c0fd af4e81f eb86d7b b3847c9 eb86d7b 2ef1f88 eb86d7b d52c0fd af4e81f eb86d7b f34a95b d52c0fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import streamlit as st
from transformers import ViltProcessor, ViltForQuestionAnswering, BlipForQuestionAnswering, AutoProcessor
from PIL import Image
# Define available models
models = {
"ViLT": (ViltProcessor, ViltForQuestionAnswering, "dandelin/vilt-b32-finetuned-vqa"),
"BLIP": (AutoProcessor, BlipForQuestionAnswering, "Salesforce/blip-vqa-base"),
}
def get_format_response(image,question,selected_model):
# Load selected model and processor
processor, model_class, model_name = models[selected_model]
processor = processor.from_pretrained(model_name)
model = model_class.from_pretrained(model_name)
encoding = processor(image, question, return_tensors="pt")
if selected_model=='ViLT':
outputs = model(**encoding)
logits = outputs.logits
idx = logits.argmax(-1).item()
answer = model.config.id2label[idx]
return answer
else:
outputs = model.generate(**encoding)
answer = processor.decode(out[0], skip_special_tokens=True)
return answer
# Streamlit app
st.title("Simple VQA App π€π")
st.subheader("A demo app showcasing VQA models. ViLT and BLIP model.")
# Sidebar for model selection
selected_model = st.sidebar.selectbox("Select Model", list(models.keys()))
# Image and question input
uploaded_image = st.file_uploader("Upload Image")
question = st.text_input("Ask a Question about the Image")
# Process image and question if provided
if uploaded_image and question:
image = Image.open(uploaded_image)
st.image(image, caption="Uploaded Image")
answer = get_format_response(image,question,selected_model)
# Display answer
st.write(f"π€ {selected_model} Answer: {answer} π")
# Disclaimer
st.sidebar.markdown("This is a demo app showcasing VQA models. Actual performance may vary.") |