Spaces:

Parimala13Sri
/

Visual_Question_Answering

Runtime error

App Files Files Community

Visual_Question_Answering / app.py

Parimala13Sri

Create app.py

9473b2d verified about 1 year ago

raw

history blame contribute delete

1.81 kB

	import streamlit as st
	from PIL import Image
	import requests
	from io import BytesIO
	from transformers import ViltProcessor, ViltForQuestionAnswering

	# Set page layout to wide
	st.set_page_config(layout="wide")

	processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
	model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")

	def get_answer(image, text):
	try:
	# Load and process the image
	img = Image.open(BytesIO(image)).convert("RGB")

	# Prepare inputs
	encoding = processor(img, text, return_tensors="pt")

	# Forward pass
	outputs = model(**encoding)
	logits = outputs.logits
	idx = logits.argmax(-1).item()
	answer = model.config.id2label[idx]

	return answer

	except Exception as e:
	return str(e)

	# Set up the Streamlit app
	st.title("VisualSense QA")
	st.write("Upload an image and enter a question to get an answer.")

	# Create columns for image upload and input fields
	col1, col2 = st.columns(2)

	# Image upload
	with col1:
	uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
	if uploaded_file is not None:
	st.image(uploaded_file, use_column_width=True)

	# Question input
	with col2:
	question = st.text_input("Question")

	# Process the image and question when both are provided
	if uploaded_file and question is not None:
	if st.button("Ask Question"):
	image = Image.open(uploaded_file)
	image_byte_array = BytesIO()
	image.save(image_byte_array, format='JPEG')
	image_bytes = image_byte_array.getvalue()

	# Get the answer
	answer = get_answer(image_bytes, question)

	# Display the answer
	st.success("Answer: " + answer)