#!/usr/bin/env python # coding: utf-8 # In[5]: import streamlit as st from PIL import Image import torch import requests from transformers import BlipProcessor, BlipForQuestionAnswering,BlipImageProcessor, AutoProcessor from transformers import BlipConfig from datasets import load_dataset from torch.utils.data import DataLoader from tqdm.notebook import tqdm import numpy as np import matplotlib.pyplot as plt from IPython.display import display text_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") image_processor = BlipImageProcessor.from_pretrained("Salesforce/blip-vqa-base") model = BlipForQuestionAnswering.from_pretrained(r"blip_model_v2_epo89" ) def preprocess_image(image): # Your image preprocessing logic here... # Example: Resize image to 128x128 pixels image = image.resize((128, 128)) image_encoding = image_processor(image, do_resize=True, size=(128, 128), return_tensors="pt") return image_encoding["pixel_values"][0] def preprocess_text(text, max_length=32): # Your text preprocessing logic here... encoding = text_processor( None, text, padding="max_length", truncation=True, max_length=max_length, return_tensors="pt" ) for k, v in encoding.items(): encoding[k] = v.squeeze() return encoding def predict(image, question): # Preprocess image pixel_values = preprocess_image(image).unsqueeze(0) # Preprocess text encoding = preprocess_text(question) # Print shapes for debugging #print("Pixel Values Shape:", pixel_values.shape) #print("Input IDs Shape:", encoding['input_ids'].unsqueeze(0).shape) # Perform prediction using your model # Example: Replace this with your actual prediction logic model.eval() outputs = model.generate(pixel_values=pixel_values, input_ids=encoding['input_ids'].unsqueeze(0)) prediction_result = text_processor.decode(outputs[0], skip_special_tokens=True) return prediction_result def main(): st.title("PathoAgent") # Image upload st.subheader("Upload Image") uploaded_file = st.file_uploader("Choose a file", type=["jpg", "png", "jpeg"]) # Text input st.subheader("Input Question") text_input = st.text_area("Enter text here:") # Display uploaded image if uploaded_file is not None: image = Image.open(uploaded_file).convert('RGB') #resized_img = image.resize((10,10)) st.image(image, caption="Uploaded Image.", use_column_width=True) # Predict button if st.button("Predict"): if uploaded_file is not None and text_input: # Perform prediction prediction_result = predict(image, text_input) # Display input text st.subheader("Input Question:") st.write(text_input) # Display prediction result st.subheader("Prediction Result:") st.write(prediction_result) if __name__ == "__main__": main() # streamlit run Streamlit.py