PathoAgent / app.py
Sayed121's picture
Rename Streamlit.py to app.py
800cf29
#!/usr/bin/env python
# coding: utf-8
# In[5]:
import streamlit as st
from PIL import Image
import torch
import requests
from transformers import BlipProcessor, BlipForQuestionAnswering,BlipImageProcessor, AutoProcessor
from transformers import BlipConfig
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
text_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
image_processor = BlipImageProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained(r"blip_model_v2_epo89" )
def preprocess_image(image):
# Your image preprocessing logic here...
# Example: Resize image to 128x128 pixels
image = image.resize((128, 128))
image_encoding = image_processor(image,
do_resize=True,
size=(128, 128),
return_tensors="pt")
return image_encoding["pixel_values"][0]
def preprocess_text(text, max_length=32):
# Your text preprocessing logic here...
encoding = text_processor(
None,
text,
padding="max_length",
truncation=True,
max_length=max_length,
return_tensors="pt"
)
for k, v in encoding.items():
encoding[k] = v.squeeze()
return encoding
def predict(image, question):
# Preprocess image
pixel_values = preprocess_image(image).unsqueeze(0)
# Preprocess text
encoding = preprocess_text(question)
# Print shapes for debugging
#print("Pixel Values Shape:", pixel_values.shape)
#print("Input IDs Shape:", encoding['input_ids'].unsqueeze(0).shape)
# Perform prediction using your model
# Example: Replace this with your actual prediction logic
model.eval()
outputs = model.generate(pixel_values=pixel_values, input_ids=encoding['input_ids'].unsqueeze(0))
prediction_result = text_processor.decode(outputs[0], skip_special_tokens=True)
return prediction_result
def main():
st.title("PathoAgent")
# Image upload
st.subheader("Upload Image")
uploaded_file = st.file_uploader("Choose a file", type=["jpg", "png", "jpeg"])
# Text input
st.subheader("Input Question")
text_input = st.text_area("Enter text here:")
# Display uploaded image
if uploaded_file is not None:
image = Image.open(uploaded_file).convert('RGB')
#resized_img = image.resize((10,10))
st.image(image, caption="Uploaded Image.", use_column_width=True)
# Predict button
if st.button("Predict"):
if uploaded_file is not None and text_input:
# Perform prediction
prediction_result = predict(image, text_input)
# Display input text
st.subheader("Input Question:")
st.write(text_input)
# Display prediction result
st.subheader("Prediction Result:")
st.write(prediction_result)
if __name__ == "__main__":
main()
# streamlit run Streamlit.py