|
import torch |
|
from PIL import Image |
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
import gradio as gr |
|
|
|
|
|
try: |
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu') |
|
model.eval() |
|
except Exception as e: |
|
print(f"Error loading model or processor: {e}") |
|
exit() |
|
|
|
def process_image(image, question): |
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
image = Image.fromarray(image).convert('RGB') |
|
|
|
|
|
inputs = processor(image, question, return_tensors="pt").to(device) |
|
|
|
|
|
try: |
|
with torch.no_grad(): |
|
outputs = model.generate(**inputs) |
|
answer = processor.decode(outputs[0], skip_special_tokens=True) |
|
return answer |
|
except Exception as e: |
|
return f"Error during model inference: {e}" |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_image, |
|
inputs=[gr.Image(type='numpy'), gr.Textbox(label="Question")], |
|
outputs=gr.Textbox(), |
|
title="Image Question Answering", |
|
description="Upload an image and ask a question about it. The model will provide an answer." |
|
) |
|
|
|
|
|
interface.launch() |
|
|