ma4389 commited on
Commit
78f56e0
·
verified ·
1 Parent(s): 5a26831

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +31 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
3
+ from PIL import Image
4
+ import gradio as gr
5
+
6
+ # Automatically use GPU if available
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+ # Load processor and model
10
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
11
+ model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
12
+
13
+ # Inference function
14
+ def generate_caption(image):
15
+ image = image.convert("RGB")
16
+ inputs = processor(images=image, return_tensors="pt").to(device)
17
+ output = model.generate(**inputs)
18
+ caption = processor.decode(output[0], skip_special_tokens=True)
19
+ return caption
20
+
21
+ # Gradio interface
22
+ interface = gr.Interface(
23
+ fn=generate_caption,
24
+ inputs=gr.Image(type="pil"),
25
+ outputs="text",
26
+ title="🖼️ Image to Text Captioning",
27
+ description="Upload an image and get a caption using BLIP (Salesforce/blip-image-captioning-large)."
28
+ )
29
+
30
+ if __name__ == "__main__":
31
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ Pillow