Daryl Lim commited on
Commit
0398e5a
·
1 Parent(s): a77f1ae

Add application file

Browse files
Files changed (2) hide show
  1. .gitignore +3 -0
  2. app.py +60 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .ipynb_checkpoints
2
+ *.ipynb
3
+ gradio-env
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides an interface for image captioning using the BLIP model.
3
+ The interface allows users to upload an image and receive a caption.
4
+ """
5
+
6
+ import gradio as gr
7
+ import spaces
8
+ from transformers import BlipProcessor, BlipForConditionalGeneration
9
+ from PIL import Image
10
+
11
+ # Initialize the processor and model
12
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
+ model = (
14
+ BlipForConditionalGeneration
15
+ .from_pretrained("Salesforce/blip-image-captioning-base")
16
+ .to("cuda")
17
+ )
18
+
19
+ def generate_caption(image: Image) -> str:
20
+ """
21
+ Generates a caption for a given image using the BLIP model.
22
+
23
+ Args:
24
+ image (Image): The input image as a PIL Image object.
25
+
26
+ Returns:
27
+ str: The generated caption.
28
+ """
29
+ inputs = processor(images=image, return_tensors="pt").to("cuda")
30
+ outputs = model.generate(**inputs)
31
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
32
+ return caption
33
+
34
+ @spaces.GPU
35
+ def caption_image(image: Image) -> str:
36
+ """
37
+ Takes a PIL Image input and returns a caption.
38
+
39
+ Args:
40
+ image (Image): The input image as a PIL Image object.
41
+
42
+ Returns:
43
+ str: The generated caption or an error message.
44
+ """
45
+ try:
46
+ return generate_caption(image)
47
+ except Exception as e:
48
+ return f"An error occurred: {str(e)}"
49
+
50
+ # Define the Gradio interface
51
+ demo = gr.Interface(
52
+ fn=caption_image,
53
+ inputs=gr.Image(type="pil"),
54
+ outputs="text",
55
+ title="Image Captioning with BLIP",
56
+ description="Upload an image to generate a caption."
57
+ )
58
+
59
+ # Launch the interface
60
+ demo.launch()