ahmedmbutt commited on
Commit
76df62a
1 Parent(s): 5e422b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -1,27 +1,23 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
- import requests
4
  from PIL import Image
5
- import io
6
 
7
- # Initialize the Hugging Face Inference Client
8
- model_id = "Salesforce/blip-image-captioning-large"
9
- client = InferenceClient(model=model_id)
10
 
11
  def caption_image(image):
12
- # Convert the PIL image to bytes
13
- buffered = io.BytesIO()
14
- image.save(buffered, format="JPEG")
15
- img_bytes = buffered.getvalue()
16
 
17
- # Call the Hugging Face inference API
18
- response = client.image_to_text(inputs=img_bytes)
19
 
20
- # Check the response and format it properly
21
- if isinstance(response, list) and response:
22
- return response[0]["generated_text"]
23
- else:
24
- return "Error generating caption"
25
 
26
  # Set up the Gradio interface
27
  interface = gr.Interface(
 
1
  import gradio as gr
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
 
3
  from PIL import Image
4
+ import torch
5
 
6
+ # Load the processor and model
7
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
8
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
9
 
10
  def caption_image(image):
11
+ # Prepare the image
12
+ inputs = processor(images=image, return_tensors="pt")
 
 
13
 
14
+ # Generate caption
15
+ out = model.generate(**inputs)
16
 
17
+ # Decode the generated caption
18
+ caption = processor.decode(out[0], skip_special_tokens=True)
19
+
20
+ return caption
 
21
 
22
  # Set up the Gradio interface
23
  interface = gr.Interface(