Spaces:
Running
Running
File size: 1,358 Bytes
56f6374 5b568c0 c878823 5b568c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForImageCaptioning
import requests
from PIL import Image
import numpy as np
# Initialize the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("microsoft/beit-base-patch16-224-in21k")
model = AutoModelForImageCaptioning.from_pretrained("microsoft/beit-base-patch16-224-in21k")
def generate_caption(image_url):
# Get the image from the URL
image = Image.open(requests.get(image_url, stream=True).raw)
# Preprocess the image
input_array = np.array(image) / 255.0
input_array = np.transpose(input_array, (2, 0, 1))
input_ids = tokenizer(image_url, return_tensors="pt").input_ids
# Generate the caption
output = model.generate(input_ids, max_length=20)
caption = tokenizer.batch_decode(output, skip_special_tokens=True)
return caption[0]
def main():
# Create a sidebar for the user to input the image URL
st.sidebar.header("Image Caption Generator")
image_url = st.sidebar.text_input("Enter the URL of an image:")
# Generate the caption if the user clicks the button
if st.sidebar.button("Generate Caption"):
if image_url != "":
caption = generate_caption(image_url)
st.success(f"Caption: {caption}")
else:
st.error("Please enter a valid image URL.")
# Run the main function
if __name__ == "__main__":
main() |