Spaces:

Suhailshah
/

Image-Captioning

Runtime error

App Files Files Community

Image-Captioning / app.py

Suhailshah

Update app.py

0a7ae9a about 2 years ago

raw

history blame contribute delete

4.95 kB

	# -- coding: utf-8 --
	"""app.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1nybx9b_W5IsJz9G0GHvDx6KQKiTv_gt3

	## Image Caption Generator

	We are going to use Transformers model to generate caption from an Image.

	### Installation



	1. Transformers
	2. Pytorch
	3. Image

	For installation, please do pip install package_name

	In Colab, Pytorch comes preinstalled and same goes with PIL for Image.

	@misc {nlp_connect_2022,

	author = { {NLP Connect} },
	title = { vit-gpt2-image-captioning (Revision 0e334c7) },
	year = 2022,
	url = { https://huggingface.co/nlpconnect/vit-gpt2-image-captioning },
	doi = { 10.57967/hf/0222 },
	publisher = { Hugging Face }
	} italicized text
	"""

	#!pip install transformers

	from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
	import torch
	from PIL import Image
	import pandas as pd

	model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	max_length = 16
	num_beams = 8
	gen_kwargs = {"max_length": max_length, "num_beams": num_beams}

	def cap_generation(img):
	images = []
	if img.mode != "RGB":
	img = img.convert(mode="RGB")
	width, height = img.size

	new_size = (int(width/4), int(height/4))

	# Resize the image for faster computation.
	img = img.resize(new_size)

	images.append(img)

	pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
	pixel_values = pixel_values.to(device)

	output_ids = model.generate(pixel_values,max_length = 100,num_return_sequences=5,do_sample=True)
	preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
	preds = [pred.strip() for pred in preds]
	result = [s.capitalize() + '.' for s in preds]
	return result

	#!pip install openai

	import openai
	openai.api_key = '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'

	model_engine = 'text-davinci-003'
	def captions(img,num_captions):
	Descriptions = cap_generation(img)
	if num_captions == 'Multiple':
	prompt = f"""
	I want to post an image on Instagram. \
	I need a good caption for that image. A caption \
	is a wity statement that decribes an image in a \
	relatable manner. To Generate the captions \
	I am using Vit-Gpt2 model to extract the \
	descriptions of the image. \
	I want you Read the list of descrptions generated by vit-gpt2 for an image delimited by triple backticks.Since the \
	descriptions are generated by an Ai model , all the descriptions might not be accurate. \
	From all these descriptions undertand what the image is.
	After understanding the image generate some creative captions for \
	that image ,the caption should include some relevent emojis also so, that I can post that on social media like instagram. \
	Only return captions.
	```{Descriptions}```
	"""
	completion = openai.Completion.create(
	engine = model_engine,
	prompt = prompt,
	max_tokens = 2048,
	n= 1,
	stop = None,)
	return completion.choices[0].text

	else:
	prompt = f"""
	I want to post an image on Instagram. \
	I need a good caption for that image. A caption \
	is a wity statement that decribes an image in a \
	relatable manner. To Generate the captions \
	I am using Vit-Gpt2 model to extract the \
	descriptions of the image. \
	I want you Read the list of descrptions generated by vit-gpt2 for an image delimited by triple backticks.Since the \
	descriptions are generated by an Ai model , all the descriptions might not be accurate. \
	From all these descriptions undertand what the image is.
	After understanding the image generate a single captions for \
	that image ,the caption should include some relevent emojis also so, that I can post that on social media like instagram. \
	Only return captions.
	```{Descriptions}```
	"""
	completion = openai.Completion.create(
	engine = model_engine,
	prompt = prompt,
	max_tokens = 1024,
	n= 1,
	stop = None,)
	return completion.choices[0].text

	#!pip install gradio
	import gradio as gr

	import gradio as gr
	inputs = [
	gr.inputs.Image(type='pil',label = "Upload your Image here"),
	gr.inputs.Dropdown(choices=["Single", "Multiple"], label="Select how many captions you want",default = "Multiple")
	]
	outputs=[gr.outputs.Textbox(label="text")]

	title = "Image Captioning"
	description = "Image Captioning with vit-gpt"
	article = " <a href = 'https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'> Model </a>"

	interface = gr.Interface(
	captions,
	inputs,
	outputs=outputs,
	title=title,
	description=description,
	article=article,
	)

	interface.launch()