Spaces:

team-indain-image-caption
/

Hindi-image-captioning

Runtime error

App Files Files Community

Hindi-image-captioning / app.py

seanbenhur

Update app.py

58572c5 over 2 years ago

raw history blame

No virus

1.89 kB

	import torch
	import re
	import gradio as gr
	from pathlib import Path
	from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
	# Pattern to ignore all the text after 2 or more full stops
	regex_pattern = "[.]{2,}"
	def post_process(text):
	try:
	text = text.strip()
	text = re.split(regex_pattern, text)[0]
	except Exception as e:
	print(e)
	pass
	return text
	def predict(image, max_length=64, num_beams=4):
	pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
	pixel_values = pixel_values.to(device)
	with torch.no_grad():
	output_ids = model.generate(
	pixel_values,
	max_length=max_length,
	num_beams=num_beams,
	return_dict_in_generate=True,
	).sequences
	preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
	pred = post_process(preds[0])
	return pred

	model_path = "team-indain-image-caption/hindi-image-captioning"
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	# Load model.
	model = VisionEncoderDecoderModel.from_pretrained(model_path)
	model.to(device)
	print("Loaded model")
	feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
	print("Loaded feature_extractor")
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	if model.decoder.name_or_path == "gpt2":
	tokenizer.pad_token = tokenizer.bos_token
	print("Loaded tokenizer")
	title = "Hindi Image Captioning"
	description = ""
	input = gr.inputs.Image(type="pil")
	example_images = sorted([f.as_posix() for f in Path("examples").glob("*.jpg")])
	print(f"Loaded {len(example_images)} example images")
	interface = gr.Interface(
	fn=predict,
	inputs=input,
	outputs="textbox",
	title=title,
	description=description,
	examples=example_images,
	live=True,
	)
	interface.launch(share=True)