Spaces:
Runtime error
Runtime error
File size: 2,182 Bytes
797b64f 4045aa3 797b64f 00ca6f9 797b64f 00ca6f9 52ccae8 00ca6f9 797b64f 58572c5 797b64f ecda335 797b64f 52ccae8 797b64f 52ccae8 797b64f 7110ce4 cafa65e 5257cda 86b2682 797b64f dc7d2f7 797b64f 03e6f9c 35906fb 86b2682 7110ce4 86b2682 9c01716 7110ce4 56c2bd0 7110ce4 56c2bd0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch
import re
import gradio as gr
from pathlib import Path
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
def predict(image, max_length=64, num_beams=4):
image = image.convert('RGB')
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
with torch.no_grad():
text = tokenizer.decode(model.generate(pixel_values.cpu())[0])
text = text.replace('<|endoftext|>', '').split('\n')
return text[0]
model_path = "team-indain-image-caption/hindi-image-captioning"
device = "cpu"
# Load model.
model = VisionEncoderDecoderModel.from_pretrained(model_path)
model.to(device)
print("Loaded model")
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
print("Loaded feature_extractor")
tokenizer = AutoTokenizer.from_pretrained(model_path)
print("Loaded tokenizer")
title = "Hindi Image Captioning"
description = ""
input = gr.inputs.Image(label="Image to search", type = 'pil', optional=False)
output = gr.outputs.Textbox(type="auto",label="Captions")
#example_images = sorted([f.as_posix() for f in Path("examples").glob("*.jpg")])
#print(f"Loaded {len(example_images)} example images")
article = "This huggingface presents a demo for Image captioning in Hindi built with VIT Encoder and GPT2 Decoder"
'''interface = gr.Interface(
fn=predict,
inputs=input,
outputs="textbox",
title=title,
description=description,
#examples=example_images,
live=True,
theme="darkpeach"
)'''
'''#inp=gr.inputs.Textbox(lines=1, placeholder=None, default="", label="search you query here")
output = gr.outputs.Textbox(type="auto",label="Captions")
interface = gr.Interface(fn=predict, inputs=input, outputs=output,article=article,title=title,theme="huggingface",layout='vertical')
interface.launch(share=True)'''
cat_image = "./example_1.jpg"
dog_image = "./example_2.jpg"
interface = gr.Interface(
fn=predict,
inputs = input,
theme="grass",
outputs=output,
title=title,
description=article,
)
interface.launch(share = True) |