File size: 2,182 Bytes
797b64f
 
 
 
 
4045aa3
797b64f
00ca6f9
797b64f
 
 
00ca6f9
 
 
52ccae8
 
00ca6f9
797b64f
58572c5
797b64f
 
ecda335
797b64f
52ccae8
797b64f
52ccae8
797b64f
7110ce4
 
 
 
cafa65e
 
5257cda
86b2682
797b64f
 
 
 
 
dc7d2f7
797b64f
03e6f9c
35906fb
86b2682
 
7110ce4
86b2682
 
9c01716
7110ce4
 
 
 
56c2bd0
7110ce4
 
 
 
 
 
 
56c2bd0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import torch
import re
import gradio as gr
from pathlib import Path
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel

def predict(image, max_length=64, num_beams=4):
    image = image.convert('RGB')
    pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)
    with torch.no_grad():
        text = tokenizer.decode(model.generate(pixel_values.cpu())[0])
        text = text.replace('<|endoftext|>', '').split('\n')
    return text[0]
    
model_path = "team-indain-image-caption/hindi-image-captioning"
device = "cpu"
# Load model.
model = VisionEncoderDecoderModel.from_pretrained(model_path)
model.to(device)
print("Loaded model")
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
print("Loaded feature_extractor")
tokenizer = AutoTokenizer.from_pretrained(model_path)
print("Loaded tokenizer")
title = "Hindi Image Captioning"
description = ""

input = gr.inputs.Image(label="Image to search", type = 'pil', optional=False)
output = gr.outputs.Textbox(type="auto",label="Captions")

#example_images = sorted([f.as_posix() for f in Path("examples").glob("*.jpg")])
#print(f"Loaded {len(example_images)} example images")
article = "This huggingface presents a demo for Image captioning in Hindi built with VIT Encoder and GPT2 Decoder"
'''interface = gr.Interface(
    fn=predict,
    inputs=input,
    outputs="textbox",
    title=title,
    description=description,
    #examples=example_images,
    live=True,
    theme="darkpeach"
  
)'''

'''#inp=gr.inputs.Textbox(lines=1, placeholder=None, default="", label="search you query here")
output = gr.outputs.Textbox(type="auto",label="Captions")

interface = gr.Interface(fn=predict, inputs=input, outputs=output,article=article,title=title,theme="huggingface",layout='vertical')
interface.launch(share=True)'''

cat_image = "./example_1.jpg"
dog_image = "./example_2.jpg"
interface = gr.Interface(
        fn=predict,
        inputs = input,
        theme="grass",
        outputs=output,
        title=title,
        description=article,
    )
interface.launch(share = True)