File size: 2,608 Bytes
fcdf907
 
 
 
 
 
 
 
 
 
 
 
 
 
3e4c6ca
 
 
 
 
 
fcdf907
 
 
 
 
 
 
 
 
 
 
 
5aaa49f
fcdf907
 
 
 
 
 
 
 
 
487c5e5
fcdf907
 
3e4c6ca
fcdf907
 
3e4c6ca
fcdf907
3e4c6ca
fcdf907
3e4c6ca
fcdf907
3e4c6ca
c0cfd10
3e4c6ca
c0cfd10
fcdf907
 
3e4c6ca
fcdf907
 
 
 
 
f645719
3e4c6ca
b7eaa3d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
import requests
import torch
from PIL import Image
import os
from tqdm import tqdm
import openai
import warnings
warnings.filterwarnings('ignore')

model_raw = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer       = GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

def Image_to_caption(image,url, greedy = True, model = model_raw):
    try:
      img = Image.open(requests.get(url, stream =True).raw)
      pixel_values   = image_processor(img, return_tensors ="pt").pixel_values
    except:
      pixel_values   = image_processor(image, return_tensors ="pt").pixel_values
    # plt.imshow(np.asarray(image))
    # plt.show()

    if greedy:
        generated_ids  = model.generate(pixel_values, max_new_tokens = 30)
    else:
        generated_ids  = model.generate(
            pixel_values,
            do_sample=True,
            max_new_tokens = 30,
            top_k=5)
    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] 
    openai.api_key = os.environ['API_KEY']
    completion = openai.ChatCompletion.create(
        model= "gpt-3.5-turbo",
        messages = [{"role":"user","content":f"provide me the top trending hashtags based this text {generated_text} in twitter"}]
    )
    list1 = []
    for words in completion.choices[0].message.content.split():
      if words.startswith("#"):
        list1.append(words)

    return '\n'.join(list1)


  


import gradio as gr

inputs = [    gr.inputs.Image(type="pil", label="Original Image"),    gr.inputs.Textbox(label="Image URL")]

outputs = [    gr.outputs.Textbox(label = 'Caption')]

title = "Image to Hashtags"
description = "This AI tool uses cutting-edge technology to generate captions and relevant hashtags for images. By combining a state-of-the-art ViT-GPT2 image captioning model with OpenAI's GPT-3.5-Turbo API this tool can suggest popular and relevant hashtags. "
article = " <a href='https://huggingface.co/nlpconnect/vit-gpt2-image-captioning'>Model Repo on Hugging Face Model Hub</a>"
examples = [['Screenshot 2023-02-03 at 3.58.03 PM.png','Screenshot 2023-02-03 at 3.57.20 PM.png','Screenshot 2023-02-03 at 3.56.22 PM.png']]

gr.Interface(
    Image_to_caption,
    inputs,
    outputs,
    title=title,
    description=description,
    article=article,
    examples=examples,
    theme="huggingface",
).launch(debug=True, enable_queue=True)