File size: 4,365 Bytes
4a53687
 
 
 
1da073b
4a53687
 
 
 
 
2e6449d
4a53687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169c40b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a53687
c0cfa96
 
3982129
4a53687
c0cfa96
 
3982129
4a53687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169c40b
 
844f195
 
 
4a53687
 
 
 
0d14460
 
 
 
 
4a53687
0d14460
4a53687
7b0889b
 
4a53687
 
 
d8ff2c8
 
 
0d14460
d8ff2c8
0d14460
d8ff2c8
0d14460
 
4a53687
9b754f9
7747217
4a53687
0d14460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
## App.py

import os
import io
# from IPython.display import Image, display, HTML
from PIL import Image
import base64 

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
hf_api_key = os.environ['HF_TOKEN']

#### Helper function
import requests, json

#Here we are going to call multiple endpoints!
def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
    headers = {
      "Authorization": f"Bearer {hf_api_key}",
      "Content-Type": "application/json"
    }   
    data = { "inputs": inputs }
    if parameters is not None:
        data.update({"parameters": parameters})
    response = requests.request("POST",
                                ENDPOINT_URL,
                                headers=headers,
                                data=json.dumps(data))
    return json.loads(response.content.decode("utf-8"))

def get_img_completion(inputs, parameters=None, ENDPOINT_URL=""):
    headers = {
      "Authorization": f"Bearer {hf_api_key}",
      "Content-Type": "application/json"
    }   
    data = { "inputs": inputs }
    if parameters is not None:
        data.update({"parameters": parameters})
    response = requests.request("POST",
                                ENDPOINT_URL,
                                headers=headers,
                                data=json.dumps(data))
    return response.content
    # return json.loads(response.content.decode("utf-8"))
    

#text-to-image
TTI_ENDPOINT = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
# "http://jupyter-api-proxy.internal.dlai/rev-proxy/huggingface/itt"
# os.environ['HF_API_TTI_BASE']
#image-to-text
ITT_ENDPOINT = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
# "http://jupyter-api-proxy.internal.dlai/rev-proxy/huggingface/tti"
# os.environ['HF_API_ITT_BASE']

## Building your game with `gr.Blocks()`

#Bringing the functions from lessons 3 and 4!
def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).decode('utf-8'))

def base64_to_pil(img_base64):
    base64_decoded = base64.b64decode(img_base64)
    byte_stream = io.BytesIO(base64_decoded)
    pil_image = Image.open(byte_stream)
    return pil_image

def captioner(image):
    base64_image = image_to_base64_str(image)
    result = get_completion(base64_image, None, ITT_ENDPOINT)
    return result[0]['generated_text']

def generate(prompt):
    output = get_img_completion(prompt, None, TTI_ENDPOINT)
    image = Image.open(io.BytesIO(output))
    return image
    # result_image = base64_to_pil(output.content)
    # return result_image

### First attempt, just captioning

import gradio as gr 
with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game ๐Ÿ–๏ธ")
    image_upload = gr.Image(label="Your first image",type="pil")
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")
    
    btn_caption.click(fn=captioner, inputs=[image_upload], outputs=[caption])

# gr.close_all()
# demo.launch(share=True)

# ### Let's add generation

with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game ๐Ÿ–๏ธ")
    image_upload = gr.Image(label="Your first image",type="pil")
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")
    btn_image = gr.Button("Generate image")
    image_output = gr.Image(label="Generated Image")
    btn_caption.click(fn=captioner, inputs=[image_upload], outputs=[caption])
    btn_image.click(fn=generate, inputs=[caption], outputs=[image_output])

# gr.close_all()
demo.launch(share=True)

### Doing it all at once

# def caption_and_generate(image):
#     caption = captioner(image)
#     image = generate(caption)
#     return [caption, image]

# with gr.Blocks() as demo:
#     gr.Markdown("# Describe-and-Generate game ๐Ÿ–๏ธ")
#     image_upload = gr.Image(label="Your first image",type="pil")
#     btn_all = gr.Button("Caption and generate")
#     caption = gr.Textbox(label="Generated caption")
#     image_output = gr.Image(label="Generated Image")
#     btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])

# gr.close_all()
# demo.launch(share=True)

# gr.close_all()