File size: 5,253 Bytes
32ae0a7
 
 
fc128e9
 
 
7fc0578
fc128e9
28050f6
 
4dd1c8f
28050f6
 
 
832b215
9395b78
28050f6
 
 
32ae0a7
22618f9
 
ccd71e8
832b215
 
 
 
 
 
 
32ae0a7
 
 
 
 
 
403b33f
32ae0a7
 
22618f9
832b215
22618f9
 
32ae0a7
 
 
 
ccd71e8
32ae0a7
a6b60cd
32ae0a7
796a417
 
 
 
 
 
 
 
 
 
 
832b215
 
 
 
 
 
 
dae2bd7
832b215
 
dae2bd7
 
832b215
 
 
 
 
1f986fb
832b215
 
 
 
 
 
1f986fb
832b215
 
 
 
034973a
255953e
034973a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01187ce
22618f9
832b215
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import requests
import os
import base64
from PIL import Image
import io
import json

def resize_image(image_path, max_size=(800, 800), quality=85):
    with Image.open(image_path) as img:
        img.thumbnail(max_size, Image.Resampling.LANCZOS)
        buffer = io.BytesIO()
        img.save(buffer, format="JPEG", quality=quality)
        return buffer.getvalue()
        
def filepath_to_base64(image_path):
    img_bytes = resize_image(image_path)
    img_base64 = base64.b64encode(img_bytes)
    return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}"

api_key = os.getenv('API_KEY')

def call_fuyu_8b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=1024):
    print(f"Caminho da imagem recebida: {image_path}")
    print(f"Conteúdo: {content}")
    print(f"Temperatura: {temperature}")
    print(f"Top P: {top_p}")
    print(f"Max Tokens: {max_tokens}")

    image_base64 = filepath_to_base64(image_path)
    invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/9f757064-657f-4c85-abd7-37a7a9b6ee11"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "accept": "text/event-stream",
        "content-type": "application/json",
    }

    payload = {
        "messages": [
            {
                "content": f"{content} <img src=\"{image_base64}\" />",
                "role": "user"
            }
        ],
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": True
    }

    response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
    if response.status_code != 200:
        print(f"Erro na requisição: {response.status_code}")
        try:
            error_details = response.json()
            print(error_details)
        except ValueError:
            print(response.text)
    else:
        response_text = ""
        for line in response.iter_lines():
            if line:
                try:
                    # Decode the line from bytes to string
                    decoded_line = line.decode('utf-8')
        
                    # Remove the "data: " prefix
                    if decoded_line.startswith('data: '):
                        json_str = decoded_line[6:]  # Remove the first 6 characters ('data: ')
                        json_line = json.loads(json_str)
        
                        # Assuming the structure is consistent with the examples you provided.
                        content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "")
                        response_text += content_parts
                    else:
                        print(f"Unexpected line format: {decoded_line}")
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON from response line: {e}")
                    print(f"Faulty line: {line}")

        return response_text
content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content")
image_input = gr.Image(type="filepath", label="Upload Image")
temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature")
top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P")
max_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=1024, label="Max Tokens")

iface = gr.Interface(fn=call_fuyu_8b_api,
                     inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input],
                     outputs="text",
                     title="Fuyu-8B API Explorer",
                     description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Explore the Synergy of Text and Image with Fuyu-8B Multi-Modal Transformer</strong>
</div>
<p>
    Embark on a journey into the future of AI with Fuyu-8B, a cutting-edge multi-modal transformer developed by Adept. This revolutionary model leverages an 8 billion parameter architecture to seamlessly integrate image understanding and text generation, offering a unique platform for exploring the intersection of visual and linguistic data.
</p>
<p>
    <strong>How to Use:</strong>
</p>
<ol>
    <li>Upload an <strong>image</strong> to provide a visual context for the model.</li>
    <li>Type your <strong>content</strong> into the textbox, with your question.</li>
    <li>Adjust <strong>Temperature</strong> and <strong>Top P</strong> sliders to fine-tune the creativity and variability of the output.</li>
    <li>Set the <strong>Max Tokens</strong> to limit the length of the generated text.</li>
    <li>Click <strong>Submit</strong> to witness the model's ability to generate insightful and relevant content based on the combined input of text and image.</li>
</ol>
<p>
    <strong>Powered by NVIDIA's advanced AI technologies, Fuyu-8B API Explorer is your gateway to discovering new possibilities in multi-modal AI interactions, completely free to use.</strong>
</p>
<p>
    <strong>Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)
</p>
<p>
    <strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a>
</p>
"""
                    )

iface.launch()