Spaces:
Sleeping
Sleeping
File size: 5,275 Bytes
32ae0a7 fc128e9 7fc0578 fc128e9 28050f6 4dd1c8f 28050f6 832b215 9395b78 28050f6 32ae0a7 22618f9 ccd71e8 832b215 32ae0a7 403b33f 32ae0a7 22618f9 832b215 22618f9 32ae0a7 ccd71e8 32ae0a7 a6b60cd 32ae0a7 796a417 832b215 dae2bd7 832b215 dae2bd7 832b215 1f986fb 832b215 1f986fb 832b215 034973a 01187ce 22618f9 832b215 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import gradio as gr
import requests
import os
import base64
from PIL import Image
import io
import json
def resize_image(image_path, max_size=(800, 800), quality=85):
with Image.open(image_path) as img:
img.thumbnail(max_size, Image.Resampling.LANCZOS)
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality)
return buffer.getvalue()
def filepath_to_base64(image_path):
img_bytes = resize_image(image_path)
img_base64 = base64.b64encode(img_bytes)
return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}"
api_key = os.getenv('API_KEY')
def call_fuyu_8b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=1024):
print(f"Caminho da imagem recebida: {image_path}")
print(f"Conteúdo: {content}")
print(f"Temperatura: {temperature}")
print(f"Top P: {top_p}")
print(f"Max Tokens: {max_tokens}")
image_base64 = filepath_to_base64(image_path)
invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/9f757064-657f-4c85-abd7-37a7a9b6ee11"
headers = {
"Authorization": f"Bearer {api_key}",
"accept": "text/event-stream",
"content-type": "application/json",
}
payload = {
"messages": [
{
"content": f"{content} <img src=\"{image_base64}\" />",
"role": "user"
}
],
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": True
}
response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
if response.status_code != 200:
print(f"Erro na requisição: {response.status_code}")
try:
error_details = response.json()
print(error_details)
except ValueError:
print(response.text)
else:
response_text = ""
for line in response.iter_lines():
if line:
try:
# Decode the line from bytes to string
decoded_line = line.decode('utf-8')
# Remove the "data: " prefix
if decoded_line.startswith('data: '):
json_str = decoded_line[6:] # Remove the first 6 characters ('data: ')
json_line = json.loads(json_str)
# Assuming the structure is consistent with the examples you provided.
content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "")
response_text += content_parts
else:
print(f"Unexpected line format: {decoded_line}")
except json.JSONDecodeError as e:
print(f"Error decoding JSON from response line: {e}")
print(f"Faulty line: {line}")
return response_text
content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content")
image_input = gr.Image(type="filepath", label="Upload Image")
temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature")
top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P")
max_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=1024, label="Max Tokens")
iface = gr.Interface(fn=call_fuyu_8b_api,
inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input],
outputs="text",
title="Fuyu-8B API Explorer",
description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Explore the Synergy of Text and Image with Fuyu-8B Multi-Modal Transformer</strong>
</div>
<p>
Embark on a journey into the future of AI with Fuyu-8B, a cutting-edge multi-modal transformer developed by Adept. This revolutionary model leverages an 8 billion parameter architecture to seamlessly integrate image understanding and text generation, offering a unique platform for exploring the intersection of visual and linguistic data.
</p>
<p>
<strong>How to Use:</strong>
</p>
<ol>
<li>Upload an <strong>image</strong> to provide a visual context for the model.</li>
<li>Type your <strong>content</strong> into the textbox, with your question.</li>
<li>Adjust <strong>Temperature</strong> and <strong>Top P</strong> sliders to fine-tune the creativity and variability of the output.</li>
<li>Set the <strong>Max Tokens</strong> to limit the length of the generated text.</li>
<li>Click <strong>Submit</strong> to witness the model's ability to generate insightful and relevant content based on the combined input of text and image.</li>
</ol>
<p>
<strong>Powered by NVIDIA's advanced AI technologies, Fuyu-8B API Explorer is your gateway to discovering new possibilities in multi-modal AI interactions, completely free to use.</strong>
</p>
<p>
<strong>Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)
</p>
<p>
<strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a>
</p>
"""
)
iface.launch() |