artificialguybr's picture
Update app.py
01187ce verified
raw
history blame
No virus
5.28 kB
import gradio as gr
import requests
import os
import base64
from PIL import Image
import io
import json
def resize_image(image_path, max_size=(800, 800), quality=85):
with Image.open(image_path) as img:
img.thumbnail(max_size, Image.Resampling.LANCZOS)
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality)
return buffer.getvalue()
def filepath_to_base64(image_path):
img_bytes = resize_image(image_path)
img_base64 = base64.b64encode(img_bytes)
return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}"
api_key = os.getenv('API_KEY')
def call_fuyu_8b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=1024):
print(f"Caminho da imagem recebida: {image_path}")
print(f"Conteúdo: {content}")
print(f"Temperatura: {temperature}")
print(f"Top P: {top_p}")
print(f"Max Tokens: {max_tokens}")
image_base64 = filepath_to_base64(image_path)
invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/9f757064-657f-4c85-abd7-37a7a9b6ee11"
headers = {
"Authorization": f"Bearer {api_key}",
"accept": "text/event-stream",
"content-type": "application/json",
}
payload = {
"messages": [
{
"content": f"{content} <img src=\"{image_base64}\" />",
"role": "user"
}
],
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": True
}
response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
if response.status_code != 200:
print(f"Erro na requisição: {response.status_code}")
try:
error_details = response.json()
print(error_details)
except ValueError:
print(response.text)
else:
response_text = ""
for line in response.iter_lines():
if line:
try:
# Decode the line from bytes to string
decoded_line = line.decode('utf-8')
# Remove the "data: " prefix
if decoded_line.startswith('data: '):
json_str = decoded_line[6:] # Remove the first 6 characters ('data: ')
json_line = json.loads(json_str)
# Assuming the structure is consistent with the examples you provided.
content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "")
response_text += content_parts
else:
print(f"Unexpected line format: {decoded_line}")
except json.JSONDecodeError as e:
print(f"Error decoding JSON from response line: {e}")
print(f"Faulty line: {line}")
return response_text
content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content")
image_input = gr.Image(type="filepath", label="Upload Image")
temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature")
top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P")
max_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=1024, label="Max Tokens")
iface = gr.Interface(fn=call_fuyu_8b_api,
inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input],
outputs="text",
title="Fuyu-8B API Explorer",
description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Explore the Synergy of Text and Image with Fuyu-8B Multi-Modal Transformer</strong>
</div>
<p>
Embark on a journey into the future of AI with Fuyu-8B, a cutting-edge multi-modal transformer developed by Adept. This revolutionary model leverages an 8 billion parameter architecture to seamlessly integrate image understanding and text generation, offering a unique platform for exploring the intersection of visual and linguistic data.
</p>
<p>
<strong>How to Use:</strong>
</p>
<ol>
<li>Upload an <strong>image</strong> to provide a visual context for the model.</li>
<li>Type your <strong>content</strong> into the textbox, with your question.</li>
<li>Adjust <strong>Temperature</strong> and <strong>Top P</strong> sliders to fine-tune the creativity and variability of the output.</li>
<li>Set the <strong>Max Tokens</strong> to limit the length of the generated text.</li>
<li>Click <strong>Submit</strong> to witness the model's ability to generate insightful and relevant content based on the combined input of text and image.</li>
</ol>
<p>
<strong>Powered by NVIDIA's advanced AI technologies, Fuyu-8B API Explorer is your gateway to discovering new possibilities in multi-modal AI interactions, completely free to use.</strong>
</p>
<p>
<strong>Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)
</p>
<p>
<strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a>
</p>
"""
)
iface.launch()