Spaces:
Sleeping
Sleeping
File size: 6,177 Bytes
e0559c2 4b0d7e0 e0559c2 41d30e3 e0559c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import requests
import os
import base64
from PIL import Image
import io
import json
def resize_image(image_path, max_size=(512, 512), quality=85):
with Image.open(image_path) as img:
img.thumbnail(max_size, Image.Resampling.LANCZOS)
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality)
return buffer.getvalue()
def filepath_to_base64(image_path):
img_bytes = resize_image(image_path)
img_base64 = base64.b64encode(img_bytes)
return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}"
api_key = os.getenv('API_KEY')
def call_neva_22b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=512, quality=6, humor=0, creativity=6, helpfulness=6):
print(f"Caminho da imagem recebida: {image_path}")
print(f"Conteúdo: {content}")
# Imprime os novos parâmetros
print(f"Quality: {quality}, Humor: {humor}, Creativity: {creativity}, Helpfulness: {helpfulness}")
image_base64 = filepath_to_base64(image_path)
invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/8bf70738-59b9-4e5f-bc87-7ab4203be7a0"
headers = {
"Authorization": f"Bearer {api_key}",
"accept": "text/event-stream",
"content-type": "application/json",
}
payload = {
"messages": [
{
"content": f"{content} <img src=\"{image_base64}\" />",
"role": "user"
},
{
"labels": {
"creativity": creativity,
"helpfulness": helpfulness,
"humor": humor,
"quality": quality
},
"role": "assistant"
}
],
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": True
}
response = requests.post(invoke_url, headers=headers, json=payload, stream=True)
if response.status_code != 200:
print(f"Erro na requisição: {response.status_code}")
try:
error_details = response.json()
print(error_details)
except ValueError:
print(response.text)
else:
response_text = ""
for line in response.iter_lines():
if line:
try:
# Decode the line from bytes to string
decoded_line = line.decode('utf-8')
# Remove the "data: " prefix
if decoded_line.startswith('data: '):
json_str = decoded_line[6:] # Remove the first 6 characters ('data: ')
json_line = json.loads(json_str)
# Assuming the structure is consistent with the examples you provided.
content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "")
response_text += content_parts
else:
print(f"Unexpected line format: {decoded_line}")
except json.JSONDecodeError as e:
print(f"Error decoding JSON from response line: {e}")
print(f"Faulty line: {line}")
return response_text
content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content")
image_input = gr.Image(type="filepath", label="Upload Image")
quality_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Quality")
humor_input = gr.Slider(minimum=0, maximum=9, step=1, value=0, label="Humor")
creativity_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Creativity")
helpfulness_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Helpfulness")
temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature")
top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P")
max_tokens_input = gr.Slider(minimum=1, maximum=512, step=1, value=512, label="Max Tokens")
iface = gr.Interface(fn=call_neva_22b_api,
inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input, quality_input, humor_input, creativity_input, helpfulness_input],
outputs="text",
title="NEVA 22B DEMO",
description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
<strong>Unlock the Power of AI with NeVA-22B Vision-Language Model</strong>
</div>
<p>
Dive into the next generation of AI with NeVA-22B, an advanced multi-modal vision-language model that redefines the boundaries of technology. Developed with a 22 billion parameter architecture, NeVA-22B excels in understanding and generating responses that incorporate both text and images, offering a groundbreaking platform for multi-modal AI exploration.
</p>
<p>
<strong>How to Use:</strong>
</p>
<ol>
<li>Upload an <strong>image</strong> to provide visual context.</li>
<li>Enter your <strong>content</strong> in the textbox to pose a question or prompt.</li>
<li>Utilize the <strong>Temperature</strong> and <strong>Top P</strong> sliders to adjust the creativity and diversity of the responses.</li>
<li>Choose the <strong>Max Tokens</strong> to control the response length.</li>
<li>Modify <strong>Quality</strong>, <strong>Humor</strong>, <strong>Creativity</strong>, and <strong>Helpfulness</strong> sliders to fine-tune the model's output according to your needs.</li>
<li>Hit <strong>Submit</strong> to experience the model's capability to generate insightful responses based on your textual and visual inputs.</li>
</ol>
<p>
<strong>Empowered by NVIDIA's cutting-edge AI technologies, NeVA-22B API Explorer opens up new avenues for engaging with multi-modal AI, accessible to everyone at no cost.</strong>
</p>
<p>
<strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)
</p>
<p>
<strong>Explore further:</strong> <a href="https://artificialguy.com">artificialguy.com</a>
</p>
"""
)
iface.launch() |