import gradio as gr import requests import os import base64 from PIL import Image import io import json def resize_image(image_path, max_size=(512, 512), quality=85): with Image.open(image_path) as img: img.thumbnail(max_size, Image.Resampling.LANCZOS) buffer = io.BytesIO() img.save(buffer, format="JPEG", quality=quality) return buffer.getvalue() def filepath_to_base64(image_path): img_bytes = resize_image(image_path) img_base64 = base64.b64encode(img_bytes) return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}" api_key = os.getenv('API_KEY') def call_neva_22b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=512, quality=6, humor=0, creativity=6, helpfulness=6): print(f"Caminho da imagem recebida: {image_path}") print(f"Conteúdo: {content}") # Imprime os novos parâmetros print(f"Quality: {quality}, Humor: {humor}, Creativity: {creativity}, Helpfulness: {helpfulness}") image_base64 = filepath_to_base64(image_path) invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/8bf70738-59b9-4e5f-bc87-7ab4203be7a0" headers = { "Authorization": f"Bearer {api_key}", "accept": "text/event-stream", "content-type": "application/json", } payload = { "messages": [ { "content": f"{content} ", "role": "user" }, { "labels": { "creativity": creativity, "helpfulness": helpfulness, "humor": humor, "quality": quality }, "role": "assistant" } ], "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens, "stream": True } response = requests.post(invoke_url, headers=headers, json=payload, stream=True) if response.status_code != 200: print(f"Erro na requisição: {response.status_code}") try: error_details = response.json() print(error_details) except ValueError: print(response.text) else: response_text = "" for line in response.iter_lines(): if line: try: # Decode the line from bytes to string decoded_line = line.decode('utf-8') # Remove the "data: " prefix if decoded_line.startswith('data: '): json_str = decoded_line[6:] # Remove the first 6 characters ('data: ') json_line = json.loads(json_str) # Assuming the structure is consistent with the examples you provided. content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "") response_text += content_parts else: print(f"Unexpected line format: {decoded_line}") except json.JSONDecodeError as e: print(f"Error decoding JSON from response line: {e}") print(f"Faulty line: {line}") return response_text content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content") image_input = gr.Image(type="filepath", label="Upload Image") temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature") top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P") max_tokens_input = gr.Slider(minimum=1, maximum=512, step=1, value=512, label="Max Tokens") quality_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Quality") humor_input = gr.Slider(minimum=0, maximum=9, step=1, value=0, label="Humor") creativity_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Creativity") helpfulness_input = gr.Slider(minimum=0, maximum=9, step=1, value=6, label="Helpfulness") iface = gr.Interface(fn=call_neva_22b_api, inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input, quality_input, humor_input, creativity_input, helpfulness_input], outputs="text", title="NEVA 22B DEMO", description="""
Unlock the Power of AI with NeVA-22B Vision-Language Model

Dive into the next generation of AI with NeVA-22B, an advanced multi-modal vision-language model that redefines the boundaries of technology. Developed with a 22 billion parameter architecture, NeVA-22B excels in understanding and generating responses that incorporate both text and images, offering a groundbreaking platform for multi-modal AI exploration.

How to Use:

  1. Upload an image to provide visual context.
  2. Enter your content in the textbox to pose a question or prompt.
  3. Utilize the Temperature and Top P sliders to adjust the creativity and diversity of the responses.
  4. Choose the Max Tokens to control the response length.
  5. Modify Quality, Humor, Creativity, and Helpfulness sliders to fine-tune the model's output according to your needs.
  6. Hit Submit to experience the model's capability to generate insightful responses based on your textual and visual inputs.

Empowered by NVIDIA's cutting-edge AI technologies, NeVA-22B API Explorer opens up new avenues for engaging with multi-modal AI, accessible to everyone at no cost.

HF Created by: @artificialguybr (Twitter)

Explore further: artificialguy.com

""" ) iface.launch()