import gradio as gr import requests import os import base64 from PIL import Image import io import json def resize_image(image_path, max_size=(800, 800), quality=85): with Image.open(image_path) as img: img.thumbnail(max_size, Image.Resampling.LANCZOS) buffer = io.BytesIO() img.save(buffer, format="JPEG", quality=quality) return buffer.getvalue() def filepath_to_base64(image_path): img_bytes = resize_image(image_path) img_base64 = base64.b64encode(img_bytes) return f"data:image/jpeg;base64,{img_base64.decode('utf-8')}" api_key = os.getenv('API_KEY') def call_fuyu_8b_api(image_path, content, temperature=0.2, top_p=0.7, max_tokens=1024): print(f"Caminho da imagem recebida: {image_path}") print(f"Conteúdo: {content}") print(f"Temperatura: {temperature}") print(f"Top P: {top_p}") print(f"Max Tokens: {max_tokens}") image_base64 = filepath_to_base64(image_path) invoke_url = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/9f757064-657f-4c85-abd7-37a7a9b6ee11" headers = { "Authorization": f"Bearer {api_key}", "accept": "text/event-stream", "content-type": "application/json", } payload = { "messages": [ { "content": f"{content} ", "role": "user" } ], "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens, "stream": True } response = requests.post(invoke_url, headers=headers, json=payload, stream=True) if response.status_code != 200: print(f"Erro na requisição: {response.status_code}") try: error_details = response.json() print(error_details) except ValueError: print(response.text) else: response_text = "" for line in response.iter_lines(): if line: try: # Decode the line from bytes to string decoded_line = line.decode('utf-8') # Remove the "data: " prefix if decoded_line.startswith('data: '): json_str = decoded_line[6:] # Remove the first 6 characters ('data: ') json_line = json.loads(json_str) # Assuming the structure is consistent with the examples you provided. content_parts = json_line.get("choices", [{}])[0].get("delta", {}).get("content", "") response_text += content_parts else: print(f"Unexpected line format: {decoded_line}") except json.JSONDecodeError as e: print(f"Error decoding JSON from response line: {e}") print(f"Faulty line: {line}") return response_text content_input = gr.Textbox(lines=2, placeholder="Enter your content here...", label="Content") image_input = gr.Image(type="filepath", label="Upload Image") temperature_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label="Temperature") top_p_input = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Top P") max_tokens_input = gr.Slider(minimum=1, maximum=1024, step=1, value=1024, label="Max Tokens") iface = gr.Interface(fn=call_fuyu_8b_api, inputs=[image_input, content_input, temperature_input, top_p_input, max_tokens_input], outputs="text", title="Fuyu-8B API Explorer", description="""
Explore the Synergy of Text and Image with Fuyu-8B Multi-Modal Transformer

Embark on a journey into the future of AI with Fuyu-8B, a cutting-edge multi-modal transformer developed by Adept. This revolutionary model leverages an 8 billion parameter architecture to seamlessly integrate image understanding and text generation, offering a unique platform for exploring the intersection of visual and linguistic data.

How to Use:

  1. Upload an image to provide a visual context for the model.
  2. Type your content into the textbox, with your question.
  3. Adjust Temperature and Top P sliders to fine-tune the creativity and variability of the output.
  4. Set the Max Tokens to limit the length of the generated text.
  5. Click Submit to witness the model's ability to generate insightful and relevant content based on the combined input of text and image.

Powered by NVIDIA's advanced AI technologies, Fuyu-8B API Explorer is your gateway to discovering new possibilities in multi-modal AI interactions, completely free to use.

Created by: @artificialguybr (Twitter)

Discover more: artificialguy.com

""" ) iface.launch()