Llama-3.2-Vision-Free

Runtime error

App Files Files Community

Llama-3.2-Vision-Free / app.py

akhaliq HF staff

Update app.py

c27316e verified 5 months ago

raw

history blame

3.32 kB

	import gradio as gr
	from PIL import Image
	import requests
	import os
	from together import Together
	import base64
	from threading import Thread
	import time

	# Initialize Together client
	client = None

	def initialize_client(api_key=None):
	global client
	if api_key:
	client = Together(api_key=api_key)
	elif "TOGETHER_API_KEY" in os.environ:
	client = Together()
	else:
	raise ValueError("Please provide an API key or set the TOGETHER_API_KEY environment variable")

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def bot_streaming(message, history, max_new_tokens=250, api_key=None):
	if client is None:
	initialize_client(api_key)

	txt = message["text"]
	messages = []
	images = []

	for i, msg in enumerate(history):
	if isinstance(msg[0], tuple):
	messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(msg[0][0])}"}}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
	elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
	pass
	elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
	messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
	messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})

	if len(message["files"]) == 1:
	if isinstance(message["files"][0], str): # examples
	image_path = message["files"][0]
	else: # regular input
	image_path = message["files"][0]["path"]
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}}]})
	else:
	messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})

	stream = client.chat.completions.create(
	model="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
	messages=messages,
	max_tokens=max_new_tokens,
	stream=True,
	)

	buffer = ""
	for chunk in stream:
	if chunk.choices[0].delta.content is not None:
	buffer += chunk.choices[0].delta.content
	time.sleep(0.01)
	yield buffer

	demo = gr.ChatInterface(
	fn=bot_streaming,
	title="Meta Llama-3.2-90B-Vision-Instruct-Turbo",
	textbox=gr.MultimodalTextbox(),
	additional_inputs=[
	gr.Slider(
	minimum=10,
	maximum=500,
	value=250,
	step=10,
	label="Maximum number of new tokens to generate",
	),
	gr.Textbox(
	label="Together API Key (optional)",
	placeholder="Enter your API key here. (optional)",
	)
	],
	cache_examples=False,
	description="Try Multimodal Llama by Meta with the Together API in this demo. Upload an image, and start chatting about it. You can provide your own API key or use the default one.",
	stop_btn="Stop Generation",
	fill_height=True,
	multimodal=True
	)

	if __name__ == "__main__":
	demo.launch(debug=True)