Spaces:

galib45
/

Llama3-Med42-8B

Runtime error

App Files Files Community

Llama3-Med42-8B / app.py

galib45

Update app.py

a0dc748 verified 4 months ago

raw

history blame contribute delete

2.41 kB

	import transformers
	import torch
	import gradio as gr

	# Initialize the model and pipeline
	model_name_or_path = "m42-health/Llama3-Med42-8B"

	pipeline = transformers.pipeline(
	"text-generation",
	model=model_name_or_path,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)

	# Define the system message for the chatbot personality
	system_message = {
	"role": "system",
	"content": (
	"You are a helpful, respectful, and honest medical assistant. "
	"You are a second version of Med42 developed by the AI team at M42, UAE. "
	"Always answer as helpfully as possible, while being safe. "
	"Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
	"Please ensure that your responses are socially unbiased and positive in nature. "
	"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
	"If you don’t know the answer to a question, please don’t share false information."
	),
	}

	# Define stop tokens
	stop_tokens = [
	pipeline.tokenizer.eos_token_id,
	pipeline.tokenizer.convert_tokens_to_ids("<\|eot_id\|>"),
	]

	# Initialize the conversation history
	conversation_history = [system_message]

	def chat_with_model(user_input):
	# Append user message to conversation history
	conversation_history.append({"role": "user", "content": user_input})

	# Format the conversation for input to the model
	prompt = pipeline.tokenizer.apply_chat_template(
	conversation_history, tokenize=False, add_generation_prompt=False
	)

	# Generate response
	outputs = pipeline(
	prompt,
	max_new_tokens=512,
	eos_token_id=stop_tokens,
	do_sample=True,
	temperature=0.4,
	top_k=150,
	top_p=0.75,
	)

	# Extract the generated response (the part after the prompt)
	generated_text = outputs[0]["generated_text"][len(prompt):]

	# Append the assistant's response to the conversation history
	conversation_history.append({"role": "assistant", "content": generated_text})

	return generated_text.strip()

	# Create Gradio interface
	iface = gr.Interface(
	fn=chat_with_model,
	inputs="text",
	outputs="text",
	title="Med42 Medical Assistant",
	description="Ask anything about medicine!",
	)

	# Launch the app
	iface.launch(share=True)