chat_llm_v3

Sleeping

App Files Files Community

chat_llm_v3 / app.py

daniloedu

Duplicate from daniloedu/chat_llm_v2

8c5a3b3 over 1 year ago

raw

history blame

1.81 kB

	import os
	import requests
	import gradio as gr
	from dotenv import load_dotenv
	from transformers import AutoTokenizer

	load_dotenv()

	model_name = "tiiuae/falcon-7b-instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
	headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}

	def format_chat_prompt(message, instruction):
	prompt = f"System:{instruction}\nUser: {message}\nAssistant:"
	return prompt

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
	MAX_TOKENS = 1024 # limit for the model
	prompt = format_chat_prompt(message, instruction)
	# Check if the prompt is too long and, if so, truncate it
	num_tokens = len(tokenizer.encode(prompt))
	if num_tokens > MAX_TOKENS:
	# Truncate the prompt to fit within the token limit
	prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:])

	response = query({"inputs": prompt})
	generated_text = response[0]['generated_text']
	assistant_message = generated_text.split("Assistant:")[-1]
	assistant_message = assistant_message.split("User:")[0].strip() # Only keep the text before the first "User:"
	return assistant_message

	iface = gr.Interface(
	respond,
	inputs=[
	gr.inputs.Textbox(label="Your question"),
	gr.inputs.Textbox(label="System message", lines=2, default="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers.")
	],
	outputs=[
	gr.outputs.Textbox(label="AI's response")
	],
	)

	iface.launch()