Spaces:

Sahar7888
/

llama2_LLM_ChatBot

Sleeping

llama2_LLM_ChatBot / app.py

Update app.py

48bad91 verified over 1 year ago

1.16 kB


	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain_community.llms import LlamaCpp
	from pprint import pprint
	import gradio as gr

	# Initialize callback manager and LLM
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	llm = LlamaCpp(
	model_path="llama-2-7b-chat.Q3_K_M.gguf",
	temperature=0.75,
	max_tokens=2000,
	top_p=1,
	callback_manager=callback_manager,
	verbose=True, # Verbose is required to pass to the callback manager
	)

	# Define the function to interact with the LLM
	def llama_llm(prompt):
	llama_prompt = f"<s>[INST]<<SYS>>\nEve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.\n<</SYS>>\n{prompt}[/INST]"
	response = llm(llama_prompt)
	return response

	# Create the Gradio interface
	iface = gr.Interface(
	fn=llama_llm,
	inputs="text",
	outputs="text",
	title="Llama LLM Chat Interface",
	description="Ask a question based on the system prompt: 'Eve lives in Hamburg.; Bob lives in Cape Town.; Alice lives in Mumbay.'"
	)

	# Launch the Gradio interface
	iface.launch()