Spaces:

rheremans
/

BramVanroy

Sleeping

App Files Files Community

BramVanroy / app.py

rheremans

Update app.py

95662cd verified over 1 year ago

raw

history blame contribute delete

2.12 kB

	from transformers import pipeline, Conversation, AutoTokenizer, AutoModelForCausalLM
	from langchain.llms import HuggingFacePipeline
	from langchain.prompts import PromptTemplate
	from langchain_community.llms import HuggingFaceHub
	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

	from langchain.chains import LLMChain

	#1: "meta-llama/Llama-2-13b-chat-hf",
	#2: "BramVanroy/Llama-2-13b-chat-dutch"
	my_config = {'model_name': "meta-llama/Llama-2-13b-chat-hf", #"./Bram", #BramVanroy/Llama-2-13b-chat-dutch",
	'do_sample': True, 'temperature': 0.1,
	'repetition_penalty': 1.1, 'max_new_tokens': 500, }

	print(f"Selected model: {my_config['model_name']}")
	print(f"Parameters are: {my_config}")

	question = "Who won the FIFA World Cup in the year 1994? "
	template = """Question: {question}
	Answer: Let's think step by step."""

	prompt = PromptTemplate.from_template(template)

	def generate_with_llama_chat(my_config):
	print('tokenizer')
	tokenizer = AutoTokenizer.from_pretrained(my_config['model_name'])
	print('causal')
	model = AutoModelForCausalLM.from_pretrained(my_config['model_name'])
	print('Pipeline')
	chatbot = pipeline("text-generation",model=my_config['model_name'],
	tokenizer=tokenizer,
	do_sample=my_config['do_sample'],
	temperature=my_config['temperature'],
	repetition_penalty=my_config['repetition_penalty'],
	#max_length=my_config['max_length'],
	max_new_tokens=my_config['max_new_tokens'],
	model_kwargs={"device_map": "auto","load_in_8bit": True})
	return chatbot

	llama_chat = generate_with_llama_chat(my_config)

	# Set up callback manager to print output word by word
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	llm = HuggingFacePipeline(pipeline=llama_chat, callback_manager=callback_manager)


	llm_chain = LLMChain(prompt=prompt, llm=llm)

	print(llm_chain.invoke(question))