Spaces:

kelvin-t-lu
/

chatbot

Paused

App Files Files Community

chatbot / tests /test_langchain_simple.py

kelvin-t-lu

init

dbd2ac6 over 1 year ago

raw

history blame

4.11 kB

	import pytest
	from tests.utils import wrap_test_forked


	@pytest.mark.need_tokens
	@wrap_test_forked
	def test_langchain_simple_h2ogpt():
	run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot')


	@pytest.mark.need_tokens
	@wrap_test_forked
	def test_langchain_simple_vicuna():
	run_langchain_simple(base_model='junelee/wizard-vicuna-13b', prompt_type='instruct_vicuna')


	def run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', prompt_type='human_bot'):
	"""
	:param base_model:
	:param prompt_type: prompt_type required for stopping support and correct handling of instruction prompting
	:return:
	"""
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from src.h2oai_pipeline import H2OTextGenerationPipeline

	model_name = base_model

	from transformers import AutoConfig
	config = AutoConfig.from_pretrained(base_model, use_auth_token=True,
	trust_remote_code=True,
	offload_folder="./")

	llama_type_from_config = 'llama' in str(config).lower()
	llama_type_from_name = "llama" in base_model.lower()
	llama_type = llama_type_from_config or llama_type_from_name

	if llama_type:
	from transformers import LlamaForCausalLM, LlamaTokenizer
	model_loader = LlamaForCausalLM
	tokenizer_loader = LlamaTokenizer
	else:
	model_loader = AutoModelForCausalLM
	tokenizer_loader = AutoTokenizer

	load_in_8bit = True
	n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
	device = 'cpu' if n_gpus == 0 else 'cuda'
	device_map = {"": 0} if device == 'cuda' else "auto"
	tokenizer = tokenizer_loader.from_pretrained(model_name, padding_side="left")

	model = model_loader.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map=device_map,
	load_in_8bit=load_in_8bit)

	gen_kwargs = dict(max_new_tokens=512, return_full_text=True, early_stopping=False)
	pipe = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer, prompt_type=prompt_type,
	base_model=base_model, **gen_kwargs)
	# below makes it listen only to our prompt removal,
	# not built in prompt removal that is less general and not specific for our model
	pipe.task = "text2text-generation"

	# create llm for LangChain
	from langchain.llms import HuggingFacePipeline
	llm = HuggingFacePipeline(pipeline=pipe)

	# Setup QA
	from langchain import PromptTemplate
	from langchain.chains.question_answering import load_qa_chain
	# NOTE: Instruct-tuned models don't need excessive many-shot examples that waste context space
	template = """
	==
	{context}
	==
	{question}"""
	prompt = PromptTemplate(
	input_variables=["context", "question"],
	template=template,
	)
	chain = load_qa_chain(llm, prompt=prompt)
	docs = [] # could have been some Documents from LangChain inputted from some sources
	query = "Give detailed list of reasons for who is smarter, Einstein or Newton."
	chain_kwargs = dict(input_documents=docs, question=query)
	answer = chain(chain_kwargs)
	print(answer)

	if 'vicuna' in base_model:
	res1 = 'Both Albert Einstein and Sir Isaac Newton were brilliant scientists' in answer[
	'output_text'] and "Newton" in answer['output_text']
	res2 = 'Both Albert Einstein and Sir Isaac Newton are considered two' in answer[
	'output_text'] and "Newton" in answer['output_text']
	else:
	res1 = 'Einstein was a genius who revolutionized physics' in answer['output_text'] and "Newton" in answer[
	'output_text']
	res2 = 'Einstein and Newton are two of the most famous scientists in history' in answer[
	'output_text'] and "Newton" in answer['output_text']
	res3 = 'Einstein is considered to be the smartest person' in answer[
	'output_text'] and "Newton" in answer['output_text']
	assert res1 or res2 or res3