TinyLLamaTest

Sleeping

App Files Files Community

TinyLLamaTest / qabot.py

Pudding48

Update qabot.py

3cb1d04 verified 6 days ago

raw

history blame contribute delete

2.36 kB

	from langchain_community.llms import CTransformers
	from langchain.prompts import PromptTemplate
	from langchain_core.runnables import RunnableSequence
	from langchain.chains import RetrievalQA
	#from langchain_community.embeddings import GPT4AllEmbeddings
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS

	from huggingface_hub import hf_hub_download

	# !pip install llama-cpp-python

	# from llama_cpp import Llama

	# model_file = Llama.from_pretrained(
	# repo_id="Pudding48/TinyLLamaTest",
	# filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
	# )
	import os
	cache_path = "/home/user/app/hf_cache"
	os.makedirs(cache_path, exist_ok=True)

	model_file = hf_hub_download(
	repo_id="Pudding48/TinyLlamaTest",
	filename="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
	cache_dir=cache_path
	)

	# Vector store location
	vector_dp_path = "/home/user/app/vectorstores/db_faiss"

	from prepare_vector_dp import create_db_from_text
	create_db_from_text()

	# Load LLM with CTransformers
	def load_llm(model_file):
	return CTransformers(
	model=model_file,
	model_type="llama",
	temperature=0.01,
	config={'gpu_layers': 0},
	max_new_tokens=128,
	context_length=512
	)

	# Create the prompt
	def creat_prompt(template):
	return PromptTemplate(template=template, input_variables=["context", "question"])

	# Create QA pipeline
	def create_qa_chain(prompt, llm, db):
	return RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=db.as_retriever(search_kwargs={"k": 1}),
	return_source_documents=False,
	chain_type_kwargs={'prompt': prompt}
	)

	# Load vector DB
	def read_vector_db():
	embedding_model = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
	return FAISS.load_local(vector_dp_path, embedding_model, allow_dangerous_deserialization=True)

	# Build everything
	db = read_vector_db()
	llm = load_llm(model_file)

	template = """<\|im_start\|>system\nSử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời\n
	{context}<\|im_end\|>\n<\|im_start\|>user\n{question}<\|im_end\|>\n<\|im_start\|>assistant"""

	prompt = creat_prompt(template)
	llm_chain = create_qa_chain(prompt, llm, db)