import warnings
import os
import glob
import textwrap
import time
import langchain
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate, LLMChain
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
import torch
import transformers
from transformers import (
AutoTokenizer, AutoModelForCausalLM,
class CFG:
# LLMs
model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B
temperature = 0
top_p = 0.95
repetition_penalty = 1.15
# splitting
split_chunk_size = 800
split_overlap = 0
# embeddings
embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
# similar passages
k = 6
# paths
Embeddings_path = 'C:/Studies/main project/codes/final/model/cse-vectordb/faiss_index_hp'
# Output_folder = './cse-vectordb'
model_repo = 'daryl149/llama-2-7b-chat-hf'
tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
bnb_config = BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_compute_dtype = torch.float16,
bnb_4bit_use_double_quant = True,
model = AutoModelForCausalLM.from_pretrained(
quantization_config = bnb_config,
device_map = 'auto',
low_cpu_mem_usage = True,
trust_remote_code = True
max_len = 2048
### hugging face pipeline
pipe = pipeline(
task = "text-generation",
model = model,
tokenizer = tokenizer,
pad_token_id = tokenizer.eos_token_id,
# do_sample = True,
max_length = max_len,
temperature = CFG.temperature,
top_p = CFG.top_p,
repetition_penalty = CFG.repetition_penalty
### langchain pipeline
llm = HuggingFacePipeline(pipeline = pipe)
### download embeddings model
embeddings = HuggingFaceInstructEmbeddings(
model_name = CFG.embeddings_model_repo,
model_kwargs = {"device": "cuda"}
### load vector DB embeddings
vectordb = FAISS.load_local(
CFG.Embeddings_path, # from input folder
# CFG.Output_folder + '/faiss_index_hp', # from output folder
prompt_template = """
Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.
Question: {question}
PROMPT = PromptTemplate(
template = prompt_template,
input_variables = ["context", "question"]
retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"})
qa_chain = RetrievalQA.from_chain_type(
llm = llm,
chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
retriever = retriever,
chain_type_kwargs = {"prompt": PROMPT},
return_source_documents = True,
verbose = False