Spaces:
No application file
No application file
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import os | |
| import glob | |
| import textwrap | |
| import time | |
| import langchain | |
| from langchain.document_loaders import PyPDFLoader, DirectoryLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain import PromptTemplate, LLMChain | |
| from langchain.vectorstores import FAISS | |
| from langchain.llms import HuggingFacePipeline | |
| from langchain.embeddings import HuggingFaceInstructEmbeddings | |
| from langchain.chains import RetrievalQA | |
| import torch | |
| import transformers | |
| from transformers import ( | |
| AutoTokenizer, AutoModelForCausalLM, | |
| BitsAndBytesConfig, | |
| pipeline | |
| ) | |
| class CFG: | |
| # LLMs | |
| model_name = 'llama2-13b-chat' # wizardlm, llama2-7b-chat, llama2-13b-chat, mistral-7B | |
| temperature = 0 | |
| top_p = 0.95 | |
| repetition_penalty = 1.15 | |
| # splitting | |
| split_chunk_size = 800 | |
| split_overlap = 0 | |
| # embeddings | |
| embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2' | |
| # similar passages | |
| k = 6 | |
| # paths | |
| Embeddings_path = 'C:/Studies/main project/codes/final/model/cse-vectordb/faiss_index_hp' | |
| # Output_folder = './cse-vectordb' | |
| model_repo = 'daryl149/llama-2-7b-chat-hf' | |
| tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit = True, | |
| bnb_4bit_quant_type = "nf4", | |
| bnb_4bit_compute_dtype = torch.float16, | |
| bnb_4bit_use_double_quant = True, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_repo, | |
| quantization_config = bnb_config, | |
| device_map = 'auto', | |
| low_cpu_mem_usage = True, | |
| trust_remote_code = True | |
| ) | |
| max_len = 2048 | |
| ### hugging face pipeline | |
| pipe = pipeline( | |
| task = "text-generation", | |
| model = model, | |
| tokenizer = tokenizer, | |
| pad_token_id = tokenizer.eos_token_id, | |
| # do_sample = True, | |
| max_length = max_len, | |
| temperature = CFG.temperature, | |
| top_p = CFG.top_p, | |
| repetition_penalty = CFG.repetition_penalty | |
| ) | |
| ### langchain pipeline | |
| llm = HuggingFacePipeline(pipeline = pipe) | |
| ### download embeddings model | |
| embeddings = HuggingFaceInstructEmbeddings( | |
| model_name = CFG.embeddings_model_repo, | |
| model_kwargs = {"device": "cuda"} | |
| ) | |
| ### load vector DB embeddings | |
| vectordb = FAISS.load_local( | |
| CFG.Embeddings_path, # from input folder | |
| # CFG.Output_folder + '/faiss_index_hp', # from output folder | |
| embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| prompt_template = """ | |
| Don't try to make up an answer, if you don't know just say that you don't know. | |
| Answer in the same language the question was asked. | |
| Use only the following pieces of context to answer the question at the end. | |
| {context} | |
| Question: {question} | |
| Answer:""" | |
| PROMPT = PromptTemplate( | |
| template = prompt_template, | |
| input_variables = ["context", "question"] | |
| ) | |
| retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"}) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm = llm, | |
| chain_type = "stuff", # map_reduce, map_rerank, stuff, refine | |
| retriever = retriever, | |
| chain_type_kwargs = {"prompt": PROMPT}, | |
| return_source_documents = True, | |
| verbose = False | |
| ) | |
| print("Hello") |