csalabs commited on
Commit
6d14bdc
1 Parent(s): c50c563

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +3 -0
  2. app.py +123 -0
  3. constants.py +142 -0
  4. requirements.txt +16 -0
.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ HUGGINGFACE_API_TOKEN='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'
2
+ REPLICATE_API_TOKEN = 'r8_f0yg1vSn32AAGDnqV6qErGJZeCcFFl30CJ46E' #--> Org gamail
3
+ # REPLICATE_API_TOKEN = 'r8_L3BQN0zjnB1KwwkPjZD0RSLVrj9umPv0oRjFY' # --trial not working
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chat import message
3
+ from langchain.chains import ConversationalRetrievalChain
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.llms import CTransformers
6
+ from langchain.llms import Replicate
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.document_loaders import PyPDFLoader, UnstructuredFileLoader
11
+ from langchain.document_loaders import TextLoader
12
+ from langchain.document_loaders import Docx2txtLoader
13
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
14
+ from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
15
+ import os
16
+ from dotenv import load_dotenv
17
+ import tempfile
18
+ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
19
+ from constants import (
20
+ CHROMA_SETTINGS,
21
+ DOCUMENT_MAP,
22
+ EMBEDDING_MODEL_NAME,
23
+ INGEST_THREADS,
24
+ PERSIST_DIRECTORY,
25
+ SOURCE_DIRECTORY,
26
+ )
27
+ from langchain.docstore.document import Document
28
+ load_dotenv()
29
+
30
+
31
+ def initialize_session_state():
32
+ if 'history' not in st.session_state:
33
+ st.session_state['history'] = []
34
+
35
+ if 'generated' not in st.session_state:
36
+ st.session_state['generated'] = ["Hello! Ask me anything about 🤗"]
37
+
38
+ if 'past' not in st.session_state:
39
+ st.session_state['past'] = ["Hey! 👋"]
40
+
41
+ def conversation_chat(query, chain, history):
42
+ result = chain({"question": query, "chat_history": history})
43
+ history.append((query, result["answer"]))
44
+ return result["answer"]
45
+
46
+ def display_chat_history(chain):
47
+ reply_container = st.container()
48
+ container = st.container()
49
+
50
+ with container:
51
+ with st.form(key='my_form', clear_on_submit=True):
52
+ user_input = st.text_input("Question:", placeholder="Ask about your Documents", key='input')
53
+ submit_button = st.form_submit_button(label='Send')
54
+
55
+ if submit_button and user_input:
56
+ with st.spinner('Generating response...'):
57
+ output = conversation_chat(user_input, chain, st.session_state['history'])
58
+
59
+ st.session_state['past'].append(user_input)
60
+ st.session_state['generated'].append(output)
61
+
62
+ if st.session_state['generated']:
63
+ with reply_container:
64
+ for i in range(len(st.session_state['generated'])):
65
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
66
+ message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
67
+
68
+
69
+ def create_conversational_chain(vector_store):
70
+ load_dotenv()
71
+ llm = Replicate(
72
+ streaming = True,
73
+ # model = "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781",
74
+ model = "meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e",
75
+ callbacks=[StreamingStdOutCallbackHandler()],
76
+ input = {"temperature": 0.01, "max_length" :500,"top_p":1})
77
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
78
+
79
+ chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
80
+ retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
81
+ memory=memory)
82
+ return chain
83
+
84
+ file_paths = [
85
+ './SOURCE_DOCUMENTS/Freedom of Information and Protection of Privacy Act, R.S.O. 1990, c. F.31[462] - Copy.pdf',
86
+ './SOURCE_DOCUMENTS/Highway Traffic Act, R.S.O. 1990, c. H.8[465] - Copy.pdf',
87
+ './SOURCE_DOCUMENTS/Narcotics Safety and Awareness Act, 2010, S.O. 2010, c. 22[463].pdf',
88
+ './SOURCE_DOCUMENTS/Nutrient Management Act, 2002, S.O. 2002, c. 4[464].pdf'
89
+ # Add more file paths as needed
90
+ ]
91
+
92
+ def main():
93
+ # load_dotenv()
94
+ os.environ.get("REPLICATE_API_TOKEN")
95
+ # Initialize session state
96
+ initialize_session_state()
97
+ st.title("Multi-Docs ChatBot using llama-2-7b :books:")
98
+ # loader = UnstructuredFileLoader('./SOURCE_DOCUMENTS/Freedom of Information and Protection of Privacy Act, R.S.O. 1990, c. F.31[462] - Copy.pdf')
99
+ # documents = loader.load()
100
+ documents = []
101
+ for file_path in file_paths:
102
+ loader = UnstructuredFileLoader(file_path)
103
+ loaded_doc = loader.load() # Assuming this returns a list of pages
104
+ documents.extend(loaded_doc)
105
+
106
+ text_splitter=CharacterTextSplitter(separator='\n',
107
+ chunk_size=1500,
108
+ chunk_overlap=300)
109
+ text_chunks=text_splitter.split_documents(documents)
110
+
111
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
112
+
113
+ vector_store=FAISS.from_documents(text_chunks, embeddings)
114
+
115
+ # Create the chain object
116
+ chain = create_conversational_chain(vector_store)
117
+
118
+ # Display chat history
119
+ display_chat_history(chain)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()
constants.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # from dotenv import load_dotenv
4
+ from chromadb.config import Settings
5
+
6
+ # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
7
+ from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
8
+
9
+ # load_dotenv()
10
+ ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
11
+
12
+ # Define the folder for storing database
13
+ SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS"
14
+
15
+ PERSIST_DIRECTORY = f"{ROOT_DIRECTORY}/DB"
16
+
17
+ # Can be changed to a specific number
18
+ INGEST_THREADS = os.cpu_count() or 8
19
+
20
+ # Define the Chroma settings
21
+ CHROMA_SETTINGS = Settings(
22
+ anonymized_telemetry=False,
23
+ is_persistent=True,
24
+ )
25
+
26
+
27
+ # https://python.langchain.com/en/latest/_modules/langchain/document_loaders/excel.html#UnstructuredExcelLoader
28
+ DOCUMENT_MAP = {
29
+ ".txt": TextLoader,
30
+ ".md": TextLoader,
31
+ ".py": TextLoader,
32
+ ".pdf": PDFMinerLoader,
33
+ ".csv": CSVLoader,
34
+ ".xls": UnstructuredExcelLoader,
35
+ ".xlsx": UnstructuredExcelLoader,
36
+ ".docx": Docx2txtLoader,
37
+ ".doc": Docx2txtLoader,
38
+ }
39
+
40
+ # Default Instructor Model
41
+ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
42
+
43
+ ####
44
+ #### OTHER EMBEDDING MODEL OPTIONS
45
+ ####
46
+
47
+ # EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl" # Uses 5 GB of VRAM (Most Accurate of all models)
48
+ # EMBEDDING_MODEL_NAME = "intfloat/e5-large-v2" # Uses 1.5 GB of VRAM (A little less accurate than instructor-large)
49
+ # EMBEDDING_MODEL_NAME = "intfloat/e5-base-v2" # Uses 0.5 GB of VRAM (A good model for lower VRAM GPUs)
50
+ # EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Uses 0.2 GB of VRAM (Less accurate but fastest - only requires 150mb of vram)
51
+
52
+ ####
53
+ #### MULTILINGUAL EMBEDDING MODELS
54
+ ####
55
+
56
+ # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
57
+ # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
58
+
59
+
60
+ #### SELECT AN OPEN SOURCE LLM (LARGE LANGUAGE MODEL)
61
+ # Select the Model ID and model_basename
62
+ # load the LLM for generating Natural Language responses
63
+
64
+ #### GPU VRAM Memory required for LLM Models (ONLY) by Billion Parameter value (B Model)
65
+ #### Does not include VRAM used by Embedding Models - which use an additional 2GB-7GB of VRAM depending on the model.
66
+ ####
67
+ #### (B Model) (float32) (float16) (GPTQ 8bit) (GPTQ 4bit)
68
+ #### 7b 28 GB 14 GB 7 GB - 9 GB 3.5 GB - 5 GB
69
+ #### 13b 52 GB 26 GB 13 GB - 15 GB 6.5 GB - 8 GB
70
+ #### 32b 130 GB 65 GB 32.5 GB - 35 GB 16.25 GB - 19 GB
71
+ #### 65b 260.8 GB 130.4 GB 65.2 GB - 67 GB 32.6 GB - - 35 GB
72
+
73
+ MODEL_ID = "TheBloke/Llama-2-7B-Chat-GGML"
74
+ MODEL_BASENAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
75
+
76
+ ####
77
+ #### (FOR HF MODELS)
78
+ ####
79
+
80
+ # MODEL_ID = "TheBloke/vicuna-7B-1.1-HF"
81
+ # MODEL_BASENAME = None
82
+ # MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-HF"
83
+ # MODEL_ID = "TheBloke/guanaco-7B-HF"
84
+ # MODEL_ID = 'NousResearch/Nous-Hermes-13b' # Requires ~ 23GB VRAM. Using STransformers
85
+ # alongside will 100% create OOM on 24GB cards.
86
+ # llm = load_model(device_type, model_id=model_id)
87
+
88
+ ####
89
+ #### (FOR GPTQ QUANTIZED) Select a llm model based on your GPU and VRAM GB. Does not include Embedding Models VRAM usage.
90
+ ####
91
+
92
+ ##### 48GB VRAM Graphics Cards (RTX 6000, RTX A6000 and other 48GB VRAM GPUs) #####
93
+
94
+ ### 65b GPTQ LLM Models for 48GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
95
+ # model_id = "TheBloke/guanaco-65B-GPTQ"
96
+ # model_basename = "model.safetensors"
97
+ # model_id = "TheBloke/Airoboros-65B-GPT4-2.0-GPTQ"
98
+ # model_basename = "model.safetensors"
99
+ # model_id = "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ"
100
+ # model_basename = "model.safetensors"
101
+ # model_id = "TheBloke/Upstage-Llama1-65B-Instruct-GPTQ"
102
+ # model_basename = "model.safetensors"
103
+
104
+ ##### 24GB VRAM Graphics Cards (RTX 3090 - RTX 4090 (35% Faster) - RTX A5000 - RTX A5500) #####
105
+
106
+ ### 13b GPTQ Models for 24GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
107
+ # model_id = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
108
+ # model_basename = "Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
109
+ # model_id = "TheBloke/vicuna-13B-v1.5-GPTQ"
110
+ # model_basename = "model.safetensors"
111
+ # model_id = "TheBloke/Nous-Hermes-13B-GPTQ"
112
+ # model_basename = "nous-hermes-13b-GPTQ-4bit-128g.no-act.order"
113
+ # model_id = "TheBloke/WizardLM-13B-V1.2-GPTQ"
114
+ # model_basename = "gptq_model-4bit-128g.safetensors
115
+
116
+ ### 30b GPTQ Models for 24GB GPUs (*** Requires using intfloat/e5-base-v2 instead of hkunlp/instructor-large as embedding model ***)
117
+ # model_id = "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ"
118
+ # model_basename = "Wizard-Vicuna-30B-Uncensored-GPTQ-4bit--1g.act.order.safetensors"
119
+ # model_id = "TheBloke/WizardLM-30B-Uncensored-GPTQ"
120
+ # model_basename = "WizardLM-30B-Uncensored-GPTQ-4bit.act-order.safetensors"
121
+
122
+ ##### 8-10GB VRAM Graphics Cards (RTX 3080 - RTX 3080 Ti - RTX 3070 Ti - 3060 Ti - RTX 2000 Series, Quadro RTX 4000, 5000, 6000) #####
123
+ ### (*** Requires using intfloat/e5-small-v2 instead of hkunlp/instructor-large as embedding model ***)
124
+
125
+ ### 7b GPTQ Models for 8GB GPUs
126
+ # model_id = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
127
+ # model_basename = "Wizard-Vicuna-7B-Uncensored-GPTQ-4bit-128g.no-act.order.safetensors"
128
+ # model_id = "TheBloke/WizardLM-7B-uncensored-GPTQ"
129
+ # model_basename = "WizardLM-7B-uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
130
+ # model_id = "TheBloke/wizardLM-7B-GPTQ"
131
+ # model_basename = "wizardLM-7B-GPTQ-4bit.compat.no-act-order.safetensors"
132
+
133
+ ####
134
+ #### (FOR GGML) (Quantized cpu+gpu+mps) models - check if they support llama.cpp
135
+ ####
136
+
137
+ # MODEL_ID = "TheBloke/wizard-vicuna-13B-GGML"
138
+ # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q4_0.bin"
139
+ # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q6_K.bin"
140
+ # MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q2_K.bin"
141
+ # MODEL_ID = "TheBloke/orca_mini_3B-GGML"
142
+ # MODEL_BASENAME = "orca-mini-3b.ggmlv3.q4_0.bin"
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ torch
3
+ accelerate
4
+ sentence_transformers
5
+ streamlit_chat
6
+ streamlit
7
+ faiss-cpu
8
+ tiktoken
9
+ ctransformers
10
+ huggingface-hub
11
+ pypdf
12
+ pypdf2
13
+ python-dotenv
14
+ replicate
15
+ docx2txt
16
+ streamlit_chat