isayahc commited on
Commit
77a48be
1 Parent(s): a2318db

removed commented out code for readability

Browse files
Files changed (1) hide show
  1. app.py +21 -47
app.py CHANGED
@@ -1,48 +1,44 @@
1
- # gradio
2
- import gradio as gr
3
- #import random
 
 
 
 
 
4
  import time
 
5
  #boto3 for S3 access
6
  import boto3
7
  from botocore import UNSIGNED
8
  from botocore.client import Config
9
- # access .env file
10
- import os
11
- from dotenv import load_dotenv
12
- #from bs4 import BeautifulSoup
13
  # HF libraries
14
  from langchain.llms import HuggingFaceHub
15
  from langchain.embeddings import HuggingFaceHubEmbeddings
16
  # vectorestore
17
  from langchain.vectorstores import Chroma
18
- #from langchain.vectorstores import FAISS
19
  # retrieval chain
20
- #from langchain.chains import RetrievalQA
21
  from langchain.chains import RetrievalQAWithSourcesChain
22
  # prompt template
23
  from langchain.prompts import PromptTemplate
24
  from langchain.memory import ConversationBufferMemory
25
- # logging
26
- import logging
27
- #import zipfile
28
- # improve results with retriever
29
- # from langchain.retrievers import ContextualCompressionRetriever
30
- # from langchain.retrievers.document_compressors import LLMChainExtractor
31
- # from langchain.retrievers.document_compressors import EmbeddingsFilter
32
- # from langchain.retrievers.multi_query import MultiQueryRetriever
33
  from langchain.retrievers import BM25Retriever, EnsembleRetriever
34
  # reorder retrived documents
35
- #from langchain.document_transformers import LongContextReorder
36
  # github issues
37
  from langchain.document_loaders import GitHubIssuesLoader
38
  # debugging
39
  from langchain.globals import set_verbose
40
  # caching
41
  from langchain.globals import set_llm_cache
42
- #from langchain.cache import InMemoryCache
43
  # We can do the same thing with a SQLite cache
44
  from langchain.cache import SQLiteCache
45
- #set_llm_cache(InMemoryCache())
 
 
 
 
46
 
47
  set_verbose(True)
48
 
@@ -65,7 +61,7 @@ llm = HuggingFaceHub(repo_id=llm_model_name, model_kwargs={
65
  # "return_full_text":True
66
  })
67
 
68
- #embedding_model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
69
  embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
70
  embeddings = HuggingFaceHubEmbeddings(repo_id=embedding_model_name)
71
 
@@ -88,26 +84,14 @@ s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
88
  db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
89
  db.get()
90
 
91
- ## FAISS DB
92
- # s3.download_file('rad-rag-demos', 'vectorstores/faiss_db_ray.zip', './chroma_db/faiss_db_ray.zip')
93
- # with zipfile.ZipFile('./chroma_db/faiss_db_ray.zip', 'r') as zip_ref:
94
- # zip_ref.extractall('./chroma_db/')
95
-
96
- # FAISS_INDEX_PATH='./chroma_db/faiss_db_ray'
97
- # db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
98
-
99
- # initialize the bm25 retriever and chroma/faiss retriever
100
- # bm25_retriever = BM25Retriever.
101
- # bm25_retriever.k = 2
102
 
103
  retriever = db.as_retriever(search_type="mmr")#, search_kwargs={'k': 3, 'lambda_mult': 0.25})
104
 
105
  # asks LLM to create 3 alternatives baed on user query
106
- # multi_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)
107
 
108
  # asks LLM to extract relevant parts from retrieved documents
109
- # compressor = LLMChainExtractor.from_llm(llm)
110
- # compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=multi_retriever)
111
 
112
  global qa
113
  template = """
@@ -138,12 +122,7 @@ logging.getLogger("langchain.chains.qa_with_sources").setLevel(logging.INFO)
138
 
139
 
140
 
141
- # qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
142
- # "verbose": True,
143
- # "memory": memory,
144
- # "prompt": prompt
145
- # }
146
- # )
147
  qa = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
148
  "verbose": True,
149
  "memory": memory,
@@ -168,12 +147,7 @@ def bot(history):
168
  src_list = '\n'.join(sources)
169
  print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
170
 
171
- # history[-1][1] = ""
172
- # for character in response['answer']:
173
- # #print_this:
174
- # history[-1][1] += character
175
- # time.sleep(0.01)
176
- # yield history
177
  history[-1][1] = print_this #response['answer']
178
  return history
179
 
 
1
+
2
+ # logging
3
+ import logging
4
+
5
+ # access .env file
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
  import time
10
+
11
  #boto3 for S3 access
12
  import boto3
13
  from botocore import UNSIGNED
14
  from botocore.client import Config
15
+
 
 
 
16
  # HF libraries
17
  from langchain.llms import HuggingFaceHub
18
  from langchain.embeddings import HuggingFaceHubEmbeddings
19
  # vectorestore
20
  from langchain.vectorstores import Chroma
21
+
22
  # retrieval chain
 
23
  from langchain.chains import RetrievalQAWithSourcesChain
24
  # prompt template
25
  from langchain.prompts import PromptTemplate
26
  from langchain.memory import ConversationBufferMemory
 
 
 
 
 
 
 
 
27
  from langchain.retrievers import BM25Retriever, EnsembleRetriever
28
  # reorder retrived documents
 
29
  # github issues
30
  from langchain.document_loaders import GitHubIssuesLoader
31
  # debugging
32
  from langchain.globals import set_verbose
33
  # caching
34
  from langchain.globals import set_llm_cache
 
35
  # We can do the same thing with a SQLite cache
36
  from langchain.cache import SQLiteCache
37
+
38
+ # gradio
39
+ import gradio as gr
40
+
41
+
42
 
43
  set_verbose(True)
44
 
 
61
  # "return_full_text":True
62
  })
63
 
64
+ # initialize Embedding config
65
  embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
66
  embeddings = HuggingFaceHubEmbeddings(repo_id=embedding_model_name)
67
 
 
84
  db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
85
  db.get()
86
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  retriever = db.as_retriever(search_type="mmr")#, search_kwargs={'k': 3, 'lambda_mult': 0.25})
89
 
90
  # asks LLM to create 3 alternatives baed on user query
91
+
92
 
93
  # asks LLM to extract relevant parts from retrieved documents
94
+
 
95
 
96
  global qa
97
  template = """
 
122
 
123
 
124
 
125
+
 
 
 
 
 
126
  qa = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, verbose=True, chain_type_kwargs={
127
  "verbose": True,
128
  "memory": memory,
 
147
  src_list = '\n'.join(sources)
148
  print_this = response['answer'] + "\n\n\n Sources: \n\n\n" + src_list
149
 
150
+
 
 
 
 
 
151
  history[-1][1] = print_this #response['answer']
152
  return history
153