DeepVen commited on
Commit
8c3e214
1 Parent(s): 63de6b6

Upload 8 files

Browse files

with index code

Files changed (6) hide show
  1. .gitattributes +0 -1
  2. Dockerfile +1 -1
  3. Index.py +237 -0
  4. extractor.py +94 -0
  5. main.py +72 -47
  6. requirements.txt +7 -1
.gitattributes CHANGED
@@ -25,7 +25,6 @@
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
 
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
28
  *.tflite filter=lfs diff=lfs merge=lfs -text
29
  *.tgz filter=lfs diff=lfs merge=lfs -text
30
  *.wasm filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -24,4 +24,4 @@ WORKDIR $HOME/app
24
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
  COPY --chown=user . $HOME/app
26
 
27
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
24
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
  COPY --chown=user . $HOME/app
26
 
27
+ CMD ["uvicorn", "Index:app", "--host", "0.0.0.0", "--port", "7860"]
Index.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ # from transformers import pipeline
4
+ from txtai.embeddings import Embeddings
5
+ from txtai.pipeline import Extractor
6
+ from langchain.document_loaders import WebBaseLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+
9
+ from langchain import HuggingFaceHub
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.chains import LLMChain
12
+ from txtai.embeddings import Embeddings
13
+ from txtai.pipeline import Extractor
14
+
15
+ import pandas as pd
16
+ import sqlite3
17
+ import os
18
+
19
+ # NOTE - we configure docs_url to serve the interactive Docs at the root path
20
+ # of the app. This way, we can use the docs as a landing page for the app on Spaces.
21
+ app = FastAPI(docs_url="/")
22
+ # app = FastAPI()
23
+
24
+ # pipe = pipeline("text2text-generation", model="google/flan-t5-small")
25
+
26
+
27
+ # @app.get("/generate")
28
+ # def generate(text: str):
29
+ # """
30
+ # Using the text2text-generation pipeline from `transformers`, generate text
31
+ # from the given input text. The model used is `google/flan-t5-small`, which
32
+ # can be found [here](https://huggingface.co/google/flan-t5-small).
33
+ # """
34
+ # output = pipe(text)
35
+ # return {"output": output[0]["generated_text"]}
36
+
37
+
38
+ def load_embeddings(
39
+ domain: str = "",
40
+ db_present: bool = True,
41
+ path: str = "sentence-transformers/all-MiniLM-L6-v2",
42
+ index_name: str = "index",
43
+ ):
44
+ # Create embeddings model with content support
45
+ embeddings = Embeddings({"path": path, "content": True})
46
+
47
+ # if Vector DB is not present
48
+ if not db_present:
49
+ return embeddings
50
+ else:
51
+ if domain == "":
52
+ embeddings.load(index_name) # change this later
53
+ else:
54
+ print(3)
55
+ embeddings.load(f"{index_name}/{domain}")
56
+ return embeddings
57
+
58
+
59
+ def _check_if_db_exists(db_path: str) -> bool:
60
+ return os.path.exists(db_path)
61
+
62
+
63
+ def _text_splitter(doc):
64
+ text_splitter = RecursiveCharacterTextSplitter(
65
+ chunk_size=500,
66
+ chunk_overlap=50,
67
+ length_function=len,
68
+ )
69
+ return text_splitter.transform_documents(doc)
70
+
71
+
72
+ def _load_docs(path: str):
73
+ load_doc = WebBaseLoader(path).load()
74
+ doc = _text_splitter(load_doc)
75
+ return doc
76
+
77
+
78
+ def _stream(dataset, limit, index: int = 0):
79
+ for row in dataset:
80
+ yield (index, row.page_content, None)
81
+ index += 1
82
+
83
+ if index >= limit:
84
+ break
85
+
86
+
87
+ def _max_index_id(path):
88
+ db = sqlite3.connect(path)
89
+
90
+ table = "sections"
91
+ df = pd.read_sql_query(f"select * from {table}", db)
92
+ return {"max_index": df["indexid"].max()}
93
+
94
+
95
+ def _upsert_docs(doc, embeddings, vector_doc_path: str, db_present: bool):
96
+ print(vector_doc_path)
97
+ if db_present:
98
+ print(1)
99
+ max_index = _max_index_id(f"{vector_doc_path}/documents")
100
+ print(max_index)
101
+ embeddings.upsert(_stream(doc, 500, max_index["max_index"]))
102
+ print("Embeddings done!!")
103
+ embeddings.save(vector_doc_path)
104
+ print("Embeddings done - 1!!")
105
+ else:
106
+ print(2)
107
+ embeddings.index(_stream(doc, 500, 0))
108
+ embeddings.save(vector_doc_path)
109
+ max_index = _max_index_id(f"{vector_doc_path}/documents")
110
+ print(max_index)
111
+ # check
112
+ # max_index = _max_index_id(f"{vector_doc_path}/documents")
113
+ # print(max_index)
114
+ return max_index
115
+
116
+
117
+ # def prompt(question):
118
+ # return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
119
+ # Question: {question}
120
+ # Context: """
121
+
122
+
123
+ # def search(query, question=None):
124
+ # # Default question to query if empty
125
+ # if not question:
126
+ # question = query
127
+
128
+ # return extractor([("answer", query, prompt(question), False)])[0][1]
129
+
130
+
131
+ # @app.get("/rag")
132
+ # def rag(question: str):
133
+ # # question = "what is the document about?"
134
+ # answer = search(question)
135
+ # # print(question, answer)
136
+ # return {answer}
137
+
138
+
139
+ # @app.get("/index")
140
+ # def get_url_file_path(url_path: str):
141
+ # embeddings = load_embeddings()
142
+ # doc = _load_docs(url_path)
143
+ # embeddings, max_index = _upsert_docs(doc, embeddings)
144
+ # return max_index
145
+
146
+
147
+ @app.get("/index/{domain}/")
148
+ def get_domain_file_path(domain: str, file_path: str):
149
+ print(domain, file_path)
150
+ print(os.getcwd())
151
+ bool_value = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
152
+ print(bool_value)
153
+ if bool_value:
154
+ embeddings = load_embeddings(domain=domain, db_present=bool_value)
155
+ print(embeddings)
156
+ doc = _load_docs(file_path)
157
+ max_index = _upsert_docs(
158
+ doc=doc,
159
+ embeddings=embeddings,
160
+ vector_doc_path=f"{os.getcwd()}/index/{domain}",
161
+ db_present=bool_value,
162
+ )
163
+ # print("-------")
164
+ else:
165
+ embeddings = load_embeddings(domain=domain, db_present=bool_value)
166
+ doc = _load_docs(file_path)
167
+ max_index = _upsert_docs(
168
+ doc=doc,
169
+ embeddings=embeddings,
170
+ vector_doc_path=f"{os.getcwd()}/index/{domain}",
171
+ db_present=bool_value,
172
+ )
173
+ # print("Final - output : ", max_index)
174
+ return "Executed Successfully!!"
175
+
176
+
177
+ def _check_if_db_exists(db_path: str) -> bool:
178
+ return os.path.exists(db_path)
179
+
180
+
181
+ def _load_embeddings_from_db(
182
+ db_present: bool,
183
+ domain: str,
184
+ path: str = "sentence-transformers/all-MiniLM-L6-v2",
185
+ ):
186
+ # Create embeddings model with content support
187
+ embeddings = Embeddings({"path": path, "content": True})
188
+ # if Vector DB is not present
189
+ if not db_present:
190
+ return embeddings
191
+ else:
192
+ if domain == "":
193
+ embeddings.load("index") # change this later
194
+ else:
195
+ print(3)
196
+ embeddings.load(f"{os.getcwd()}/index/{domain}")
197
+ return embeddings
198
+
199
+
200
+ def _prompt(question):
201
+ return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
202
+ Question: {question}
203
+ Context: """
204
+
205
+
206
+ def _search(query, extractor, question=None):
207
+ # Default question to query if empty
208
+ if not question:
209
+ question = query
210
+
211
+ # template = f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
212
+ # Question: {question}
213
+ # Context: """
214
+
215
+ # prompt = PromptTemplate(template=template, input_variables=["question"])
216
+ # llm_chain = LLMChain(prompt=prompt, llm=extractor)
217
+
218
+ # return {"question": question, "answer": llm_chain.run(question)}
219
+ return extractor([("answer", query, _prompt(question), False)])[0][1]
220
+
221
+
222
+ @app.get("/rag")
223
+ def rag(domain: str, question: str):
224
+ db_exists = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
225
+ print(db_exists)
226
+ # if db_exists:
227
+ embeddings = _load_embeddings_from_db(db_exists, domain)
228
+ # Create extractor instance
229
+ #extractor = Extractor(embeddings, "google/flan-t5-base")
230
+ extractor = Extractor(embeddings, "TheBloke/Llama-2-7B-GGUF/llama-2-7b.Q4_0.gguf")
231
+ # llm = HuggingFaceHub(
232
+ # repo_id="google/flan-t5-xxl",
233
+ # model_kwargs={"temperature": 1, "max_length": 1000000},
234
+ # )
235
+ # else:
236
+ answer = _search(question, extractor)
237
+ return {"question": question, "answer": answer}
extractor.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ # from transformers import pipeline
4
+ from txtai.embeddings import Embeddings
5
+ from txtai.pipeline import Extractor
6
+ from langchain.document_loaders import WebBaseLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+
9
+ # NOTE - we configure docs_url to serve the interactive Docs at the root path
10
+ # of the app. This way, we can use the docs as a landing page for the app on Spaces.
11
+ app = FastAPI(docs_url="/")
12
+
13
+ # Create embeddings model with content support
14
+ embeddings = Embeddings(
15
+ {"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True}
16
+ )
17
+
18
+
19
+ # Create extractor instance
20
+ # extractor = Extractor(embeddings, "google/flan-t5-base")
21
+
22
+
23
+ def _stream(dataset, limit, index: int = 0):
24
+ for row in dataset:
25
+ yield (index, row.page_content, None)
26
+ index += 1
27
+
28
+ if index >= limit:
29
+ break
30
+
31
+
32
+ def _max_index_id(path):
33
+ db = sqlite3.connect(path)
34
+
35
+ table = "sections"
36
+ df = pd.read_sql_query(f"select * from {table}", db)
37
+ return {"max_index": df["indexid"].max()}
38
+
39
+
40
+ def _prompt(question):
41
+ return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
42
+ Question: {question}
43
+ Context: """
44
+
45
+
46
+ async def _search(query, extractor, question=None):
47
+ # Default question to query if empty
48
+ if not question:
49
+ question = query
50
+
51
+ return extractor([("answer", query, _prompt(question), False)])[0][1]
52
+
53
+
54
+ def _text_splitter(doc):
55
+ text_splitter = RecursiveCharacterTextSplitter(
56
+ chunk_size=500,
57
+ chunk_overlap=50,
58
+ length_function=len,
59
+ )
60
+ return text_splitter.transform_documents(doc)
61
+
62
+
63
+ def _load_docs(path: str):
64
+ load_doc = WebBaseLoader(path).load()
65
+ doc = _text_splitter(load_doc)
66
+ return doc
67
+
68
+
69
+ async def _upsert_docs(doc):
70
+ max_index = _max_index_id("index/documents")
71
+ embeddings.upsert(_stream(doc, 500, max_index["max_index"]))
72
+ embeddings.save("index")
73
+
74
+ return embeddings
75
+
76
+
77
+ @app.put("/rag/{path}")
78
+ async def get_doc_path(path: str):
79
+ return path
80
+
81
+
82
+ @app.get("/rag")
83
+ async def rag(question: str):
84
+ # question = "what is the document about?"
85
+ embeddings.load("index")
86
+ path = await get_doc_path(path)
87
+ doc = _load_docs(path)
88
+ embeddings = _upsert_docs(doc)
89
+
90
+ # Create extractor instance
91
+ extractor = Extractor(embeddings, "google/flan-t5-base")
92
+ answer = await _search(question, extractor)
93
+ # print(question, answer)
94
+ return {answer}
main.py CHANGED
@@ -1,60 +1,85 @@
1
  from fastapi import FastAPI
2
- from transformers import pipeline
3
  from txtai.embeddings import Embeddings
4
  from txtai.pipeline import Extractor
5
- from llama_cpp import Llama
 
 
 
6
 
7
- from huggingface_hub import hf_hub_download
8
 
9
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
10
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
11
  app = FastAPI(docs_url="/")
12
 
13
- # Create embeddings model with content support
14
- # embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
15
- # embeddings.load('index')
16
-
17
- # Create extractor instance
18
- #extractor = Extractor(embeddings, "google/flan-t5-base")
19
-
20
- # pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
21
-
22
- # model_name_or_path = "TheBloke/Llama-2-7B-GGUF"
23
- # model_basename = "llama-2-7b.Q4_0.gguf"
24
-
25
- model_name_or_path = "TheBloke/Llama-2-13B-GGUF"
26
- model_basename = "llama-2-13b.Q3_K_S.gguf"
27
-
28
- model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
29
-
30
- llm = Llama(model_path=model_path)
31
-
32
- @app.get("/generate")
33
- def generate(text: str):
34
- """
35
- llama2 q4 backend
36
- """
37
- output = llm(text)
38
- return {"output": output}
39
-
40
-
41
- def prompt(question):
42
- return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
 
 
 
 
 
 
43
  Question: {question}
44
  Context: """
45
 
46
 
47
- def search(query, question=None):
48
- # Default question to query if empty
49
- if not question:
50
- question = query
51
-
52
- return extractor([("answer", query, prompt(question), False)])[0][1]
53
-
54
-
55
- # @app.get("/rag")
56
- # def rag(question: str):
57
- # # question = "what is the document about?"
58
- # answer = search(question)
59
- # # print(question, answer)
60
- # return {answer}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
 
2
  from txtai.embeddings import Embeddings
3
  from txtai.pipeline import Extractor
4
+ import os
5
+ from langchain import HuggingFaceHub
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chains import LLMChain
8
 
9
+ # from transformers import pipeline
10
 
11
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
12
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
13
  app = FastAPI(docs_url="/")
14
 
15
+ # @app.get("/generate")
16
+ # def generate(text: str):
17
+ # """
18
+ # Using the text2text-generation pipeline from `transformers`, generate text
19
+ # from the given input text. The model used is `google/flan-t5-small`, which
20
+ # can be found [here](https://huggingface.co/google/flan-t5-small).
21
+ # """
22
+ # output = pipe(text)
23
+ # return {"output": output[0]["generated_text"]}
24
+
25
+
26
+ def _check_if_db_exists(db_path: str) -> bool:
27
+ return os.path.exists(db_path)
28
+
29
+
30
+ def _load_embeddings_from_db(
31
+ db_present: bool,
32
+ domain: str,
33
+ path: str = "sentence-transformers/all-MiniLM-L6-v2",
34
+ ):
35
+ # Create embeddings model with content support
36
+ embeddings = Embeddings({"path": path, "content": True})
37
+ # if Vector DB is not present
38
+ if not db_present:
39
+ return embeddings
40
+ else:
41
+ if domain == "":
42
+ embeddings.load("index") # change this later
43
+ else:
44
+ print(3)
45
+ embeddings.load(f"index/{domain}")
46
+ return embeddings
47
+
48
+
49
+ def _prompt(question):
50
+ return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
51
  Question: {question}
52
  Context: """
53
 
54
 
55
+ def _search(query, extractor, question=None):
56
+ # Default question to query if empty
57
+ if not question:
58
+ question = query
59
+
60
+ # template = f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
61
+ # Question: {question}
62
+ # Context: """
63
+
64
+ # prompt = PromptTemplate(template=template, input_variables=["question"])
65
+ # llm_chain = LLMChain(prompt=prompt, llm=extractor)
66
+
67
+ # return {"question": question, "answer": llm_chain.run(question)}
68
+ return extractor([("answer", query, _prompt(question), False)])[0][1]
69
+
70
+
71
+ @app.get("/rag")
72
+ def rag(domain: str, question: str):
73
+ db_exists = _check_if_db_exists(db_path=f"{os.getcwd()}\index\{domain}\documents")
74
+ print(db_exists)
75
+ # if db_exists:
76
+ embeddings = _load_embeddings_from_db(db_exists, domain)
77
+ # Create extractor instance
78
+ extractor = Extractor(embeddings, "google/flan-t5-base")
79
+ # llm = HuggingFaceHub(
80
+ # repo_id="google/flan-t5-xxl",
81
+ # model_kwargs={"temperature": 1, "max_length": 1000000},
82
+ # )
83
+ # else:
84
+ answer = _search(question, extractor)
85
+ return {"question": question, "answer": answer}
requirements.txt CHANGED
@@ -2,5 +2,11 @@ fastapi==0.74.*
2
  requests==2.27.*
3
  uvicorn[standard]==0.17.*
4
  sentencepiece==0.1.*
 
 
5
  txtai==6.0.*
6
- llama-cpp-python
 
 
 
 
 
2
  requests==2.27.*
3
  uvicorn[standard]==0.17.*
4
  sentencepiece==0.1.*
5
+ torch==1.12.*
6
+ transformers==4.*
7
  txtai==6.0.*
8
+ langchain==0.0.301
9
+ langsmith==0.0.40
10
+ bs4==0.0.1
11
+ pandas==2.1.1
12
+ SQLAlchemy==2.0.21