base_models_rag

Runtime error

App Files Files Community

DeepVen commited on Oct 4, 2023

Commit

8c3e214

•

1 Parent(s): 63de6b6

Upload 8 files

Browse files

with index code

Files changed (6) hide show

.gitattributes +0 -1
Dockerfile +1 -1
Index.py +237 -0
extractor.py +94 -0
main.py +72 -47
requirements.txt +7 -1

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -24,4 +24,4 @@ WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
+CMD ["uvicorn", "Index:app", "--host", "0.0.0.0", "--port", "7860"]

Index.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from fastapi import FastAPI
+# from transformers import pipeline
+from txtai.embeddings import Embeddings
+from txtai.pipeline import Extractor
+from langchain.document_loaders import WebBaseLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain import HuggingFaceHub
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from txtai.embeddings import Embeddings
+from txtai.pipeline import Extractor
+import pandas as pd
+import sqlite3
+import os
+# NOTE - we configure docs_url to serve the interactive Docs at the root path
+# of the app. This way, we can use the docs as a landing page for the app on Spaces.
+app = FastAPI(docs_url="/")
+# app = FastAPI()
+# pipe = pipeline("text2text-generation", model="google/flan-t5-small")
+# @app.get("/generate")
+# def generate(text: str):
+#     """
+#     Using the text2text-generation pipeline from `transformers`, generate text
+#     from the given input text. The model used is `google/flan-t5-small`, which
+#     can be found [here](https://huggingface.co/google/flan-t5-small).
+#     """
+#     output = pipe(text)
+#     return {"output": output[0]["generated_text"]}
+def load_embeddings(
+    domain: str = "",
+    db_present: bool = True,
+    path: str = "sentence-transformers/all-MiniLM-L6-v2",
+    index_name: str = "index",
+):
+    # Create embeddings model with content support
+    embeddings = Embeddings({"path": path, "content": True})
+    # if Vector DB is not present
+    if not db_present:
+        return embeddings
+    else:
+        if domain == "":
+            embeddings.load(index_name)  # change this later
+        else:
+            print(3)
+            embeddings.load(f"{index_name}/{domain}")
+        return embeddings
+def _check_if_db_exists(db_path: str) -> bool:
+    return os.path.exists(db_path)
+def _text_splitter(doc):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        length_function=len,
+    )
+    return text_splitter.transform_documents(doc)
+def _load_docs(path: str):
+    load_doc = WebBaseLoader(path).load()
+    doc = _text_splitter(load_doc)
+    return doc
+def _stream(dataset, limit, index: int = 0):
+    for row in dataset:
+        yield (index, row.page_content, None)
+        index += 1
+        if index >= limit:
+            break
+def _max_index_id(path):
+    db = sqlite3.connect(path)
+    table = "sections"
+    df = pd.read_sql_query(f"select * from {table}", db)
+    return {"max_index": df["indexid"].max()}
+def _upsert_docs(doc, embeddings, vector_doc_path: str, db_present: bool):
+    print(vector_doc_path)
+    if db_present:
+        print(1)
+        max_index = _max_index_id(f"{vector_doc_path}/documents")
+        print(max_index)
+        embeddings.upsert(_stream(doc, 500, max_index["max_index"]))
+        print("Embeddings done!!")
+        embeddings.save(vector_doc_path)
+        print("Embeddings done - 1!!")
+    else:
+        print(2)
+        embeddings.index(_stream(doc, 500, 0))
+        embeddings.save(vector_doc_path)
+        max_index = _max_index_id(f"{vector_doc_path}/documents")
+        print(max_index)
+    # check
+    # max_index = _max_index_id(f"{vector_doc_path}/documents")
+    # print(max_index)
+    return max_index
+# def prompt(question):
+#     return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
+#             Question: {question}
+#             Context: """
+# def search(query, question=None):
+#     # Default question to query if empty
+#     if not question:
+#         question = query
+#     return extractor([("answer", query, prompt(question), False)])[0][1]
+# @app.get("/rag")
+# def rag(question: str):
+#     # question = "what is the document about?"
+#     answer = search(question)
+#     # print(question, answer)
+#     return {answer}
+# @app.get("/index")
+# def get_url_file_path(url_path: str):
+#     embeddings = load_embeddings()
+#     doc = _load_docs(url_path)
+#     embeddings, max_index = _upsert_docs(doc, embeddings)
+#     return max_index
+@app.get("/index/{domain}/")
+def get_domain_file_path(domain: str, file_path: str):
+    print(domain, file_path)
+    print(os.getcwd())
+    bool_value = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
+    print(bool_value)
+    if bool_value:
+        embeddings = load_embeddings(domain=domain, db_present=bool_value)
+        print(embeddings)
+        doc = _load_docs(file_path)
+        max_index = _upsert_docs(
+            doc=doc,
+            embeddings=embeddings,
+            vector_doc_path=f"{os.getcwd()}/index/{domain}",
+            db_present=bool_value,
+        )
+        # print("-------")
+    else:
+        embeddings = load_embeddings(domain=domain, db_present=bool_value)
+        doc = _load_docs(file_path)
+        max_index = _upsert_docs(
+            doc=doc,
+            embeddings=embeddings,
+            vector_doc_path=f"{os.getcwd()}/index/{domain}",
+            db_present=bool_value,
+        )
+    # print("Final - output : ", max_index)
+    return "Executed Successfully!!"
+def _check_if_db_exists(db_path: str) -> bool:
+    return os.path.exists(db_path)
+def _load_embeddings_from_db(
+    db_present: bool,
+    domain: str,
+    path: str = "sentence-transformers/all-MiniLM-L6-v2",
+):
+    # Create embeddings model with content support
+    embeddings = Embeddings({"path": path, "content": True})
+    # if Vector DB is not present
+    if not db_present:
+        return embeddings
+    else:
+        if domain == "":
+            embeddings.load("index")  # change this later
+        else:
+            print(3)
+            embeddings.load(f"{os.getcwd()}/index/{domain}")
+        return embeddings
+def _prompt(question):
+    return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
+            Question: {question}
+            Context: """
+def _search(query, extractor, question=None):
+    # Default question to query if empty
+    if not question:
+        question = query
+    # template = f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
+    #         Question: {question}
+    #         Context: """
+    # prompt = PromptTemplate(template=template, input_variables=["question"])
+    # llm_chain = LLMChain(prompt=prompt, llm=extractor)
+    # return {"question": question, "answer": llm_chain.run(question)}
+    return extractor([("answer", query, _prompt(question), False)])[0][1]
+@app.get("/rag")
+def rag(domain: str, question: str):
+    db_exists = _check_if_db_exists(db_path=f"{os.getcwd()}/index/{domain}/documents")
+    print(db_exists)
+    # if db_exists:
+    embeddings = _load_embeddings_from_db(db_exists, domain)
+    # Create extractor instance
+    #extractor = Extractor(embeddings, "google/flan-t5-base")
+    extractor = Extractor(embeddings, "TheBloke/Llama-2-7B-GGUF/llama-2-7b.Q4_0.gguf")
+    # llm = HuggingFaceHub(
+    #     repo_id="google/flan-t5-xxl",
+    #     model_kwargs={"temperature": 1, "max_length": 1000000},
+    # )
+    # else:
+    answer = _search(question, extractor)
+    return {"question": question, "answer": answer}

extractor.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from fastapi import FastAPI
+# from transformers import pipeline
+from txtai.embeddings import Embeddings
+from txtai.pipeline import Extractor
+from langchain.document_loaders import WebBaseLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# NOTE - we configure docs_url to serve the interactive Docs at the root path
+# of the app. This way, we can use the docs as a landing page for the app on Spaces.
+app = FastAPI(docs_url="/")
+# Create embeddings model with content support
+embeddings = Embeddings(
+    {"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True}
+)
+# Create extractor instance
+# extractor = Extractor(embeddings, "google/flan-t5-base")
+def _stream(dataset, limit, index: int = 0):
+    for row in dataset:
+        yield (index, row.page_content, None)
+        index += 1
+        if index >= limit:
+            break
+def _max_index_id(path):
+    db = sqlite3.connect(path)
+    table = "sections"
+    df = pd.read_sql_query(f"select * from {table}", db)
+    return {"max_index": df["indexid"].max()}
+def _prompt(question):
+    return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
+            Question: {question}
+            Context: """
+async def _search(query, extractor, question=None):
+    # Default question to query if empty
+    if not question:
+        question = query
+    return extractor([("answer", query, _prompt(question), False)])[0][1]
+def _text_splitter(doc):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50,
+        length_function=len,
+    )
+    return text_splitter.transform_documents(doc)
+def _load_docs(path: str):
+    load_doc = WebBaseLoader(path).load()
+    doc = _text_splitter(load_doc)
+    return doc
+async def _upsert_docs(doc):
+    max_index = _max_index_id("index/documents")
+    embeddings.upsert(_stream(doc, 500, max_index["max_index"]))
+    embeddings.save("index")
+    return embeddings
+@app.put("/rag/{path}")
+async def get_doc_path(path: str):
+    return path
+@app.get("/rag")
+async def rag(question: str):
+    # question = "what is the document about?"
+    embeddings.load("index")
+    path = await get_doc_path(path)
+    doc = _load_docs(path)
+    embeddings = _upsert_docs(doc)
+    # Create extractor instance
+    extractor = Extractor(embeddings, "google/flan-t5-base")
+    answer = await _search(question, extractor)
+    # print(question, answer)
+    return {answer}

main.py CHANGED Viewed

@@ -1,60 +1,85 @@
 from fastapi import FastAPI
-from transformers import pipeline
 from txtai.embeddings import Embeddings
 from txtai.pipeline import Extractor
-from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
-# Create embeddings model with content support
-# embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
-# embeddings.load('index')
-# Create extractor instance
-#extractor = Extractor(embeddings, "google/flan-t5-base")
-# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
-# model_name_or_path = "TheBloke/Llama-2-7B-GGUF"
-# model_basename = "llama-2-7b.Q4_0.gguf"
-model_name_or_path = "TheBloke/Llama-2-13B-GGUF"
-model_basename = "llama-2-13b.Q3_K_S.gguf"
-model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
-llm = Llama(model_path=model_path)
-@app.get("/generate")
-def generate(text: str):
-    """
-    llama2 q4 backend
-    """
-    output = llm(text)
-    return {"output": output}
-def prompt(question):
-  return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
             Question: {question}
             Context: """
-def search(query, question=None):
-  # Default question to query if empty
-  if not question:
-    question = query
-  return extractor([("answer", query, prompt(question), False)])[0][1]
-# @app.get("/rag")
-# def rag(question: str):
-#     # question = "what is the document about?"
-#     answer = search(question)
-#     # print(question, answer)
-#     return {answer}

 from fastapi import FastAPI
 from txtai.embeddings import Embeddings
 from txtai.pipeline import Extractor
+import os
+from langchain import HuggingFaceHub
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+# from transformers import pipeline
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
+# @app.get("/generate")
+# def generate(text: str):
+#     """
+#     Using the text2text-generation pipeline from `transformers`, generate text
+#     from the given input text. The model used is `google/flan-t5-small`, which
+#     can be found [here](https://huggingface.co/google/flan-t5-small).
+#     """
+#     output = pipe(text)
+#     return {"output": output[0]["generated_text"]}
+def _check_if_db_exists(db_path: str) -> bool:
+    return os.path.exists(db_path)
+def _load_embeddings_from_db(
+    db_present: bool,
+    domain: str,
+    path: str = "sentence-transformers/all-MiniLM-L6-v2",
+):
+    # Create embeddings model with content support
+    embeddings = Embeddings({"path": path, "content": True})
+    # if Vector DB is not present
+    if not db_present:
+        return embeddings
+    else:
+        if domain == "":
+            embeddings.load("index")  # change this later
+        else:
+            print(3)
+            embeddings.load(f"index/{domain}")
+        return embeddings
+def _prompt(question):
+    return f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
             Question: {question}
             Context: """
+def _search(query, extractor, question=None):
+    # Default question to query if empty
+    if not question:
+        question = query
+    # template = f"""Answer the following question using only the context below. Say 'no answer' when the question can't be answered.
+    #         Question: {question}
+    #         Context: """
+    # prompt = PromptTemplate(template=template, input_variables=["question"])
+    # llm_chain = LLMChain(prompt=prompt, llm=extractor)
+    # return {"question": question, "answer": llm_chain.run(question)}
+    return extractor([("answer", query, _prompt(question), False)])[0][1]
+@app.get("/rag")
+def rag(domain: str, question: str):
+    db_exists = _check_if_db_exists(db_path=f"{os.getcwd()}\index\{domain}\documents")
+    print(db_exists)
+    # if db_exists:
+    embeddings = _load_embeddings_from_db(db_exists, domain)
+    # Create extractor instance
+    extractor = Extractor(embeddings, "google/flan-t5-base")
+    # llm = HuggingFaceHub(
+    #     repo_id="google/flan-t5-xxl",
+    #     model_kwargs={"temperature": 1, "max_length": 1000000},
+    # )
+    # else:
+    answer = _search(question, extractor)
+    return {"question": question, "answer": answer}

requirements.txt CHANGED Viewed

@@ -2,5 +2,11 @@ fastapi==0.74.*
 requests==2.27.*
 uvicorn[standard]==0.17.*
 sentencepiece==0.1.*
 txtai==6.0.*
-llama-cpp-python

 requests==2.27.*
 uvicorn[standard]==0.17.*
 sentencepiece==0.1.*
+torch==1.12.*
+transformers==4.*
 txtai==6.0.*
+langchain==0.0.301
+langsmith==0.0.40
+bs4==0.0.1
+pandas==2.1.1
+SQLAlchemy==2.0.21