Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -72,12 +72,13 @@ from langchain.chains import RetrievalQA
|
|
| 72 |
|
| 73 |
from langchain.memory import ConversationBufferMemory
|
| 74 |
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
# Function to index URLs in RAG
|
| 78 |
def index_urls_in_rag(urls=[]):
|
| 79 |
# Load the RAG model
|
| 80 |
-
rag_model = "
|
| 81 |
encode_kwargs = {
|
| 82 |
"normalize_embeddings": True
|
| 83 |
} # set True to compute cosine similarity
|
|
@@ -96,9 +97,15 @@ def index_urls_in_rag(urls=[]):
|
|
| 96 |
loader = WebBaseLoader(url)
|
| 97 |
document = loader.load()
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# Split the document into chunks
|
| 100 |
text_splitter = RecursiveCharacterTextSplitter()
|
| 101 |
-
document_chunks = text_splitter.split_documents(
|
| 102 |
print(document_chunks)
|
| 103 |
# Index document chunks into the vector store
|
| 104 |
vector_store.add_documents(document_chunks)
|
|
|
|
| 72 |
|
| 73 |
from langchain.memory import ConversationBufferMemory
|
| 74 |
|
| 75 |
+
from langchain_community.document_transformers import BeautifulSoupTransformer
|
| 76 |
|
| 77 |
|
| 78 |
# Function to index URLs in RAG
|
| 79 |
def index_urls_in_rag(urls=[]):
|
| 80 |
# Load the RAG model
|
| 81 |
+
rag_model = "jinaai/jina-embeddings-v2-base-de"
|
| 82 |
encode_kwargs = {
|
| 83 |
"normalize_embeddings": True
|
| 84 |
} # set True to compute cosine similarity
|
|
|
|
| 97 |
loader = WebBaseLoader(url)
|
| 98 |
document = loader.load()
|
| 99 |
|
| 100 |
+
# Transform
|
| 101 |
+
bs_transformer = BeautifulSoupTransformer()
|
| 102 |
+
docs_transformed = bs_transformer.transform_documents(
|
| 103 |
+
document, class_to_extract=["p", "li", "div", "a"]
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
# Split the document into chunks
|
| 107 |
text_splitter = RecursiveCharacterTextSplitter()
|
| 108 |
+
document_chunks = text_splitter.split_documents(docs_transformed)
|
| 109 |
print(document_chunks)
|
| 110 |
# Index document chunks into the vector store
|
| 111 |
vector_store.add_documents(document_chunks)
|