Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from langchain_community.vectorstores import Qdrant
|
6 |
from langchain.prompts import ChatPromptTemplate
|
@@ -35,6 +35,13 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
35 |
)
|
36 |
rag_documents = text_splitter.split_documents(documents)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
39 |
|
40 |
# Create the vector store
|
|
|
1 |
import os
|
2 |
from langchain_community.document_loaders import PyMuPDFLoader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
4 |
from langchain_qdrant import QdrantVectorStore
|
5 |
from langchain_community.vectorstores import Qdrant
|
6 |
from langchain.prompts import ChatPromptTemplate
|
|
|
35 |
)
|
36 |
rag_documents = text_splitter.split_documents(documents)
|
37 |
|
38 |
+
# Alternative chunking: Tokens (more accurate for OpenAI models)
|
39 |
+
token_text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
|
40 |
+
encoding="cl100k_base", chunk_size=100, chunk_overlap=0
|
41 |
+
)
|
42 |
+
token_rag_documents = token_text_splitter.split_documents(documents)
|
43 |
+
# TO DO ^^ test
|
44 |
+
|
45 |
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
46 |
|
47 |
# Create the vector store
|