Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -46,14 +46,28 @@ from langchain_community.document_loaders import TextLoader
|
|
46 |
def load_txt(path="./a.cv.ckaller.2024.txt"):
|
47 |
loader = TextLoader(path)
|
48 |
document = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
# split the document into chunks
|
50 |
-
|
51 |
chunk_size=1500,
|
52 |
chunk_overlap=250,
|
53 |
length_function=len,
|
54 |
is_separator_regex=False,
|
55 |
)
|
56 |
-
|
|
|
|
|
57 |
#######
|
58 |
'''
|
59 |
FAISS
|
@@ -66,6 +80,16 @@ def load_txt(path="./a.cv.ckaller.2024.txt"):
|
|
66 |
embeddings = HuggingFaceBgeEmbeddings(
|
67 |
model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
|
68 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
# load from disk
|
70 |
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
|
71 |
|
|
|
46 |
def load_txt(path="./a.cv.ckaller.2024.txt"):
|
47 |
loader = TextLoader(path)
|
48 |
document = loader.load()
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
####
|
53 |
+
|
54 |
+
from langchain_experimental.text_splitter import SemanticChunker
|
55 |
+
with open(path) as f:
|
56 |
+
state_of_the_union = f.read()
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
######
|
61 |
# split the document into chunks
|
62 |
+
a_text_splitter = RecursiveCharacterTextSplitter(
|
63 |
chunk_size=1500,
|
64 |
chunk_overlap=250,
|
65 |
length_function=len,
|
66 |
is_separator_regex=False,
|
67 |
)
|
68 |
+
a_document_chunks = text_splitter.split_documents(document)
|
69 |
+
|
70 |
+
|
71 |
#######
|
72 |
'''
|
73 |
FAISS
|
|
|
80 |
embeddings = HuggingFaceBgeEmbeddings(
|
81 |
model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
|
82 |
)
|
83 |
+
|
84 |
+
|
85 |
+
#####
|
86 |
+
|
87 |
+
|
88 |
+
text_splitter = SemanticChunker(embeddings)
|
89 |
+
|
90 |
+
document_chunks = text_splitter.create_documents([state_of_the_union])
|
91 |
+
print(document_chunks[0].page_content)
|
92 |
+
|
93 |
# load from disk
|
94 |
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)
|
95 |
|