captain-awesome
commited on
Commit
•
59a15ba
1
Parent(s):
c249782
Update app.py
Browse files
app.py
CHANGED
@@ -108,9 +108,14 @@ def create_vector_database(loaded_documents):
|
|
108 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
|
109 |
chunked_documents = text_splitter.split_documents(loaded_documents)
|
110 |
|
111 |
-
embeddings =
|
112 |
-
model_name
|
|
|
113 |
)
|
|
|
|
|
|
|
|
|
114 |
|
115 |
# model_name = "BAAI/bge-large-en"
|
116 |
# model_kwargs = {'device': 'cpu'}
|
@@ -121,18 +126,19 @@ def create_vector_database(loaded_documents):
|
|
121 |
# encode_kwargs=encode_kwargs
|
122 |
# )
|
123 |
|
124 |
-
|
125 |
# Create and persist a Chroma vector database from the chunked documents
|
126 |
db = Chroma.from_documents(
|
127 |
documents=chunked_documents,
|
128 |
-
|
129 |
-
|
130 |
# persist_directory=DB_DIR,
|
131 |
)
|
132 |
db.persist()
|
133 |
# db = Chroma(persist_directory=persist_directory,
|
134 |
# embedding_function=embedding)
|
135 |
return db
|
|
|
136 |
|
137 |
def set_custom_prompt():
|
138 |
"""
|
|
|
108 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
|
109 |
chunked_documents = text_splitter.split_documents(loaded_documents)
|
110 |
|
111 |
+
embeddings = HuggingFaceEmbeddings(
|
112 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
113 |
+
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
114 |
)
|
115 |
+
|
116 |
+
# embeddings = HuggingFaceBgeEmbeddings(
|
117 |
+
# model_name = "BAAI/bge-large-en"
|
118 |
+
# )
|
119 |
|
120 |
# model_name = "BAAI/bge-large-en"
|
121 |
# model_kwargs = {'device': 'cpu'}
|
|
|
126 |
# encode_kwargs=encode_kwargs
|
127 |
# )
|
128 |
|
129 |
+
persist_directory = 'db'
|
130 |
# Create and persist a Chroma vector database from the chunked documents
|
131 |
db = Chroma.from_documents(
|
132 |
documents=chunked_documents,
|
133 |
+
embeddings=embeddings,
|
134 |
+
persist_directory=persist_directory
|
135 |
# persist_directory=DB_DIR,
|
136 |
)
|
137 |
db.persist()
|
138 |
# db = Chroma(persist_directory=persist_directory,
|
139 |
# embedding_function=embedding)
|
140 |
return db
|
141 |
+
|
142 |
|
143 |
def set_custom_prompt():
|
144 |
"""
|