Spaces:
Running
Running
AFischer1985
commited on
Commit
•
5c691a5
1
Parent(s):
d8b9fc9
Update run.py
Browse files
run.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
-
# Last update: October
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
@@ -203,18 +203,18 @@ def add_doc(path, session):
|
|
203 |
print(len(x))
|
204 |
if(len(x)==0):
|
205 |
chunkSize=40000
|
206 |
-
for i in range(round(len(corpus)/chunkSize+0.5)):
|
207 |
print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
|
208 |
ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
|
209 |
batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
|
210 |
textIDs=[str(id) for id in ids[0:len(batch)]]
|
211 |
ids=[str(id+len(x)+1) for id in ids[0:len(batch)]] # id refers to chromadb-unique ID
|
212 |
collection.add(documents=batch, ids=ids,
|
213 |
-
metadatas=[{"date": str("2024-10-10")} for b in batch])
|
214 |
print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
|
215 |
now = datetime.now()
|
216 |
gr.Info(f"Indexing complete!")
|
217 |
-
print(now-then)
|
218 |
return(collection)
|
219 |
|
220 |
#split_with_overlap("test me if you can",2,1)
|
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
+
# Last update: October 15th, 2024
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
|
|
203 |
print(len(x))
|
204 |
if(len(x)==0):
|
205 |
chunkSize=40000
|
206 |
+
for i in range(round(len(corpus)/chunkSize+0.5)):
|
207 |
print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
|
208 |
ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
|
209 |
batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
|
210 |
textIDs=[str(id) for id in ids[0:len(batch)]]
|
211 |
ids=[str(id+len(x)+1) for id in ids[0:len(batch)]] # id refers to chromadb-unique ID
|
212 |
collection.add(documents=batch, ids=ids,
|
213 |
+
metadatas=[{"date": str("2024-10-10")} for b in batch])
|
214 |
print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
|
215 |
now = datetime.now()
|
216 |
gr.Info(f"Indexing complete!")
|
217 |
+
print(now-then)
|
218 |
return(collection)
|
219 |
|
220 |
#split_with_overlap("test me if you can",2,1)
|