Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update auditqa/doc_process.py
Browse files- auditqa/doc_process.py +4 -1
auditqa/doc_process.py
CHANGED
@@ -41,13 +41,16 @@ def process_pdf():
|
|
41 |
doc_processed = text_splitter.split_documents(value)
|
42 |
for doc in doc_processed:
|
43 |
doc.metadata["source"] = file
|
|
|
44 |
all_documents[file] = doc_processed
|
45 |
|
46 |
print(all_documents.keys())
|
47 |
|
48 |
|
49 |
embeddings = HuggingFaceEmbeddings(
|
50 |
-
|
|
|
|
|
51 |
)
|
52 |
|
53 |
qdrant_collections = {}
|
|
|
41 |
doc_processed = text_splitter.split_documents(value)
|
42 |
for doc in doc_processed:
|
43 |
doc.metadata["source"] = file
|
44 |
+
doc.metadata["year"] = file[-4:]
|
45 |
all_documents[file] = doc_processed
|
46 |
|
47 |
print(all_documents.keys())
|
48 |
|
49 |
|
50 |
embeddings = HuggingFaceEmbeddings(
|
51 |
+
model_kwargs = {'device': 'cpu'},
|
52 |
+
encode_kwargs = {'normalize_embeddings': True},
|
53 |
+
model_name="BAAI/bge-small-en-v1.5"
|
54 |
)
|
55 |
|
56 |
qdrant_collections = {}
|