Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update auditqa/doc_process.py
Browse files- auditqa/doc_process.py +3 -3
auditqa/doc_process.py
CHANGED
@@ -31,7 +31,7 @@ def process_pdf():
|
|
31 |
# langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
|
32 |
chunk_size = 256
|
33 |
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
|
34 |
-
AutoTokenizer.from_pretrained("BAAI/bge-
|
35 |
chunk_size=chunk_size,
|
36 |
chunk_overlap=10,
|
37 |
add_start_index=True,
|
@@ -78,7 +78,7 @@ def process_pdf():
|
|
78 |
embeddings = HuggingFaceEmbeddings(
|
79 |
model_kwargs = {'device': device},
|
80 |
encode_kwargs = {'normalize_embeddings': True},
|
81 |
-
model_name="BAAI/bge-
|
82 |
)
|
83 |
# placeholder for collection
|
84 |
qdrant_collections = {}
|
@@ -102,7 +102,7 @@ def get_local_qdrant():
|
|
102 |
embeddings = HuggingFaceEmbeddings(
|
103 |
model_kwargs = {'device': device},
|
104 |
encode_kwargs = {'normalize_embeddings': True},
|
105 |
-
model_name="BAAI/bge-
|
106 |
list_ = ['Consolidated','District','Ministry','allreports']
|
107 |
for val in list_:
|
108 |
client = QdrantClient(path=f"./data/{val}")
|
|
|
31 |
# langchain text splitters: https://python.langchain.com/docs/modules/data_connection/document_transformers/
|
32 |
chunk_size = 256
|
33 |
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
|
34 |
+
AutoTokenizer.from_pretrained("BAAI/bge-en-icl"),
|
35 |
chunk_size=chunk_size,
|
36 |
chunk_overlap=10,
|
37 |
add_start_index=True,
|
|
|
78 |
embeddings = HuggingFaceEmbeddings(
|
79 |
model_kwargs = {'device': device},
|
80 |
encode_kwargs = {'normalize_embeddings': True},
|
81 |
+
model_name="BAAI/bge-en-icl"
|
82 |
)
|
83 |
# placeholder for collection
|
84 |
qdrant_collections = {}
|
|
|
102 |
embeddings = HuggingFaceEmbeddings(
|
103 |
model_kwargs = {'device': device},
|
104 |
encode_kwargs = {'normalize_embeddings': True},
|
105 |
+
model_name="BAAI/bge-en-icl")
|
106 |
list_ = ['Consolidated','District','Ministry','allreports']
|
107 |
for val in list_:
|
108 |
client = QdrantClient(path=f"./data/{val}")
|