Chenxi Whitehouse
commited on
Commit
•
26238ae
1
Parent(s):
0f60703
more files
Browse files
data_store/knowledge_store/dev_knowledge_store.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:021e258cd6fb5fe6d627a4667d663e95c184c966939c15124df9206142fc2212
|
3 |
+
size 11537899362
|
data_store/urls/dev_urls.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:005d894194c69c689d0122d1332b059226d16c7ca2df25cd2a72b49716020dd3
|
3 |
+
size 17657479
|
data_store/urls/train_urls.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:425705c491cbc721c97d31aad5ef04530aec8a1015626671a59634bfe9b7f134
|
3 |
+
size 103470086
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
spacy
|
2 |
+
scikit-learn
|
3 |
+
PyMuPDF
|
4 |
+
pandas
|
5 |
+
trafilatura
|
6 |
+
frontend
|
7 |
+
transformers==4.29.2
|
8 |
+
rank-bm25
|
9 |
+
accelerate
|
10 |
+
nltk
|
11 |
+
pytorch_lightning
|
12 |
+
torchmetrics
|
13 |
+
levenshtein
|
14 |
+
datasets
|