reshinthadith commited on
Commit
212fefd
1 Parent(s): 1b6aa17

Reformatted Arxiv & PubMed (#1)

Browse files

- Add reformatted arxiv (10e6e04c65623d1e83c3dfb3c96f831393bde62b)
- Fix name (5dd43d39a92df7d551bf753cceadc374fcc3c2f6)
- Add PubMed reformatted (4a54fd68a496ca4051b33a857d86178bcc8befe8)
- Fix wrong folder name (da07004d4b8aec004f9b7ede66da4ca0b9fa6f83)

app.py CHANGED
@@ -6,11 +6,13 @@ from transformers import AutoTokenizer
6
  import ast
7
  import re
8
 
9
- version = st.sidebar.selectbox("Choose a version", ["init","local_dedup"])
10
  if version == "init":
11
  CACHE_DIR = "cache_ds/" #Use this to build the dataset
12
- else:
13
  CACHE_DIR = "local_dedup/"
 
 
14
  contribution_json = "contributors.json"
15
 
16
  contribution_dict = json.load(open(contribution_json,"r"))
6
  import ast
7
  import re
8
 
9
+ version = st.sidebar.selectbox("Choose a version", ["init","local_dedup", "reformatted"])
10
  if version == "init":
11
  CACHE_DIR = "cache_ds/" #Use this to build the dataset
12
+ elif version == "local_dedup":
13
  CACHE_DIR = "local_dedup/"
14
+ elif version == "reformatted":
15
+ CACHE_DIR = "reformatted/"
16
  contribution_json = "contributors.json"
17
 
18
  contribution_dict = json.load(open(contribution_json,"r"))
reformatted/PubMed_ver2/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1f1a2954bba37232ce7d9ea23dfef87abf0fa66c3ee79e8c9ab68b552077536
3
+ size 32596448
reformatted/PubMed_ver2/dataset_info.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:521f6c83673dbf0f0a3d10092c5c8cc05e42e566960d46ee791252633697f6bf
3
+ size 1247
reformatted/PubMed_ver2/state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3089461938e84e882252be0dcccf624f5e7924999d487f7baf1681b0d786da7d
3
+ size 250
reformatted/arXiv_ver2/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01546d7da93f954d5a22cb01415a140528aee065191d1c5f4e4cf9607430c82
3
+ size 49745024
reformatted/arXiv_ver2/dataset_info.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15da1446b29b726e1adf07e1f0a30a869d2be04336b7e31ad0f261d29697485
3
+ size 1247
reformatted/arXiv_ver2/state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c239a6b3c4ddf13e7d2810a3a5b69f4edfed495a7ed1d59073071e86193fa43
3
+ size 250