Upload 7 files
Browse files- instruct_med_dataset.hf/data-00000-of-00005.arrow +3 -0
- instruct_med_dataset.hf/data-00001-of-00005.arrow +3 -0
- instruct_med_dataset.hf/data-00002-of-00005.arrow +3 -0
- instruct_med_dataset.hf/data-00003-of-00005.arrow +3 -0
- instruct_med_dataset.hf/data-00004-of-00005.arrow +3 -0
- instruct_med_dataset.hf/dataset_info.json +12 -0
- instruct_med_dataset.hf/state.json +27 -0
instruct_med_dataset.hf/data-00000-of-00005.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385ac79f7119adc73a706f2b55288e89650a1acb18df7271fb477205caa995a7
|
3 |
+
size 466470304
|
instruct_med_dataset.hf/data-00001-of-00005.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9744e85decc4a2f617223cd42c248b2f3437b4aff6ba94a0e554a0591446a186
|
3 |
+
size 470409624
|
instruct_med_dataset.hf/data-00002-of-00005.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8f8ac4f1b10bfecbcffe6a4461138c7d4c987dfdfd74b4815db64661230d9b9
|
3 |
+
size 471381784
|
instruct_med_dataset.hf/data-00003-of-00005.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:438edcc8f61539ffaac7f9acf2c109e6916a64c5433d73dec90fa7cdb68af89d
|
3 |
+
size 464955888
|
instruct_med_dataset.hf/data-00004-of-00005.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b78604ac776c9d2448bf0736c14ff9dab23b22653596424afcf150cde0638cd3
|
3 |
+
size 469655192
|
instruct_med_dataset.hf/dataset_info.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "@article{Cohan_2018,\n title={A Discourse-Aware Attention Model for Abstractive Summarization of\n Long Documents},\n url={http://dx.doi.org/10.18653/v1/n18-2097},\n DOI={10.18653/v1/n18-2097},\n journal={Proceedings of the 2018 Conference of the North American Chapter of\n the Association for Computational Linguistics: Human Language\n Technologies, Volume 2 (Short Papers)},\n publisher={Association for Computational Linguistics},\n author={Cohan, Arman and Dernoncourt, Franck and Kim, Doo Soon and Bui, Trung and Kim, Seokhwan and Chang, Walter and Goharian, Nazli},\n year={2018}\n}",
|
3 |
+
"description": "Scientific papers datasets contains two sets of long and structured documents.\nThe datasets are obtained from ArXiv and PubMed OpenAccess repositories.\n\nBoth \"arxiv\" and \"pubmed\" have two features:\n - article: the body of the document, pagragraphs seperated by \"/n\".\n - abstract: the abstract of the document, pagragraphs seperated by \"/n\".\n - section_names: titles of sections, seperated by \"/n\".",
|
4 |
+
"features": {
|
5 |
+
"text": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"homepage": "https://github.com/armancohan/long-summarization",
|
11 |
+
"license": ""
|
12 |
+
}
|
instruct_med_dataset.hf/state.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00005.arrow"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"filename": "data-00001-of-00005.arrow"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"filename": "data-00002-of-00005.arrow"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"filename": "data-00003-of-00005.arrow"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"filename": "data-00004-of-00005.arrow"
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"_fingerprint": "8c606ef2269c391c",
|
20 |
+
"_format_columns": [
|
21 |
+
"text"
|
22 |
+
],
|
23 |
+
"_format_kwargs": {},
|
24 |
+
"_format_type": null,
|
25 |
+
"_output_all_columns": false,
|
26 |
+
"_split": null
|
27 |
+
}
|