Sentence Similarity
Safetensors
Japanese
distilbert
feature-extraction
hpprc commited on
Commit
349feda
1 Parent(s): 9b738d8

Upload 17 files

Browse files
jmteb_config/jmteb.jsonnet ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Classification
2
+ (import './tasks/amazon_review_classification.jsonnet') +
3
+ (import './tasks/amazon_counterfactual_classification.jsonnet') +
4
+ (import './tasks/massive_intent_classification.jsonnet') +
5
+ (import './tasks/massive_scenario_classification.jsonnet') +
6
+ // Clustering
7
+ (import './tasks/livedoor_news.jsonnet') +
8
+ (import './tasks/mewsc16.jsonnet') +
9
+ // STS
10
+ (import './tasks/jsts.jsonnet') +
11
+ (import './tasks/jsick.jsonnet') +
12
+ // Pair Classification
13
+ (import './tasks/paws_x_ja.jsonnet') +
14
+ // Retrieval
15
+ (import './tasks/jagovfaqs_22k.jsonnet') +
16
+ (import './tasks/mrtydi.jsonnet') +
17
+ (import './tasks/jaqket.jsonnet') +
18
+ (import './tasks/nlp_journal_title_abs.jsonnet') +
19
+ (import './tasks/nlp_journal_title_intro.jsonnet') +
20
+ (import './tasks/nlp_journal_abs_intro.jsonnet') +
21
+ // Reranking
22
+ (import './tasks/esci.jsonnet')
jmteb_config/tasks/amazon_counterfactual_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ amazon_counterfactual_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ train_dataset: {
7
+ class_path: 'HfClassificationDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'train',
11
+ name: 'amazon_counterfactual_classification',
12
+ },
13
+ },
14
+ val_dataset: {
15
+ class_path: 'HfClassificationDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'validation',
19
+ name: 'amazon_counterfactual_classification',
20
+ },
21
+ },
22
+ test_dataset: {
23
+ class_path: 'HfClassificationDataset',
24
+ init_args: {
25
+ path: 'sbintuitions/JMTEB',
26
+ split: 'test',
27
+ name: 'amazon_counterfactual_classification',
28
+ },
29
+ },
30
+ },
31
+ },
32
+ }
jmteb_config/tasks/amazon_review_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ amazon_review_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ train_dataset: {
7
+ class_path: 'HfClassificationDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'train',
11
+ name: 'amazon_review_classification',
12
+ },
13
+ },
14
+ val_dataset: {
15
+ class_path: 'HfClassificationDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'validation',
19
+ name: 'amazon_review_classification',
20
+ },
21
+ },
22
+ test_dataset: {
23
+ class_path: 'HfClassificationDataset',
24
+ init_args: {
25
+ path: 'sbintuitions/JMTEB',
26
+ split: 'test',
27
+ name: 'amazon_review_classification',
28
+ },
29
+ },
30
+ },
31
+ },
32
+ }
jmteb_config/tasks/esci.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ esci: {
3
+ class_path: 'RerankingEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRerankingQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'esci-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRerankingQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'esci-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRerankingDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'esci-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/jagovfaqs_22k.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jagovfaqs_22k: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'jagovfaqs_22k-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'jagovfaqs_22k-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'jagovfaqs_22k-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/jaqket.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jaqket: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'jaqket-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'jaqket-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'jaqket-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/jsick.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jsick: {
3
+ class_path: 'STSEvaluator',
4
+ init_args: {
5
+ sentence1_prefix: 'クエリ: ',
6
+ sentence2_prefix: 'クエリ: ',
7
+ val_dataset: {
8
+ class_path: 'HfSTSDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'jsick',
13
+ },
14
+ },
15
+ test_dataset: {
16
+ class_path: 'HfSTSDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'jsick',
21
+ },
22
+ },
23
+ },
24
+ },
25
+ }
jmteb_config/tasks/jsts.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ jsts: {
3
+ class_path: 'STSEvaluator',
4
+ init_args: {
5
+ sentence1_prefix: 'クエリ: ',
6
+ sentence2_prefix: 'クエリ: ',
7
+ val_dataset: {
8
+ class_path: 'HfSTSDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'train',
12
+ name: 'jsts',
13
+ },
14
+ },
15
+ test_dataset: {
16
+ class_path: 'HfSTSDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'jsts',
21
+ },
22
+ },
23
+ },
24
+ },
25
+ }
jmteb_config/tasks/livedoor_news.jsonnet ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ livedoor_news: {
3
+ class_path: 'ClusteringEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ val_dataset: {
7
+ class_path: 'HfClusteringDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'validation',
11
+ name: 'livedoor_news',
12
+ },
13
+ },
14
+ test_dataset: {
15
+ class_path: 'HfClusteringDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'test',
19
+ name: 'livedoor_news',
20
+ },
21
+ },
22
+ },
23
+ },
24
+ }
jmteb_config/tasks/massive_intent_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ massive_intent_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ train_dataset: {
7
+ class_path: 'HfClassificationDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'train',
11
+ name: 'massive_intent_classification',
12
+ },
13
+ },
14
+ val_dataset: {
15
+ class_path: 'HfClassificationDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'validation',
19
+ name: 'massive_intent_classification',
20
+ },
21
+ },
22
+ test_dataset: {
23
+ class_path: 'HfClassificationDataset',
24
+ init_args: {
25
+ path: 'sbintuitions/JMTEB',
26
+ split: 'test',
27
+ name: 'massive_intent_classification',
28
+ },
29
+ },
30
+ },
31
+ },
32
+ }
jmteb_config/tasks/massive_scenario_classification.jsonnet ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ massive_scenario_classification: {
3
+ class_path: 'ClassificationEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ train_dataset: {
7
+ class_path: 'HfClassificationDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'train',
11
+ name: 'massive_scenario_classification',
12
+ },
13
+ },
14
+ val_dataset: {
15
+ class_path: 'HfClassificationDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'validation',
19
+ name: 'massive_scenario_classification',
20
+ },
21
+ },
22
+ test_dataset: {
23
+ class_path: 'HfClassificationDataset',
24
+ init_args: {
25
+ path: 'sbintuitions/JMTEB',
26
+ split: 'test',
27
+ name: 'massive_scenario_classification',
28
+ },
29
+ },
30
+ },
31
+ },
32
+ }
jmteb_config/tasks/mewsc16.jsonnet ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ mewsc16: {
3
+ class_path: 'ClusteringEvaluator',
4
+ init_args: {
5
+ prefix: 'クエリ: ',
6
+ val_dataset: {
7
+ class_path: 'HfClusteringDataset',
8
+ init_args: {
9
+ path: 'sbintuitions/JMTEB',
10
+ split: 'validation',
11
+ name: 'mewsc16_ja',
12
+ },
13
+ },
14
+ test_dataset: {
15
+ class_path: 'HfClusteringDataset',
16
+ init_args: {
17
+ path: 'sbintuitions/JMTEB',
18
+ split: 'test',
19
+ name: 'mewsc16_ja',
20
+ },
21
+ },
22
+ },
23
+ },
24
+ }
jmteb_config/tasks/mrtydi.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ mrtydi: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'mrtydi-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'mrtydi-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'mrtydi-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/nlp_journal_abs_intro.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_abs_intro: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: '文章: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'nlp_journal_abs_intro-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'nlp_journal_abs_intro-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'nlp_journal_abs_intro-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/nlp_journal_title_abs.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_title_abs: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'nlp_journal_title_abs-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'nlp_journal_title_abs-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'nlp_journal_title_abs-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/nlp_journal_title_intro.jsonnet ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ nlp_journal_title_intro: {
3
+ class_path: 'RetrievalEvaluator',
4
+ init_args: {
5
+ doc_prefix: '文章: ',
6
+ query_prefix: 'クエリ: ',
7
+ val_query_dataset: {
8
+ class_path: 'HfRetrievalQueryDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'nlp_journal_title_intro-query',
13
+ },
14
+ },
15
+ test_query_dataset: {
16
+ class_path: 'HfRetrievalQueryDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'nlp_journal_title_intro-query',
21
+ },
22
+ },
23
+ doc_dataset: {
24
+ class_path: 'HfRetrievalDocDataset',
25
+ init_args: {
26
+ path: 'sbintuitions/JMTEB',
27
+ split: 'corpus',
28
+ name: 'nlp_journal_title_intro-corpus',
29
+ },
30
+ },
31
+ },
32
+ },
33
+ }
jmteb_config/tasks/paws_x_ja.jsonnet ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ paws_x_ja: {
3
+ class_path: 'PairClassificationEvaluator',
4
+ init_args: {
5
+ sentence1_prefix: 'クエリ: ',
6
+ sentence2_prefix: 'クエリ: ',
7
+ val_dataset: {
8
+ class_path: 'HfPairClassificationDataset',
9
+ init_args: {
10
+ path: 'sbintuitions/JMTEB',
11
+ split: 'validation',
12
+ name: 'paws_x_ja',
13
+ },
14
+ },
15
+ test_dataset: {
16
+ class_path: 'HfPairClassificationDataset',
17
+ init_args: {
18
+ path: 'sbintuitions/JMTEB',
19
+ split: 'test',
20
+ name: 'paws_x_ja',
21
+ },
22
+ },
23
+ },
24
+ },
25
+ }