zake7749 commited on
Commit
c4528be
1 Parent(s): 369f24e

[I] Batman.

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lyrics Generation with Masked Sequence-to-Sequence Pretraining
2
+
3
+ This repository demostrate a format-controllable Chinese lyric generator, fine-tuned on [Chinese-Lyric-Corpus](https://github.com/gaussic/Chinese-Lyric-Corpus) using a [MASS](https://arxiv.org/abs/1905.02450)-like strategy.
4
+
5
+ ## Usage
6
+
7
+ ## Initialization.
8
+
9
+ ```
10
+ from transformers import MT5ForConditionalGeneration, MT5Tokenizer, Text2TextGenerationPipeline
11
+ model_path = "zake7749/chinese-lyrics-generation-mass"
12
+ model = MT5ForConditionalGeneration.from_pretrained(model_path)
13
+ tokenizer = MT5Tokenizer.from_pretrained(model_path)
14
+ pipe = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
15
+ ```
16
+
17
+ ## Generate lyrics with a template
18
+
19
+ ```
20
+ template = "風花雪月。像XXXXXXXXXX。日升月落。仿若XXXXXXXXXX。"
21
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
22
+ print(lyric) # 風花雪月。像你在我的夢裡慢慢散落。日升月落。仿若我宿命無法陪隨你走過
23
+ ```
24
+
25
+ ## Acrostic
26
+
27
+ ```
28
+ template = "分XXXXXX。手XXXXXXXXX。之XXXXXXX。後XXXXXXXXX。"
29
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
30
+ print(lyric) # 分開後激情浮現。手牽著手走過的那一天。之間有太多的危險。後悔一點點,傷心一片。
31
+ ```
32
+
33
+ ## Free-Style.
34
+
35
+ ```
36
+ import random
37
+
38
+ min_sentence_num, max_sentence_num = 2, 5
39
+ min_characher_num, max_character_num = 4, 10
40
+
41
+ num_sentences = random.randint(min_sentence_num, max_sentence_num)
42
+ num_words = ["X" * random.randint(min_characher_num, max_character_num)
43
+ for _ in range(num_sentences)]
44
+
45
+ template = "。".join(num_words) + "。"
46
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
47
+ print(f"{template}\n{lyric}")
48
+
49
+ # XXXXXXXXXX。XXXXXXXXXX。XXXXXX。XXXXXXX。XXXXXXXX。
50
+ # 讓我為你再勇敢下一次。也許我唱的歌還可以。算是你的天使。如果我們不在一起。是否就不會有距離。
51
+ ```
52
+
53
+ # Note
54
+
55
+ The model is still under training, so sometimes it might not follow the template explicitly, especially for long sequences generation.
56
+
57
+ # Disclaimer
58
+
59
+ This lyric generator is for academic purposes only. Users of this model should exercise caution and carefully evaluate the results before using them for any commercial or non-academic purpose. We are not liable for any damages or losses resulting from the use or misuse of the model.
60
+
61
+ ---
62
+ license: mit
63
+ ---
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MT5ForConditionalGeneration"
4
+ ],
5
+ "d_ff": 2048,
6
+ "d_kv": 64,
7
+ "d_model": 768,
8
+ "decoder_start_token_id": 0,
9
+ "dense_act_fn": "gelu_new",
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "gated-gelu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "is_gated_act": true,
16
+ "layer_norm_epsilon": 1e-06,
17
+ "model_type": "mt5",
18
+ "num_decoder_layers": 12,
19
+ "num_heads": 12,
20
+ "num_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.21.0",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
.ipynb_checkpoints/special_tokens_map-checkpoint.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
.ipynb_checkpoints/tokenizer_config-checkpoint.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "eos_token": "</s>",
4
+ "extra_ids": 0,
5
+ "name_or_path": "bigscience/mt0-base",
6
+ "pad_token": "<pad>",
7
+ "sp_model_kwargs": {},
8
+ "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276",
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_token": "<unk>"
11
+ }
README.md CHANGED
@@ -1,3 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
 
1
+ # Lyrics Generation with Masked Sequence-to-Sequence Pretraining
2
+
3
+ This repository demostrate a format-controllable Chinese lyric generator, fine-tuned on [Chinese-Lyric-Corpus](https://github.com/gaussic/Chinese-Lyric-Corpus) using a [MASS](https://arxiv.org/abs/1905.02450)-like strategy.
4
+
5
+ ## Usage
6
+
7
+ ## Initialization.
8
+
9
+ ```
10
+ from transformers import MT5ForConditionalGeneration, MT5Tokenizer, Text2TextGenerationPipeline
11
+ model_path = "zake7749/chinese-lyrics-generation-mass"
12
+ model = MT5ForConditionalGeneration.from_pretrained(model_path)
13
+ tokenizer = MT5Tokenizer.from_pretrained(model_path)
14
+ pipe = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
15
+ ```
16
+
17
+ ## Generate lyrics with a template
18
+
19
+ ```
20
+ template = "風花雪月。像XXXXXXXXXX。日升月落。仿若XXXXXXXXXX。"
21
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
22
+ print(lyric) # 風花雪月。像你在我的夢裡慢慢散落。日升月落。仿若我宿命無法陪隨你走過
23
+ ```
24
+
25
+ ## Acrostic
26
+
27
+ ```
28
+ template = "分XXXXXX。手XXXXXXXXX。之XXXXXXX。後XXXXXXXXX。"
29
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
30
+ print(lyric) # 分開後激情浮現。手牽著手走過的那一天。之間有太多的危險。後悔一點點,傷心一片。
31
+ ```
32
+
33
+ ## Free-Style.
34
+
35
+ ```
36
+ import random
37
+
38
+ min_sentence_num, max_sentence_num = 2, 5
39
+ min_characher_num, max_character_num = 4, 10
40
+
41
+ num_sentences = random.randint(min_sentence_num, max_sentence_num)
42
+ num_words = ["X" * random.randint(min_characher_num, max_character_num)
43
+ for _ in range(num_sentences)]
44
+
45
+ template = "。".join(num_words) + "。"
46
+ lyric = pipe(template, max_length=128, top_p=0.8, do_sample=True, repetition_penalty=1.2)[0]['generated_text']
47
+ print(f"{template}\n{lyric}")
48
+
49
+ # XXXXXXXXXX。XXXXXXXXXX。XXXXXX。XXXXXXX。XXXXXXXX。
50
+ # 讓我為你再勇敢下一次。也許我唱的歌還可以。算是你的天使。如果我們不在一起。是否就不會有距離。
51
+ ```
52
+
53
+ # Note
54
+
55
+ The model is still under training, so sometimes it might not follow the template explicitly, especially for long sequences generation.
56
+
57
+ # Disclaimer
58
+
59
+ This lyric generator is for academic purposes only. Users of this model should exercise caution and carefully evaluate the results before using them for any commercial or non-academic purpose. We are not liable for any damages or losses resulting from the use or misuse of the model.
60
+
61
  ---
62
  license: mit
63
  ---
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MT5ForConditionalGeneration"
4
+ ],
5
+ "d_ff": 2048,
6
+ "d_kv": 64,
7
+ "d_model": 768,
8
+ "decoder_start_token_id": 0,
9
+ "dense_act_fn": "gelu_new",
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "gated-gelu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "is_gated_act": true,
16
+ "layer_norm_epsilon": 1e-06,
17
+ "model_type": "mt5",
18
+ "num_decoder_layers": 12,
19
+ "num_heads": 12,
20
+ "num_layers": 12,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.21.0",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4853a0e8df9f4aff8392fdcf9534e924760a35e908e363b625f2ca132e3b32a4
3
+ size 2329696205
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "eos_token": "</s>",
4
+ "extra_ids": 0,
5
+ "name_or_path": "bigscience/mt0-base",
6
+ "pad_token": "<pad>",
7
+ "sp_model_kwargs": {},
8
+ "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276",
9
+ "tokenizer_class": "T5Tokenizer",
10
+ "unk_token": "<unk>"
11
+ }