ksuzuki01 commited on
Commit
9dbbe33
1 Parent(s): e46c22a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -0
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: mit
3
  language:
4
  - ja
 
5
  ---
6
 
7
  # japanese-gpt-1b-PII-masking
@@ -13,6 +14,40 @@ japanese-gpt-1b-PII-masking は、 [日本語事前学習済み1B GPTモデル](
13
 
14
  # Usage
15
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  ```
17
 
18
  # Licenese
 
2
  license: mit
3
  language:
4
  - ja
5
+ pipeline_tag: text-generation
6
  ---
7
 
8
  # japanese-gpt-1b-PII-masking
 
14
 
15
  # Usage
16
  ```
17
+ import torch
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
+
20
+ input_text = ""
21
+
22
+ model_name = ""
23
+ model = AutoModelForCausalLM.from_pretrained(best_model_path)
24
+ tokenizer = AutoTokenizer.from_pretrained(best_model_path)
25
+
26
+ if torch.cuda.is_available():
27
+ model = model.to("cuda")
28
+
29
+ def preprocess(text):
30
+ return text.replace("\n", "<LB>")
31
+
32
+ def postprocess(text):
33
+ return text.replace("<LB>", "\n")
34
+
35
+ input_text += tokenizer.eos_token
36
+ input_text = preprocess(input_text)
37
+
38
+ with torch.no_grad():
39
+ token_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
40
+
41
+ output_ids = model.generate(
42
+ token_ids.to(model.device),
43
+ max_new_tokens=256,
44
+ pad_token_id=tokenizer.pad_token_id,
45
+ eos_token_id=tokenizer.eos_token_id,
46
+ )
47
+ output = tokenizer.decode(output_ids.tolist()[0][token_ids.size(1) :], skip_special_tokens=True)
48
+ output = postprocess(output)
49
+
50
+ print(output)
51
  ```
52
 
53
  # Licenese