aino-capm
commited on
Commit
·
f3f6a96
1
Parent(s):
a3493e5
first
Browse files- app.py +29 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import T5Tokenizer,AutoModelForCausalLM
|
3 |
+
|
4 |
+
tokenizer = T5Tokenizer.from_pretrained("rinna/japanese-gpt2-small")
|
5 |
+
model3 = AutoModelForCausalLM.from_pretrained("./models")
|
6 |
+
model3.to("cpu")
|
7 |
+
|
8 |
+
def getarate_sentences3(seed_sentence):
|
9 |
+
x = tokenizer.encode(seed_sentence, return_tensors="pt", add_special_tokens=False) # 入力
|
10 |
+
x = x.cpu() # CPU対応
|
11 |
+
y = model3.generate(x, # 入力
|
12 |
+
min_length=50, # 文章の最小長
|
13 |
+
max_length=100, # 文章の最大長
|
14 |
+
do_sample=True, # 次の単語を確率で選ぶ
|
15 |
+
top_k=50, # Top-Kサンプリング
|
16 |
+
top_p=0.95, # Top-pサンプリング
|
17 |
+
temperature=1.2, # 確率分布の調整
|
18 |
+
num_return_sequences=3, # 生成する文章の数
|
19 |
+
pad_token_id=tokenizer.pad_token_id, # パディングのトークンID
|
20 |
+
bos_token_id=tokenizer.bos_token_id, # テキスト先頭のトークンID
|
21 |
+
eos_token_id=tokenizer.eos_token_id, # テキスト終端のトークンID
|
22 |
+
bad_word_ids=[[tokenizer.unk_token_id]] # 生成が許可されないトークンID
|
23 |
+
)
|
24 |
+
generated_sentences = tokenizer.batch_decode(y, skip_special_tokens=True) # 特殊トークンをスキップして文章に変換
|
25 |
+
return generated_sentences
|
26 |
+
|
27 |
+
demo = gr.Interface(fn=getarate_sentences3, inputs="text", outputs="text")
|
28 |
+
|
29 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
tramsformers=4.20.1
|
2 |
+
torch
|
3 |
+
sentencepiece
|