Spaces:
Runtime error
Runtime error
Commit
โข
2ddb46f
1
Parent(s):
b042daf
Update app.py
Browse files
app.py
CHANGED
@@ -52,44 +52,54 @@ def n_line_poem(input_letter):
|
|
52 |
# ๋์ ๋ฒ์น ์ ์ฉ
|
53 |
if val in dooeum.keys():
|
54 |
val = dooeum[val]
|
55 |
-
|
56 |
-
|
57 |
-
while
|
58 |
# ๋ง์ฝ idx ๊ฐ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ์
|
59 |
if idx == 0:
|
60 |
# ์ฒซ ๊ธ์ ์ธ์ฝ๋ฉ
|
61 |
input_ids = tokenizer.encode(
|
62 |
val, add_special_tokens=False, return_tensors="pt")
|
|
|
63 |
|
64 |
# ์ฒซ ๊ธ์ ์ธ์ฝ๋ฉ ๊ฐ์ผ๋ก ๋ฌธ์ฅ ์์ฑ
|
65 |
output_sequence = model.generate(
|
66 |
input_ids=input_ids,
|
67 |
-
do_sample=True, max_length=42,
|
68 |
-
min_length=5, temperature=0.9, repetition_penalty=1.5
|
|
|
|
|
69 |
|
70 |
# ์ฒซ ๊ธ์๊ฐ ์๋๋ผ๋ฉด
|
71 |
else:
|
72 |
-
#
|
73 |
-
# ์ดํ generate ๋ ๋ฌธ์ฅ์์ ์ด์ ๋ฌธ์ฅ์ ๋ํ ๋ฐ์ดํฐ ์ ๊ฑฐ
|
74 |
-
link_with_pre_sentence = " ".join(res_l) + " " + val
|
75 |
-
# print(link_with_pre_sentence)
|
76 |
-
|
77 |
-
# ์ฐ๊ฒฐ๋ ๋ฌธ์ฅ์ ์ธ์ฝ๋ฉ
|
78 |
input_ids = tokenizer.encode(
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# ์ธ์ฝ๋ฉ ๊ฐ์ผ๋ก ๋ฌธ์ฅ ์์ฑ
|
82 |
output_sequence = model.generate(
|
83 |
-
input_ids=
|
84 |
-
do_sample=True, max_length=42,
|
85 |
-
min_length=
|
86 |
-
|
|
|
|
|
87 |
# ์์ฑ๋ ๋ฌธ์ฅ ๋ฆฌ์คํธ๋ก ๋ณํ (์ธ์ฝ๋ฉ ๋์ด์๊ณ , ์์ฑ๋ ๋ฌธ์ฅ ๋ค๋ก padding ์ด ์๋ ์ํ)
|
88 |
-
generated_sequence = output_sequence.tolist()
|
|
|
89 |
|
90 |
# padding index ์๊น์ง slicing ํจ์ผ๋ก์จ padding ์ ๊ฑฐ, padding์ด ์์ ์๋ ์๊ธฐ ๋๋ฌธ์ ์กฐ๊ฑด๋ฌธ ํ์ธ ํ ์ ๊ฑฐ
|
91 |
if tokenizer.pad_token_id in generated_sequence:
|
92 |
generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
|
|
|
|
|
|
|
93 |
|
94 |
# ์ฒซ ๊ธ์๊ฐ ์๋๋ผ๋ฉด, generate ๋ ์์ ๋ง ๊ฒฐ๊ณผ๋ฌผ list์ ๋ค์ด๊ฐ ์ ์๊ฒ ์ ๋ฌธ์ฅ์ ๋ํ ์ธ์ฝ๋ฉ ๊ฐ ์ ๊ฑฐ
|
95 |
# print(generated_sequence)
|
@@ -97,32 +107,21 @@ def n_line_poem(input_letter):
|
|
97 |
# ์ด์ ๋ฌธ์ฅ์ ๊ธธ์ด ์ดํ๋ก ์ฌ๋ผ์ด์ฑํด์ ์ ๋ฌธ์ฅ ์ ๊ฑฐ
|
98 |
generated_sequence = generated_sequence[len_sequence:]
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
# ์ฒซ ๊ธ์๋ผ๋ฉด
|
104 |
-
else:
|
105 |
-
# ์ํ์ค ๊ธธ์ด ์ ์ฅ
|
106 |
-
len_sequence = len(generated_sequence)
|
107 |
|
108 |
-
#
|
109 |
-
|
110 |
-
# ๊ฒฐ๊ณผ๋ฌผ ๋์ฝ๋ฉ
|
111 |
-
decoded_sequence = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
|
112 |
-
|
113 |
-
if len(decoded_sequence) > 1:
|
114 |
break
|
115 |
-
|
116 |
-
times += 1
|
117 |
-
continue
|
118 |
-
|
119 |
# ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์คํธ์ ๋ด๊ธฐ
|
120 |
-
res_l.append(
|
121 |
|
122 |
poem_dict = {}
|
123 |
|
124 |
for letter, res in zip(input_letter, res_l):
|
125 |
-
|
|
|
126 |
|
127 |
return poem_dict
|
128 |
|
@@ -189,5 +188,3 @@ with row2_1:
|
|
189 |
for r in result:
|
190 |
st.write(f'{r} : {result[r]}')
|
191 |
|
192 |
-
|
193 |
-
|
|
|
52 |
# ๋์ ๋ฒ์น ์ ์ฉ
|
53 |
if val in dooeum.keys():
|
54 |
val = dooeum[val]
|
55 |
+
|
56 |
+
|
57 |
+
while True:
|
58 |
# ๋ง์ฝ idx ๊ฐ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ์
|
59 |
if idx == 0:
|
60 |
# ์ฒซ ๊ธ์ ์ธ์ฝ๋ฉ
|
61 |
input_ids = tokenizer.encode(
|
62 |
val, add_special_tokens=False, return_tensors="pt")
|
63 |
+
# print(f"{idx}๋ฒ ์ธ์ฝ๋ฉ : {input_ids}\n") # 2์ฐจ์ ํ
์
|
64 |
|
65 |
# ์ฒซ ๊ธ์ ์ธ์ฝ๋ฉ ๊ฐ์ผ๋ก ๋ฌธ์ฅ ์์ฑ
|
66 |
output_sequence = model.generate(
|
67 |
input_ids=input_ids,
|
68 |
+
do_sample=True, max_length=42,
|
69 |
+
min_length=5, temperature=0.9, repetition_penalty=1.5,
|
70 |
+
no_repeat_ngram_size=2)[0]
|
71 |
+
# print("์ฒซ ๊ธ์ ์ธ์ฝ๋ฉ ํ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
|
72 |
|
73 |
# ์ฒซ ๊ธ์๊ฐ ์๋๋ผ๋ฉด
|
74 |
else:
|
75 |
+
# ํ ์์
|
|
|
|
|
|
|
|
|
|
|
76 |
input_ids = tokenizer.encode(
|
77 |
+
val, add_special_tokens=False, return_tensors="pt")
|
78 |
+
# print(f"{idx}๋ฒ ์งธ ๊ธ์ ์ธ์ฝ๋ฉ : {input_ids} \n")
|
79 |
+
|
80 |
+
# ์ข๋ ๋งค๋๋ฌ์ด ์ผํ์๋ฅผ ์ํด ์ด์ ์ธ์ฝ๋ฉ๊ณผ ์ง๊ธ ์ธ์ฝ๋ฉ ์ฐ๊ฒฐ
|
81 |
+
link_with_pre_sentence = torch.cat((generated_sequence, input_ids[0]), 0)
|
82 |
+
link_with_pre_sentence = torch.reshape(link_with_pre_sentence, (1, len(link_with_pre_sentence)))
|
83 |
+
# print(f"์ด์ ํ
์์ ์ฐ๊ฒฐ๋ ํ
์ {link_with_pre_sentence} \n")
|
84 |
|
85 |
# ์ธ์ฝ๋ฉ ๊ฐ์ผ๋ก ๋ฌธ์ฅ ์์ฑ
|
86 |
output_sequence = model.generate(
|
87 |
+
input_ids=link_with_pre_sentence,
|
88 |
+
do_sample=True, max_length=42,
|
89 |
+
min_length=5, temperature=0.9, repetition_penalty=1.5,
|
90 |
+
no_repeat_ngram_size=2)[0]
|
91 |
+
# print(f"{idx}๋ฒ ์ธ์ฝ๋ฉ ํ generate : {output_sequence}")
|
92 |
+
|
93 |
# ์์ฑ๋ ๋ฌธ์ฅ ๋ฆฌ์คํธ๋ก ๋ณํ (์ธ์ฝ๋ฉ ๋์ด์๊ณ , ์์ฑ๋ ๋ฌธ์ฅ ๋ค๋ก padding ์ด ์๋ ์ํ)
|
94 |
+
generated_sequence = output_sequence.tolist()
|
95 |
+
# print(f"{idx}๋ฒ ์ธ์ฝ๋ฉ ๋ฆฌ์คํธ : {generated_sequence} \n")
|
96 |
|
97 |
# padding index ์๊น์ง slicing ํจ์ผ๋ก์จ padding ์ ๊ฑฐ, padding์ด ์์ ์๋ ์๊ธฐ ๋๋ฌธ์ ์กฐ๊ฑด๋ฌธ ํ์ธ ํ ์ ๊ฑฐ
|
98 |
if tokenizer.pad_token_id in generated_sequence:
|
99 |
generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
|
100 |
+
|
101 |
+
generated_sequence = torch.tensor(generated_sequence)
|
102 |
+
# print(f"{idx}๋ฒ ์ธ์ฝ๋ฉ ๋ฆฌ์คํธ ํจ๋ฉ ์ ๊ฑฐ ํ ๋ค์ ํ
์ : {generated_sequence} \n")
|
103 |
|
104 |
# ์ฒซ ๊ธ์๊ฐ ์๋๋ผ๋ฉด, generate ๋ ์์ ๋ง ๊ฒฐ๊ณผ๋ฌผ list์ ๋ค์ด๊ฐ ์ ์๊ฒ ์ ๋ฌธ์ฅ์ ๋ํ ์ธ์ฝ๋ฉ ๊ฐ ์ ๊ฑฐ
|
105 |
# print(generated_sequence)
|
|
|
107 |
# ์ด์ ๋ฌธ์ฅ์ ๊ธธ์ด ์ดํ๋ก ์ฌ๋ผ์ด์ฑํด์ ์ ๋ฌธ์ฅ ์ ๊ฑฐ
|
108 |
generated_sequence = generated_sequence[len_sequence:]
|
109 |
|
110 |
+
len_sequence = len(generated_sequence)
|
111 |
+
# print("len_seq", len_sequence)
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
# ์์ ๊ทธ๋๋ก ๋ฑ์ผ๋ฉด ๋ค์ ํด์, ์๋๋ฉด while๋ฌธ ํ์ถ
|
114 |
+
if len_sequence > 1:
|
|
|
|
|
|
|
|
|
115 |
break
|
116 |
+
|
|
|
|
|
|
|
117 |
# ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์คํธ์ ๋ด๊ธฐ
|
118 |
+
res_l.append(generated_sequence)
|
119 |
|
120 |
poem_dict = {}
|
121 |
|
122 |
for letter, res in zip(input_letter, res_l):
|
123 |
+
decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True)
|
124 |
+
poem_dict[poem(letter)] = decode_res
|
125 |
|
126 |
return poem_dict
|
127 |
|
|
|
188 |
for r in result:
|
189 |
st.write(f'{r} : {result[r]}')
|
190 |
|
|
|
|