dk-davidekim commited on
Commit
2ddb46f
โ€ข
1 Parent(s): b042daf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -38
app.py CHANGED
@@ -52,44 +52,54 @@ def n_line_poem(input_letter):
52
  # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
53
  if val in dooeum.keys():
54
  val = dooeum[val]
55
-
56
- times = 0
57
- while times < 3:
58
  # ๋งŒ์•ฝ idx ๊ฐ€ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ€์ž
59
  if idx == 0:
60
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ
61
  input_ids = tokenizer.encode(
62
  val, add_special_tokens=False, return_tensors="pt")
 
63
 
64
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
65
  output_sequence = model.generate(
66
  input_ids=input_ids,
67
- do_sample=True, max_length=42, no_repeat_ngram_size=2,
68
- min_length=5, temperature=0.9, repetition_penalty=1.5)
 
 
69
 
70
  # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด
71
  else:
72
- # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ๋ฌธ์žฅ์ด๋ž‘ ํ˜„์žฌ ์Œ์ ˆ ์—ฐ๊ฒฐ
73
- # ์ดํ›„ generate ๋œ ๋ฌธ์žฅ์—์„œ ์ด์ „ ๋ฌธ์žฅ์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ ์ œ๊ฑฐ
74
- link_with_pre_sentence = " ".join(res_l) + " " + val
75
- # print(link_with_pre_sentence)
76
-
77
- # ์—ฐ๊ฒฐ๋œ ๋ฌธ์žฅ์„ ์ธ์ฝ”๋”ฉ
78
  input_ids = tokenizer.encode(
79
- link_with_pre_sentence, add_special_tokens=False, return_tensors="pt")
 
 
 
 
 
 
80
 
81
  # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
82
  output_sequence = model.generate(
83
- input_ids=input_ids,
84
- do_sample=True, max_length=42, no_repeat_ngram_size=2,
85
- min_length=len_sequence, temperature=0.9, repetition_penalty=1.5)
86
-
 
 
87
  # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
88
- generated_sequence = output_sequence.tolist()[0]
 
89
 
90
  # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
91
  if tokenizer.pad_token_id in generated_sequence:
92
  generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
 
 
 
93
 
94
  # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
95
  # print(generated_sequence)
@@ -97,32 +107,21 @@ def n_line_poem(input_letter):
97
  # ์ด์ „ ๋ฌธ์žฅ์˜ ๊ธธ์ด ์ดํ›„๋กœ ์Šฌ๋ผ์ด์‹ฑํ•ด์„œ ์•ž ๋ฌธ์žฅ ์ œ๊ฑฐ
98
  generated_sequence = generated_sequence[len_sequence:]
99
 
100
- # ๋‹ค์Œ ์Œ์ ˆ์„ ์œ„ํ•ด ๊ธธ์ด ๊ฐฑ์‹ 
101
- len_sequence += len(generated_sequence)
102
-
103
- # ์ฒซ ๊ธ€์ž๋ผ๋ฉด
104
- else:
105
- # ์‹œํ€€์Šค ๊ธธ์ด ์ €์žฅ
106
- len_sequence = len(generated_sequence)
107
 
108
- # print(last_sequence)
109
-
110
- # ๊ฒฐ๊ณผ๋ฌผ ๋””์ฝ”๋”ฉ
111
- decoded_sequence = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
112
-
113
- if len(decoded_sequence) > 1:
114
  break
115
- else:
116
- times += 1
117
- continue
118
-
119
  # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
120
- res_l.append(decoded_sequence)
121
 
122
  poem_dict = {}
123
 
124
  for letter, res in zip(input_letter, res_l):
125
- poem_dict[poem(letter)] = res
 
126
 
127
  return poem_dict
128
 
@@ -189,5 +188,3 @@ with row2_1:
189
  for r in result:
190
  st.write(f'{r} : {result[r]}')
191
 
192
-
193
-
 
52
  # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
53
  if val in dooeum.keys():
54
  val = dooeum[val]
55
+
56
+
57
+ while True:
58
  # ๋งŒ์•ฝ idx ๊ฐ€ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ€์ž
59
  if idx == 0:
60
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ
61
  input_ids = tokenizer.encode(
62
  val, add_special_tokens=False, return_tensors="pt")
63
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ : {input_ids}\n") # 2์ฐจ์› ํ…์„œ
64
 
65
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
66
  output_sequence = model.generate(
67
  input_ids=input_ids,
68
+ do_sample=True, max_length=42,
69
+ min_length=5, temperature=0.9, repetition_penalty=1.5,
70
+ no_repeat_ngram_size=2)[0]
71
+ # print("์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ํ›„ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
72
 
73
  # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด
74
  else:
75
+ # ํ•œ ์Œ์ ˆ
 
 
 
 
 
76
  input_ids = tokenizer.encode(
77
+ val, add_special_tokens=False, return_tensors="pt")
78
+ # print(f"{idx}๋ฒˆ ์งธ ๊ธ€์ž ์ธ์ฝ”๋”ฉ : {input_ids} \n")
79
+
80
+ # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ์ธ์ฝ”๋”ฉ๊ณผ ์ง€๊ธˆ ์ธ์ฝ”๋”ฉ ์—ฐ๊ฒฐ
81
+ link_with_pre_sentence = torch.cat((generated_sequence, input_ids[0]), 0)
82
+ link_with_pre_sentence = torch.reshape(link_with_pre_sentence, (1, len(link_with_pre_sentence)))
83
+ # print(f"์ด์ „ ํ…์„œ์™€ ์—ฐ๊ฒฐ๋œ ํ…์„œ {link_with_pre_sentence} \n")
84
 
85
  # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
86
  output_sequence = model.generate(
87
+ input_ids=link_with_pre_sentence,
88
+ do_sample=True, max_length=42,
89
+ min_length=5, temperature=0.9, repetition_penalty=1.5,
90
+ no_repeat_ngram_size=2)[0]
91
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ํ›„ generate : {output_sequence}")
92
+
93
  # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
94
+ generated_sequence = output_sequence.tolist()
95
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ : {generated_sequence} \n")
96
 
97
  # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
98
  if tokenizer.pad_token_id in generated_sequence:
99
  generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
100
+
101
+ generated_sequence = torch.tensor(generated_sequence)
102
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ ํŒจ๋”ฉ ์ œ๊ฑฐ ํ›„ ๋‹ค์‹œ ํ…์„œ : {generated_sequence} \n")
103
 
104
  # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
105
  # print(generated_sequence)
 
107
  # ์ด์ „ ๋ฌธ์žฅ์˜ ๊ธธ์ด ์ดํ›„๋กœ ์Šฌ๋ผ์ด์‹ฑํ•ด์„œ ์•ž ๋ฌธ์žฅ ์ œ๊ฑฐ
108
  generated_sequence = generated_sequence[len_sequence:]
109
 
110
+ len_sequence = len(generated_sequence)
111
+ # print("len_seq", len_sequence)
 
 
 
 
 
112
 
113
+ # ์Œ์ ˆ ๊ทธ๋Œ€๋กœ ๋ฑ‰์œผ๋ฉด ๋‹ค์‹œ ํ•ด์™€, ์•„๋‹ˆ๋ฉด while๋ฌธ ํƒˆ์ถœ
114
+ if len_sequence > 1:
 
 
 
 
115
  break
116
+
 
 
 
117
  # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
118
+ res_l.append(generated_sequence)
119
 
120
  poem_dict = {}
121
 
122
  for letter, res in zip(input_letter, res_l):
123
+ decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True)
124
+ poem_dict[poem(letter)] = decode_res
125
 
126
  return poem_dict
127
 
 
188
  for r in result:
189
  st.write(f'{r} : {result[r]}')
190