dk-davidekim commited on
Commit
7c4c56e
โ€ข
1 Parent(s): 1b167ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -32
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import requests
2
  import streamlit as st
3
  from streamlit_lottie import st_lottie
@@ -7,10 +8,11 @@ import re
7
 
8
  # Page Config
9
  st.set_page_config(
10
- page_title="๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œ",
11
  page_icon="๐Ÿ’Œ",
12
  layout="wide"
13
  )
 
14
 
15
  ### Model
16
  tokenizer = AutoTokenizer.from_pretrained("wumusill/final_project_kogpt2")
@@ -22,6 +24,20 @@ def load_model():
22
 
23
  model = load_model()
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Class : Dict ์ค‘๋ณต ํ‚ค ์ถœ๋ ฅ
26
  class poem(object):
27
  def __init__(self,letter):
@@ -34,7 +50,85 @@ class poem(object):
34
  return "'"+self.letter+"'"
35
 
36
 
37
- def n_line_poem(input_letter):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
40
  dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
@@ -66,8 +160,11 @@ def n_line_poem(input_letter):
66
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
67
  output_sequence = model.generate(
68
  input_ids=input_ids,
69
- do_sample=True, max_length=42,
70
- min_length=5, temperature=0.9, repetition_penalty=1.5,
 
 
 
71
  no_repeat_ngram_size=2)[0]
72
  # print("์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ํ›„ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
73
 
@@ -86,8 +183,11 @@ def n_line_poem(input_letter):
86
  # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
87
  output_sequence = model.generate(
88
  input_ids=link_with_pre_sentence,
89
- do_sample=True, max_length=42,
90
- min_length=5, temperature=0.9, repetition_penalty=1.5,
 
 
 
91
  no_repeat_ngram_size=2)[0]
92
  # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ํ›„ generate : {output_sequence}")
93
 
@@ -118,7 +218,7 @@ def n_line_poem(input_letter):
118
  # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
119
  res_l.append(generated_sequence)
120
 
121
- poem_dict = {}
122
 
123
  for letter, res in zip(input_letter, res_l):
124
  decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
@@ -126,8 +226,6 @@ def n_line_poem(input_letter):
126
 
127
  return poem_dict
128
 
129
- ###
130
-
131
  # Image(.gif)
132
  @st.cache(show_spinner=False)
133
  def load_lottieurl(url: str):
@@ -152,11 +250,10 @@ with row0_1:
152
  st.markdown("### ๐Ÿฆ๋ฉ‹์Ÿ์ด์‚ฌ์ž์ฒ˜๋Ÿผ AIS7๐Ÿฆ - ํŒŒ์ด๋„ ํ”„๋กœ์ ํŠธ")
153
 
154
  with row0_2:
155
- st.write("")
156
- st.write("")
157
  st.write("")
158
  st.subheader("1์กฐ - ํ•ดํŒŒ๋ฆฌ")
159
- st.write("์ด์ง€ํ˜œ, ์ตœ์ง€์˜, ๊ถŒ์†Œํฌ, ๋ฌธ์ข…ํ˜„, ๊ตฌ์žํ˜„, ๊น€์˜์ค€")
 
160
 
161
  st.write('---')
162
 
@@ -165,34 +262,49 @@ row1_spacer1, row1_1, row1_spacer2 = st.columns((0.01, 0.01, 0.01))
165
 
166
  with row1_1:
167
  st.markdown("### nํ–‰์‹œ ๊ฐ€์ด๋“œ๋ผ์ธ")
168
- st.markdown("1. ํ•˜๋‹จ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฐ”์— 5์ž ์ดํ•˜ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ๋„ฃ์–ด์ฃผ์„ธ์š”")
169
  st.markdown("2. 'nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ด์ฃผ์„ธ์š”")
 
 
 
170
 
171
  st.write('---')
172
 
173
  # Model & Input
174
  row2_spacer1, row2_1, row2_spacer2= st.columns((0.01, 0.01, 0.01))
175
 
 
 
176
  # Word Input
177
  with row2_1:
178
- word_input = st.text_input(
179
- "nํ–‰์‹œ์— ์‚ฌ์šฉํ•  ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ ๊ณ  ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”.(์ตœ๋Œ€ 5์ž) ๐Ÿ‘‡",
180
- placeholder='ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”',
181
- max_chars=5
182
- )
183
- word_input = re.sub("[^๊ฐ€-ํžฃ]", "", word_input)
184
-
185
- if st.button('nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ'):
186
- if word_input == "":
187
- st.error("์˜จ์ „ํ•œ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”!")
188
-
189
- else:
190
- st.write("nํ–‰์‹œ ๋‹จ์–ด : ", word_input)
191
- with st.spinner('์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”...'):
192
- result = n_line_poem(word_input)
193
- st.success('์™„๋ฃŒ๋์Šต๋‹ˆ๋‹ค!')
194
- for r in result:
195
- st.write(f'{r} : {result[r]}')
196
-
197
 
 
 
 
 
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
  import requests
3
  import streamlit as st
4
  from streamlit_lottie import st_lottie
 
8
 
9
  # Page Config
10
  st.set_page_config(
11
+ page_title="๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œ Beta",
12
  page_icon="๐Ÿ’Œ",
13
  layout="wide"
14
  )
15
+ # st.text(os.listdir(os.curdir))
16
 
17
  ### Model
18
  tokenizer = AutoTokenizer.from_pretrained("wumusill/final_project_kogpt2")
 
24
 
25
  model = load_model()
26
 
27
+ @st.cache(show_spinner=False)
28
+ def get_word():
29
+ word = pd.read_csv("ballad_word.csv", encoding="cp949")
30
+ return word
31
+
32
+
33
+ word = get_word()
34
+
35
+
36
+ one = word[word["0"].str.startswith("ํ•œ")].sample(1).values[0][0]
37
+ # st.header(type(one))
38
+ # st.header(one)
39
+
40
+
41
  # Class : Dict ์ค‘๋ณต ํ‚ค ์ถœ๋ ฅ
42
  class poem(object):
43
  def __init__(self,letter):
 
50
  return "'"+self.letter+"'"
51
 
52
 
53
+ def beta_poem(input_letter):
54
+ # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
55
+ dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
56
+ "๋ž˜":"๋‚ด", "๋žญ":"๋ƒ‰", "๋ƒ‘":"์•ฝ", "๋žต":"์•ฝ", "๋ƒฅ":"์–‘", "๋Ÿ‰":"์–‘", "๋…€":"์—ฌ",
57
+ "๋ ค":"์—ฌ", "๋…":"์—ญ", "๋ ฅ":"์—ญ", "๋…„":"์—ฐ", "๋ จ":"์—ฐ", "๋…ˆ":"์—ด", "๋ ฌ":"์—ด",
58
+ "๋…":"์—ผ", "๋ ด":"์—ผ", "๋ ต":"์—ฝ", "๋…•":"์˜", "๋ น":"์˜", "๋…œ":"์˜ˆ", "๋ก€":"์˜ˆ",
59
+ "๋กœ":"๋…ธ", "๋ก":"๋…น", "๋ก ":"๋…ผ", "๋กฑ":"๋†", "๋ขฐ":"๋‡Œ", "๋‡จ":"์š”", "๋ฃŒ":"์š”",
60
+ "๋ฃก":"์šฉ", "๋ฃจ":"๋ˆ„", "๋‰ด":"์œ ", "๋ฅ˜":"์œ ", "๋‰ต":"์œก", "๋ฅ™":"์œก", "๋ฅœ":"์œค",
61
+ "๋ฅ ":"์œจ", "๋ฅญ":"์œต", "๋ฅต":"๋Š‘", "๋ฆ„":"๋Š ", "๋ฆ‰":"๋Šฅ", "๋‹ˆ":"์ด", "๋ฆฌ":"์ด",
62
+ "๋ฆฐ":'์ธ', '๋ฆผ':'์ž„', '๋ฆฝ':'์ž…'}
63
+ # ๊ฒฐ๊ณผ๋ฌผ์„ ๋‹ด์„ list
64
+ res_l = []
65
+ len_sequence = 0
66
+
67
+ # ํ•œ ๊ธ€์ž์”ฉ ์ธ๋ฑ์Šค์™€ ํ•จ๊ป˜ ๊ฐ€์ ธ์˜ด
68
+ for idx, val in enumerate(input_letter):
69
+ # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
70
+ if val in dooeum.keys():
71
+ val = dooeum[val]
72
+
73
+ # ๋ฐœ๋ผ๋“œ์— ์žˆ๋Š” ๋‹จ์–ด ์ ์šฉ
74
+ try:
75
+ one = word[word["0"].str.startswith(val)].sample(1).values[0][0]
76
+ # st.text(one)
77
+ except:
78
+ one = val
79
+
80
+ # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ๋ฌธ์žฅ์ด๋ž‘ ํ˜„์žฌ ์Œ์ ˆ ์—ฐ๊ฒฐ
81
+ # ์ดํ›„ generate ๋œ ๋ฌธ์žฅ์—์„œ ์ด์ „ ๋ฌธ์žฅ์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ ์ œ๊ฑฐ
82
+ link_with_pre_sentence = (" ".join(res_l)+ " " + one + " " if idx != 0 else one).strip()
83
+ # print(link_with_pre_sentence)
84
+
85
+ # ์—ฐ๊ฒฐ๋œ ๋ฌธ์žฅ์„ ์ธ์ฝ”๋”ฉ
86
+ input_ids = tokenizer.encode(link_with_pre_sentence, add_special_tokens=False, return_tensors="pt")
87
+
88
+ # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
89
+ output_sequence = model.generate(
90
+ input_ids=input_ids,
91
+ do_sample=True,
92
+ max_length=42,
93
+ min_length=len_sequence + 2,
94
+ temperature=0.9,
95
+ repetition_penalty=1.5,
96
+ no_repeat_ngram_size=2)
97
+
98
+ # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
99
+ generated_sequence = output_sequence.tolist()[0]
100
+
101
+ # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
102
+ # ์‚ฌ์šฉํ•  generated_sequence ๊ฐ€ 5๋ณด๋‹ค ์งง์œผ๋ฉด ๊ฐ•์ œ์ ์œผ๋กœ ๊ธธ์ด๋ฅผ 8๋กœ ํ•ด์ค€๋‹ค...
103
+ if tokenizer.pad_token_id in generated_sequence:
104
+ check_index = generated_sequence.index(tokenizer.pad_token_id)
105
+ check_index = check_index if check_index-len_sequence > 3 else len_sequence + 8
106
+ generated_sequence = generated_sequence[:check_index]
107
+
108
+ word_encode = tokenizer.encode(one, add_special_tokens=False, return_tensors="pt").tolist()[0][0]
109
+ split_index = len(generated_sequence) - 1 - generated_sequence[::-1].index(word_encode)
110
+
111
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
112
+ generated_sequence = generated_sequence[split_index:]
113
+
114
+ # print(tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True))
115
+ # ๋‹ค์Œ ์Œ์ ˆ์„ ์œ„ํ•ด ๊ธธ์ด ๊ฐฑ์‹ 
116
+ len_sequence += len([elem for elem in generated_sequence if elem not in(tokenizer.all_special_ids)])
117
+ # ๊ฒฐ๊ณผ๋ฌผ ๋””์ฝ”๋”ฉ
118
+ decoded_sequence = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)
119
+
120
+ # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
121
+ res_l.append(decoded_sequence)
122
+
123
+ poem_dict = {"Type":"beta"}
124
+
125
+ for letter, res in zip(input_letter, res_l):
126
+ # decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
127
+ poem_dict[poem(letter)] = res
128
+
129
+ return poem_dict
130
+
131
+ def alpha_poem(input_letter):
132
 
133
  # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
134
  dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
 
160
  # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
161
  output_sequence = model.generate(
162
  input_ids=input_ids,
163
+ do_sample=True,
164
+ max_length=42,
165
+ min_length=5,
166
+ temperature=0.9,
167
+ repetition_penalty=1.7,
168
  no_repeat_ngram_size=2)[0]
169
  # print("์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ํ›„ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
170
 
 
183
  # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
184
  output_sequence = model.generate(
185
  input_ids=link_with_pre_sentence,
186
+ do_sample=True,
187
+ max_length=42,
188
+ min_length=5,
189
+ temperature=0.9,
190
+ repetition_penalty=1.7,
191
  no_repeat_ngram_size=2)[0]
192
  # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ํ›„ generate : {output_sequence}")
193
 
 
218
  # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
219
  res_l.append(generated_sequence)
220
 
221
+ poem_dict = {"Type":"alpha"}
222
 
223
  for letter, res in zip(input_letter, res_l):
224
  decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
 
226
 
227
  return poem_dict
228
 
 
 
229
  # Image(.gif)
230
  @st.cache(show_spinner=False)
231
  def load_lottieurl(url: str):
 
250
  st.markdown("### ๐Ÿฆ๋ฉ‹์Ÿ์ด์‚ฌ์ž์ฒ˜๋Ÿผ AIS7๐Ÿฆ - ํŒŒ์ด๋„ ํ”„๋กœ์ ํŠธ")
251
 
252
  with row0_2:
 
 
253
  st.write("")
254
  st.subheader("1์กฐ - ํ•ดํŒŒ๋ฆฌ")
255
+ st.write("์ด์ง€ํ˜œ, ์ตœ์ง€์˜, ๊ถŒ์†Œํฌ")
256
+ st.write("๋ฌธ์ข…ํ˜„, ๊ตฌ์žํ˜„, ๊น€์˜์ค€")
257
 
258
  st.write('---')
259
 
 
262
 
263
  with row1_1:
264
  st.markdown("### nํ–‰์‹œ ๊ฐ€์ด๋“œ๋ผ์ธ")
265
+ st.markdown("1. ํ•˜๋‹จ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฐ”์— 5์ž ์ดํ•˜๋กœ ๋œ, ์™„์„ฑ๋œ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ๋„ฃ์–ด์ฃผ์„ธ์š”")
266
  st.markdown("2. 'nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ด์ฃผ์„ธ์š”")
267
+ st.markdown("* nํ–‰์‹œ ํƒ€์ž… ์„ค์ •\n"
268
+ " * Alpha ver. : ๋ชจ๋ธ์ด ์ฒซ ์Œ์ ˆ๋ถ€ํ„ฐ ์ƒ์„ฑ\n"
269
+ " * Beta ver. : ์ฒซ ์Œ์ ˆ์„ ๋ฐ์ดํ„ฐ์…‹์—์„œ ์ฐพ๊ณ , ๋‹ค์Œ ๋ถ€๋ถ„์„ ์ƒ์„ฑ")
270
 
271
  st.write('---')
272
 
273
  # Model & Input
274
  row2_spacer1, row2_1, row2_spacer2= st.columns((0.01, 0.01, 0.01))
275
 
276
+ col1, col2 = st.columns(2)
277
+
278
  # Word Input
279
  with row2_1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ with col1:
282
+ genre = st.radio(
283
+ "nํ–‰์‹œ ํƒ€์ž… ์„ ํƒ",
284
+ ('Alpha', 'Beta(test์ค‘)'))
285
 
286
+ if genre == 'Alpha':
287
+ n_line_poem = alpha_poem
288
+
289
+ else:
290
+ n_line_poem = beta_poem
291
+
292
+ with col2:
293
+ word_input = st.text_input(
294
+ "nํ–‰์‹œ์— ์‚ฌ์šฉํ•  ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ ๊ณ  ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”.(์ตœ๋Œ€ 5์ž) ๐Ÿ‘‡",
295
+ placeholder='ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”',
296
+ max_chars=5
297
+ )
298
+ word_input = re.sub("[^๊ฐ€-ํžฃ]", "", word_input)
299
+
300
+ if st.button('nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ'):
301
+ if word_input == "":
302
+ st.error("์˜จ์ „ํ•œ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”!")
303
+
304
+ else:
305
+ st.write("nํ–‰์‹œ ๋‹จ์–ด : ", word_input)
306
+ with st.spinner('์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”...'):
307
+ result = n_line_poem(word_input)
308
+ st.success('์™„๋ฃŒ๋์Šต๋‹ˆ๋‹ค!')
309
+ for r in result:
310
+ st.write(f'{r} : {result[r]}')