dk-davidekim commited on
Commit
2220c11
โ€ข
1 Parent(s): 4c192ae
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. LICENSE +21 -0
  3. README.md +2 -11
  4. ballad_word.csv +3 -0
  5. main.py +198 -0
  6. pages/beta.py +312 -0
  7. requirements.txt +0 -6
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ ballad_word.csv filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 happyFinal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,3 @@
1
- ---
2
- title: HappyFinal Streamlit
3
- emoji: ๐Ÿจ
4
- colorFrom: red
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.15.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # streamlit
 
 
 
 
 
 
 
 
 
2
 
3
+ https://happyfinal-streamlit-main-cjyf11.streamlit.app/
ballad_word.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbc1980b49a3efe1bc291f04987d6d523fe0366621ae473e9a5162d103aa738
3
+ size 1852955
main.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ from streamlit_lottie import st_lottie
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import re
7
+
8
+ # Page Config
9
+ st.set_page_config(
10
+ page_title="๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œ",
11
+ page_icon="๐Ÿ’Œ",
12
+ layout="wide"
13
+ )
14
+
15
+ ### Model
16
+ tokenizer = AutoTokenizer.from_pretrained("wumusill/final_project_kogpt2")
17
+
18
+ @st.cache(show_spinner=False)
19
+ def load_model():
20
+ model = AutoModelForCausalLM.from_pretrained("wumusill/final_project_kogpt2")
21
+ return model
22
+
23
+ model = load_model()
24
+
25
+ # Class : Dict ์ค‘๋ณต ํ‚ค ์ถœ๋ ฅ
26
+ class poem(object):
27
+ def __init__(self,letter):
28
+ self.letter = letter
29
+
30
+ def __str__(self):
31
+ return self.letter
32
+
33
+ def __repr__(self):
34
+ return "'"+self.letter+"'"
35
+
36
+
37
+ def n_line_poem(input_letter):
38
+
39
+ # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
40
+ dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
41
+ "๋ž˜":"๋‚ด", "๋žญ":"๋ƒ‰", "๋ƒ‘":"์•ฝ", "๋žต":"์•ฝ", "๋ƒฅ":"์–‘", "๋Ÿ‰":"์–‘", "๋…€":"์—ฌ",
42
+ "๋ ค":"์—ฌ", "๋…":"์—ญ", "๋ ฅ":"์—ญ", "๋…„":"์—ฐ", "๋ จ":"์—ฐ", "๋…ˆ":"์—ด", "๋ ฌ":"์—ด",
43
+ "๋…":"์—ผ", "๋ ด":"์—ผ", "๋ ต":"์—ฝ", "๋…•":"์˜", "๋ น":"์˜", "๋…œ":"์˜ˆ", "๋ก€":"์˜ˆ",
44
+ "๋กœ":"๋…ธ", "๋ก":"๋…น", "๋ก ":"๋…ผ", "๋กฑ":"๋†", "๋ขฐ":"๋‡Œ", "๋‡จ":"์š”", "๋ฃŒ":"์š”",
45
+ "๋ฃก":"์šฉ", "๋ฃจ":"๋ˆ„", "๋‰ด":"์œ ", "๋ฅ˜":"์œ ", "๋‰ต":"์œก", "๋ฅ™":"์œก", "๋ฅœ":"์œค",
46
+ "๋ฅ ":"์œจ", "๋ฅญ":"์œต", "๋ฅต":"๋Š‘", "๋ฆ„":"๋Š ", "๋ฆ‰":"๋Šฅ", "๋‹ˆ":"์ด", "๋ฆฌ":"์ด",
47
+ "๋ฆฐ":'์ธ', '๋ฆผ':'์ž„', '๋ฆฝ':'์ž…'}
48
+ # ๊ฒฐ๊ณผ๋ฌผ์„ ๋‹ด์„ list
49
+ res_l = []
50
+
51
+ # ํ•œ ๊ธ€์ž์”ฉ ์ธ๋ฑ์Šค์™€ ํ•จ๊ป˜ ๊ฐ€์ ธ์˜ด
52
+ for idx, val in enumerate(input_letter):
53
+ # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
54
+ if val in dooeum.keys():
55
+ val = dooeum[val]
56
+
57
+
58
+ while True:
59
+ # ๋งŒ์•ฝ idx ๊ฐ€ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ€์ž
60
+ if idx == 0:
61
+ # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ
62
+ input_ids = tokenizer.encode(
63
+ val, add_special_tokens=False, return_tensors="pt")
64
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ : {input_ids}\n") # 2์ฐจ์› ํ…์„œ
65
+
66
+ # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
67
+ output_sequence = model.generate(
68
+ input_ids=input_ids,
69
+ do_sample=True, max_length=42,
70
+ min_length=5, temperature=0.9, repetition_penalty=1.5,
71
+ no_repeat_ngram_size=2)[0]
72
+ # print("์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ํ›„ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
73
+
74
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด
75
+ else:
76
+ # ํ•œ ์Œ์ ˆ
77
+ input_ids = tokenizer.encode(
78
+ val, add_special_tokens=False, return_tensors="pt")
79
+ # print(f"{idx}๋ฒˆ ์งธ ๊ธ€์ž ์ธ์ฝ”๋”ฉ : {input_ids} \n")
80
+
81
+ # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ์ธ์ฝ”๋”ฉ๊ณผ ์ง€๊ธˆ ์ธ์ฝ”๋”ฉ ์—ฐ๊ฒฐ
82
+ link_with_pre_sentence = torch.cat((generated_sequence, input_ids[0]), 0)
83
+ link_with_pre_sentence = torch.reshape(link_with_pre_sentence, (1, len(link_with_pre_sentence)))
84
+ # print(f"์ด์ „ ํ…์„œ์™€ ์—ฐ๊ฒฐ๋œ ํ…์„œ {link_with_pre_sentence} \n")
85
+
86
+ # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
87
+ output_sequence = model.generate(
88
+ input_ids=link_with_pre_sentence,
89
+ do_sample=True, max_length=42,
90
+ min_length=5, temperature=0.9, repetition_penalty=1.5,
91
+ no_repeat_ngram_size=2)[0]
92
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ํ›„ generate : {output_sequence}")
93
+
94
+ # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
95
+ generated_sequence = output_sequence.tolist()
96
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ : {generated_sequence} \n")
97
+
98
+ # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
99
+ if tokenizer.pad_token_id in generated_sequence:
100
+ generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
101
+
102
+ generated_sequence = torch.tensor(generated_sequence)
103
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ ํŒจ๋”ฉ ์ œ๊ฑฐ ํ›„ ๋‹ค์‹œ ํ…์„œ : {generated_sequence} \n")
104
+
105
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
106
+ # print(generated_sequence)
107
+ if idx != 0:
108
+ # ์ด์ „ ๋ฌธ์žฅ์˜ ๊ธธ์ด ์ดํ›„๋กœ ์Šฌ๋ผ์ด์‹ฑํ•ด์„œ ์•ž ๋ฌธ์žฅ ์ œ๊ฑฐ
109
+ generated_sequence = generated_sequence[len_sequence:]
110
+
111
+ len_sequence = len(generated_sequence)
112
+ # print("len_seq", len_sequence)
113
+
114
+ # ์Œ์ ˆ ๊ทธ๋Œ€๋กœ ๋ฑ‰์œผ๋ฉด ๋‹ค์‹œ ํ•ด์™€, ์•„๋‹ˆ๋ฉด while๋ฌธ ํƒˆ์ถœ
115
+ if len_sequence > 1:
116
+ break
117
+
118
+ # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
119
+ res_l.append(generated_sequence)
120
+
121
+ poem_dict = {}
122
+
123
+ for letter, res in zip(input_letter, res_l):
124
+ decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
125
+ poem_dict[poem(letter)] = decode_res
126
+
127
+ return poem_dict
128
+
129
+ ###
130
+
131
+ # Image(.gif)
132
+ @st.cache(show_spinner=False)
133
+ def load_lottieurl(url: str):
134
+ r = requests.get(url)
135
+ if r.status_code != 200:
136
+ return None
137
+ return r.json()
138
+
139
+ lottie_url = "https://assets7.lottiefiles.com/private_files/lf30_fjln45y5.json"
140
+
141
+ lottie_json = load_lottieurl(lottie_url)
142
+ st_lottie(lottie_json, speed=1, height=200, key="initial")
143
+
144
+
145
+ # Title
146
+ row0_spacer1, row0_1, row0_spacer2, row0_2, row0_spacer3 = st.columns(
147
+ (0.01, 2, 0.05, 0.5, 0.01)
148
+ )
149
+
150
+ with row0_1:
151
+ st.markdown("# ํ•œ๊ธ€ ๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œโœ")
152
+ st.markdown("### ๐Ÿฆ๋ฉ‹์Ÿ์ด์‚ฌ์ž์ฒ˜๋Ÿผ AIS7๐Ÿฆ - ํŒŒ์ด๋„ ํ”„๋กœ์ ํŠธ")
153
+
154
+ with row0_2:
155
+ st.write("")
156
+ st.write("")
157
+ st.write("")
158
+ st.subheader("1์กฐ - ํ•ดํŒŒ๋ฆฌ")
159
+ st.write("์ด์ง€ํ˜œ, ์ตœ์ง€์˜, ๊ถŒ์†Œํฌ, ๋ฌธ์ข…ํ˜„, ๊ตฌ์žํ˜„, ๊น€์˜์ค€")
160
+
161
+ st.write('---')
162
+
163
+ # Explanation
164
+ row1_spacer1, row1_1, row1_spacer2 = st.columns((0.01, 0.01, 0.01))
165
+
166
+ with row1_1:
167
+ st.markdown("### nํ–‰์‹œ ๊ฐ€์ด๋“œ๋ผ์ธ")
168
+ st.markdown("1. ํ•˜๋‹จ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฐ”์— 5์ž ์ดํ•˜ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ๋„ฃ์–ด์ฃผ์„ธ์š”")
169
+ st.markdown("2. 'nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ด์ฃผ์„ธ์š”")
170
+
171
+ st.write('---')
172
+
173
+ # Model & Input
174
+ row2_spacer1, row2_1, row2_spacer2= st.columns((0.01, 0.01, 0.01))
175
+
176
+ # Word Input
177
+ with row2_1:
178
+ word_input = st.text_input(
179
+ "nํ–‰์‹œ์— ์‚ฌ์šฉํ•  ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ ๊ณ  ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”.(์ตœ๋Œ€ 5์ž) ๐Ÿ‘‡",
180
+ placeholder='ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”',
181
+ max_chars=5
182
+ )
183
+ word_input = re.sub("[^๊ฐ€-ํžฃ]", "", word_input)
184
+
185
+ if st.button('nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ'):
186
+ if word_input == "":
187
+ st.error("์˜จ์ „ํ•œ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”!")
188
+
189
+ else:
190
+ st.write("nํ–‰์‹œ ๋‹จ์–ด : ", word_input)
191
+ with st.spinner('์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”...'):
192
+ result = n_line_poem(word_input)
193
+ st.success('์™„๋ฃŒ๋์Šต๋‹ˆ๋‹ค!')
194
+ for r in result:
195
+ st.write(f'{r} : {result[r]}')
196
+
197
+
198
+
pages/beta.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ import streamlit as st
4
+ from streamlit_lottie import st_lottie
5
+ import torch
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import re
8
+
9
+ # Page Config
10
+ st.set_page_config(
11
+ page_title="๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œ Beta",
12
+ page_icon="๐Ÿ’Œ",
13
+ layout="wide"
14
+ )
15
+ # st.text(os.listdir(os.curdir))
16
+
17
+ ### Model
18
+ tokenizer = AutoTokenizer.from_pretrained("wumusill/final_project_kogpt2")
19
+
20
+ @st.cache(show_spinner=False)
21
+ def load_model():
22
+ model = AutoModelForCausalLM.from_pretrained("wumusill/final_project_kogpt2")
23
+ return model
24
+
25
+ model = load_model()
26
+
27
+ @st.cache(show_spinner=False)
28
+ def get_word():
29
+ word = pd.read_csv("ballad_word.csv", encoding="cp949")
30
+ return word
31
+
32
+
33
+ word = get_word()
34
+
35
+
36
+ one = word[word["0"].str.startswith("ํ•œ")].sample(1).values[0][0]
37
+ # st.header(type(one))
38
+ # st.header(one)
39
+
40
+
41
+ # Class : Dict ์ค‘๋ณต ํ‚ค ์ถœ๋ ฅ
42
+ class poem(object):
43
+ def __init__(self,letter):
44
+ self.letter = letter
45
+
46
+ def __str__(self):
47
+ return self.letter
48
+
49
+ def __repr__(self):
50
+ return "'"+self.letter+"'"
51
+
52
+
53
+ def beta_poem(input_letter):
54
+ # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
55
+ dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
56
+ "๋ž˜":"๋‚ด", "๋žญ":"๋ƒ‰", "๋ƒ‘":"์•ฝ", "๋žต":"์•ฝ", "๋ƒฅ":"์–‘", "๋Ÿ‰":"์–‘", "๋…€":"์—ฌ",
57
+ "๋ ค":"์—ฌ", "๋…":"์—ญ", "๋ ฅ":"์—ญ", "๋…„":"์—ฐ", "๋ จ":"์—ฐ", "๋…ˆ":"์—ด", "๋ ฌ":"์—ด",
58
+ "๋…":"์—ผ", "๋ ด":"์—ผ", "๋ ต":"์—ฝ", "๋…•":"์˜", "๋ น":"์˜", "๋…œ":"์˜ˆ", "๋ก€":"์˜ˆ",
59
+ "๋กœ":"๋…ธ", "๋ก":"๋…น", "๋ก ":"๋…ผ", "๋กฑ":"๋†", "๋ขฐ":"๋‡Œ", "๋‡จ":"์š”", "๋ฃŒ":"์š”",
60
+ "๋ฃก":"์šฉ", "๋ฃจ":"๋ˆ„", "๋‰ด":"์œ ", "๋ฅ˜":"์œ ", "๋‰ต":"์œก", "๋ฅ™":"์œก", "๋ฅœ":"์œค",
61
+ "๋ฅ ":"์œจ", "๋ฅญ":"์œต", "๋ฅต":"๋Š‘", "๋ฆ„":"๋Š ", "๋ฆ‰":"๋Šฅ", "๋‹ˆ":"์ด", "๋ฆฌ":"์ด",
62
+ "๋ฆฐ":'์ธ', '๋ฆผ':'์ž„', '๋ฆฝ':'์ž…'}
63
+ # ๊ฒฐ๊ณผ๋ฌผ์„ ๋‹ด์„ list
64
+ res_l = []
65
+ len_sequence = 0
66
+
67
+ # ํ•œ ๊ธ€์ž์”ฉ ์ธ๋ฑ์Šค์™€ ํ•จ๊ป˜ ๊ฐ€์ ธ์˜ด
68
+ for idx, val in enumerate(input_letter):
69
+ # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
70
+ if val in dooeum.keys():
71
+ val = dooeum[val]
72
+
73
+ # ๋ฐœ๋ผ๋“œ์— ์žˆ๋Š” ๋‹จ์–ด ์ ์šฉ
74
+ try:
75
+ one = word[word["0"].str.startswith(val)].sample(1).values[0][0]
76
+ # st.text(one)
77
+ except:
78
+ one = val
79
+
80
+ # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ๋ฌธ์žฅ์ด๋ž‘ ํ˜„์žฌ ์Œ์ ˆ ์—ฐ๊ฒฐ
81
+ # ์ดํ›„ generate ๋œ ๋ฌธ์žฅ์—์„œ ์ด์ „ ๋ฌธ์žฅ์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ ์ œ๊ฑฐ
82
+ link_with_pre_sentence = (" ".join(res_l)+ " " + one + " " if idx != 0 else one).strip()
83
+ # print(link_with_pre_sentence)
84
+
85
+ # ์—ฐ๊ฒฐ๋œ ๋ฌธ์žฅ์„ ์ธ์ฝ”๋”ฉ
86
+ input_ids = tokenizer.encode(link_with_pre_sentence, add_special_tokens=False, return_tensors="pt")
87
+
88
+ # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
89
+ output_sequence = model.generate(
90
+ input_ids=input_ids,
91
+ do_sample=True,
92
+ max_length=42,
93
+ min_length=len_sequence + 2,
94
+ temperature=0.9,
95
+ repetition_penalty=1.5,
96
+ no_repeat_ngram_size=2)
97
+
98
+ # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
99
+ generated_sequence = output_sequence.tolist()[0]
100
+
101
+ # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
102
+ # ์‚ฌ์šฉํ•  generated_sequence ๊ฐ€ 5๋ณด๋‹ค ์งง์œผ๋ฉด ๊ฐ•์ œ์ ์œผ๋กœ ๊ธธ์ด๋ฅผ 8๋กœ ํ•ด์ค€๋‹ค...
103
+ if tokenizer.pad_token_id in generated_sequence:
104
+ check_index = generated_sequence.index(tokenizer.pad_token_id)
105
+ check_index = check_index if check_index-len_sequence > 3 else len_sequence + 8
106
+ generated_sequence = generated_sequence[:check_index]
107
+
108
+ word_encode = tokenizer.encode(one, add_special_tokens=False, return_tensors="pt").tolist()[0][0]
109
+ split_index = len(generated_sequence) - 1 - generated_sequence[::-1].index(word_encode)
110
+
111
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
112
+ generated_sequence = generated_sequence[split_index:]
113
+
114
+ # print(tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True))
115
+ # ๋‹ค์Œ ์Œ์ ˆ์„ ์œ„ํ•ด ๊ธธ์ด ๊ฐฑ์‹ 
116
+ len_sequence += len([elem for elem in generated_sequence if elem not in(tokenizer.all_special_ids)])
117
+ # ๊ฒฐ๊ณผ๋ฌผ ๋””์ฝ”๋”ฉ
118
+ decoded_sequence = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)
119
+
120
+ # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
121
+ res_l.append(decoded_sequence)
122
+
123
+ poem_dict = {"Type":"beta"}
124
+
125
+ for letter, res in zip(input_letter, res_l):
126
+ # decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
127
+ poem_dict[poem(letter)] = res
128
+
129
+ return poem_dict
130
+
131
+ def alpha_poem(input_letter):
132
+
133
+ # ๋‘์Œ ๋ฒ•์น™ ์‚ฌ์ „
134
+ dooeum = {"๋ผ":"๋‚˜", "๋ฝ":"๋‚™", "๋ž€":"๋‚œ", "๋ž„":"๋‚ ", "๋žŒ":"๋‚จ", "๋ž":"๋‚ฉ", "๋ž‘":"๋‚ญ",
135
+ "๋ž˜":"๋‚ด", "๋žญ":"๋ƒ‰", "๋ƒ‘":"์•ฝ", "๋žต":"์•ฝ", "๋ƒฅ":"์–‘", "๋Ÿ‰":"์–‘", "๋…€":"์—ฌ",
136
+ "๋ ค":"์—ฌ", "๋…":"์—ญ", "๋ ฅ":"์—ญ", "๋…„":"์—ฐ", "๋ จ":"์—ฐ", "๋…ˆ":"์—ด", "๋ ฌ":"์—ด",
137
+ "๋…":"์—ผ", "๋ ด":"์—ผ", "๋ ต":"์—ฝ", "๋…•":"์˜", "๋ น":"์˜", "๋…œ":"์˜ˆ", "๋ก€":"์˜ˆ",
138
+ "๋กœ":"๋…ธ", "๋ก":"๋…น", "๋ก ":"๋…ผ", "๋กฑ":"๋†", "๋ขฐ":"๋‡Œ", "๋‡จ":"์š”", "๋ฃŒ":"์š”",
139
+ "๋ฃก":"์šฉ", "๋ฃจ":"๋ˆ„", "๋‰ด":"์œ ", "๋ฅ˜":"์œ ", "๋‰ต":"์œก", "๋ฅ™":"์œก", "๋ฅœ":"์œค",
140
+ "๋ฅ ":"์œจ", "๋ฅญ":"์œต", "๋ฅต":"๋Š‘", "๋ฆ„":"๋Š ", "๋ฆ‰":"๋Šฅ", "๋‹ˆ":"์ด", "๋ฆฌ":"์ด",
141
+ "๋ฆฐ":'์ธ', '๋ฆผ':'์ž„', '๋ฆฝ':'์ž…'}
142
+ # ๊ฒฐ๊ณผ๋ฌผ์„ ๋‹ด์„ list
143
+ res_l = []
144
+
145
+ # ํ•œ ๊ธ€์ž์”ฉ ์ธ๋ฑ์Šค์™€ ํ•จ๊ป˜ ๊ฐ€์ ธ์˜ด
146
+ for idx, val in enumerate(input_letter):
147
+ # ๋‘์Œ ๋ฒ•์น™ ์ ์šฉ
148
+ if val in dooeum.keys():
149
+ val = dooeum[val]
150
+
151
+
152
+ while True:
153
+ # ๋งŒ์•ฝ idx ๊ฐ€ 0 ์ด๋ผ๋ฉด == ์ฒซ ๊ธ€์ž
154
+ if idx == 0:
155
+ # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ
156
+ input_ids = tokenizer.encode(
157
+ val, add_special_tokens=False, return_tensors="pt")
158
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ : {input_ids}\n") # 2์ฐจ์› ํ…์„œ
159
+
160
+ # ์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
161
+ output_sequence = model.generate(
162
+ input_ids=input_ids,
163
+ do_sample=True,
164
+ max_length=42,
165
+ min_length=5,
166
+ temperature=0.9,
167
+ repetition_penalty=1.7,
168
+ no_repeat_ngram_size=2)[0]
169
+ # print("์ฒซ ๊ธ€์ž ์ธ์ฝ”๋”ฉ ํ›„ generate ๊ฒฐ๊ณผ:", output_sequence, "\n") # tensor
170
+
171
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด
172
+ else:
173
+ # ํ•œ ์Œ์ ˆ
174
+ input_ids = tokenizer.encode(
175
+ val, add_special_tokens=False, return_tensors="pt")
176
+ # print(f"{idx}๋ฒˆ ์งธ ๊ธ€์ž ์ธ์ฝ”๋”ฉ : {input_ids} \n")
177
+
178
+ # ์ข€๋” ๋งค๋„๋Ÿฌ์šด ์‚ผํ–‰์‹œ๋ฅผ ์œ„ํ•ด ์ด์ „ ์ธ์ฝ”๋”ฉ๊ณผ ์ง€๊ธˆ ์ธ์ฝ”๋”ฉ ์—ฐ๊ฒฐ
179
+ link_with_pre_sentence = torch.cat((generated_sequence, input_ids[0]), 0)
180
+ link_with_pre_sentence = torch.reshape(link_with_pre_sentence, (1, len(link_with_pre_sentence)))
181
+ # print(f"์ด์ „ ํ…์„œ์™€ ์—ฐ๊ฒฐ๋œ ํ…์„œ {link_with_pre_sentence} \n")
182
+
183
+ # ์ธ์ฝ”๋”ฉ ๊ฐ’์œผ๋กœ ๋ฌธ์žฅ ์ƒ์„ฑ
184
+ output_sequence = model.generate(
185
+ input_ids=link_with_pre_sentence,
186
+ do_sample=True,
187
+ max_length=42,
188
+ min_length=5,
189
+ temperature=0.9,
190
+ repetition_penalty=1.7,
191
+ no_repeat_ngram_size=2)[0]
192
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ํ›„ generate : {output_sequence}")
193
+
194
+ # ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ (์ธ์ฝ”๋”ฉ ๋˜์–ด์žˆ๊ณ , ์ƒ์„ฑ๋œ ๋ฌธ์žฅ ๋’ค๋กœ padding ์ด ์žˆ๋Š” ์ƒํƒœ)
195
+ generated_sequence = output_sequence.tolist()
196
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ : {generated_sequence} \n")
197
+
198
+ # padding index ์•ž๊นŒ์ง€ slicing ํ•จ์œผ๋กœ์จ padding ์ œ๊ฑฐ, padding์ด ์—†์„ ์ˆ˜๋„ ์žˆ๊ธฐ ๋•Œ๋ฌธ์— ์กฐ๊ฑด๋ฌธ ํ™•์ธ ํ›„ ์ œ๊ฑฐ
199
+ if tokenizer.pad_token_id in generated_sequence:
200
+ generated_sequence = generated_sequence[:generated_sequence.index(tokenizer.pad_token_id)]
201
+
202
+ generated_sequence = torch.tensor(generated_sequence)
203
+ # print(f"{idx}๋ฒˆ ์ธ์ฝ”๋”ฉ ๋ฆฌ์ŠคํŠธ ํŒจ๋”ฉ ์ œ๊ฑฐ ํ›„ ๋‹ค์‹œ ํ…์„œ : {generated_sequence} \n")
204
+
205
+ # ์ฒซ ๊ธ€์ž๊ฐ€ ์•„๋‹ˆ๋ผ๋ฉด, generate ๋œ ์Œ์ ˆ๋งŒ ๊ฒฐ๊ณผ๋ฌผ list์— ๋“ค์–ด๊ฐˆ ์ˆ˜ ์žˆ๊ฒŒ ์•ž ๋ฌธ์žฅ์— ๋Œ€ํ•œ ์ธ์ฝ”๋”ฉ ๊ฐ’ ์ œ๊ฑฐ
206
+ # print(generated_sequence)
207
+ if idx != 0:
208
+ # ์ด์ „ ๋ฌธ์žฅ์˜ ๊ธธ์ด ์ดํ›„๋กœ ์Šฌ๋ผ์ด์‹ฑํ•ด์„œ ์•ž ๋ฌธ์žฅ ์ œ๊ฑฐ
209
+ generated_sequence = generated_sequence[len_sequence:]
210
+
211
+ len_sequence = len(generated_sequence)
212
+ # print("len_seq", len_sequence)
213
+
214
+ # ์Œ์ ˆ ๊ทธ๋Œ€๋กœ ๋ฑ‰์œผ๋ฉด ๋‹ค์‹œ ํ•ด์™€, ์•„๋‹ˆ๋ฉด while๋ฌธ ํƒˆ์ถœ
215
+ if len_sequence > 1:
216
+ break
217
+
218
+ # ๊ฒฐ๊ณผ๋ฌผ ๋ฆฌ์ŠคํŠธ์— ๋‹ด๊ธฐ
219
+ res_l.append(generated_sequence)
220
+
221
+ poem_dict = {"Type":"alpha"}
222
+
223
+ for letter, res in zip(input_letter, res_l):
224
+ decode_res = tokenizer.decode(res, clean_up_tokenization_spaces=True, skip_special_tokens=True)
225
+ poem_dict[poem(letter)] = decode_res
226
+
227
+ return poem_dict
228
+
229
+ # Image(.gif)
230
+ @st.cache(show_spinner=False)
231
+ def load_lottieurl(url: str):
232
+ r = requests.get(url)
233
+ if r.status_code != 200:
234
+ return None
235
+ return r.json()
236
+
237
+ lottie_url = "https://assets7.lottiefiles.com/private_files/lf30_fjln45y5.json"
238
+
239
+ lottie_json = load_lottieurl(lottie_url)
240
+ st_lottie(lottie_json, speed=1, height=200, key="initial")
241
+
242
+
243
+ # Title
244
+ row0_spacer1, row0_1, row0_spacer2, row0_2, row0_spacer3 = st.columns(
245
+ (0.01, 2, 0.05, 0.5, 0.01)
246
+ )
247
+
248
+ with row0_1:
249
+ st.markdown("# ํ•œ๊ธ€ ๋…ธ๋ž˜ ๊ฐ€์‚ฌ nํ–‰์‹œโœ")
250
+ st.markdown("### ๐Ÿฆ๋ฉ‹์Ÿ์ด์‚ฌ์ž์ฒ˜๋Ÿผ AIS7๐Ÿฆ - ํŒŒ์ด๋„ ํ”„๋กœ์ ํŠธ")
251
+
252
+ with row0_2:
253
+ st.write("")
254
+ st.write("")
255
+ st.write("")
256
+ st.subheader("1์กฐ - ํ•ดํŒŒ๋ฆฌ")
257
+ st.write("์ด์ง€ํ˜œ, ์ตœ์ง€์˜, ๊ถŒ์†Œํฌ, ๋ฌธ์ข…ํ˜„, ๊ตฌ์žํ˜„, ๊น€์˜์ค€")
258
+
259
+ st.write('---')
260
+
261
+ # Explanation
262
+ row1_spacer1, row1_1, row1_spacer2 = st.columns((0.01, 0.01, 0.01))
263
+
264
+ with row1_1:
265
+ st.markdown("### nํ–‰์‹œ ๊ฐ€์ด๋“œ๋ผ์ธ")
266
+ st.markdown("1. ํ•˜๋‹จ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฐ”์— 5์ž ์ดํ•˜ ๋‹จ์–ด๋ฅผ ๋„ฃ์–ด์ฃผ์„ธ์š”")
267
+ st.markdown("2. 'nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•ด์ฃผ์„ธ์š”")
268
+ st.markdown("* nํ–‰์‹œ ํƒ€์ž… ์„ค์ •\n"
269
+ " * Alpha ver. : ๋ชจ๋ธ์ด ์ฒซ ์Œ์ ˆ๋ถ€ํ„ฐ ์ƒ์„ฑ\n"
270
+ " * Beta ver. : ์ฒซ ์Œ์ ˆ์„ ๋ฐ์ดํ„ฐ์…‹์—์„œ ์ฐพ๊ณ , ๋‹ค์Œ ๋ถ€๋ถ„์„ ์ƒ์„ฑ")
271
+
272
+ st.write('---')
273
+
274
+ # Model & Input
275
+ row2_spacer1, row2_1, row2_spacer2= st.columns((0.01, 0.01, 0.01))
276
+
277
+ col1, col2 = st.columns(2)
278
+
279
+ # Word Input
280
+ with row2_1:
281
+
282
+ with col1:
283
+ genre = st.radio(
284
+ "nํ–‰์‹œ ํƒ€์ž… ์„ ํƒ",
285
+ ('Alpha', 'Beta(test์ค‘)'))
286
+
287
+ if genre == 'Alpha':
288
+ n_line_poem = alpha_poem
289
+
290
+ else:
291
+ n_line_poem = beta_poem
292
+
293
+ with col2:
294
+ word_input = st.text_input(
295
+ "nํ–‰์‹œ์— ์‚ฌ์šฉํ•  ๋‹จ์–ด๋ฅผ ์ ๊ณ  ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”.(์ตœ๋Œ€ 5์ž) ๐Ÿ‘‡",
296
+ placeholder='ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”',
297
+ max_chars=5
298
+ )
299
+ word_input = re.sub("[^๊ฐ€-ํžฃ]", "", word_input)
300
+
301
+ if st.button('nํ–‰์‹œ ์ œ์ž‘ํ•˜๊ธฐ'):
302
+ if word_input == "":
303
+ st.error("์˜จ์ „ํ•œ ํ•œ๊ธ€ ๋‹จ์–ด๋ฅผ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”!")
304
+
305
+ else:
306
+ st.write("nํ–‰์‹œ ๋‹จ์–ด : ", word_input)
307
+ with st.spinner('์ž ์‹œ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”...'):
308
+ result = n_line_poem(word_input)
309
+ st.success('์™„๋ฃŒ๋์Šต๋‹ˆ๋‹ค!')
310
+ for r in result:
311
+ st.write(f'{r} : {result[r]}')
312
+
requirements.txt CHANGED
@@ -1,13 +1,7 @@
1
  pandas
2
  # numpy
3
  streamlit
4
- # plotly
5
- # seaborn
6
- # matplotlib
7
  requests
8
  streamlit_lottie
9
- # streamlit_folium
10
- # koreanize_matplotlib
11
- # tensorflow
12
  torch
13
  transformers
 
1
  pandas
2
  # numpy
3
  streamlit
 
 
 
4
  requests
5
  streamlit_lottie
 
 
 
6
  torch
7
  transformers