snoop2head commited on
Commit
85c2795
โ€ข
1 Parent(s): 40607dc

change to conditional kogpt trintiy project

Browse files
Files changed (1) hide show
  1. app.py +11 -52
app.py CHANGED
@@ -47,12 +47,13 @@ def infer(input_ids, max_length, temperature, top_k, top_p):
47
 
48
 
49
  # prompts
50
- st.title("์‚ผํ–‰์‹œ์˜ ๋‹ฌ์ธ KoGPT์ž…๋‹ˆ๋‹ค ๐Ÿฆ„")
51
- st.write("ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  CTRL+Enter(CMD+Enter)์„ ๋ˆ„๋ฅด์„ธ์š” ๐Ÿค—")
 
52
 
53
  # text and sidebars
54
- default_value = "ํŽ˜๋ฅด์†Œ๋‚˜"
55
- sent = st.text_area("Text", default_value, max_chars=4, height=275)
56
  max_length = st.sidebar.slider("์ƒ์„ฑ ๋ฌธ์žฅ ๊ธธ์ด๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”!", min_value=42, max_value=64)
57
  temperature = st.sidebar.slider(
58
  "Temperature", value=1.0, min_value=0.0, max_value=1.0, step=0.05
@@ -93,67 +94,25 @@ def infer_sentence(
93
  generated_sequence = output_sequences[0]
94
  print(generated_sequence)
95
 
96
- # print(f"=== GENERATED SEQUENCE {generated_sequence_idx + 1} ===")
97
- # generated_sequences = generated_sequence.tolist()
98
  # Decode text
99
  text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
100
  print(text)
101
- # Remove all text after the stop token
 
102
  stop_token = tokenizer.pad_token
103
  print(stop_token)
104
  text = text[: text.find(stop_token) if stop_token else None]
105
  print(text)
106
-
 
107
  condition_index = find_nth(text, "๋ฌธ์žฅ์ด๋‹ค", 2)
108
  text = text[condition_index + 5 :]
109
  text = text.strip()
110
  return text
111
 
112
 
113
- def make_residual_conditional_samhaengshi(input_letter, condition_sentence):
114
- # make letter string into
115
- list_samhaengshi = []
116
-
117
- # initializing text and index for iteration purpose
118
- index = 0
119
-
120
- # iterating over the input letter string
121
- for index, letter_item in enumerate(input_letter):
122
- # initializing the input_letter
123
- if index == 0:
124
- residual_text = letter_item
125
- # print('residual_text:', residual_text)
126
-
127
- # infer and add to the output
128
- conditional_input = f"{condition_sentence} {residual_text}"
129
- inferred_sentence = infer_sentence(conditional_input, tokenizer)
130
- if index != 0:
131
- # remove previous sentence from the output
132
- print("inferred_sentence:", inferred_sentence)
133
- inferred_sentence = inferred_sentence.replace(
134
- list_samhaengshi[index - 1], ""
135
- ).strip()
136
- else:
137
- pass
138
- list_samhaengshi.append(inferred_sentence)
139
-
140
- # until the end of the input_letter, give the previous residual_text to the next iteration
141
- if index < len(input_letter) - 1:
142
- residual_sentence = list_samhaengshi[index]
143
- next_letter = input_letter[index + 1]
144
- residual_text = (
145
- f"{residual_sentence} {next_letter}" # previous sentence + next letter
146
- )
147
- print("residual_text", residual_text)
148
-
149
- elif index == len(input_letter) - 1: # end of the input_letter
150
- # Concatenate strings in the list without intersection
151
-
152
- return list_samhaengshi
153
-
154
-
155
- return_text = make_residual_conditional_samhaengshi(
156
- input_letter=sent, condition_sentence=condition_sentence
157
  )
158
 
159
  print(return_text)
 
47
 
48
 
49
  # prompts
50
+ st.title("์ฃผ์–ด์ง„ ๊ฐ์ •์— ๋งž๊ฒŒ ๋ฌธ์žฅ์„ ๋งŒ๋“œ๋Š” KoGPT์ž…๋‹ˆ๋‹ค ๐Ÿฆ„")
51
+ st.write("์ขŒ์ธก์— ๊ฐ์ •์ƒํƒœ์˜ ๋ณ€ํ™”๋ฅผ ์ค˜๋ณด์„ธ์š”.")
52
+ st.write("์ž…๋ ฅํ•˜๊ณ  ๋‚˜์„œ CTRL+Enter(CMD+Enter)๋ฅผ ๋ˆ„๋ฅด์„ธ์š” ๐Ÿค—")
53
 
54
  # text and sidebars
55
+ default_value = "์ˆ˜์ƒํ•œ ๋ฐค๋“ค์ด ๊ณ„์†๋˜๋˜ ๋‚  ์–ธ์  ๊ฐ€๋ถ€ํ„ฐ ๋‚˜๋Š”"
56
+ sent = st.text_area("Text", default_value, max_chars=30, height=275)
57
  max_length = st.sidebar.slider("์ƒ์„ฑ ๋ฌธ์žฅ ๊ธธ์ด๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”!", min_value=42, max_value=64)
58
  temperature = st.sidebar.slider(
59
  "Temperature", value=1.0, min_value=0.0, max_value=1.0, step=0.05
 
94
  generated_sequence = output_sequences[0]
95
  print(generated_sequence)
96
 
 
 
97
  # Decode text
98
  text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
99
  print(text)
100
+
101
+ # Remove all text after the pad token
102
  stop_token = tokenizer.pad_token
103
  print(stop_token)
104
  text = text[: text.find(stop_token) if stop_token else None]
105
  print(text)
106
+
107
+ # Remove condition sentence
108
  condition_index = find_nth(text, "๋ฌธ์žฅ์ด๋‹ค", 2)
109
  text = text[condition_index + 5 :]
110
  text = text.strip()
111
  return text
112
 
113
 
114
+ return_text = infer_sentence(
115
+ condition_plus_input=condition_plus_input, tokenizer=tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  )
117
 
118
  print(return_text)