cn91 commited on
Commit
15553b2
1 Parent(s): 0cbaa9b

Update app.py

Browse files

change to Chinese only

Files changed (1) hide show
  1. app.py +10 -72
app.py CHANGED
@@ -11,41 +11,19 @@ if USE_GPU and torch.cuda.is_available():
11
  else:
12
  device = torch.device('cpu')
13
 
14
- MODEL_NAME_ENGLISH = "facebook/xlm-v-base"
15
- #SENTENCE_MODEL_NAME_ENGLISH = 'sentence-transformers/all-MiniLM-L6-v2'
16
- #WORD_MODEL_NAME_ENGLISH = 'vocab-transformers/distilbert-word2vec_256k-MLM_best'
17
-
18
- # chinese models
19
  MODEL_NAME_CHINESE = "IDEA-CCNL/Erlangshen-DeBERTa-v2-186M-Chinese-SentencePiece"
20
 
21
  WORD_PROBABILITY_THRESHOLD = 0.02
22
- #WORD_PROBABILITY_THRESHOLD_ENGLISH = 0.02
23
- #WORD_PROBABILITY_THRESHOLD_CHINESE = 0.02
24
  TOP_K_WORDS = 10
25
 
26
- ENGLISH_LANG = "English"
27
- CHINESE_LANG = "Chinese"
28
-
29
  CHINESE_WORDLIST = ['一定','一样','不得了','主观','从此','便于','俗话','倒霉','候选','充沛','分别','反倒','只好','同情','吹捧','咳嗽','围绕','如意','实行','将近','就职','应该','归还','当面','忘记','急忙','恢复','悲哀','感冒','成长','截至','打架','把握','报告','抱怨','担保','拒绝','拜访','拥护','拳头','拼搏','损坏','接待','握手','揭发','攀登','显示','普遍','未免','欣赏','正式','比如','流浪','涂抹','深刻','演绎','留念','瞻仰','确保','稍微','立刻','精心','结算','罕见','访问','请示','责怪','起初','转达','辅导','过瘾','运动','连忙','适合','遭受','重叠','镇静']
30
 
31
  @st.cache_resource
32
  def get_model_chinese():
33
  return pipeline("fill-mask", MODEL_NAME_CHINESE, device = device)
34
 
35
- @st.cache_resource
36
- def get_model_english():
37
- return pipeline("fill-mask", MODEL_NAME_ENGLISH, device = device)
38
-
39
- @st.cache_data
40
- def get_wordlist_chinese():
41
- return pd.read_csv('wordlist_chinese.csv')
42
-
43
- @st.cache_data
44
- def get_wordlist_english():
45
- return pd.read_csv('wordlist_english.csv')
46
-
47
  def assess_chinese(word, sentence):
48
- print("Assessing English")
49
  if sentence.lower().find(word.lower()) == -1:
50
  print('Sentence does not contain the word!')
51
  return
@@ -65,57 +43,19 @@ def assess_chinese(word, sentence):
65
 
66
  return top_k_prediction, score
67
 
68
- def assess_english(word, sentence):
69
- if sentence.lower().find(word.lower()) == -1:
70
- raise Exception("Sentence does not contain the target word")
71
-
72
- text = sentence.replace(word.lower(), "<mask>")
73
-
74
- top_k_prediction = mask_filler_english(text, top_k=TOP_K_WORDS)
75
- target_word_prediction = mask_filler_english(text, targets = chr(9601)+word)
76
-
77
- score = target_word_prediction[0]['score']
78
-
79
- # append the original word if its not found in the results
80
- top_k_prediction_filtered = [output for output in top_k_prediction if \
81
- output['token_str'] == word]
82
- if len(top_k_prediction_filtered) == 0:
83
- top_k_prediction.extend(target_word_prediction)
84
-
85
- return top_k_prediction, score
86
-
87
- def assess_sentence(language, word, sentence):
88
- if (language == ENGLISH_LANG):
89
- return assess_english(word, sentence)
90
- elif (language == CHINESE_LANG):
91
- return assess_chinese(word, sentence)
92
 
93
  def get_chinese_word():
94
- include = (wordlist_chinese.assess == True) & (wordlist_chinese.Chinese.apply(len) == 2)
95
- possible_words = wordlist_chinese[include]
96
- word = possible_words.sample(1).iloc[0].Chinese
97
- test_words = CHINESE_WORDLIST
98
- word = np.random.choice(test_words)
99
- return word
100
-
101
- def get_english_word():
102
- include = (wordlist_english.assess == True)
103
- possible_words = wordlist_english[include]
104
- word = possible_words.sample(1).iloc[0].word
105
- test_words = ["independent","satisfied","excited"]
106
- word = np.random.choice(test_words)
107
  return word
108
 
109
- def get_word(language):
110
- if (language == ENGLISH_LANG):
111
- return get_english_word()
112
- elif (language == CHINESE_LANG):
113
- return get_chinese_word()
114
 
115
  mask_filler_chinese = get_model_chinese()
116
- mask_filler_english = get_model_english()
117
  wordlist_chinese = get_wordlist_chinese()
118
- wordlist_english = get_wordlist_english()
119
 
120
  def highlight_given_word(row):
121
  color = '#ACE5EE' if row.Words == target_word else 'white'
@@ -141,23 +81,21 @@ def get_top_5_results(top_k_prediction):
141
 
142
  #### Streamlit Page
143
  st.title("造句 Auto-marking Demo")
144
- language = st.radio("Select your language", (ENGLISH_LANG, CHINESE_LANG))
145
- #st.info("You are practising on " + language)
146
 
147
  if 'target_word' not in st.session_state:
148
- st.session_state['target_word'] = get_word(language)
149
  target_word = st.session_state['target_word']
150
 
151
  st.write("Target word: ", target_word)
152
  if st.button("Get new word"):
153
- st.session_state['target_word'] = get_word(language)
154
  st.experimental_rerun()
155
 
156
  st.subheader("Form your sentence and input below!")
157
  sentence = st.text_input('Enter your sentence here', placeholder="Enter your sentence here!")
158
 
159
  if st.button("Grade"):
160
- top_k_prediction, score = assess_sentence(language, target_word, sentence)
161
  with open('./result01.json', 'w') as outfile:
162
  outfile.write(str(top_k_prediction))
163
 
 
11
  else:
12
  device = torch.device('cpu')
13
 
 
 
 
 
 
14
  MODEL_NAME_CHINESE = "IDEA-CCNL/Erlangshen-DeBERTa-v2-186M-Chinese-SentencePiece"
15
 
16
  WORD_PROBABILITY_THRESHOLD = 0.02
 
 
17
  TOP_K_WORDS = 10
18
 
 
 
 
19
  CHINESE_WORDLIST = ['一定','一样','不得了','主观','从此','便于','俗话','倒霉','候选','充沛','分别','反倒','只好','同情','吹捧','咳嗽','围绕','如意','实行','将近','就职','应该','归还','当面','忘记','急忙','恢复','悲哀','感冒','成长','截至','打架','把握','报告','抱怨','担保','拒绝','拜访','拥护','拳头','拼搏','损坏','接待','握手','揭发','攀登','显示','普遍','未免','欣赏','正式','比如','流浪','涂抹','深刻','演绎','留念','瞻仰','确保','稍微','立刻','精心','结算','罕见','访问','请示','责怪','起初','转达','辅导','过瘾','运动','连忙','适合','遭受','重叠','镇静']
20
 
21
  @st.cache_resource
22
  def get_model_chinese():
23
  return pipeline("fill-mask", MODEL_NAME_CHINESE, device = device)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def assess_chinese(word, sentence):
26
+ print("Assessing Chinese")
27
  if sentence.lower().find(word.lower()) == -1:
28
  print('Sentence does not contain the word!')
29
  return
 
43
 
44
  return top_k_prediction, score
45
 
46
+ def assess_sentence(word, sentence):
47
+ return assess_chinese(word, sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def get_chinese_word():
50
+ possible_words = CHINESE_WORDLIST
51
+ word = np.random.choice(possible_words)
 
 
 
 
 
 
 
 
 
 
 
52
  return word
53
 
54
+ def get_word():
55
+ return get_chinese_word()
 
 
 
56
 
57
  mask_filler_chinese = get_model_chinese()
 
58
  wordlist_chinese = get_wordlist_chinese()
 
59
 
60
  def highlight_given_word(row):
61
  color = '#ACE5EE' if row.Words == target_word else 'white'
 
81
 
82
  #### Streamlit Page
83
  st.title("造句 Auto-marking Demo")
 
 
84
 
85
  if 'target_word' not in st.session_state:
86
+ st.session_state['target_word'] = get_word()
87
  target_word = st.session_state['target_word']
88
 
89
  st.write("Target word: ", target_word)
90
  if st.button("Get new word"):
91
+ st.session_state['target_word'] = get_word()
92
  st.experimental_rerun()
93
 
94
  st.subheader("Form your sentence and input below!")
95
  sentence = st.text_input('Enter your sentence here', placeholder="Enter your sentence here!")
96
 
97
  if st.button("Grade"):
98
+ top_k_prediction, score = assess_sentence(target_word, sentence)
99
  with open('./result01.json', 'w') as outfile:
100
  outfile.write(str(top_k_prediction))
101