Spaces:
Running
Running
taka-yamakoshi
commited on
Commit
•
228552f
1
Parent(s):
9d4d0bb
fix bugs
Browse files
app.py
CHANGED
@@ -48,20 +48,25 @@ if __name__=='__main__':
|
|
48 |
tokenizer = load_model('albert-xxlarge-v2')
|
49 |
sent_cols = st.columns(2)
|
50 |
num_tokens = {}
|
|
|
51 |
for sent_id, sent_col in enumerate(sent_cols):
|
52 |
with sent_col:
|
53 |
sentence = st.text_input(f'Sentence {sent_id+1}')
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
48 |
tokenizer = load_model('albert-xxlarge-v2')
|
49 |
sent_cols = st.columns(2)
|
50 |
num_tokens = {}
|
51 |
+
sents = {}
|
52 |
for sent_id, sent_col in enumerate(sent_cols):
|
53 |
with sent_col:
|
54 |
sentence = st.text_input(f'Sentence {sent_id+1}')
|
55 |
+
sents[f'sent_{sent_id}'] = sentence
|
56 |
+
if len(sentence)>0:
|
57 |
+
input_sent = tokenizer(sentence)['input_ids']
|
58 |
+
decoded_sent = [tokenizer.decode([token]) for token in input_sent[1:-1]]
|
59 |
+
num_tokens[f'sent_{sent_id}'] = len(decoded_sent)
|
60 |
|
61 |
+
char_nums = [len(word)+2 for word in decoded_sent]
|
62 |
+
word_cols = st.columns(char_nums)
|
63 |
+
for word_col,word in zip(word_cols,decoded_sent):
|
64 |
+
with word_col:
|
65 |
+
st.write(word)
|
66 |
+
st.write(f'{len(decoded_sent)} tokens')
|
67 |
+
|
68 |
+
if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
|
69 |
+
if num_tokens[f'sent_1']==num_tokens[f'sent_2']:
|
70 |
+
st.subheader('Matched!')
|
71 |
+
else:
|
72 |
+
st.subheader('Not Matched...')
|