rbiswasfc commited on
Commit
7661b57
1 Parent(s): 3ffe751
Files changed (2) hide show
  1. app.py +50 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from spacy.lang.en import English
5
+ from transformers import AutoTokenizer
6
+
7
+ # download spacy model ---
8
+ os.system('python -m spacy download en_core_web_sm')
9
+
10
+
11
+ deberta_v3_tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
12
+ mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
13
+
14
+ en_tokenizer = English().tokenizer
15
+
16
+
17
+ def tokenize_with_spacy(text, tokenizer=en_tokenizer):
18
+ tokenized_text = tokenizer(text)
19
+ tokens = [token.text for token in tokenized_text]
20
+ return tokens
21
+
22
+
23
+ def tokenize_with_hf(text, tokenizer=deberta_v3_tokenizer):
24
+ tokenized_text = tokenizer.tokenize(text)
25
+ return tokenized_text
26
+
27
+
28
+ def tokenize(text):
29
+ s = tokenize_with_spacy(text)
30
+ d = tokenize_with_hf(text)
31
+ m = tokenize_with_hf(text, tokenizer=mistral_tokenizer)
32
+ return s, d, m
33
+
34
+
35
+ with gr.Blocks() as demo:
36
+ input_text = gr.Textbox(lines=2, placeholder="Input text...")
37
+ submit_btn = gr.Button(label="Submit")
38
+
39
+ spacy_display = gr.JSON(label="Spacy")
40
+ deb_display = gr.JSON(label="DeBERTa-V3")
41
+ mistral_display = gr.JSON(label="Mistral")
42
+
43
+ # callback ---
44
+ submit_btn.click(
45
+ fn=tokenize,
46
+ outputs=[spacy_display, deb_display, mistral_display],
47
+ )
48
+
49
+ # launch app --------
50
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ spacy