qanastek commited on
Commit
091b77e
1 Parent(s): 2da9ad9

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. app.py +54 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  title: CAS Biomedical POS Tagging
3
- emoji: 📈
4
  colorFrom: indigo
5
- colorTo: yellow
6
  sdk: streamlit
7
- sdk_version: 1.17.0
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
  ---
12
 
 
1
  ---
2
  title: CAS Biomedical POS Tagging
3
+ emoji: ⚕️
4
  colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: streamlit
7
+ sdk_version: 1.2.0
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
  ---
12
 
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
2
+ import sentencepiece
3
+ import streamlit as st
4
+ import pandas as pd
5
+
6
+ text_1 = "ddd"
7
+
8
+ text_2 = """ddd"""
9
+
10
+ st.title("Demo for Biomedical POS Tagging in French with DrBERT")
11
+ st.sidebar.write("Model : DrBERT-7GB base CAS corpus POS tagging")
12
+ st.sidebar.write("For details of model: 'https://huggingface.co/Dr-BERT/DrBERT-7GB'")
13
+
14
+ model_checkpoint = "Dr-BERT/DrBERT-7GB"
15
+ aggregation = "simple"
16
+
17
+ st.subheader("Select Text")
18
+ context_1 = st.text_area("Text #1", text_1, height=128)
19
+ context_2 = st.text_area("Text #2", text_2, height=128)
20
+ context_3 = st.text_area("New Text", value="", height=128)
21
+
22
+ context = st.radio("Select Text", ("Text #1", "Text #2", "New Text"))
23
+
24
+ if context == "Text #1":
25
+ input_text = context_1
26
+ elif context == "Text #2":
27
+ input_text = context_2
28
+ elif context == "New Text":
29
+ input_text = context_3
30
+
31
+ @st.cache(allow_output_mutation=True)
32
+ def setModel(model_checkpoint, aggregation):
33
+ model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)
34
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
35
+ return pipeline('token-classification', model=model, tokenizer=tokenizer, aggregation_strategy=aggregation)
36
+
37
+ Run_Button = st.button("Run", key=None)
38
+ if Run_Button == True:
39
+
40
+ ner_pipeline = setModel(model_checkpoint, aggregation)
41
+ output = ner_pipeline(input_text)
42
+
43
+ df = pd.DataFrame.from_dict(output)
44
+
45
+ if aggregation != "none":
46
+ df.rename(index=str,columns={'entity_group':'POS Tag'},inplace=True)
47
+ else:
48
+ df.rename(index=str,columns={'entity_group':'POS Tag'},inplace=True)
49
+
50
+ cols_to_keep = ['word','POS Tag','score','start','end']
51
+ df_final = df[cols_to_keep]
52
+
53
+ st.subheader("POS Tags")
54
+ st.dataframe(df_final)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ pandas
5
+ sentencepiece