Stefan Dumitrescu commited on
Commit
ba99ac9
1 Parent(s): 4d544f0
Files changed (4) hide show
  1. README.md +6 -6
  2. app.py +88 -5
  3. gitattributes +27 -0
  4. requirements.txt +7 -0
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
- title: NamedEntityRecognition Romanian
3
- emoji: 📚
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  ---
@@ -34,4 +34,4 @@ Path to your main application file (which contains either `gradio` or `streamlit
34
  Path is relative to the root of the repository.
35
 
36
  `pinned`: _boolean_
37
- Whether the Space stays on top of your list.
 
1
  ---
2
+ title: Turkish Named Entity Recognition
3
+ emoji: 🏃
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: streamlit
7
  app_file: app.py
8
  pinned: false
9
  ---
 
34
  Path is relative to the root of the repository.
35
 
36
  `pinned`: _boolean_
37
+ Whether the Space stays on top of your list.
app.py CHANGED
@@ -1,7 +1,90 @@
1
- import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sentencepiece
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import spacy
5
+ import roner
6
 
7
+ example_list = [
8
+ "Ana merge în București.",
9
+ """Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București. Ana merge în București."""
10
+ ]
11
 
12
+ st.set_page_config(layout="wide")
13
+
14
+ st.title("Demo for Romanian NER")
15
+
16
+ model_list = ['dumitrescustefan/bert-base-romanian-ner']
17
+
18
+ st.sidebar.header("Select NER Model")
19
+ model_checkpoint = st.sidebar.radio("", model_list)
20
+
21
+ st.sidebar.write("For details of models: 'https://huggingface.co/dumitrescustefan/")
22
+ st.sidebar.write("")
23
+
24
+ xlm_agg_strategy_info = "'aggregation_strategy' can be selected as 'simple' or 'none' for 'xlm-roberta' because of the RoBERTa model's tokenization approach."
25
+
26
+ st.sidebar.header("Select Aggregation Strategy Type")
27
+ if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner":
28
+ aggregation = st.sidebar.radio("", ('simple', 'none'))
29
+ st.sidebar.write(xlm_agg_strategy_info)
30
+ elif model_checkpoint == "xlm-roberta-large-finetuned-conll03-english":
31
+ aggregation = st.sidebar.radio("", ('simple', 'none'))
32
+ st.sidebar.write(xlm_agg_strategy_info)
33
+ st.sidebar.write("")
34
+ st.sidebar.write("This English NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta.")
35
+ else:
36
+ aggregation = st.sidebar.radio("", ('first', 'simple', 'average', 'max', 'none'))
37
+
38
+ st.sidebar.write("Please refer 'https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html' for entity grouping with aggregation_strategy parameter.")
39
+
40
+ st.subheader("Select Text Input Method")
41
+ input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text'))
42
+ if input_method == 'Select from Examples':
43
+ selected_text = st.selectbox('Select Text from List', example_list, index=0, key=1)
44
+ st.subheader("Text to Run")
45
+ input_text = st.text_area("Selected Text", selected_text, height=128, max_chars=None, key=2)
46
+ elif input_method == "Write or Paste New Text":
47
+ st.subheader("Text to Run")
48
+ input_text = st.text_area('Write or Paste Text Below', value="", height=128, max_chars=None, key=2)
49
+
50
+ @st.cache(allow_output_mutation=True)
51
+ def setModel(named_persons_only):
52
+ ner = roner.NER(named_persons_only=named_persons_only)
53
+ return ner
54
+
55
+ @st.cache(allow_output_mutation=True)
56
+ def get_html(html: str):
57
+ WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
58
+ html = html.replace("\n", " ")
59
+ return WRAPPER.format(html)
60
+
61
+ Run_Button = st.button("Run", key=None)
62
+ if Run_Button == True:
63
+
64
+ ner = setModel(named_persons_only = False)
65
+ output = ner(input_text)[0]
66
+
67
+ df = pd.DataFrame.from_dict(output)
68
+
69
+ st.subheader("Recognized Entities")
70
+ st.dataframe(df)
71
+
72
+ """
73
+ st.subheader("Spacy Style Display")
74
+ spacy_display = {}
75
+ spacy_display["ents"] = []
76
+ spacy_display["text"] = input_text
77
+ spacy_display["title"] = None
78
+
79
+ for entity in output:
80
+ if aggregation != "none":
81
+ spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
82
+ else:
83
+ spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity"]})
84
+
85
+ entity_list = ["PER", "LOC", "ORG", "MISC"]
86
+ colors = {'PER': '#85DCDF', 'LOC': '#DF85DC', 'ORG': '#DCDF85', 'MISC': '#85ABDF',}
87
+ html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True, options={"ents": entity_list, "colors": colors})
88
+ style = "<style>mark.entity { display: inline-block }</style>"
89
+ st.write(f"{style}{get_html(html)}", unsafe_allow_html=True)
90
+ """
gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ sentencepiece
5
+ pandas
6
+ spacy
7
+ roner