abdullahmubeen10
commited on
Commit
•
a1587c9
1
Parent(s):
cee413d
Update Demo.py
Browse files
Demo.py
CHANGED
@@ -1,140 +1,140 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import sparknlp
|
3 |
-
|
4 |
-
from sparknlp.base import *
|
5 |
-
from sparknlp.annotator import *
|
6 |
-
from pyspark.ml import Pipeline
|
7 |
-
from annotated_text import annotated_text
|
8 |
-
|
9 |
-
# Page configuration
|
10 |
-
st.set_page_config(
|
11 |
-
layout="wide",
|
12 |
-
initial_sidebar_state="auto"
|
13 |
-
)
|
14 |
-
|
15 |
-
# CSS for styling
|
16 |
-
st.markdown("""
|
17 |
-
<style>
|
18 |
-
.main-title {
|
19 |
-
font-size: 36px;
|
20 |
-
color: #4A90E2;
|
21 |
-
font-weight: bold;
|
22 |
-
text-align: center;
|
23 |
-
}
|
24 |
-
.section {
|
25 |
-
background-color: #f9f9f9;
|
26 |
-
padding: 10px;
|
27 |
-
border-radius: 10px;
|
28 |
-
margin-top: 10px;
|
29 |
-
}
|
30 |
-
.section p, .section ul {
|
31 |
-
color: #666666;
|
32 |
-
}
|
33 |
-
</style>
|
34 |
-
""", unsafe_allow_html=True)
|
35 |
-
|
36 |
-
@st.cache_resource
|
37 |
-
def init_spark():
|
38 |
-
return sparknlp.start()
|
39 |
-
|
40 |
-
@st.cache_resource
|
41 |
-
def create_pipeline():
|
42 |
-
document_assembler = DocumentAssembler() \
|
43 |
-
.setInputCol('text') \
|
44 |
-
.setOutputCol('document')
|
45 |
-
|
46 |
-
tokenizer = Tokenizer() \
|
47 |
-
.setInputCols(['document']) \
|
48 |
-
.setOutputCol('token')
|
49 |
-
|
50 |
-
sequence_classifier = XlmRoBertaForSequenceClassification.pretrained(
|
51 |
-
.setInputCols(["document", "token"]) \
|
52 |
-
.setOutputCol("class")
|
53 |
-
|
54 |
-
pipeline = Pipeline(stages=[document_assembler, tokenizer, sequence_classifier])
|
55 |
-
return pipeline
|
56 |
-
|
57 |
-
def fit_data(pipeline, data):
|
58 |
-
empty_df = spark.createDataFrame([['']]).toDF('text')
|
59 |
-
pipeline_model = pipeline.fit(empty_df)
|
60 |
-
model = LightPipeline(pipeline_model)
|
61 |
-
result = model.fullAnnotate(data)
|
62 |
-
return result
|
63 |
-
|
64 |
-
def annotate(data):
|
65 |
-
document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
|
66 |
-
annotated_words = []
|
67 |
-
for chunk, label in zip(chunks, labels):
|
68 |
-
parts = document.split(chunk, 1)
|
69 |
-
if parts[0]:
|
70 |
-
annotated_words.append(parts[0])
|
71 |
-
annotated_words.append((chunk, label))
|
72 |
-
document = parts[1]
|
73 |
-
if document:
|
74 |
-
annotated_words.append(document)
|
75 |
-
annotated_text(*annotated_words)
|
76 |
-
|
77 |
-
tasks_models_descriptions = {
|
78 |
-
"Sequence Classification": {
|
79 |
-
"models": ["xlmroberta_classifier_base_mrpc"],
|
80 |
-
"description": "The 'xlmroberta_classifier_base_mrpc' model is proficient in sequence classification tasks, such as sentiment analysis and document categorization. It effectively determines the sentiment of reviews, classifies text, and sorts documents based on their content and context."
|
81 |
-
}
|
82 |
-
}
|
83 |
-
|
84 |
-
# Sidebar content
|
85 |
-
task = st.sidebar.selectbox("Choose the task", list(tasks_models_descriptions.keys()))
|
86 |
-
model = st.sidebar.selectbox("Choose the pretrained model", tasks_models_descriptions[task]["models"], help="For more info about the models visit: https://sparknlp.org/models")
|
87 |
-
|
88 |
-
# Reference notebook link in sidebar
|
89 |
-
link = """
|
90 |
-
<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/357691d18373d6e8f13b5b1015137a398fd0a45f/Spark_NLP_Udemy_MOOC/Open_Source/17.01.Transformers-based_Embeddings.ipynb#L103">
|
91 |
-
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
92 |
-
</a>
|
93 |
-
"""
|
94 |
-
st.sidebar.markdown('Reference notebook:')
|
95 |
-
st.sidebar.markdown(link, unsafe_allow_html=True)
|
96 |
-
|
97 |
-
# Page content
|
98 |
-
title, sub_title = (f'DeBERTa for {task}', tasks_models_descriptions[task]["description"])
|
99 |
-
st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
|
100 |
-
container = st.container(border=True)
|
101 |
-
container.write(sub_title)
|
102 |
-
|
103 |
-
# Load examples
|
104 |
-
examples_mapping = {
|
105 |
-
"Sequence Classification": [
|
106 |
-
"This movie was absolutely fantastic! The storyline was gripping, the characters were well-developed, and the cinematography was stunning. I was on the edge of my seat the entire time.",
|
107 |
-
"A heartwarming and beautiful film. The performances were top-notch, and the direction was flawless. This is easily one of the best movies I've seen this year.",
|
108 |
-
"What a delightful surprise! The humor was spot on, and the plot was refreshingly original. The cast did an amazing job bringing the characters to life. Highly recommended!",
|
109 |
-
"This was one of the worst movies I’ve ever seen. The plot was predictable, the acting was wooden, and the pacing was painfully slow. I couldn’t wait for it to end.",
|
110 |
-
"A complete waste of time. The movie lacked any real substance or direction, and the dialogue was cringe-worthy. I wouldn’t recommend this to anyone.",
|
111 |
-
"I had high hopes for this film, but it turned out to be a huge disappointment. The story was disjointed, and the special effects were laughably bad. Don’t bother watching this one.",
|
112 |
-
"The movie was okay, but nothing special. It had a few good moments, but overall, it felt pretty average. Not something I would watch again, but it wasn’t terrible either.",
|
113 |
-
"An average film with a decent plot. The acting was passable, but it didn't leave much of an impression on me. It's a movie you might watch once and forget about.",
|
114 |
-
"This movie was neither good nor bad, just kind of there. It had some interesting ideas, but they weren’t executed very well. It’s a film you could take or leave."
|
115 |
-
]
|
116 |
-
}
|
117 |
-
|
118 |
-
examples = examples_mapping[task]
|
119 |
-
selected_text = st.selectbox("Select an example", examples)
|
120 |
-
custom_input = st.text_input("Try it with your own Sentence!")
|
121 |
-
|
122 |
-
try:
|
123 |
-
text_to_analyze = custom_input if custom_input else selected_text
|
124 |
-
st.subheader('Full example text')
|
125 |
-
HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
|
126 |
-
st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
|
127 |
-
except:
|
128 |
-
text_to_analyze = selected_text
|
129 |
-
|
130 |
-
# Initialize Spark and create pipeline
|
131 |
-
spark = init_spark()
|
132 |
-
pipeline = create_pipeline()
|
133 |
-
output = fit_data(pipeline, text_to_analyze)
|
134 |
-
|
135 |
-
# Display matched sentence
|
136 |
-
st.subheader("Prediction:")
|
137 |
-
st.markdown(f"Classified as : **{output[0]['class'][0].result}**")
|
138 |
-
|
139 |
-
|
140 |
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import sparknlp
|
3 |
+
|
4 |
+
from sparknlp.base import *
|
5 |
+
from sparknlp.annotator import *
|
6 |
+
from pyspark.ml import Pipeline
|
7 |
+
from annotated_text import annotated_text
|
8 |
+
|
9 |
+
# Page configuration
|
10 |
+
st.set_page_config(
|
11 |
+
layout="wide",
|
12 |
+
initial_sidebar_state="auto"
|
13 |
+
)
|
14 |
+
|
15 |
+
# CSS for styling
|
16 |
+
st.markdown("""
|
17 |
+
<style>
|
18 |
+
.main-title {
|
19 |
+
font-size: 36px;
|
20 |
+
color: #4A90E2;
|
21 |
+
font-weight: bold;
|
22 |
+
text-align: center;
|
23 |
+
}
|
24 |
+
.section {
|
25 |
+
background-color: #f9f9f9;
|
26 |
+
padding: 10px;
|
27 |
+
border-radius: 10px;
|
28 |
+
margin-top: 10px;
|
29 |
+
}
|
30 |
+
.section p, .section ul {
|
31 |
+
color: #666666;
|
32 |
+
}
|
33 |
+
</style>
|
34 |
+
""", unsafe_allow_html=True)
|
35 |
+
|
36 |
+
@st.cache_resource
|
37 |
+
def init_spark():
|
38 |
+
return sparknlp.start()
|
39 |
+
|
40 |
+
@st.cache_resource
|
41 |
+
def create_pipeline():
|
42 |
+
document_assembler = DocumentAssembler() \
|
43 |
+
.setInputCol('text') \
|
44 |
+
.setOutputCol('document')
|
45 |
+
|
46 |
+
tokenizer = Tokenizer() \
|
47 |
+
.setInputCols(['document']) \
|
48 |
+
.setOutputCol('token')
|
49 |
+
|
50 |
+
sequence_classifier = XlmRoBertaForSequenceClassification.pretrained() \
|
51 |
+
.setInputCols(["document", "token"]) \
|
52 |
+
.setOutputCol("class")
|
53 |
+
|
54 |
+
pipeline = Pipeline(stages=[document_assembler, tokenizer, sequence_classifier])
|
55 |
+
return pipeline
|
56 |
+
|
57 |
+
def fit_data(pipeline, data):
|
58 |
+
empty_df = spark.createDataFrame([['']]).toDF('text')
|
59 |
+
pipeline_model = pipeline.fit(empty_df)
|
60 |
+
model = LightPipeline(pipeline_model)
|
61 |
+
result = model.fullAnnotate(data)
|
62 |
+
return result
|
63 |
+
|
64 |
+
def annotate(data):
|
65 |
+
document, chunks, labels = data["Document"], data["NER Chunk"], data["NER Label"]
|
66 |
+
annotated_words = []
|
67 |
+
for chunk, label in zip(chunks, labels):
|
68 |
+
parts = document.split(chunk, 1)
|
69 |
+
if parts[0]:
|
70 |
+
annotated_words.append(parts[0])
|
71 |
+
annotated_words.append((chunk, label))
|
72 |
+
document = parts[1]
|
73 |
+
if document:
|
74 |
+
annotated_words.append(document)
|
75 |
+
annotated_text(*annotated_words)
|
76 |
+
|
77 |
+
tasks_models_descriptions = {
|
78 |
+
"Sequence Classification": {
|
79 |
+
"models": ["xlmroberta_classifier_base_mrpc"],
|
80 |
+
"description": "The 'xlmroberta_classifier_base_mrpc' model is proficient in sequence classification tasks, such as sentiment analysis and document categorization. It effectively determines the sentiment of reviews, classifies text, and sorts documents based on their content and context."
|
81 |
+
}
|
82 |
+
}
|
83 |
+
|
84 |
+
# Sidebar content
|
85 |
+
task = st.sidebar.selectbox("Choose the task", list(tasks_models_descriptions.keys()))
|
86 |
+
model = st.sidebar.selectbox("Choose the pretrained model", tasks_models_descriptions[task]["models"], help="For more info about the models visit: https://sparknlp.org/models")
|
87 |
+
|
88 |
+
# Reference notebook link in sidebar
|
89 |
+
link = """
|
90 |
+
<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/357691d18373d6e8f13b5b1015137a398fd0a45f/Spark_NLP_Udemy_MOOC/Open_Source/17.01.Transformers-based_Embeddings.ipynb#L103">
|
91 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
92 |
+
</a>
|
93 |
+
"""
|
94 |
+
st.sidebar.markdown('Reference notebook:')
|
95 |
+
st.sidebar.markdown(link, unsafe_allow_html=True)
|
96 |
+
|
97 |
+
# Page content
|
98 |
+
title, sub_title = (f'DeBERTa for {task}', tasks_models_descriptions[task]["description"])
|
99 |
+
st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
|
100 |
+
container = st.container(border=True)
|
101 |
+
container.write(sub_title)
|
102 |
+
|
103 |
+
# Load examples
|
104 |
+
examples_mapping = {
|
105 |
+
"Sequence Classification": [
|
106 |
+
"This movie was absolutely fantastic! The storyline was gripping, the characters were well-developed, and the cinematography was stunning. I was on the edge of my seat the entire time.",
|
107 |
+
"A heartwarming and beautiful film. The performances were top-notch, and the direction was flawless. This is easily one of the best movies I've seen this year.",
|
108 |
+
"What a delightful surprise! The humor was spot on, and the plot was refreshingly original. The cast did an amazing job bringing the characters to life. Highly recommended!",
|
109 |
+
"This was one of the worst movies I’ve ever seen. The plot was predictable, the acting was wooden, and the pacing was painfully slow. I couldn’t wait for it to end.",
|
110 |
+
"A complete waste of time. The movie lacked any real substance or direction, and the dialogue was cringe-worthy. I wouldn’t recommend this to anyone.",
|
111 |
+
"I had high hopes for this film, but it turned out to be a huge disappointment. The story was disjointed, and the special effects were laughably bad. Don’t bother watching this one.",
|
112 |
+
"The movie was okay, but nothing special. It had a few good moments, but overall, it felt pretty average. Not something I would watch again, but it wasn’t terrible either.",
|
113 |
+
"An average film with a decent plot. The acting was passable, but it didn't leave much of an impression on me. It's a movie you might watch once and forget about.",
|
114 |
+
"This movie was neither good nor bad, just kind of there. It had some interesting ideas, but they weren’t executed very well. It’s a film you could take or leave."
|
115 |
+
]
|
116 |
+
}
|
117 |
+
|
118 |
+
examples = examples_mapping[task]
|
119 |
+
selected_text = st.selectbox("Select an example", examples)
|
120 |
+
custom_input = st.text_input("Try it with your own Sentence!")
|
121 |
+
|
122 |
+
try:
|
123 |
+
text_to_analyze = custom_input if custom_input else selected_text
|
124 |
+
st.subheader('Full example text')
|
125 |
+
HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
|
126 |
+
st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
|
127 |
+
except:
|
128 |
+
text_to_analyze = selected_text
|
129 |
+
|
130 |
+
# Initialize Spark and create pipeline
|
131 |
+
spark = init_spark()
|
132 |
+
pipeline = create_pipeline()
|
133 |
+
output = fit_data(pipeline, text_to_analyze)
|
134 |
+
|
135 |
+
# Display matched sentence
|
136 |
+
st.subheader("Prediction:")
|
137 |
+
st.markdown(f"Classified as : **{output[0]['class'][0].result}**")
|
138 |
+
|
139 |
+
|
140 |
|