abdullahmubeen10 commited on
Commit
83c92cc
1 Parent(s): f7b8b91

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +121 -121
Demo.py CHANGED
@@ -1,121 +1,121 @@
1
- import streamlit as st
2
- import sparknlp
3
- import os
4
- import pandas as pd
5
-
6
- from sparknlp.base import *
7
- from sparknlp.annotator import *
8
- from pyspark.ml import Pipeline
9
- from sparknlp.pretrained import PretrainedPipeline
10
-
11
- # Page configuration
12
- st.set_page_config(
13
- layout="wide",
14
- page_title="Spark NLP Demos App",
15
- initial_sidebar_state="auto"
16
- )
17
-
18
- # CSS for styling
19
- st.markdown("""
20
- <style>
21
- .main-title {
22
- font-size: 36px;
23
- color: #4A90E2;
24
- font-weight: bold;
25
- text-align: center;
26
- }
27
- .section p, .section ul {
28
- color: #666666;
29
- }
30
- </style>
31
- """, unsafe_allow_html=True)
32
-
33
- @st.cache_resource
34
- def init_spark():
35
- return sparknlp.start()
36
-
37
- @st.cache_resource
38
- def create_pipeline():
39
- document_assembler = DocumentAssembler()\
40
- .setInputCol("text")\
41
- .setOutputCol("document")
42
-
43
- tokenizer = RecursiveTokenizer()\
44
- .setInputCols(["document"])\
45
- .setOutputCol("token")\
46
- .setPrefixes(["\"", "(", "[", "\n"])\
47
- .setSuffixes([".", ",", "?", ")","!", "‘s"])
48
-
49
- spell_model = ContextSpellCheckerModel\
50
- .pretrained('spellcheck_dl')\
51
- .setInputCols("token")\
52
- .setOutputCol("corrected")
53
-
54
- light_pipeline = Pipeline(stages = [document_assembler, tokenizer, spell_model])
55
-
56
- return light_pipeline
57
-
58
- def fit_data(pipeline, data):
59
- empty_df = spark.createDataFrame([['']]).toDF('text')
60
- pipeline_model = pipeline.fit(empty_df)
61
- model = LightPipeline(pipeline_model)
62
- result = model.annotate(data)
63
-
64
- return result
65
-
66
- # Set up the page layout
67
- st.markdown('<div class="main-title">Spell Check Your Text Documents With Spark NLP</div>', unsafe_allow_html=True)
68
-
69
- # Sidebar content
70
- model = st.sidebar.selectbox(
71
- "Choose the pretrained model",
72
- ["spellcheck_dl"],
73
- help="For more info about the models visit: https://sparknlp.org/models"
74
- )
75
-
76
- # Reference notebook link in sidebar
77
- link = """
78
- <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SPELL_CHECKER_EN.ipynb">
79
- <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
80
- </a>
81
- """
82
- st.sidebar.markdown('Reference notebook:')
83
- st.sidebar.markdown(link, unsafe_allow_html=True)
84
-
85
- # Load examples
86
- examples = [
87
- "Apollo 11 was the space fleght that landed the frrst humans, Americans Neil Armstrong and Buzz Aldrin, on the Mon on july 20, 1969, at 20:18 UTC. Armstrong beceme the first to stp onto the luner surface 6 hours later on July 21 at 02:56 UTC. Armstrong spent abut three and a half two and a hakf hours outside the spacecraft, Aldrin slghtely less; and tgether they colected 47.5 pounds (21.5 kg) of lunar material for returne to Earth. A third member of the mission, Michael Collins, pilloted the comand spacecraft alone in lunar orbit untl Armstrong and Aldrin returned to it for the trep back to Earth.",
88
- "Set theory is a branch of mathematical logic that studyes sets, which informally are colections of objects. Although any type of object can be collected into a set, set theory is applyed most often to objects that are relevant to mathematics. The language of set theory can be used to define nearly all mathematical objects. Set theory is commonly employed as a foundational system for mathematics. Beyond its foundational role, set theory is a branch of mathematics in its own right, with an active resurch community. Contemporary resurch into set theory includes a divers colection of topics, ranging from the structer of the real number line to the study of the consistency of large cardinals.",
89
- "In mathematics and transporation enginering, traffic flow is the study of interctions between travellers (including podestrians, ciclists, drivers, and their vehicles) and inferstructure (including highways, signage, and traffic control devices), with the aim of understanding and developing an optimal transport network with eficiant movement of traffic and minimal traffic congestion problems. Current traffic models use a mixture of emperical and theoretical techniques. These models are then developed into traffic forecasts, and take account of proposed local or major changes, such as incrased vehicle use, changes in land use or changes in mode of transport (with people moving from bus to train or car, for example), and to identify areas of congestion where the network needs to be ajusted.",
90
- "Critical theory is a social pholosiphy pertaning to the reflective asessment and critique of society and culture in order to reveal and challange power structures. With origins in socology, as well as in literary criticism, it argues that social problems are influenced and created more by societal structures and cultural assumptions than by individual and psychological factors. Mantaining that ideology is the principal obsticle to human liberation, critical theory was establiched as a school of thought primarily by the Frankfurt School theoreticians. One sociologist described a theory as critical insofar as it seeks 'to liberate human beings from the circomstances that enslave them.",
91
- "In computitional languistics, lemmatisation is the algorhythmic process of determining the lemma of a word based on its intended meaning. Unlike stemming, lemmatisation depends on corectly identifing the intended part of speech and meaning of a word in a sentence, as well as within the larger context surounding that sentence, such as neigboring sentences or even an entire document. As a result, devleoping efficient lemmatisation algorithums is an open area of research. In many langauges, words appear in several inflected forms. For example, in English, the verb 'to walk' may appear as 'walk', 'walked', 'walks' or 'walking'. The base form, 'walk', that one might look up in a dictionery, is called the lemma for the word. The asociation of the base form with a part of speech is often called a lexeme of the word."
92
- ]
93
-
94
- selected_text = st.selectbox("Select an example", examples)
95
- custom_input = st.text_input("Try it for yourself!")
96
-
97
- if custom_input:
98
- selected_text = custom_input
99
- elif selected_text:
100
- selected_text = selected_text
101
-
102
- st.subheader("Selected Text:")
103
- st.write(selected_text)
104
-
105
- # Initialize Spark and create pipeline
106
- spark = init_spark()
107
- pipeline = create_pipeline(model)
108
- output = fit_data(pipeline, selected_text)
109
-
110
- # Display output sentence
111
- correction_dict_filtered = {token: corrected for token, corrected in zip(output['token'], output['corrected']) if token != corrected}
112
-
113
- def generate_html_with_corrections(sentence, corrections):
114
- corrected_html = sentence
115
- for incorrect, correct in corrections.items():
116
- corrected_html = corrected_html.replace(incorrect, f'<span style="text-decoration: line-through; color: red;">{incorrect}</span> <span style="color: green;">{correct}</span>')
117
- return f'<div style="font-family: Arial, sans-serif; font-size: 16px;">{corrected_html}</div>'
118
-
119
- corrected_html_snippet = generate_html_with_corrections(selected_text, correction_dict_filtered)
120
- st.subheader(f"Misspells Detected:")
121
- st.markdown(corrected_html_snippet, unsafe_allow_html=True)
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ layout="wide",
14
+ page_title="Spark NLP Demos App",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
+ @st.cache_resource
34
+ def init_spark():
35
+ return sparknlp.start()
36
+
37
+ @st.cache_resource
38
+ def create_pipeline():
39
+ document_assembler = DocumentAssembler()\
40
+ .setInputCol("text")\
41
+ .setOutputCol("document")
42
+
43
+ tokenizer = RecursiveTokenizer()\
44
+ .setInputCols(["document"])\
45
+ .setOutputCol("token")\
46
+ .setPrefixes(["\"", "(", "[", "\n"])\
47
+ .setSuffixes([".", ",", "?", ")","!", "‘s"])
48
+
49
+ spell_model = ContextSpellCheckerModel\
50
+ .pretrained('spellcheck_dl')\
51
+ .setInputCols("token")\
52
+ .setOutputCol("corrected")
53
+
54
+ light_pipeline = Pipeline(stages = [document_assembler, tokenizer, spell_model])
55
+
56
+ return light_pipeline
57
+
58
+ def fit_data(pipeline, data):
59
+ empty_df = spark.createDataFrame([['']]).toDF('text')
60
+ pipeline_model = pipeline.fit(empty_df)
61
+ model = LightPipeline(pipeline_model)
62
+ result = model.annotate(data)
63
+
64
+ return result
65
+
66
+ # Set up the page layout
67
+ st.markdown('<div class="main-title">Spell Check Your Text Documents With Spark NLP</div>', unsafe_allow_html=True)
68
+
69
+ # Sidebar content
70
+ model = st.sidebar.selectbox(
71
+ "Choose the pretrained model",
72
+ ["spellcheck_dl"],
73
+ help="For more info about the models visit: https://sparknlp.org/models"
74
+ )
75
+
76
+ # Reference notebook link in sidebar
77
+ link = """
78
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SPELL_CHECKER_EN.ipynb">
79
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
80
+ </a>
81
+ """
82
+ st.sidebar.markdown('Reference notebook:')
83
+ st.sidebar.markdown(link, unsafe_allow_html=True)
84
+
85
+ # Load examples
86
+ examples = [
87
+ "Apollo 11 was the space fleght that landed the frrst humans, Americans Neil Armstrong and Buzz Aldrin, on the Mon on july 20, 1969, at 20:18 UTC. Armstrong beceme the first to stp onto the luner surface 6 hours later on July 21 at 02:56 UTC. Armstrong spent abut three and a half two and a hakf hours outside the spacecraft, Aldrin slghtely less; and tgether they colected 47.5 pounds (21.5 kg) of lunar material for returne to Earth. A third member of the mission, Michael Collins, pilloted the comand spacecraft alone in lunar orbit untl Armstrong and Aldrin returned to it for the trep back to Earth.",
88
+ "Set theory is a branch of mathematical logic that studyes sets, which informally are colections of objects. Although any type of object can be collected into a set, set theory is applyed most often to objects that are relevant to mathematics. The language of set theory can be used to define nearly all mathematical objects. Set theory is commonly employed as a foundational system for mathematics. Beyond its foundational role, set theory is a branch of mathematics in its own right, with an active resurch community. Contemporary resurch into set theory includes a divers colection of topics, ranging from the structer of the real number line to the study of the consistency of large cardinals.",
89
+ "In mathematics and transporation enginering, traffic flow is the study of interctions between travellers (including podestrians, ciclists, drivers, and their vehicles) and inferstructure (including highways, signage, and traffic control devices), with the aim of understanding and developing an optimal transport network with eficiant movement of traffic and minimal traffic congestion problems. Current traffic models use a mixture of emperical and theoretical techniques. These models are then developed into traffic forecasts, and take account of proposed local or major changes, such as incrased vehicle use, changes in land use or changes in mode of transport (with people moving from bus to train or car, for example), and to identify areas of congestion where the network needs to be ajusted.",
90
+ "Critical theory is a social pholosiphy pertaning to the reflective asessment and critique of society and culture in order to reveal and challange power structures. With origins in socology, as well as in literary criticism, it argues that social problems are influenced and created more by societal structures and cultural assumptions than by individual and psychological factors. Mantaining that ideology is the principal obsticle to human liberation, critical theory was establiched as a school of thought primarily by the Frankfurt School theoreticians. One sociologist described a theory as critical insofar as it seeks 'to liberate human beings from the circomstances that enslave them.",
91
+ "In computitional languistics, lemmatisation is the algorhythmic process of determining the lemma of a word based on its intended meaning. Unlike stemming, lemmatisation depends on corectly identifing the intended part of speech and meaning of a word in a sentence, as well as within the larger context surounding that sentence, such as neigboring sentences or even an entire document. As a result, devleoping efficient lemmatisation algorithums is an open area of research. In many langauges, words appear in several inflected forms. For example, in English, the verb 'to walk' may appear as 'walk', 'walked', 'walks' or 'walking'. The base form, 'walk', that one might look up in a dictionery, is called the lemma for the word. The asociation of the base form with a part of speech is often called a lexeme of the word."
92
+ ]
93
+
94
+ selected_text = st.selectbox("Select an example", examples)
95
+ custom_input = st.text_input("Try it for yourself!")
96
+
97
+ if custom_input:
98
+ selected_text = custom_input
99
+ elif selected_text:
100
+ selected_text = selected_text
101
+
102
+ st.subheader("Selected Text:")
103
+ st.write(selected_text)
104
+
105
+ # Initialize Spark and create pipeline
106
+ spark = init_spark()
107
+ pipeline = create_pipeline()
108
+ output = fit_data(pipeline, selected_text)
109
+
110
+ # Display output sentence
111
+ correction_dict_filtered = {token: corrected for token, corrected in zip(output['token'], output['corrected']) if token != corrected}
112
+
113
+ def generate_html_with_corrections(sentence, corrections):
114
+ corrected_html = sentence
115
+ for incorrect, correct in corrections.items():
116
+ corrected_html = corrected_html.replace(incorrect, f'<span style="text-decoration: line-through; color: red;">{incorrect}</span> <span style="color: green;">{correct}</span>')
117
+ return f'<div style="font-family: Arial, sans-serif; font-size: 16px;">{corrected_html}</div>'
118
+
119
+ corrected_html_snippet = generate_html_with_corrections(selected_text, correction_dict_filtered)
120
+ st.subheader(f"Misspells Detected:")
121
+ st.markdown(corrected_html_snippet, unsafe_allow_html=True)