abdullahmubeen10 commited on
Commit
d7e89a9
Β·
verified Β·
1 Parent(s): cd2173f

Upload 5 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+
4
+ from sparknlp.base import *
5
+ from sparknlp.annotator import *
6
+ from pyspark.ml import Pipeline
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ layout="wide",
11
+ initial_sidebar_state="auto"
12
+ )
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ .scroll {
33
+ overflow-x: auto;
34
+ border: 1px solid #e6e9ef;
35
+ border-radius: 0.25rem;
36
+ padding: 1rem;
37
+ margin-bottom: 2.5rem;
38
+ white-space: pre-wrap;
39
+ }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ @st.cache_resource
44
+ def init_spark():
45
+ return sparknlp.start()
46
+
47
+ @st.cache_resource
48
+ def create_pipeline(model, task):
49
+ documentAssembler = DocumentAssembler() \
50
+ .setInputCol("text") \
51
+ .setOutputCol("documents")
52
+
53
+ t5 = T5Transformer.pretrained(model) \
54
+ .setTask(task) \
55
+ .setInputCols(["documents"]) \
56
+ .setMaxOutputLength(200) \
57
+ .setOutputCol("transfers")
58
+
59
+ pipeline = Pipeline().setStages([documentAssembler, t5])
60
+ return pipeline
61
+
62
+ def fit_data(pipeline, data):
63
+ df = spark.createDataFrame([[data]]).toDF("text")
64
+ result = pipeline.fit(df).transform(df)
65
+ return result.select('transfers.result').collect()
66
+
67
+ # Sidebar setup
68
+ model = st.sidebar.selectbox(
69
+ "Choose the Pretrained Model",
70
+ ['t5_active_to_passive_styletransfer', 't5_passive_to_active_styletransfer'],
71
+ help="Select the model you want to use for style transfer."
72
+ )
73
+
74
+ # Reference notebook link in sidebar
75
+ st.sidebar.markdown('Reference notebook:')
76
+ st.sidebar.markdown(
77
+ """
78
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">
79
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
80
+ </a>
81
+ """,
82
+ unsafe_allow_html=True
83
+ )
84
+
85
+ examples = {
86
+ "t5_active_to_passive_styletransfer": [
87
+ "I am writing you a letter.",
88
+ "Reporters write news reports.",
89
+ "The company will hire new workers.",
90
+ "Emma writes a letter.",
91
+ "We did not grow rice.",
92
+ "People will admire him.",
93
+ "Someone has stolen my purse."
94
+ ],
95
+ "t5_passive_to_active_styletransfer": [
96
+ "At dinner, six shrimp were eaten by Harry.",
97
+ "The savannah is roamed by beautiful giraffes.",
98
+ "The flat tire was changed by Sue.",
99
+ "The students' questions are always answered by the teacher."
100
+ ]
101
+ }
102
+
103
+ task_descriptions = {
104
+ "t5_active_to_passive_styletransfer": "Transfer Active to Passive:",
105
+ "t5_passive_to_active_styletransfer": "Transfer Passive to Active:"
106
+ }
107
+
108
+ # Set up the page layout
109
+ title = "Switch Between Active and Passive Voice"
110
+ sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"
111
+
112
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
113
+ st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
114
+
115
+ # Text selection and analysis
116
+ selected_text = st.selectbox("Select an example", examples[model])
117
+ custom_input = st.text_input("Try it with your own sentence!")
118
+
119
+ text_to_analyze = custom_input if custom_input else selected_text
120
+
121
+ st.write('Text to analyze:')
122
+ st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)
123
+
124
+ # Initialize Spark and create pipeline
125
+ spark = init_spark()
126
+ pipeline = create_pipeline(model, task_descriptions[model])
127
+ output = fit_data(pipeline, text_to_analyze)
128
+
129
+ # Display transformed sentence
130
+ st.write("Predicted Sentence:")
131
+ output_text = "".join(output[0][0])
132
+ st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
9
+
10
+ # Install required packages
11
+ RUN apt-get update && apt-get install -y \
12
+ tar \
13
+ wget \
14
+ bash \
15
+ rsync \
16
+ gcc \
17
+ libfreetype6-dev \
18
+ libhdf5-serial-dev \
19
+ libpng-dev \
20
+ libzmq3-dev \
21
+ python3 \
22
+ python3-dev \
23
+ python3-pip \
24
+ unzip \
25
+ pkg-config \
26
+ software-properties-common \
27
+ graphviz \
28
+ openjdk-8-jdk \
29
+ ant \
30
+ ca-certificates-java \
31
+ && apt-get clean \
32
+ && update-ca-certificates -f
33
+
34
+ # Install Python 3.8 and pip
35
+ RUN add-apt-repository ppa:deadsnakes/ppa \
36
+ && apt-get update \
37
+ && apt-get install -y python3.8 python3-pip \
38
+ && apt-get clean
39
+
40
+ # Set up JAVA_HOME
41
+ RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
42
+ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
43
+ # Create a new user named "jovyan" with user ID 1000
44
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
45
+
46
+ # Switch to the "jovyan" user
47
+ USER ${NB_USER}
48
+
49
+ # Set home and path variables for the user
50
+ ENV HOME=/home/${NB_USER} \
51
+ PATH=/home/${NB_USER}/.local/bin:$PATH
52
+
53
+ # Set up PySpark to use Python 3.8 for both driver and workers
54
+ ENV PYSPARK_PYTHON=/usr/bin/python3.8
55
+ ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
56
+
57
+ # Set the working directory to the user's home directory
58
+ WORKDIR ${HOME}
59
+
60
+ # Upgrade pip and install Python dependencies
61
+ RUN python3.8 -m pip install --upgrade pip
62
+ COPY requirements.txt /tmp/requirements.txt
63
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
64
+
65
+ # Copy the application code into the container at /home/jovyan
66
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
67
+
68
+ # Expose port for Streamlit
69
+ EXPOSE 7860
70
+
71
+ # Define the entry point for the container
72
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Page configuration
4
+ st.set_page_config(
5
+ layout="wide",
6
+ initial_sidebar_state="auto"
7
+ )
8
+
9
+ # Custom CSS for better styling
10
+ st.markdown("""
11
+ <style>
12
+ .main-title {
13
+ font-size: 36px;
14
+ color: #4A90E2;
15
+ font-weight: bold;
16
+ text-align: center;
17
+ }
18
+ .sub-title {
19
+ font-size: 24px;
20
+ color: #4A90E2;
21
+ margin-top: 20px;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 15px;
26
+ border-radius: 10px;
27
+ margin-top: 20px;
28
+ }
29
+ .section h2 {
30
+ font-size: 22px;
31
+ color: #4A90E2;
32
+ }
33
+ .section p, .section ul {
34
+ color: #666666;
35
+ }
36
+ .link {
37
+ color: #4A90E2;
38
+ text-decoration: none;
39
+ }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ # Title
44
+ st.markdown('<div class="main-title">Switch Between Active and Passive Voice</div>', unsafe_allow_html=True)
45
+
46
+ # Introduction Section
47
+ st.markdown("""
48
+ <div class="section">
49
+ <p>Switching between active and passive voice is an essential skill in writing, allowing for more versatile sentence structures and varied expression. Active voice is direct and vigorous, while passive voice can be used to emphasize the action or the recipient of the action, making it a useful tool for nuanced communication.</p>
50
+ <p>In this page, we explore how to implement a pipeline that can automatically switch between active and passive voice, and vice versa, using advanced NLP models. We use a T5 Transformer model fine-tuned for style transfer, enabling seamless conversion of sentences between these two voices.</p>
51
+ </div>
52
+ """, unsafe_allow_html=True)
53
+
54
+ # T5 Transformer Overview
55
+ st.markdown('<div class="sub-title">Understanding the T5 Transformer for Style Transfer</div>', unsafe_allow_html=True)
56
+
57
+ st.markdown("""
58
+ <div class="section">
59
+ <p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a powerful tool capable of handling a variety of text-based tasks in a unified framework. When fine-tuned for style transfer, T5 can effectively convert sentences from active to passive voice and vice versa.</p>
60
+ <p>The model processes input sentences and, based on its training, generates a text output that switches the voice while preserving the original meaning. This is particularly useful for applications in writing assistance, automated editing, and language learning tools.</p>
61
+ </div>
62
+ """, unsafe_allow_html=True)
63
+
64
+ # Performance Section
65
+ st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
66
+
67
+ st.markdown("""
68
+ <div class="section">
69
+ <p>The T5 model has been extensively tested on various text transformation tasks, including style transfer between active and passive voice. The model consistently produces accurate and contextually appropriate results, making it a valuable asset in both professional and educational settings.</p>
70
+ <p>This capability is especially useful for writers, editors, and educators who need to adjust sentence structures for clarity, emphasis, or stylistic variation. The T5 model's ability to perform these transformations without requiring external data sources makes it a powerful tool for on-the-fly text editing.</p>
71
+ </div>
72
+ """, unsafe_allow_html=True)
73
+
74
+ # Implementation Section
75
+ st.markdown('<div class="sub-title">Implementing Active-Passive Voice Switching</div>', unsafe_allow_html=True)
76
+
77
+ st.markdown("""
78
+ <div class="section">
79
+ <p>The following example demonstrates how to implement a style transfer pipeline using Spark NLP to switch between active and passive voice and vice versa. The pipeline includes a document assembler and the T5 model to perform the transformation in both directions.</p>
80
+ </div>
81
+ """, unsafe_allow_html=True)
82
+
83
+ st.code('''
84
+ from sparknlp.base import *
85
+ from sparknlp.annotator import *
86
+ from pyspark.ml import Pipeline
87
+
88
+ # Initialize Spark NLP
89
+ spark = sparknlp.start()
90
+
91
+ # Define the pipeline stages
92
+ document_assembler = DocumentAssembler()\\
93
+ .setInputCol("text")\\
94
+ .setOutputCol("documents")
95
+
96
+ # Active to Passive transformation
97
+ t5_active_to_passive = T5Transformer()\\
98
+ .pretrained("t5_active_to_passive_styletransfer")\\
99
+ .setTask("Transfer Active to Passive:")\\
100
+ .setInputCols(["documents"])\\
101
+ .setOutputCol("passive")
102
+
103
+ # Passive to Active transformation
104
+ t5_passive_to_active = T5Transformer()\\
105
+ .pretrained("t5_passive_to_active_styletransfer")\\
106
+ .setTask("Transfer Passive to Active:")\\
107
+ .setInputCols(["documents"])\\
108
+ .setOutputCol("active")
109
+
110
+ pipeline_active_to_passive = Pipeline().setStages([document_assembler, t5_active_to_passive])
111
+ pipeline_passive_to_active = Pipeline().setStages([document_assembler, t5_passive_to_active])
112
+
113
+ # Input data example
114
+ data_active = spark.createDataFrame([["The dog chased the cat."]]).toDF("text")
115
+ data_passive = spark.createDataFrame([["The cat was chased by the dog."]]).toDF("text")
116
+
117
+ # Apply the pipeline for active to passive
118
+ result_active_to_passive = pipeline_active_to_passive.fit(data_active).transform(data_active)
119
+ result_active_to_passive.select("passive.result").show(truncate=False)
120
+
121
+ # Apply the pipeline for passive to active
122
+ result_passive_to_active = pipeline_passive_to_active.fit(data_passive).transform(data_passive)
123
+ result_passive_to_active.select("active.result").show(truncate=False)
124
+ ''', language='python')
125
+
126
+ # Example Output
127
+ st.text("""
128
+ +--------------------------------+
129
+ |passive.result |
130
+ +--------------------------------+
131
+ |[The cat was chased by the dog.]|
132
+ +--------------------------------+
133
+
134
+ +---------------------------+
135
+ |active.result |
136
+ +---------------------------+
137
+ |[The dog chased the cat.] |
138
+ +---------------------------+
139
+ """)
140
+
141
+ # Model Info Section
142
+ st.markdown('<div class="sub-title">Choosing the Right T5 Model for Style Transfer</div>', unsafe_allow_html=True)
143
+
144
+ st.markdown("""
145
+ <div class="section">
146
+ <p>Several T5 models are available, each fine-tuned for different tasks. For switching between active and passive voice, two models are used: "t5_active_to_passive_styletransfer" for active-to-passive conversion and "t5_passive_to_active_styletransfer" for passive-to-active conversion.</p>
147
+ <p>Depending on your requirements, you can explore other T5 models optimized for different style transfer tasks. Check the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the most suitable model for your needs.</p>
148
+ </div>
149
+ """, unsafe_allow_html=True)
150
+
151
+ # References Section
152
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
153
+
154
+ st.markdown("""
155
+ <div class="section">
156
+ <ul>
157
+ <li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
158
+ <li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
159
+ <li>Model used for Active to Passive: <a class="link" href="https://sparknlp.org/2022/05/31/t5_active_to_passive_styletransfer_en_3_0.html" target="_blank">t5_active_to_passive_styletransfer</a></li>
160
+ <li>Model used for Passive to Active: <a class="link" href="https://sparknlp.org/2022/06/01/t5_passive_to_active_styletransfer.html" target="_blank">t5_passive_to_active_styletransfer</a></li>
161
+ <li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
162
+ <li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
163
+ </ul>
164
+ </div>
165
+ """, unsafe_allow_html=True)
166
+
167
+ # Community & Support Section
168
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
169
+
170
+ st.markdown("""
171
+ <div class="section">
172
+ <ul>
173
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
174
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
175
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
176
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
177
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
178
+ </ul>
179
+ </div>
180
+ """, unsafe_allow_html=True)
181
+
182
+ # Quick Links Section
183
+ st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
184
+
185
+ st.markdown("""
186
+ <div class="section">
187
+ <ul>
188
+ <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
189
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
190
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
191
+ <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
192
+ </ul>
193
+ </div>
194
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ streamlit-tags
4
+ pandas
5
+ numpy
6
+ spark-nlp
7
+ pyspark