Spaces:

spark-nlp
/

sparknlp-switch-between-active-and-passive-voice

Sleeping

App Files Files Community

abdullahmubeen10 commited on Sep 1, 2024

Commit

d7e89a9

verified ·

1 Parent(s): cd2173f

Upload 5 files

Browse files

Files changed (5) hide show

.streamlit/config.toml +3 -0
Demo.py +132 -0
Dockerfile +72 -0
pages/Workflow & Model Overview.py +194 -0
requirements.txt +7 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 10px;
+            border-radius: 10px;
+            margin-top: 10px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .scroll {
+            overflow-x: auto;
+            border: 1px solid #e6e9ef;
+            border-radius: 0.25rem;
+            padding: 1rem;
+            margin-bottom: 2.5rem;
+            white-space: pre-wrap;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model, task):
+    documentAssembler = DocumentAssembler() \
+        .setInputCol("text") \
+        .setOutputCol("documents")
+    t5 = T5Transformer.pretrained(model) \
+        .setTask(task) \
+        .setInputCols(["documents"]) \
+        .setMaxOutputLength(200) \
+        .setOutputCol("transfers")
+    pipeline = Pipeline().setStages([documentAssembler, t5])
+    return pipeline
+def fit_data(pipeline, data):
+    df = spark.createDataFrame([[data]]).toDF("text")
+    result = pipeline.fit(df).transform(df)
+    return result.select('transfers.result').collect()
+# Sidebar setup
+model = st.sidebar.selectbox(
+    "Choose the Pretrained Model",
+    ['t5_active_to_passive_styletransfer', 't5_passive_to_active_styletransfer'],
+    help="Select the model you want to use for style transfer."
+)
+# Reference notebook link in sidebar
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(
+    """
+    <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">
+        <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+    </a>
+    """,
+    unsafe_allow_html=True
+)
+examples = {
+    "t5_active_to_passive_styletransfer": [
+        "I am writing you a letter.",
+        "Reporters write news reports.",
+        "The company will hire new workers.",
+        "Emma writes a letter.",
+        "We did not grow rice.",
+        "People will admire him.",
+        "Someone has stolen my purse."
+    ],
+    "t5_passive_to_active_styletransfer": [
+        "At dinner, six shrimp were eaten by Harry.",
+        "The savannah is roamed by beautiful giraffes.",
+        "The flat tire was changed by Sue.",
+        "The students' questions are always answered by the teacher."
+    ]
+}
+task_descriptions = {
+    "t5_active_to_passive_styletransfer": "Transfer Active to Passive:",
+    "t5_passive_to_active_styletransfer": "Transfer Passive to Active:"
+}
+# Set up the page layout
+title = "Switch Between Active and Passive Voice"
+sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"
+st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
+st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
+# Text selection and analysis
+selected_text = st.selectbox("Select an example", examples[model])
+custom_input = st.text_input("Try it with your own sentence!")
+text_to_analyze = custom_input if custom_input else selected_text
+st.write('Text to analyze:')
+st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)
+# Initialize Spark and create pipeline
+spark = init_spark()
+pipeline = create_pipeline(model, task_descriptions[model])
+output = fit_data(pipeline, text_to_analyze)
+# Display transformed sentence
+st.write("Predicted Sentence:")
+output_text = "".join(output[0][0])
+st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)

Dockerfile ADDED Viewed

	@@ -0,0 +1,72 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean
+# Set up JAVA_HOME
+RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
+    && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set up PySpark to use Python 3.8 for both driver and workers
+ENV PYSPARK_PYTHON=/usr/bin/python3.8
+ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import streamlit as st
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section h2 {
+            font-size: 22px;
+            color: #4A90E2;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Title
+st.markdown('<div class="main-title">Switch Between Active and Passive Voice</div>', unsafe_allow_html=True)
+# Introduction Section
+st.markdown("""
+<div class="section">
+    <p>Switching between active and passive voice is an essential skill in writing, allowing for more versatile sentence structures and varied expression. Active voice is direct and vigorous, while passive voice can be used to emphasize the action or the recipient of the action, making it a useful tool for nuanced communication.</p>
+    <p>In this page, we explore how to implement a pipeline that can automatically switch between active and passive voice, and vice versa, using advanced NLP models. We use a T5 Transformer model fine-tuned for style transfer, enabling seamless conversion of sentences between these two voices.</p>
+</div>
+""", unsafe_allow_html=True)
+# T5 Transformer Overview
+st.markdown('<div class="sub-title">Understanding the T5 Transformer for Style Transfer</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The T5 (Text-To-Text Transfer Transformer) model, developed by Google, is a powerful tool capable of handling a variety of text-based tasks in a unified framework. When fine-tuned for style transfer, T5 can effectively convert sentences from active to passive voice and vice versa.</p>
+    <p>The model processes input sentences and, based on its training, generates a text output that switches the voice while preserving the original meaning. This is particularly useful for applications in writing assistance, automated editing, and language learning tools.</p>
+</div>
+""", unsafe_allow_html=True)
+# Performance Section
+st.markdown('<div class="sub-title">Performance and Use Cases</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The T5 model has been extensively tested on various text transformation tasks, including style transfer between active and passive voice. The model consistently produces accurate and contextually appropriate results, making it a valuable asset in both professional and educational settings.</p>
+    <p>This capability is especially useful for writers, editors, and educators who need to adjust sentence structures for clarity, emphasis, or stylistic variation. The T5 model's ability to perform these transformations without requiring external data sources makes it a powerful tool for on-the-fly text editing.</p>
+</div>
+""", unsafe_allow_html=True)
+# Implementation Section
+st.markdown('<div class="sub-title">Implementing Active-Passive Voice Switching</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The following example demonstrates how to implement a style transfer pipeline using Spark NLP to switch between active and passive voice and vice versa. The pipeline includes a document assembler and the T5 model to perform the transformation in both directions.</p>
+</div>
+""", unsafe_allow_html=True)
+st.code('''
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+# Initialize Spark NLP
+spark = sparknlp.start()
+# Define the pipeline stages
+document_assembler = DocumentAssembler()\\
+    .setInputCol("text")\\
+    .setOutputCol("documents")
+# Active to Passive transformation
+t5_active_to_passive = T5Transformer()\\
+    .pretrained("t5_active_to_passive_styletransfer")\\
+    .setTask("Transfer Active to Passive:")\\
+    .setInputCols(["documents"])\\
+    .setOutputCol("passive")
+# Passive to Active transformation
+t5_passive_to_active = T5Transformer()\\
+    .pretrained("t5_passive_to_active_styletransfer")\\
+    .setTask("Transfer Passive to Active:")\\
+    .setInputCols(["documents"])\\
+    .setOutputCol("active")
+pipeline_active_to_passive = Pipeline().setStages([document_assembler, t5_active_to_passive])
+pipeline_passive_to_active = Pipeline().setStages([document_assembler, t5_passive_to_active])
+# Input data example
+data_active = spark.createDataFrame([["The dog chased the cat."]]).toDF("text")
+data_passive = spark.createDataFrame([["The cat was chased by the dog."]]).toDF("text")
+# Apply the pipeline for active to passive
+result_active_to_passive = pipeline_active_to_passive.fit(data_active).transform(data_active)
+result_active_to_passive.select("passive.result").show(truncate=False)
+# Apply the pipeline for passive to active
+result_passive_to_active = pipeline_passive_to_active.fit(data_passive).transform(data_passive)
+result_passive_to_active.select("active.result").show(truncate=False)
+''', language='python')
+# Example Output
+st.text("""
++--------------------------------+
+|passive.result                  |
++--------------------------------+
+|[The cat was chased by the dog.]|
++--------------------------------+
++---------------------------+
+|active.result              |
++---------------------------+
+|[The dog chased the cat.]  |
++---------------------------+
+""")
+# Model Info Section
+st.markdown('<div class="sub-title">Choosing the Right T5 Model for Style Transfer</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Several T5 models are available, each fine-tuned for different tasks. For switching between active and passive voice, two models are used: "t5_active_to_passive_styletransfer" for active-to-passive conversion and "t5_passive_to_active_styletransfer" for passive-to-active conversion.</p>
+    <p>Depending on your requirements, you can explore other T5 models optimized for different style transfer tasks. Check the <a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Models Hub</a> to find the most suitable model for your needs.</p>
+</div>
+""", unsafe_allow_html=True)
+# References Section
+st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html" target="_blank">Google AI Blog</a>: Exploring Transfer Learning with T5</li>
+        <li><a class="link" href="https://sparknlp.org/models?annotator=T5Transformer" target="_blank">Spark NLP Model Hub</a>: Explore T5 models</li>
+        <li>Model used for Active to Passive: <a class="link" href="https://sparknlp.org/2022/05/31/t5_active_to_passive_styletransfer_en_3_0.html" target="_blank">t5_active_to_passive_styletransfer</a></li>
+        <li>Model used for Passive to Active: <a class="link" href="https://sparknlp.org/2022/06/01/t5_passive_to_active_styletransfer.html" target="_blank">t5_passive_to_active_styletransfer</a></li>
+        <li><a class="link" href="https://github.com/google-research/text-to-text-transfer-transformer" target="_blank">GitHub</a>: T5 Transformer repository</li>
+        <li><a class="link" href="https://arxiv.org/abs/1910.10683" target="_blank">T5 Paper</a>: Detailed insights from the developers</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Community & Support Section
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+# Quick Links Section
+st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
+        <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
+        <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+st-annotated-text
+streamlit-tags
+pandas
+numpy
+spark-nlp
+pyspark