Spaces:

spark-nlp
/

sparknlp-text-to-sql-t5

Sleeping

App Files Files Community

abdullahmubeen10 commited on Aug 29, 2024

Commit

b364455

verified ·

1 Parent(s): 65096a3

Upload 5 files

Browse files

Files changed (5) hide show

.streamlit/config.toml +3 -0
Demo.py +113 -0
Dockerfile +72 -0
pages/Workflow & Model Overview.py +169 -0
requirements.txt +7 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+import sparknlp
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 10px;
+            border-radius: 10px;
+            margin-top: 10px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+    documentAssembler = DocumentAssembler() \
+        .setInputCol("text") \
+        .setOutputCol("documents")
+    t5 = T5Transformer.pretrained(model) \
+        .setTask("translate English to SQL:") \
+        .setInputCols(["documents"]) \
+        .setMaxOutputLength(200) \
+        .setOutputCol("sql")
+    pipeline = Pipeline().setStages([documentAssembler, t5])
+    return pipeline
+def fit_data(pipeline, data):
+    df = spark.createDataFrame([[data]]).toDF("text")
+    result = pipeline.fit(df).transform(df)
+    return result.select('sql.result').collect()
+# Sidebar content
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    ["t5_small_wikiSQL"],
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Set up the page layout
+title, sub_title = (
+    'SQL Query Generation',
+    'This demo shows how to generate SQL code from natural language text.'
+)
+st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
+st.write(sub_title)
+# Reference notebook link in sidebar
+link = """
+<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_SQL.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+</a>
+"""
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(link, unsafe_allow_html=True)
+# Load examples
+examples = [
+    "How many customers have ordered more than 2 items?",
+    "How many players were with the school or club team La Salle?",
+    "When the scoring rank was 117, what was the best finish?",
+    "When the best finish was T69, how many people came in 2nd?",
+    "How many wins were there when the money list rank was 183?",
+    "When did the Metrostars have their first Rookie of the Year winner?",
+    "What college did the Rookie of the Year from the Columbus Crew attend?"
+]
+selected_text = st.selectbox("Select an example", examples)
+custom_input = st.text_input("Try it with your own Sentence!")
+text_to_analyze = custom_input if custom_input else selected_text
+st.write('Text to be converted to SQL query:')
+HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
+st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
+# Initialize Spark and create pipeline
+spark = init_spark()
+pipeline = create_pipeline(model)
+output = fit_data(pipeline, text_to_analyze)
+# Display matched sentence
+st.write("Generated Output:")
+output_text = "".join(output[0][0])
+st.markdown(f'<div class="section-content">{output_text}</div>', unsafe_allow_html=True)

Dockerfile ADDED Viewed

	@@ -0,0 +1,72 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean
+# Set up JAVA_HOME
+RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
+    && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set up PySpark to use Python 3.8 for both driver and workers
+ENV PYSPARK_PYTHON=/usr/bin/python3.8
+ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import streamlit as st
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section h2 {
+            font-size: 22px;
+            color: #4A90E2;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Title
+st.markdown('<div class="main-title">Chat and Conversational LLMs (Facebook Llama-2)</div>', unsafe_allow_html=True)
+# Introduction Section
+st.markdown("""
+<div class="section">
+    <p>Facebook's Llama-2 is a cutting-edge family of large language models (LLMs) designed to excel in a variety of conversational tasks. With models ranging from 7 billion to 70 billion parameters, Llama-2 has been fine-tuned specifically for dialogue use cases, making it one of the most powerful and versatile models available for chat and conversational AI.</p>
+    <p>Llama-2 models have demonstrated superior performance across multiple benchmarks, often outperforming other open-source models and rivaling some of the best closed-source models like ChatGPT and PaLM. These models are capable of handling complex, context-rich conversations with a high degree of accuracy and coherence.</p>
+</div>
+""", unsafe_allow_html=True)
+# Llama-2 Transformer Overview
+st.markdown('<div class="sub-title">Understanding the Llama-2 Transformer</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <h2>Llama-2: The Transformer Architecture</h2>
+    <p>Llama-2 is based on the transformer architecture, a deep learning model that has revolutionized the field of natural language processing. The transformer model employs a mechanism called self-attention, which allows it to weigh the importance of different words in a sentence relative to each other. This enables the model to capture long-range dependencies in text, making it highly effective for understanding and generating human-like text.</p>
+    <p>The Llama-2 model family builds on this architecture, incorporating enhancements that improve its ability to handle longer contexts and generate more accurate and coherent responses. The model is particularly well-suited for dialogue and conversational applications, where understanding context and maintaining coherence over multiple turns of conversation is crucial.</p>
+</div>
+""", unsafe_allow_html=True)
+# Performance Section
+st.markdown('<div class="sub-title">Performance and Benchmarks</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Llama-2-Chat models have been rigorously tested against a variety of benchmarks to assess their performance in dialogue and conversational tasks. The results have shown that Llama-2 outperforms other open-source chat models on most benchmarks, demonstrating its effectiveness in generating accurate, relevant, and contextually appropriate responses.</p>
+    <p>In human evaluations, Llama-2-Chat has been found to be on par with some of the leading closed-source models in terms of helpfulness and safety. This makes it a highly reliable option for developers looking to implement conversational AI in their applications.</p>
+</div>
+""", unsafe_allow_html=True)
+# Implementation Section
+st.markdown('<div class="sub-title">Implementing Llama-2 for Conversational AI</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The following is an example of how to implement a Llama-2 model for generating responses in a conversational AI application. We use the Llama-2 model with a simple Spark NLP pipeline to generate responses to user input.</p>
+</div>
+""", unsafe_allow_html=True)
+st.code('''
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from pyspark.sql.functions import col, expr
+documentAssembler = DocumentAssembler() \\
+    .setInputCol("text") \\
+    .setOutputCol("documents")
+llama2 = LLAMA2Transformer \\
+    .pretrained("llama_2_7b_chat_hf_int4") \\
+    .setMaxOutputLength(50) \\
+    .setDoSample(False) \\
+    .setInputCols(["documents"]) \\
+    .setOutputCol("generation")
+pipeline = Pipeline().setStages([documentAssembler, llama2])
+data = spark.createDataFrame([["what are your thoughts about the new monkeypox virus"]]).toDF("text")
+result = pipeline.fit(data).transform(data)
+result.select("generation.result").show(truncate=False)
+''', language='python')
+# Example Output
+st.text("""
++------------------------------------------------+
+|generation.result                                |
++------------------------------------------------+
+|Monkeypox is a rare disease that has been ...    |
++------------------------------------------------+
+""")
+# Model Info Section
+st.markdown('<div class="sub-title">Choosing the Right Llama-2 Model</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Llama-2 models are available in various sizes and configurations, depending on the specific needs of your application. For conversational AI, it is important to select a model that balances performance with resource efficiency. The model used in the example, "llama_2_7b_chat_hf_int4," is optimized for chat applications and is a good starting point for many use cases.</p>
+    <p>For more complex tasks or larger-scale deployments, you may consider using one of the larger Llama-2 models, such as the 13B or 70B parameter variants, which offer greater accuracy and contextual understanding.</p>
+    <p>Explore the available models on the <a class="link" href="https://sparknlp.org/models?annotator=LLAMA2Transformer" target="_blank">Spark NLP Models Hub</a> to find the one that fits your needs.</p>
+</div>
+""", unsafe_allow_html=True)
+# Footer
+# References Section
+st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://ai.facebook.com/" target="_blank">Facebook AI Research</a>: Learn more about Facebook's AI initiatives</li>
+        <li><a class="link" href="https://sparknlp.org/models?annotator=LLAMA2Transformer" target="_blank">Spark NLP Model Hub</a>: Explore Llama-2 models</li>
+        <li><a class="link" href="https://huggingface.co/facebook/llama" target="_blank">Hugging Face Model Hub</a>: Explore Llama-2 models</li>
+        <li><a class="link" href="https://github.com/facebookresearch/llama" target="_blank">GitHub</a>: Access the Llama-2 repository and contribute</li>
+        <li><a class="link" href="https://ai.facebook.com/blog/introducing-llama-2" target="_blank">Llama-2 Blog Post</a>: Detailed insights from the developers</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
+        <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
+        <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+st-annotated-text
+streamlit-tags
+pandas
+numpy
+spark-nlp
+pyspark