abdullahmubeen10 commited on
Commit
b364455
·
verified ·
1 Parent(s): 65096a3

Upload 5 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+
4
+ from sparknlp.base import *
5
+ from sparknlp.annotator import *
6
+ from pyspark.ml import Pipeline
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ layout="wide",
11
+ initial_sidebar_state="auto"
12
+ )
13
+
14
+ # CSS for styling
15
+ st.markdown("""
16
+ <style>
17
+ .main-title {
18
+ font-size: 36px;
19
+ color: #4A90E2;
20
+ font-weight: bold;
21
+ text-align: center;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 10px;
26
+ border-radius: 10px;
27
+ margin-top: 10px;
28
+ }
29
+ .section p, .section ul {
30
+ color: #666666;
31
+ }
32
+ </style>
33
+ """, unsafe_allow_html=True)
34
+
35
+ @st.cache_resource
36
+ def init_spark():
37
+ return sparknlp.start()
38
+
39
+ @st.cache_resource
40
+ def create_pipeline(model):
41
+ documentAssembler = DocumentAssembler() \
42
+ .setInputCol("text") \
43
+ .setOutputCol("documents")
44
+
45
+ t5 = T5Transformer.pretrained(model) \
46
+ .setTask("translate English to SQL:") \
47
+ .setInputCols(["documents"]) \
48
+ .setMaxOutputLength(200) \
49
+ .setOutputCol("sql")
50
+
51
+ pipeline = Pipeline().setStages([documentAssembler, t5])
52
+ return pipeline
53
+
54
+ def fit_data(pipeline, data):
55
+ df = spark.createDataFrame([[data]]).toDF("text")
56
+ result = pipeline.fit(df).transform(df)
57
+ return result.select('sql.result').collect()
58
+
59
+ # Sidebar content
60
+ model = st.sidebar.selectbox(
61
+ "Choose the pretrained model",
62
+ ["t5_small_wikiSQL"],
63
+ help="For more info about the models visit: https://sparknlp.org/models"
64
+ )
65
+
66
+ # Set up the page layout
67
+ title, sub_title = (
68
+ 'SQL Query Generation',
69
+ 'This demo shows how to generate SQL code from natural language text.'
70
+ )
71
+
72
+ st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
73
+ st.write(sub_title)
74
+
75
+ # Reference notebook link in sidebar
76
+ link = """
77
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_SQL.ipynb">
78
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
79
+ </a>
80
+ """
81
+ st.sidebar.markdown('Reference notebook:')
82
+ st.sidebar.markdown(link, unsafe_allow_html=True)
83
+
84
+ # Load examples
85
+ examples = [
86
+ "How many customers have ordered more than 2 items?",
87
+ "How many players were with the school or club team La Salle?",
88
+ "When the scoring rank was 117, what was the best finish?",
89
+ "When the best finish was T69, how many people came in 2nd?",
90
+ "How many wins were there when the money list rank was 183?",
91
+ "When did the Metrostars have their first Rookie of the Year winner?",
92
+ "What college did the Rookie of the Year from the Columbus Crew attend?"
93
+ ]
94
+
95
+ selected_text = st.selectbox("Select an example", examples)
96
+ custom_input = st.text_input("Try it with your own Sentence!")
97
+
98
+ text_to_analyze = custom_input if custom_input else selected_text
99
+
100
+ st.write('Text to be converted to SQL query:')
101
+ HTML_WRAPPER = """<div class="scroll entities" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem; white-space:pre-wrap">{}</div>"""
102
+ st.markdown(HTML_WRAPPER.format(text_to_analyze), unsafe_allow_html=True)
103
+
104
+ # Initialize Spark and create pipeline
105
+ spark = init_spark()
106
+ pipeline = create_pipeline(model)
107
+ output = fit_data(pipeline, text_to_analyze)
108
+
109
+ # Display matched sentence
110
+ st.write("Generated Output:")
111
+
112
+ output_text = "".join(output[0][0])
113
+ st.markdown(f'<div class="section-content">{output_text}</div>', unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
9
+
10
+ # Install required packages
11
+ RUN apt-get update && apt-get install -y \
12
+ tar \
13
+ wget \
14
+ bash \
15
+ rsync \
16
+ gcc \
17
+ libfreetype6-dev \
18
+ libhdf5-serial-dev \
19
+ libpng-dev \
20
+ libzmq3-dev \
21
+ python3 \
22
+ python3-dev \
23
+ python3-pip \
24
+ unzip \
25
+ pkg-config \
26
+ software-properties-common \
27
+ graphviz \
28
+ openjdk-8-jdk \
29
+ ant \
30
+ ca-certificates-java \
31
+ && apt-get clean \
32
+ && update-ca-certificates -f
33
+
34
+ # Install Python 3.8 and pip
35
+ RUN add-apt-repository ppa:deadsnakes/ppa \
36
+ && apt-get update \
37
+ && apt-get install -y python3.8 python3-pip \
38
+ && apt-get clean
39
+
40
+ # Set up JAVA_HOME
41
+ RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /etc/profile \
42
+ && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile
43
+ # Create a new user named "jovyan" with user ID 1000
44
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
45
+
46
+ # Switch to the "jovyan" user
47
+ USER ${NB_USER}
48
+
49
+ # Set home and path variables for the user
50
+ ENV HOME=/home/${NB_USER} \
51
+ PATH=/home/${NB_USER}/.local/bin:$PATH
52
+
53
+ # Set up PySpark to use Python 3.8 for both driver and workers
54
+ ENV PYSPARK_PYTHON=/usr/bin/python3.8
55
+ ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.8
56
+
57
+ # Set the working directory to the user's home directory
58
+ WORKDIR ${HOME}
59
+
60
+ # Upgrade pip and install Python dependencies
61
+ RUN python3.8 -m pip install --upgrade pip
62
+ COPY requirements.txt /tmp/requirements.txt
63
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
64
+
65
+ # Copy the application code into the container at /home/jovyan
66
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
67
+
68
+ # Expose port for Streamlit
69
+ EXPOSE 7860
70
+
71
+ # Define the entry point for the container
72
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Page configuration
4
+ st.set_page_config(
5
+ layout="wide",
6
+ initial_sidebar_state="auto"
7
+ )
8
+
9
+ # Custom CSS for better styling
10
+ st.markdown("""
11
+ <style>
12
+ .main-title {
13
+ font-size: 36px;
14
+ color: #4A90E2;
15
+ font-weight: bold;
16
+ text-align: center;
17
+ }
18
+ .sub-title {
19
+ font-size: 24px;
20
+ color: #4A90E2;
21
+ margin-top: 20px;
22
+ }
23
+ .section {
24
+ background-color: #f9f9f9;
25
+ padding: 15px;
26
+ border-radius: 10px;
27
+ margin-top: 20px;
28
+ }
29
+ .section h2 {
30
+ font-size: 22px;
31
+ color: #4A90E2;
32
+ }
33
+ .section p, .section ul {
34
+ color: #666666;
35
+ }
36
+ .link {
37
+ color: #4A90E2;
38
+ text-decoration: none;
39
+ }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ # Title
44
+ st.markdown('<div class="main-title">Chat and Conversational LLMs (Facebook Llama-2)</div>', unsafe_allow_html=True)
45
+
46
+ # Introduction Section
47
+ st.markdown("""
48
+ <div class="section">
49
+ <p>Facebook's Llama-2 is a cutting-edge family of large language models (LLMs) designed to excel in a variety of conversational tasks. With models ranging from 7 billion to 70 billion parameters, Llama-2 has been fine-tuned specifically for dialogue use cases, making it one of the most powerful and versatile models available for chat and conversational AI.</p>
50
+ <p>Llama-2 models have demonstrated superior performance across multiple benchmarks, often outperforming other open-source models and rivaling some of the best closed-source models like ChatGPT and PaLM. These models are capable of handling complex, context-rich conversations with a high degree of accuracy and coherence.</p>
51
+ </div>
52
+ """, unsafe_allow_html=True)
53
+
54
+ # Llama-2 Transformer Overview
55
+ st.markdown('<div class="sub-title">Understanding the Llama-2 Transformer</div>', unsafe_allow_html=True)
56
+
57
+ st.markdown("""
58
+ <div class="section">
59
+ <h2>Llama-2: The Transformer Architecture</h2>
60
+ <p>Llama-2 is based on the transformer architecture, a deep learning model that has revolutionized the field of natural language processing. The transformer model employs a mechanism called self-attention, which allows it to weigh the importance of different words in a sentence relative to each other. This enables the model to capture long-range dependencies in text, making it highly effective for understanding and generating human-like text.</p>
61
+ <p>The Llama-2 model family builds on this architecture, incorporating enhancements that improve its ability to handle longer contexts and generate more accurate and coherent responses. The model is particularly well-suited for dialogue and conversational applications, where understanding context and maintaining coherence over multiple turns of conversation is crucial.</p>
62
+ </div>
63
+ """, unsafe_allow_html=True)
64
+
65
+ # Performance Section
66
+ st.markdown('<div class="sub-title">Performance and Benchmarks</div>', unsafe_allow_html=True)
67
+
68
+ st.markdown("""
69
+ <div class="section">
70
+ <p>Llama-2-Chat models have been rigorously tested against a variety of benchmarks to assess their performance in dialogue and conversational tasks. The results have shown that Llama-2 outperforms other open-source chat models on most benchmarks, demonstrating its effectiveness in generating accurate, relevant, and contextually appropriate responses.</p>
71
+ <p>In human evaluations, Llama-2-Chat has been found to be on par with some of the leading closed-source models in terms of helpfulness and safety. This makes it a highly reliable option for developers looking to implement conversational AI in their applications.</p>
72
+ </div>
73
+ """, unsafe_allow_html=True)
74
+
75
+ # Implementation Section
76
+ st.markdown('<div class="sub-title">Implementing Llama-2 for Conversational AI</div>', unsafe_allow_html=True)
77
+
78
+ st.markdown("""
79
+ <div class="section">
80
+ <p>The following is an example of how to implement a Llama-2 model for generating responses in a conversational AI application. We use the Llama-2 model with a simple Spark NLP pipeline to generate responses to user input.</p>
81
+ </div>
82
+ """, unsafe_allow_html=True)
83
+
84
+ st.code('''
85
+ from sparknlp.base import *
86
+ from sparknlp.annotator import *
87
+ from pyspark.ml import Pipeline
88
+ from pyspark.sql.functions import col, expr
89
+
90
+ documentAssembler = DocumentAssembler() \\
91
+ .setInputCol("text") \\
92
+ .setOutputCol("documents")
93
+
94
+ llama2 = LLAMA2Transformer \\
95
+ .pretrained("llama_2_7b_chat_hf_int4") \\
96
+ .setMaxOutputLength(50) \\
97
+ .setDoSample(False) \\
98
+ .setInputCols(["documents"]) \\
99
+ .setOutputCol("generation")
100
+
101
+ pipeline = Pipeline().setStages([documentAssembler, llama2])
102
+
103
+ data = spark.createDataFrame([["what are your thoughts about the new monkeypox virus"]]).toDF("text")
104
+ result = pipeline.fit(data).transform(data)
105
+ result.select("generation.result").show(truncate=False)
106
+ ''', language='python')
107
+
108
+ # Example Output
109
+ st.text("""
110
+ +------------------------------------------------+
111
+ |generation.result |
112
+ +------------------------------------------------+
113
+ |Monkeypox is a rare disease that has been ... |
114
+ +------------------------------------------------+
115
+ """)
116
+
117
+ # Model Info Section
118
+ st.markdown('<div class="sub-title">Choosing the Right Llama-2 Model</div>', unsafe_allow_html=True)
119
+
120
+ st.markdown("""
121
+ <div class="section">
122
+ <p>Llama-2 models are available in various sizes and configurations, depending on the specific needs of your application. For conversational AI, it is important to select a model that balances performance with resource efficiency. The model used in the example, "llama_2_7b_chat_hf_int4," is optimized for chat applications and is a good starting point for many use cases.</p>
123
+ <p>For more complex tasks or larger-scale deployments, you may consider using one of the larger Llama-2 models, such as the 13B or 70B parameter variants, which offer greater accuracy and contextual understanding.</p>
124
+ <p>Explore the available models on the <a class="link" href="https://sparknlp.org/models?annotator=LLAMA2Transformer" target="_blank">Spark NLP Models Hub</a> to find the one that fits your needs.</p>
125
+ </div>
126
+ """, unsafe_allow_html=True)
127
+
128
+ # Footer
129
+ # References Section
130
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
131
+
132
+ st.markdown("""
133
+ <div class="section">
134
+ <ul>
135
+ <li><a class="link" href="https://ai.facebook.com/" target="_blank">Facebook AI Research</a>: Learn more about Facebook's AI initiatives</li>
136
+ <li><a class="link" href="https://sparknlp.org/models?annotator=LLAMA2Transformer" target="_blank">Spark NLP Model Hub</a>: Explore Llama-2 models</li>
137
+ <li><a class="link" href="https://huggingface.co/facebook/llama" target="_blank">Hugging Face Model Hub</a>: Explore Llama-2 models</li>
138
+ <li><a class="link" href="https://github.com/facebookresearch/llama" target="_blank">GitHub</a>: Access the Llama-2 repository and contribute</li>
139
+ <li><a class="link" href="https://ai.facebook.com/blog/introducing-llama-2" target="_blank">Llama-2 Blog Post</a>: Detailed insights from the developers</li>
140
+ </ul>
141
+ </div>
142
+ """, unsafe_allow_html=True)
143
+
144
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
145
+
146
+ st.markdown("""
147
+ <div class="section">
148
+ <ul>
149
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
150
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
151
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
152
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
153
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
154
+ </ul>
155
+ </div>
156
+ """, unsafe_allow_html=True)
157
+
158
+ st.markdown('<div class="sub-title">Quick Links</div>', unsafe_allow_html=True)
159
+
160
+ st.markdown("""
161
+ <div class="section">
162
+ <ul>
163
+ <li><a class="link" href="https://sparknlp.org/docs/en/quickstart" target="_blank">Getting Started</a></li>
164
+ <li><a class="link" href="https://nlp.johnsnowlabs.com/models" target="_blank">Pretrained Models</a></li>
165
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples/python/annotation/text/english" target="_blank">Example Notebooks</a></li>
166
+ <li><a class="link" href="https://sparknlp.org/docs/en/install" target="_blank">Installation Guide</a></li>
167
+ </ul>
168
+ </div>
169
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ st-annotated-text
3
+ streamlit-tags
4
+ pandas
5
+ numpy
6
+ spark-nlp
7
+ pyspark