abdullahmubeen10 commited on
Commit
88f84f1
1 Parent(s): 0a0506c

Upload 11 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+
5
+ from sparknlp.base import *
6
+ from sparknlp.annotator import *
7
+ from pyspark.ml import Pipeline
8
+ from sparknlp.pretrained import PretrainedPipeline
9
+
10
+ # Page configuration
11
+ st.set_page_config(
12
+ layout="wide",
13
+ page_title="Spark NLP Demos App",
14
+ initial_sidebar_state="auto"
15
+ )
16
+
17
+ # CSS for styling
18
+ st.markdown("""
19
+ <style>
20
+ .main-title {
21
+ font-size: 36px;
22
+ color: #4A90E2;
23
+ font-weight: bold;
24
+ text-align: center;
25
+ }
26
+ .section p, .section ul {
27
+ color: #666666;
28
+ }
29
+ </style>
30
+ """, unsafe_allow_html=True)
31
+
32
+ # Initialize Spark NLP
33
+ @st.cache_resource
34
+ def init_spark():
35
+ return sparknlp.start()
36
+
37
+ # Create Spark NLP pipeline
38
+ @st.cache_resource
39
+ def create_pipeline(year, month, data):
40
+ document_assembler = DocumentAssembler()\
41
+ .setInputCol("text")\
42
+ .setOutputCol("document")
43
+
44
+ sentence_detector = SentenceDetector()\
45
+ .setInputCols("document")\
46
+ .setOutputCol("sentence")
47
+
48
+ date_matcher = DateMatcher() \
49
+ .setInputCols('sentence')\
50
+ .setOutputCol("date")\
51
+ .setAnchorDateYear(year)\
52
+ .setAnchorDateMonth(month)\
53
+ .setAnchorDateDay(date)
54
+
55
+ pipeline1= Pipeline(
56
+ stages=[
57
+ document_assembler,
58
+ sentence_detector,
59
+ date_matcher,
60
+ ])
61
+
62
+ return pipeline1
63
+
64
+ # Fit data and get results
65
+ def fit_data(pipeline, data):
66
+ empty_df = spark.createDataFrame([['']]).toDF('text')
67
+ pipeline_model = pipeline.fit(empty_df)
68
+ model = LightPipeline(pipeline_model)
69
+ results = model.fullAnnotate(data)[0]
70
+
71
+ return [res.result for res in results['matched_text']]
72
+
73
+ # Set up the page layout
74
+ st.markdown('<div class="main-title">State-of-the-Art Date Detecting and normalization with Spark NLP</div>', unsafe_allow_html=True)
75
+ st.write("")
76
+
77
+ # Sidebar content
78
+ date = st.sidebar.date_input('Select reference date')
79
+ year, month, day = map(int, str(date).split('-'))
80
+
81
+ # Reference notebook link in sidebar
82
+ link = """
83
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/DATE_MATCHER.ipynb">
84
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
85
+ </a>
86
+ """
87
+ st.sidebar.title('')
88
+ st.sidebar.markdown('Reference notebook:')
89
+ st.sidebar.markdown(link, unsafe_allow_html=True)
90
+
91
+ # Load examples from files
92
+ folder_path = f"inputs\date_matcher"
93
+ examples = [
94
+ lines[1].strip()
95
+ for filename in os.listdir(folder_path)
96
+ if filename.endswith('.txt')
97
+ for lines in [open(os.path.join(folder_path, filename), 'r', encoding='utf-8').readlines()]
98
+ if len(lines) >= 2
99
+ ]
100
+
101
+ st.subheader("Automatically detect phrases expressing dates and normalize them with respect to a reference date.")
102
+ selected_text = st.selectbox("Select an example", examples)
103
+ custom_input = st.text_input("Try it with your own Sentence!")
104
+
105
+ text_to_analyze = custom_input if custom_input else selected_text
106
+
107
+ st.subheader('Full example text')
108
+ st.write(text_to_analyze)
109
+
110
+ # Initialize Spark and create pipeline
111
+ spark = init_spark()
112
+ pipeline = create_pipeline(model, labels_to_match)
113
+ output = fit_data(pipeline, text_to_analyze)
114
+
115
+ # Display matched sentence
116
+ st.subheader("Dates matched:")
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+
9
+ # Install required packages
10
+ RUN apt-get update && apt-get install -y \
11
+ tar \
12
+ wget \
13
+ bash \
14
+ rsync \
15
+ gcc \
16
+ libfreetype6-dev \
17
+ libhdf5-serial-dev \
18
+ libpng-dev \
19
+ libzmq3-dev \
20
+ python3 \
21
+ python3-dev \
22
+ python3-pip \
23
+ unzip \
24
+ pkg-config \
25
+ software-properties-common \
26
+ graphviz \
27
+ openjdk-8-jdk \
28
+ ant \
29
+ ca-certificates-java \
30
+ && apt-get clean \
31
+ && update-ca-certificates -f;
32
+
33
+ # Install Python 3.8 and pip
34
+ RUN add-apt-repository ppa:deadsnakes/ppa \
35
+ && apt-get update \
36
+ && apt-get install -y python3.8 python3-pip \
37
+ && apt-get clean;
38
+
39
+ # Set up JAVA_HOME
40
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
41
+ RUN mkdir -p ${HOME} \
42
+ && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
43
+ && chown -R ${NB_UID}:${NB_UID} ${HOME}
44
+
45
+ # Create a new user named "jovyan" with user ID 1000
46
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
47
+
48
+ # Switch to the "jovyan" user
49
+ USER ${NB_USER}
50
+
51
+ # Set home and path variables for the user
52
+ ENV HOME=/home/${NB_USER} \
53
+ PATH=/home/${NB_USER}/.local/bin:$PATH
54
+
55
+ # Set the working directory to the user's home directory
56
+ WORKDIR ${HOME}
57
+
58
+ # Upgrade pip and install Python dependencies
59
+ RUN python3.8 -m pip install --upgrade pip
60
+ COPY requirements.txt /tmp/requirements.txt
61
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
62
+
63
+ # Copy the application code into the container at /home/jovyan
64
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
65
+
66
+ # Expose port for Streamlit
67
+ EXPOSE 7860
68
+
69
+ # Define the entry point for the container
70
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
images/Extracting-Exact-Dates.jpg ADDED
inputs/date_matcher/Example1.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ David visited the restaurant yesterday with his family. He also visited and the day before, but at ...
2
+ David visited the restaurant yesterday with his family.
3
+ He also visited and the day before, but at that time he was alone.
4
+ David again visited today with his colleagues.
5
+ He and his friends really liked the food and hoped to visit again tomorrow.
inputs/date_matcher/Example2.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ In March 2003 she was seen in the office and appeared to be extremely disturbed emotionally. On 2003...
2
+ In March 2003 she was seen in the office and appeared to be extremely disturbed emotionally.
3
+ On 2003-04-04 she again visited and talked about the effects of the medication she has been taking, and seemed positive and in much better shape.
4
+ She again visited on Fri, 22/4/2003 and looked better.
5
+ She has been working out and taking her medicines since April 1st 2003.
inputs/date_matcher/Example3.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ I have a very busy schedule these days. I have meetings from 7pm. till 11pm. I have 3 meetings the d...
2
+ I have a very busy schedule these days. I have meetings from 7pm. till 11pm.
3
+ I have 3 meetings the day after, and have submission deadlines approaching as well.
4
+ By next mon I have to finalise the architecture, for which i'll have to hold multiple meetings with ARM.
5
+ Then i'll have to discuss dev plans by next tuesday and develop a thorough plan.
6
+ The plan should be ready by Nov 30th.
inputs/date_matcher/Example4.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ When Tom visited the Bahamas last year, it was his first time travelling. Since then he was travelle...
2
+ When Tom visited the Bahamas last year, it was his first time travelling.
3
+ Since then he was travelled a lot. For example, he visited Hawaii last week.
4
+ The last time we talked, he was planning to travel to Alaska next month.
inputs/date_matcher/Example5.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Isn't it weird that all my family members have the same birth day and month? All of us were born on ...
2
+ Isn't it weird that all my family members have the same birth day and month? All of us were born on 1st Jan
3
+ Dad was born on 01/01/1900. Mom has a birth date of 1st Jan 1902. And I was born on 2000/01/01
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sparknlp.base import DocumentAssembler, Pipeline
3
+ from sparknlp.annotator import DateMatcher, MultiDateMatcher
4
+ from pyspark.sql.types import StringType
5
+ import pyspark.sql.functions as F
6
+ import sparknlp
7
+
8
+ # Custom CSS for better styling
9
+ st.markdown("""
10
+ <style>
11
+ .main-title {
12
+ font-size: 36px;
13
+ color: #4A90E2;
14
+ font-weight: bold;
15
+ text-align: center;
16
+ }
17
+ .sub-title {
18
+ font-size: 24px;
19
+ color: #4A90E2;
20
+ margin-top: 20px;
21
+ }
22
+ .section {
23
+ background-color: #f9f9f9;
24
+ padding: 15px;
25
+ border-radius: 10px;
26
+ margin-top: 20px;
27
+ }
28
+ .section h2 {
29
+ font-size: 22px;
30
+ color: #4A90E2;
31
+ }
32
+ .section p, .section ul {
33
+ color: #666666;
34
+ }
35
+ .link {
36
+ color: #4A90E2;
37
+ text-decoration: none;
38
+ }
39
+ </style>
40
+ """, unsafe_allow_html=True)
41
+
42
+ # Introduction
43
+ st.markdown('<div class="main-title">Date Extraction with Spark NLP</div>', unsafe_allow_html=True)
44
+
45
+ st.markdown("""
46
+ <div class="section">
47
+ <p>Welcome to the Spark NLP Date Extraction Demo App! Date extraction is a crucial task in Natural Language Processing (NLP) that involves identifying and extracting references to dates in text data. This can be useful for a wide range of applications, such as event scheduling, social media monitoring, and financial forecasting.</p>
48
+ <p>Using Spark NLP, it is possible to identify and extract dates from a text with high accuracy. This app demonstrates how to use the DateMatcher and MultiDateMatcher annotators to extract dates from text data.</p>
49
+ </div>
50
+ """, unsafe_allow_html=True)
51
+
52
+ st.image('images/Extracting-Exact-Dates.jpg', use_column_width='auto')
53
+
54
+ # About Date Extraction
55
+ st.markdown('<div class="sub-title">About Date Extraction</div>', unsafe_allow_html=True)
56
+ st.markdown("""
57
+ <div class="section">
58
+ <p>Date extraction involves identifying and extracting references to dates in text data. This can be achieved using various techniques such as regular expressions, Named Entity Recognition (NER), and rule-based systems.</p>
59
+ <p>Spark NLP provides powerful tools for date extraction, including the DateMatcher and MultiDateMatcher annotators, which use pattern matching to extract date expressions from text.</p>
60
+ </div>
61
+ """, unsafe_allow_html=True)
62
+
63
+ # Using DateMatcher in Spark NLP
64
+ st.markdown('<div class="sub-title">Using DateMatcher in Spark NLP</div>', unsafe_allow_html=True)
65
+ st.markdown("""
66
+ <div class="section">
67
+ <p>The DateMatcher annotator in Spark NLP allows users to extract specific date patterns from text data. This annotator can identify dates in various formats, providing valuable insights from unstructured text data.</p>
68
+ <p>The DateMatcher annotator in Spark NLP offers:</p>
69
+ <ul>
70
+ <li>Flexible date pattern matching</li>
71
+ <li>Extraction of single date occurrences</li>
72
+ <li>Efficient processing of large text datasets</li>
73
+ <li>Integration with other Spark NLP components for comprehensive NLP pipelines</li>
74
+ </ul>
75
+ </div>
76
+ """, unsafe_allow_html=True)
77
+
78
+ st.markdown('<h2 class="sub-title">Example Usage in Python</h2>', unsafe_allow_html=True)
79
+ st.markdown('<p>Here’s how you can implement DateMatcher and MultiDateMatcher annotators in Spark NLP:</p>', unsafe_allow_html=True)
80
+
81
+ # Setup Instructions
82
+ st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
83
+ st.markdown('<p>To install Spark NLP in Python, use your favorite package manager (conda, pip, etc.). For example:</p>', unsafe_allow_html=True)
84
+ st.code("""
85
+ pip install spark-nlp
86
+ pip install pyspark
87
+ """, language="bash")
88
+
89
+ st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
90
+ st.code("""
91
+ import sparknlp
92
+
93
+ # Start Spark Session
94
+ spark = sparknlp.start()
95
+ """, language='python')
96
+
97
+ # Single Date Extraction Example
98
+ st.markdown('<div class="sub-title">Example Usage: Single Date Extraction with DateMatcher</div>', unsafe_allow_html=True)
99
+ st.code('''
100
+ from sparknlp.base import DocumentAssembler, Pipeline
101
+ from sparknlp.annotator import DateMatcher
102
+ import pyspark.sql.functions as F
103
+
104
+ # Step 1: Transforms raw texts to `document` annotation
105
+ document_assembler = (
106
+ DocumentAssembler()
107
+ .setInputCol("text")
108
+ .setOutputCol("document")
109
+ )
110
+
111
+ # Step 2: Extracts one date information from text
112
+ date_matcher = (
113
+ DateMatcher()
114
+ .setInputCols("document")
115
+ .setOutputCol("date")
116
+ .setOutputFormat("yyyy/MM/dd")
117
+ )
118
+
119
+ nlp_pipeline = Pipeline(stages=[document_assembler, date_matcher])
120
+
121
+ text_list = ["See you on next monday.",
122
+ "She was born on 02/03/1966.",
123
+ "The project started yesterday and will finish next year.",
124
+ "She will graduate by July 2023.",
125
+ "She will visit doctor tomorrow and next month again."]
126
+
127
+ # Create a dataframe
128
+ spark_df = spark.createDataFrame(text_list, StringType()).toDF("text")
129
+
130
+ # Fit the pipeline and get predictions
131
+ result = nlp_pipeline.fit(spark_df).transform(spark_df)
132
+
133
+ # Display the extracted date information
134
+ result.selectExpr("text", "date.result as date").show(truncate=False)
135
+ ''', language='python')
136
+
137
+ st.text("""
138
+ +--------------------------------------------------------+------------+
139
+ |text |date |
140
+ +--------------------------------------------------------+------------+
141
+ |See you on next monday. |[2024/07/08]|
142
+ |She was born on 02/03/1966. |[1966/02/03]|
143
+ |The project started yesterday and will finish next year.|[2025/07/06]|
144
+ |She will graduate by July 2023. |[2023/07/01]|
145
+ |She will visit doctor tomorrow and next month again. |[2024/08/06]|
146
+ +--------------------------------------------------------+------------+
147
+ """)
148
+
149
+ st.markdown("""
150
+ <p>The code snippet demonstrates how to set up a pipeline in Spark NLP to extract single date patterns from text data using the DateMatcher annotator. The resulting DataFrame contains the matched date patterns.</p>
151
+ """, unsafe_allow_html=True)
152
+
153
+ # Using MultiDateMatcher in Spark NLP
154
+ st.markdown('<div class="sub-title">Using MultiDateMatcher in Spark NLP</div>', unsafe_allow_html=True)
155
+ st.markdown("""
156
+ <div class="section">
157
+ <p>The MultiDateMatcher annotator in Spark NLP extends the capabilities of the DateMatcher by allowing extraction of multiple date patterns from text data. This is useful when a text contains several dates.</p>
158
+ <p>The MultiDateMatcher annotator in Spark NLP offers:</p>
159
+ <ul>
160
+ <li>Flexible date pattern matching</li>
161
+ <li>Extraction of multiple date occurrences</li>
162
+ <li>Efficient processing of large text datasets</li>
163
+ <li>Integration with other Spark NLP components for comprehensive NLP pipelines</li>
164
+ </ul>
165
+ </div>
166
+ """, unsafe_allow_html=True)
167
+
168
+ # Multi Date Extraction Example
169
+ st.markdown('<div class="sub-title">Example Usage: Multiple Date Extraction with MultiDateMatcher</div>', unsafe_allow_html=True)
170
+ st.code('''
171
+ from sparknlp.annotator import MultiDateMatcher
172
+
173
+ # Step 1: Transforms raw texts to `document` annotation
174
+ document_assembler = (
175
+ DocumentAssembler()
176
+ .setInputCol("text")
177
+ .setOutputCol("document")
178
+ )
179
+
180
+ # Step 2: Extracts multiple date information from text
181
+ multi_date_matcher = (
182
+ MultiDateMatcher()
183
+ .setInputCols("document")
184
+ .setOutputCol("multi_date")
185
+ .setOutputFormat("MM/dd/yy")
186
+ )
187
+
188
+ nlp_pipeline = Pipeline(stages=[document_assembler, multi_date_matcher])
189
+
190
+ text_list = ["See you on next monday.",
191
+ "She was born on 02/03/1966.",
192
+ "The project started yesterday and will finish next year.",
193
+ "She will graduate by July 2023.",
194
+ "She will visit doctor tomorrow and next month again."]
195
+
196
+ # Create a dataframe
197
+ spark_df = spark.createDataFrame(text_list, StringType()).toDF("text")
198
+
199
+ # Fit the pipeline and get predictions
200
+ result = nlp_pipeline.fit(spark_df).transform(spark_df)
201
+
202
+ # Display the extracted date information
203
+ result.selectExpr("text", "multi_date.result as multi_date").show(truncate=False)
204
+ ''', language='python')
205
+
206
+ st.text("""
207
+ +--------------------------------------------------------+--------------------+
208
+ |text |multi_date |
209
+ +--------------------------------------------------------+--------------------+
210
+ |See you on next monday. |[07/08/24] |
211
+ |She was born on 02/03/1966. |[02/03/66] |
212
+ |The project started yesterday and will finish next year.|[07/06/25, 07/05/24]|
213
+ |She will graduate by July 2023. |[07/01/23] |
214
+ |She will visit doctor tomorrow and next month again. |[08/06/24, 07/07/24]|
215
+ +--------------------------------------------------------+--------------------+
216
+ """)
217
+
218
+ st.markdown("""
219
+ <p>The code snippet demonstrates how to set up a pipeline in Spark NLP to extract multiple date patterns from text data using the MultiDateMatcher annotator. The resulting DataFrame contains the matched date patterns.</p>
220
+ """, unsafe_allow_html=True)
221
+
222
+ # Handling Relative Dates
223
+ st.markdown('<div class="sub-title">Handling Relative Dates</div>', unsafe_allow_html=True)
224
+ st.write("")
225
+ st.markdown("""<p>DateMatcher and MultiDateMatcher annotators in Spark NLP can also handle relative dates such as "tomorrow," "next week," or "last year." To achieve this, you need to set a reference (or anchor) date, which the annotators will use as a base to interpret the relative dates mentioned in the text.</p>""", unsafe_allow_html=True)
226
+ st.code('''
227
+ # Step 1: Transforms raw texts to `document` annotation
228
+ document_assembler = (
229
+ DocumentAssembler()
230
+ .setInputCol("text")
231
+ .setOutputCol("document")
232
+ )
233
+
234
+ # Step 2: Set anchor day, month and year
235
+ multi_date_matcher = (
236
+ MultiDateMatcher()
237
+ .setInputCols("document")
238
+ .setOutputCol("multi_date")
239
+ .setOutputFormat("MM/dd/yyyy")
240
+ .setAnchorDateYear(2024)
241
+ .setAnchorDateMonth(7)
242
+ .setAnchorDateDay(6)
243
+ )
244
+
245
+ nlp_pipeline = Pipeline(stages=[document_assembler, multi_date_matcher])
246
+
247
+ text_list = ["See you on next monday.",
248
+ "She was born on 02/03/1966.",
249
+ "The project started yesterday and will finish next year.",
250
+ "She will graduate by July 2023.",
251
+ "She will visit doctor tomorrow and next month again."]
252
+
253
+ # Create a dataframe
254
+ spark_df = spark.createDataFrame(text_list, StringType()).toDF("text")
255
+
256
+ # Fit the pipeline and get predictions
257
+ result = nlp_pipeline.fit(spark_df).transform(spark_df)
258
+
259
+ # Display the extracted date information
260
+ result.selectExpr("text", "multi_date.result as multi_date").show(truncate=False)
261
+ ''', language='python')
262
+
263
+ st.text("""
264
+ +--------------------------------------------------------+------------------------+
265
+ |text |multi_date |
266
+ +--------------------------------------------------------+------------------------+
267
+ |See you on next monday. |[07/08/2024] |
268
+ |She was born on 02/03/1966. |[02/03/1966] |
269
+ |The project started yesterday and will finish next year.|[07/06/2025, 07/05/2024]|
270
+ |She will graduate by July 2023. |[07/01/2023] |
271
+ |She will visit doctor tomorrow and next month again. |[08/06/2024, 07/07/2024]|
272
+ +--------------------------------------------------------+------------------------+
273
+ """)
274
+
275
+ st.markdown("""
276
+ <p>This code snippet shows how to handle relative dates by setting an anchor date for the MultiDateMatcher annotator. The anchor date helps in converting relative date references to absolute dates.</p>
277
+ """, unsafe_allow_html=True)
278
+
279
+ st.markdown("""
280
+ <div class="section">
281
+ <h2>Conclusion</h2>
282
+ <p>In this app, we demonstrated how to use Spark NLP's DateMatcher and MultiDateMatcher annotators to extract dates from text data. These powerful tools enable users to efficiently process large datasets and identify date patterns, whether single or multiple occurrences, including handling relative dates with ease. By integrating these annotators into your NLP pipelines, you can enhance the extraction of valuable temporal information from unstructured text, providing deeper insights for various applications.</p>
283
+ </div>
284
+ """, unsafe_allow_html=True)
285
+
286
+ # References and Additional Information
287
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
288
+
289
+ st.markdown("""
290
+ <div class="section">
291
+ <ul>
292
+ <li>Documentation : <a href="https://nlp.johnsnowlabs.com/docs/en/annotators#datematcher" target="_blank" rel="noopener">DateMatcher</a>, <a href="https://nlp.johnsnowlabs.com/docs/en/annotators#multidatematcher" target="_blank" rel="noopener">MultiDateMatcher</a></li>
293
+ <li>Python Doc : <a href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/annotator/matcher/date_matcher/index.html#module-sparknlp.annotator.matcher.date_matcher" target="_blank" rel="noopener">DateMatcher</a>, <a href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/annotator/matcher/multi_date_matcher/index.html" target="_blank" rel="noopener">MultiDateMatcher</a></li>
294
+ <li>Scala Doc : <a href="https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/annotators/DateMatcher.html" target="_blank" rel="noopener">DateMatcher</a>, <a href="https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/annotators/MultiDateMatcher.html" target="_blank" rel="noopener">MultiDateMatcher</a></li>
295
+ <li>For extended examples of usage, see the <a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/Certification_Trainings/Public/2.Text_Preprocessing_with_SparkNLP_Annotators_Transformers.ipynb" target="_blank" rel="noopener nofollow">Spark NLP Workshop</a>.</li>
296
+ </ul>
297
+ </div>
298
+ """, unsafe_allow_html=True)
299
+
300
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
301
+ st.markdown("""
302
+ <div class="section">
303
+ <ul>
304
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
305
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
306
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
307
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
308
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
309
+ </ul>
310
+ </div>
311
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ spark-nlp
5
+ pyspark