abdullahmubeen10
commited on
Commit
•
04a38a0
1
Parent(s):
1d5c000
Update Demo.py
Browse files
Demo.py
CHANGED
@@ -62,16 +62,17 @@ def create_pipeline(n):
|
|
62 |
tokenizer = Tokenizer().setInputCols(["document"]).setOutputCol("token")
|
63 |
ngram = NGramGenerator().setN(n).setInputCols(["token"]).setOutputCol("ngrams")
|
64 |
pipeline = Pipeline(stages=[document_assembler, tokenizer, ngram])
|
65 |
-
|
66 |
-
return pipeline
|
67 |
|
68 |
-
# Function to fit data to the pipeline and get results
|
69 |
-
def fit_data(pipeline, data):
|
70 |
df = spark.createDataFrame([[""]]).toDF("text")
|
71 |
model = pipeline.fit(df)
|
72 |
light_pipeline = LightPipeline(model)
|
73 |
-
|
74 |
-
return
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
# Set up the page layout
|
77 |
st.markdown('<div class="main-title">State-of-the-Art NGram Generation with Spark NLP</div>', unsafe_allow_html=True)
|
@@ -130,18 +131,4 @@ df = pd.DataFrame(data)
|
|
130 |
|
131 |
df.index = df.index + 1
|
132 |
df.columns = ["N-Grams"]
|
133 |
-
|
134 |
-
# Apply custom CSS to center the DataFrame elements
|
135 |
-
st.markdown("""
|
136 |
-
<style>
|
137 |
-
.dataframe th, .dataframe td {
|
138 |
-
text-align: center;
|
139 |
-
}
|
140 |
-
.dataframe {
|
141 |
-
width: 50%;
|
142 |
-
margin: 0 auto;
|
143 |
-
}
|
144 |
-
</style>
|
145 |
-
""", unsafe_allow_html=True)
|
146 |
-
|
147 |
st.dataframe(df)
|
|
|
62 |
tokenizer = Tokenizer().setInputCols(["document"]).setOutputCol("token")
|
63 |
ngram = NGramGenerator().setN(n).setInputCols(["token"]).setOutputCol("ngrams")
|
64 |
pipeline = Pipeline(stages=[document_assembler, tokenizer, ngram])
|
|
|
|
|
65 |
|
|
|
|
|
66 |
df = spark.createDataFrame([[""]]).toDF("text")
|
67 |
model = pipeline.fit(df)
|
68 |
light_pipeline = LightPipeline(model)
|
69 |
+
|
70 |
+
return light_pipeline
|
71 |
+
|
72 |
+
# Function to fit data to the pipeline and get results
|
73 |
+
@st.cache_resource
|
74 |
+
def fit_data(light_pipeline, data):
|
75 |
+
return light_pipeline.fullAnnotate(data)
|
76 |
|
77 |
# Set up the page layout
|
78 |
st.markdown('<div class="main-title">State-of-the-Art NGram Generation with Spark NLP</div>', unsafe_allow_html=True)
|
|
|
131 |
|
132 |
df.index = df.index + 1
|
133 |
df.columns = ["N-Grams"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
st.dataframe(df)
|