Spaces:

abdullahmubeen10
/

sparknlp-ngram-generation

Sleeping

abdullahmubeen10 commited on Jul 7

Commit

04a38a0

•

1 Parent(s): 1d5c000

Update Demo.py

Files changed (1) hide show

Demo.py CHANGED Viewed

@@ -62,16 +62,17 @@ def create_pipeline(n):
     tokenizer = Tokenizer().setInputCols(["document"]).setOutputCol("token")
     ngram = NGramGenerator().setN(n).setInputCols(["token"]).setOutputCol("ngrams")
     pipeline = Pipeline(stages=[document_assembler, tokenizer, ngram])
-    return pipeline
-# Function to fit data to the pipeline and get results
-def fit_data(pipeline, data):
     df = spark.createDataFrame([[""]]).toDF("text")
     model = pipeline.fit(df)
     light_pipeline = LightPipeline(model)
-    results = light_pipeline.fullAnnotate(data)
-    return results
 # Set up the page layout
 st.markdown('<div class="main-title">State-of-the-Art NGram Generation with Spark NLP</div>', unsafe_allow_html=True)
@@ -130,18 +131,4 @@ df = pd.DataFrame(data)
 df.index = df.index + 1
 df.columns = ["N-Grams"]
-# Apply custom CSS to center the DataFrame elements
-st.markdown("""
-    <style>
-        .dataframe th, .dataframe td {
-            text-align: center;
-        }
-        .dataframe {
-            width: 50%;
-            margin: 0 auto;
-        }
-    </style>
-""", unsafe_allow_html=True)
 st.dataframe(df)

     tokenizer = Tokenizer().setInputCols(["document"]).setOutputCol("token")
     ngram = NGramGenerator().setN(n).setInputCols(["token"]).setOutputCol("ngrams")
     pipeline = Pipeline(stages=[document_assembler, tokenizer, ngram])
     df = spark.createDataFrame([[""]]).toDF("text")
     model = pipeline.fit(df)
     light_pipeline = LightPipeline(model)
+    return light_pipeline
+# Function to fit data to the pipeline and get results
+@st.cache_resource
+def fit_data(light_pipeline, data):
+    return light_pipeline.fullAnnotate(data)
 # Set up the page layout
 st.markdown('<div class="main-title">State-of-the-Art NGram Generation with Spark NLP</div>', unsafe_allow_html=True)
 df.index = df.index + 1
 df.columns = ["N-Grams"]
 st.dataframe(df)