Spaces:

YaraKyrychenko
/

BERTinsights

Sleeping

App Files Files Community

Yara Kyrychenko commited on Oct 8, 2023

Commit

4183828

•

1 Parent(s): d8cfc05

file uploader

Browse files

Files changed (2) hide show

app.py +4 -3
pages/Create_Model.py +12 -10

app.py CHANGED Viewed

@@ -40,9 +40,10 @@ if "model" not in st.session_state:
     st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
     model_name = st.text_input("Please enter model file name (e.g., 'model')")
     df_name =  st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
-    datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
-    st.session_state.datetime_format = None if datetime_format == "" else datetime_format
-    if st.button("Enter"):
         st.session_state.model = get_model(f'models/{model_name}')
         st.session_state.df = get_df(f'models/{df_name}')
         st.success("Model and dataframe loaded!")

     st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
     model_name = st.text_input("Please enter model file name (e.g., 'model')")
     df_name =  st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
+    uploaded_file2 = st.file_uploader("Choose a file")
+    #datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
+    st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
+    if uploaded_file2 is not None:
         st.session_state.model = get_model(f'models/{model_name}')
         st.session_state.df = get_df(f'models/{df_name}')
         st.success("Model and dataframe loaded!")

pages/Create_Model.py CHANGED Viewed

@@ -40,18 +40,19 @@ st.set_page_config(
 st.header("🤖 Create BERTopic")
 st.subheader("Use this page to create a model with your data")
-model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
-df_name =  st.text_input("Please enter data file path (e.g., 'data/df.csv')")
 language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
 text_col = st.text_input("Text column name (exactly as appears in the csv)")
 date_col = st.text_input("Date column name (exactly as appears in the csv)")
 datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
-st.session_state.datetime_format = None if datetime_format == "" else datetime_format
-embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
 sample = st.slider("Percent of data to use", 0, 100, 25)
 emotions_yes = st.checkbox("Get emotions")
-if st.button("Train new model"):
     from sentence_transformers import SentenceTransformer
     # https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
@@ -79,7 +80,7 @@ if st.button("Train new model"):
                         calculate_probabilities=True)
     with st.spinner("Preprocessing..."):
-        df = pd.read_csv(df_name).sample(frac=sample/100)
         df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
         df.index = range(len(df))
         new_df = preproc(df)
@@ -93,7 +94,7 @@ if st.button("Train new model"):
             embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
         except:
             embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
-            pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
     with st.spinner("Creating the model. This may take a couple of minutes..."):
         doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
@@ -112,6 +113,7 @@ if st.button("Train new model"):
     st.session_state.df = new_df
     st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
-    topic_model.save(f"models/{model_name}")
-    st.session_state.df.to_csv(f"models/df_{model_name}.csv")
-    st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")

 st.header("🤖 Create BERTopic")
 st.subheader("Use this page to create a model with your data")
+#model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
+#df_name =  st.text_input("Please enter data file path (e.g., 'data/df.csv')")
 language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
 text_col = st.text_input("Text column name (exactly as appears in the csv)")
 date_col = st.text_input("Date column name (exactly as appears in the csv)")
 datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
+st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
+#embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
 sample = st.slider("Percent of data to use", 0, 100, 25)
 emotions_yes = st.checkbox("Get emotions")
+uploaded_file = st.file_uploader("Choose a file")
+if st.button("Train new model") and uploaded_file is not None:
     from sentence_transformers import SentenceTransformer
     # https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
                         calculate_probabilities=True)
     with st.spinner("Preprocessing..."):
+        df = pd.read_csv(uploaded_file).sample(frac=sample/100)
         df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
         df.index = range(len(df))
         new_df = preproc(df)
             embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
         except:
             embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
+            #pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
     with st.spinner("Creating the model. This may take a couple of minutes..."):
         doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
     st.session_state.df = new_df
     st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
+    #topic_model.save(f"models/{model_name}")
+    #st.session_state.df.to_csv(f"models/df_{model_name}.csv")
+    #st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")
+    st.success(f"New model trained")