Yara Kyrychenko commited on
Commit
4183828
β€’
1 Parent(s): d8cfc05

file uploader

Browse files
Files changed (2) hide show
  1. app.py +4 -3
  2. pages/Create_Model.py +12 -10
app.py CHANGED
@@ -40,9 +40,10 @@ if "model" not in st.session_state:
40
  st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
41
  model_name = st.text_input("Please enter model file name (e.g., 'model')")
42
  df_name = st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
43
- datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
44
- st.session_state.datetime_format = None if datetime_format == "" else datetime_format
45
- if st.button("Enter"):
 
46
  st.session_state.model = get_model(f'models/{model_name}')
47
  st.session_state.df = get_df(f'models/{df_name}')
48
  st.success("Model and dataframe loaded!")
 
40
  st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
41
  model_name = st.text_input("Please enter model file name (e.g., 'model')")
42
  df_name = st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
43
+ uploaded_file2 = st.file_uploader("Choose a file")
44
+ #datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
45
+ st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
46
+ if uploaded_file2 is not None:
47
  st.session_state.model = get_model(f'models/{model_name}')
48
  st.session_state.df = get_df(f'models/{df_name}')
49
  st.success("Model and dataframe loaded!")
pages/Create_Model.py CHANGED
@@ -40,18 +40,19 @@ st.set_page_config(
40
  st.header("πŸ€– Create BERTopic")
41
  st.subheader("Use this page to create a model with your data")
42
 
43
- model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
44
- df_name = st.text_input("Please enter data file path (e.g., 'data/df.csv')")
45
  language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
46
  text_col = st.text_input("Text column name (exactly as appears in the csv)")
47
  date_col = st.text_input("Date column name (exactly as appears in the csv)")
48
  datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
49
- st.session_state.datetime_format = None if datetime_format == "" else datetime_format
50
- embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
51
  sample = st.slider("Percent of data to use", 0, 100, 25)
52
  emotions_yes = st.checkbox("Get emotions")
 
53
 
54
- if st.button("Train new model"):
55
 
56
  from sentence_transformers import SentenceTransformer
57
  # https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
@@ -79,7 +80,7 @@ if st.button("Train new model"):
79
  calculate_probabilities=True)
80
 
81
  with st.spinner("Preprocessing..."):
82
- df = pd.read_csv(df_name).sample(frac=sample/100)
83
  df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
84
  df.index = range(len(df))
85
  new_df = preproc(df)
@@ -93,7 +94,7 @@ if st.button("Train new model"):
93
  embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
94
  except:
95
  embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
96
- pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
97
 
98
  with st.spinner("Creating the model. This may take a couple of minutes..."):
99
  doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
@@ -112,6 +113,7 @@ if st.button("Train new model"):
112
  st.session_state.df = new_df
113
  st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
114
 
115
- topic_model.save(f"models/{model_name}")
116
- st.session_state.df.to_csv(f"models/df_{model_name}.csv")
117
- st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")
 
 
40
  st.header("πŸ€– Create BERTopic")
41
  st.subheader("Use this page to create a model with your data")
42
 
43
+ #model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
44
+ #df_name = st.text_input("Please enter data file path (e.g., 'data/df.csv')")
45
  language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
46
  text_col = st.text_input("Text column name (exactly as appears in the csv)")
47
  date_col = st.text_input("Date column name (exactly as appears in the csv)")
48
  datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
49
+ st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
50
+ #embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
51
  sample = st.slider("Percent of data to use", 0, 100, 25)
52
  emotions_yes = st.checkbox("Get emotions")
53
+ uploaded_file = st.file_uploader("Choose a file")
54
 
55
+ if st.button("Train new model") and uploaded_file is not None:
56
 
57
  from sentence_transformers import SentenceTransformer
58
  # https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
 
80
  calculate_probabilities=True)
81
 
82
  with st.spinner("Preprocessing..."):
83
+ df = pd.read_csv(uploaded_file).sample(frac=sample/100)
84
  df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
85
  df.index = range(len(df))
86
  new_df = preproc(df)
 
94
  embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
95
  except:
96
  embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
97
+ #pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
98
 
99
  with st.spinner("Creating the model. This may take a couple of minutes..."):
100
  doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
 
113
  st.session_state.df = new_df
114
  st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
115
 
116
+ #topic_model.save(f"models/{model_name}")
117
+ #st.session_state.df.to_csv(f"models/df_{model_name}.csv")
118
+ #st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")
119
+ st.success(f"New model trained")