Spaces:
Sleeping
Sleeping
Yara Kyrychenko
commited on
Commit
β’
4183828
1
Parent(s):
d8cfc05
file uploader
Browse files- app.py +4 -3
- pages/Create_Model.py +12 -10
app.py
CHANGED
@@ -40,9 +40,10 @@ if "model" not in st.session_state:
|
|
40 |
st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
|
41 |
model_name = st.text_input("Please enter model file name (e.g., 'model')")
|
42 |
df_name = st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
|
43 |
-
|
44 |
-
|
45 |
-
if
|
|
|
46 |
st.session_state.model = get_model(f'models/{model_name}')
|
47 |
st.session_state.df = get_df(f'models/{df_name}')
|
48 |
st.success("Model and dataframe loaded!")
|
|
|
40 |
st.markdown("If you already have a BoardTopic model trained, please enter the information below:")
|
41 |
model_name = st.text_input("Please enter model file name (e.g., 'model')")
|
42 |
df_name = st.text_input("Please enter dataframe file name (e.g., 'df_small.csv')")
|
43 |
+
uploaded_file2 = st.file_uploader("Choose a file")
|
44 |
+
#datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
|
45 |
+
st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
|
46 |
+
if uploaded_file2 is not None:
|
47 |
st.session_state.model = get_model(f'models/{model_name}')
|
48 |
st.session_state.df = get_df(f'models/{df_name}')
|
49 |
st.success("Model and dataframe loaded!")
|
pages/Create_Model.py
CHANGED
@@ -40,18 +40,19 @@ st.set_page_config(
|
|
40 |
st.header("π€ Create BERTopic")
|
41 |
st.subheader("Use this page to create a model with your data")
|
42 |
|
43 |
-
model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
|
44 |
-
df_name = st.text_input("Please enter data file path (e.g., 'data/df.csv')")
|
45 |
language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
|
46 |
text_col = st.text_input("Text column name (exactly as appears in the csv)")
|
47 |
date_col = st.text_input("Date column name (exactly as appears in the csv)")
|
48 |
datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
|
49 |
-
st.session_state.datetime_format = None if datetime_format == "" else datetime_format
|
50 |
-
embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
|
51 |
sample = st.slider("Percent of data to use", 0, 100, 25)
|
52 |
emotions_yes = st.checkbox("Get emotions")
|
|
|
53 |
|
54 |
-
if st.button("Train new model"):
|
55 |
|
56 |
from sentence_transformers import SentenceTransformer
|
57 |
# https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
|
@@ -79,7 +80,7 @@ if st.button("Train new model"):
|
|
79 |
calculate_probabilities=True)
|
80 |
|
81 |
with st.spinner("Preprocessing..."):
|
82 |
-
df = pd.read_csv(
|
83 |
df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
|
84 |
df.index = range(len(df))
|
85 |
new_df = preproc(df)
|
@@ -93,7 +94,7 @@ if st.button("Train new model"):
|
|
93 |
embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
|
94 |
except:
|
95 |
embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
|
96 |
-
pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
|
97 |
|
98 |
with st.spinner("Creating the model. This may take a couple of minutes..."):
|
99 |
doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
|
@@ -112,6 +113,7 @@ if st.button("Train new model"):
|
|
112 |
st.session_state.df = new_df
|
113 |
st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
|
114 |
|
115 |
-
topic_model.save(f"models/{model_name}")
|
116 |
-
st.session_state.df.to_csv(f"models/df_{model_name}.csv")
|
117 |
-
st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")
|
|
|
|
40 |
st.header("π€ Create BERTopic")
|
41 |
st.subheader("Use this page to create a model with your data")
|
42 |
|
43 |
+
#model_name = st.text_input("Please enter a name for the new model (e.g., 'ukraine_war_jan5')")
|
44 |
+
#df_name = st.text_input("Please enter data file path (e.g., 'data/df.csv')")
|
45 |
language = st.radio("Please pick one language that best describes your data", ["English","Russian/Ukrainian","Other"],horizontal=True)
|
46 |
text_col = st.text_input("Text column name (exactly as appears in the csv)")
|
47 |
date_col = st.text_input("Date column name (exactly as appears in the csv)")
|
48 |
datetime_format = st.text_input("Please enter the date format (e.g., '%d.%m.%Y')", value="")
|
49 |
+
st.session_state.datetime_format = None #if datetime_format == "" else datetime_format
|
50 |
+
#embs_name = st.text_input("Please enter embedding file path if any (e.g., 'data/embs.csv')")
|
51 |
sample = st.slider("Percent of data to use", 0, 100, 25)
|
52 |
emotions_yes = st.checkbox("Get emotions")
|
53 |
+
uploaded_file = st.file_uploader("Choose a file")
|
54 |
|
55 |
+
if st.button("Train new model") and uploaded_file is not None:
|
56 |
|
57 |
from sentence_transformers import SentenceTransformer
|
58 |
# https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models
|
|
|
80 |
calculate_probabilities=True)
|
81 |
|
82 |
with st.spinner("Preprocessing..."):
|
83 |
+
df = pd.read_csv(uploaded_file).sample(frac=sample/100)
|
84 |
df = df.rename({text_col: 'text', date_col: 'date'}, axis=1)
|
85 |
df.index = range(len(df))
|
86 |
new_df = preproc(df)
|
|
|
94 |
embeddings = np.array(pd.read_csv(embs_name).drop("Unnamed: 0",axis=1))
|
95 |
except:
|
96 |
embeddings = sentence_model.encode(new_df.proc, show_progress_bar=True)
|
97 |
+
#pd.DataFrame(embeddings).to_csv(f"embs_{model_name}.csv")
|
98 |
|
99 |
with st.spinner("Creating the model. This may take a couple of minutes..."):
|
100 |
doc_emb_chunks = [(all_docs[i:i+1000],embeddings[i:i+1000]) for i in range(0, len(all_docs), 1000)]
|
|
|
113 |
st.session_state.df = new_df
|
114 |
st.session_state.model_df = st.session_state.model.get_document_info(new_df.proc)
|
115 |
|
116 |
+
#topic_model.save(f"models/{model_name}")
|
117 |
+
#st.session_state.df.to_csv(f"models/df_{model_name}.csv")
|
118 |
+
#st.success(f"New model trained and saved as '{model_name}', dataframe saved as 'df_{model_name}.csv' in folter 'models'.")
|
119 |
+
st.success(f"New model trained")
|