mattikris commited on
Commit
a95e12c
1 Parent(s): 109a390

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -1
app.py CHANGED
@@ -11,6 +11,23 @@ import chardet
11
  from pathlib import Path
12
  from detect_delimiter import detect
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  label_dict = {
15
  0: "Brandsøgning",
16
  1: "Informational",
@@ -19,7 +36,7 @@ label_dict = {
19
  4: "Transactional"
20
  }
21
  upload_file = st.file_uploader("Choose a file",type="csv" )
22
- model = pickle.load(open("finalized_model.sav","rb"))
23
 
24
  if upload_file is not None:
25
  result = chardet.detect(upload_file.getvalue())
 
11
  from pathlib import Path
12
  from detect_delimiter import detect
13
 
14
+ data = pd.read_csv("training_data.csv")
15
+ #Change Information - Sammenligning to information
16
+ data['Category'] = data['Category'].replace('Information - Sammenligning', 'Informational')
17
+ data['Category'] = data['Category'].replace('Information', 'Informational')
18
+ data = data.groupby('Category').apply(lambda x: x.sample(1500, replace=True)).reset_index(drop=True)
19
+ train_df = pd.DataFrame()
20
+ train_df['text'] = data['keywords']
21
+ train_df['labels'] = data['Category']
22
+ train_df['labels'] = train_df['labels'].astype('category').cat.codes
23
+ n_labels = len(train_df['labels'].unique())
24
+ from simpletransformers.ner import NERModel
25
+ from simpletransformers.classification import ClassificationModel
26
+
27
+ model = ClassificationModel('bert', 'Maltehb/danish-bert-botxo', num_labels=n_labels, use_cuda=True, args={'reprocess_input_data': True, 'overwrite_output_dir': True})
28
+
29
+ model.train_model(train_df)
30
+
31
  label_dict = {
32
  0: "Brandsøgning",
33
  1: "Informational",
 
36
  4: "Transactional"
37
  }
38
  upload_file = st.file_uploader("Choose a file",type="csv" )
39
+ #model = pickle.load(open("finalized_model.sav","rb"))
40
 
41
  if upload_file is not None:
42
  result = chardet.detect(upload_file.getvalue())