Umang-Bansal commited on
Commit
6acdc14
1 Parent(s): bd3d9f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -5,6 +5,8 @@ from sklearn.preprocessing import StandardScaler
5
  import scipy
6
  from scipy import signal
7
  import pickle
 
 
8
 
9
  # Global variable to store the uploaded data
10
  global_data = None
@@ -14,7 +16,6 @@ def get_data_preview(file):
14
  global_data = pd.read_csv(file.name)
15
  global_data['label'] = np.nan # Initialize a label column
16
  global_data['label'] = global_data['label'].astype(object) # Ensure the label column can hold different types
17
- print("Data preview:\n", global_data.head())
18
  return global_data.head()
19
 
20
  def label_data(ranges):
@@ -23,15 +24,11 @@ def label_data(ranges):
23
  for i, (start, end, label) in enumerate(ranges.values):
24
  start = int(start)
25
  end = int(end)
26
- print(f"Processing range {i}: start={start}, end={end}, label={label}")
27
  if start < 0 or start >= len(global_data):
28
- print(f"Invalid range: start={start}, end={end}, label={label}")
29
  continue
30
  if end >= len(global_data):
31
- print(f"End index {end} exceeds data length {len(global_data)}. Adjusting to {len(global_data) - 1}.")
32
  end = len(global_data) - 1
33
  global_data.loc[start:end, 'label'] = label
34
- print("Data after labeling:\n", global_data.tail())
35
  return global_data.tail()
36
 
37
  def preprocess_data():
@@ -120,34 +117,37 @@ def preprocess_data():
120
  return "Data preprocessing complete! Download the processed data and scaler below.", processed_data_filename, scaler_filename
121
 
122
  except Exception as e:
123
- print(f"An error occurred during preprocessing: {e}")
124
  return f"An error occurred during preprocessing: {e}", None, None
125
-
126
  def train_model():
127
  global global_data
128
- data = preprocess_data(global_data)
129
- scaler = StandardScaler()
130
- X = data.drop('label', axis=1)
131
- y = data['label']
132
- X_scaled = scaler.fit_transform(X)
133
- X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
134
-
135
- param_grid = {'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto', 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']}
136
- svc = SVC(probability=True)
137
- grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)
138
- grid_search.fit(X_train, y_train)
139
-
140
- model = grid_search.best_estimator_
141
- model_filename = 'model.pkl'
142
- scaler_filename = 'scaler.pkl'
143
-
144
- with open(model_filename, 'wb') as file:
145
- pickle.dump(model, file)
146
 
147
- with open(scaler_filename, 'wb') as file:
148
- pickle.dump(scaler, file)
149
 
150
- return "Training complete! Model and scaler saved.", model_filename, scaler_filename
 
 
 
 
151
 
152
  with gr.Blocks() as demo:
153
  file_input = gr.File(label="Upload CSV File")
@@ -155,14 +155,14 @@ with gr.Blocks() as demo:
155
  ranges_input = gr.Dataframe(headers=["Start Index", "End Index", "Label"], label="Ranges for Labeling")
156
  labeled_data_preview = gr.Dataframe(label="Labeled Data Preview", interactive=False)
157
 
158
- preprocessing_status = gr.Textbox(label="Preprocessing Status")
159
- processed_data_file = gr.File(label="Download Processed Data")
160
  scaler_file = gr.File(label="Download Scaler")
161
 
162
  file_input.upload(get_data_preview, inputs=file_input, outputs=data_preview)
163
  label_button = gr.Button("Label Data")
164
- label_button.click(label_data, inputs=[ranges_input], outputs=labeled_data_preview)
165
- preprocess_button = gr.Button("Preprocess Data")
166
- preprocess_button.click(preprocess_data, outputs=[preprocessing_status, processed_data_file, scaler_file])
167
 
168
  demo.launch()
 
5
  import scipy
6
  from scipy import signal
7
  import pickle
8
+ from sklearn.svm import SVC
9
+ from sklearn.model_selection import train_test_split, GridSearchCV
10
 
11
  # Global variable to store the uploaded data
12
  global_data = None
 
16
  global_data = pd.read_csv(file.name)
17
  global_data['label'] = np.nan # Initialize a label column
18
  global_data['label'] = global_data['label'].astype(object) # Ensure the label column can hold different types
 
19
  return global_data.head()
20
 
21
  def label_data(ranges):
 
24
  for i, (start, end, label) in enumerate(ranges.values):
25
  start = int(start)
26
  end = int(end)
 
27
  if start < 0 or start >= len(global_data):
 
28
  continue
29
  if end >= len(global_data):
 
30
  end = len(global_data) - 1
31
  global_data.loc[start:end, 'label'] = label
 
32
  return global_data.tail()
33
 
34
  def preprocess_data():
 
117
  return "Data preprocessing complete! Download the processed data and scaler below.", processed_data_filename, scaler_filename
118
 
119
  except Exception as e:
 
120
  return f"An error occurred during preprocessing: {e}", None, None
121
+
122
  def train_model():
123
  global global_data
124
+ try:
125
+ preprocess_status, processed_data_filename, scaler_filename = preprocess_data()
126
+ if processed_data_filename is None:
127
+ return preprocess_status, None, None
128
+
129
+ df_scaled = pd.read_csv(processed_data_filename)
130
+ X = df_scaled.drop('label', axis=1)
131
+ y = df_scaled['label']
132
+
133
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
134
+
135
+ param_grid = {'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto', 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']}
136
+ svc = SVC(probability=True)
137
+ grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)
138
+ grid_search.fit(X_train, y_train)
139
+
140
+ model = grid_search.best_estimator_
141
+ model_filename = 'model.pkl'
142
 
143
+ with open(model_filename, 'wb') as file:
144
+ pickle.dump(model, file)
145
 
146
+ return "Training complete! Download the model and scaler below.", model_filename, scaler_filename
147
+
148
+ except Exception as e:
149
+ print(f"An error occurred during training: {e}")
150
+ return f"An error occurred during training: {e}", None, None
151
 
152
  with gr.Blocks() as demo:
153
  file_input = gr.File(label="Upload CSV File")
 
155
  ranges_input = gr.Dataframe(headers=["Start Index", "End Index", "Label"], label="Ranges for Labeling")
156
  labeled_data_preview = gr.Dataframe(label="Labeled Data Preview", interactive=False)
157
 
158
+ training_status = gr.Textbox(label="Training Status")
159
+ model_file = gr.File(label="Download Trained Model")
160
  scaler_file = gr.File(label="Download Scaler")
161
 
162
  file_input.upload(get_data_preview, inputs=file_input, outputs=data_preview)
163
  label_button = gr.Button("Label Data")
164
+ label_button.click(label_data, inputs=[ranges_input], outputs=labeled_data_preview, queue=True)
165
+ train_button = gr.Button("Train Model")
166
+ train_button.click(train_model, outputs=[training_status, model_file, scaler_file])
167
 
168
  demo.launch()