Spaces:

suryadev1
/

astra

Running on CPU Upgrade

App Files Files Community

suryadev1 commited on Dec 18, 2024

Commit

4ede553

1 Parent(s): 60cabed

added for low

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +19 -13
plot.png +0 -0
result.txt +7 -7
roc_data.pkl +2 -2
selected_rows.txt +0 -0
train.txt +0 -0

.gitignore CHANGED Viewed

@@ -2,3 +2,4 @@ train_info.txt
 train.txt
 train_label.txt
 ratio_proportion_change3_2223/sch_largest_100-coded/logs/

 train.txt
 train_label.txt
 ratio_proportion_change3_2223/sch_largest_100-coded/logs/
+ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/

app.py CHANGED Viewed

@@ -23,10 +23,23 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     # shutil.copyfile(file.name, saved_test_dataset)
     # shutil.copyfile(label.name, saved_test_label)
     # shutil.copyfile(info.name, saved_train_info)
     # Load the test_info file and the graduation rate file
-    test_info = pd.read_csv('train_info.txt', sep=',', header=None, engine='python')
     grad_rate_data = pd.DataFrame(pd.read_pickle('school_grduation_rate.pkl'),columns=['school_number','grad_rate'])  # Load the grad_rate data
     # Step 1: Extract unique school numbers from test_info
@@ -53,7 +66,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     indices = test_info[test_info[0].isin(random_schools)].index.tolist()
     # Load the test file and select rows based on indices
-    test = pd.read_csv('train.txt', sep=',', header=None, engine='python')
     selected_rows_df2 = test.loc[indices]
     # Save the selected rows to a file
@@ -61,14 +74,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     # For demonstration purposes, we'll just return the content with the selected model name
-    if(model_name=="High Graduated Schools"):
-        finetune_task="highGRschool10"
-    elif(model_name== "Low Graduated Schools" ):
-        finetune_task="highGRschool10"
-    elif(model_name=="Full Set"):
-        finetune_task="highGRschool10"
-    else:
-        finetune_task=None
     # print(checkpoint)
     progress(0.1, desc="Files created and saved")
     # if (inc_val<5):
@@ -81,7 +87,7 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     subprocess.run([
         "python", "new_test_saved_finetuned_model.py",
         "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
-        "-finetune_task", "highGRschool10",
         "-test_dataset_path","../../../../selected_rows.txt",
         # "-test_label_path","../../../../train_label.txt",
         "-finetuned_bert_classifier_checkpoint",

     # shutil.copyfile(file.name, saved_test_dataset)
     # shutil.copyfile(label.name, saved_test_label)
     # shutil.copyfile(info.name, saved_train_info)
+    parent_location="ratio_proportion_change3_2223/sch_largest_100-coded/finetuning/"
+    if(model_name=="High Graduated Schools"):
+        finetune_task="highGRschool10"
+        test_info_location=parent_location+"highGRschool10/test_info.txt"
+        test_location=parent_location+"highGRschool10/test.txt"
+    elif(model_name== "Low Graduated Schools" ):
+        finetune_task="lowGRschoolAll"
+        test_info_location=parent_location+"lowGRschoolAll/test_info.txt"
+        test_location=parent_location+"lowGRschoolAll/test.txt"
+    elif(model_name=="Full Set"):
+        test_info_location=parent_location+"highGRschool10/test_info.txt"
+        test_location=parent_location+"highGRschool10/test.txt"
+        finetune_task="highGRschool10"
+    else:
+        finetune_task=None
     # Load the test_info file and the graduation rate file
+    test_info = pd.read_csv(test_info_location, sep=',', header=None, engine='python')
     grad_rate_data = pd.DataFrame(pd.read_pickle('school_grduation_rate.pkl'),columns=['school_number','grad_rate'])  # Load the grad_rate data
     # Step 1: Extract unique school numbers from test_info
     indices = test_info[test_info[0].isin(random_schools)].index.tolist()
     # Load the test file and select rows based on indices
+    test = pd.read_csv(test_location, sep=',', header=None, engine='python')
     selected_rows_df2 = test.loc[indices]
     # Save the selected rows to a file
     # For demonstration purposes, we'll just return the content with the selected model name
     # print(checkpoint)
     progress(0.1, desc="Files created and saved")
     # if (inc_val<5):
     subprocess.run([
         "python", "new_test_saved_finetuned_model.py",
         "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
+        "-finetune_task", finetune_task,
         "-test_dataset_path","../../../../selected_rows.txt",
         # "-test_label_path","../../../../train_label.txt",
         "-finetuned_bert_classifier_checkpoint",

plot.png CHANGED Viewed

result.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-avg_loss: 0.5730699896812439
-total_acc: 69.52861952861953
-precisions: 0.7336375047795977
-recalls: 0.6952861952861953
-f1_scores: 0.6858177547541179
-time_taken_from_start: 16.031665802001953
-auc_score: 0.7738852057033876

+avg_loss: 0.5569005310535431
+total_acc: 74.30213464696223
+precisions: 0.7660032941165892
+recalls: 0.7430213464696224
+f1_scores: 0.7359098644855878
+time_taken_from_start: 41.834863901138306
+auc_score: 0.7675472675472674

roc_data.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c4af99c21a2122f6f4c4773439bbb77976243559acf78cd9b771f24d3ae9bdc
-size 5930

 version https://git-lfs.github.com/spec/v1
+oid sha256:c022a6b5eaa8a1a3c8cb6f10578afc01f92a1f9800ec4ebe1ab78b22b3ddd988
+size 10685

selected_rows.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

train.txt DELETED Viewed

The diff for this file is too large to render. See raw diff