File size: 11,564 Bytes
6a34fd4
 
 
ee40bd7
6a34fd4
 
 
debb3aa
 
c343cc3
6a34fd4
1922da0
c343cc3
ee40bd7
 
 
 
 
 
 
 
 
 
 
c343cc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c72fe4
c343cc3
 
 
 
 
 
ee40bd7
c343cc3
 
 
 
 
 
 
 
 
 
5c72fe4
 
 
c343cc3
 
5c72fe4
 
 
 
ee40bd7
5e7e944
ee40bd7
445302e
 
 
 
 
 
 
 
 
debb3aa
 
 
5fdf2ba
debb3aa
 
 
 
 
 
 
 
 
 
 
 
ee40bd7
debb3aa
 
c343cc3
 
 
 
 
 
 
 
3821b59
 
 
 
c343cc3
 
 
debb3aa
6a34fd4
 
1922da0
c343cc3
1922da0
6a34fd4
bffd338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a34fd4
 
bffd338
 
1922da0
5c72fe4
 
c343cc3
1922da0
bffd338
3821b59
6a34fd4
debb3aa
 
 
6a34fd4
 
ee40bd7
c343cc3
1922da0
6a34fd4
 
bffd338
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
import gradio as gr
from huggingface_hub import hf_hub_download
import pickle
from gradio import Progress
import numpy as np
import subprocess
import shutil
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import pandas as pd
# Define the function to process the input file and model selection

def process_file(file,label,info,model_name,inc_slider,progress=Progress(track_tqdm=True)):
    # progress = gr.Progress(track_tqdm=True)
    progress(0, desc="Starting the processing") 
    with open(file.name, 'r') as f:
        content = f.read()
    saved_test_dataset = "train.txt"
    saved_test_label = "train_label.txt"
    saved_train_info="train_info.txt"
    # Save the uploaded file content to a specified location
    shutil.copyfile(file.name, saved_test_dataset)
    shutil.copyfile(label.name, saved_test_label)
    shutil.copyfile(info.name, saved_train_info)


    # Load the test_info file and the graduation rate file
    test_info = pd.read_csv('train_info.txt', sep=',', header=None, engine='python')
    grad_rate_data = pd.DataFrame(pd.read_pickle('school_grduation_rate.pkl'),columns=['school_number','grad_rate'])  # Load the grad_rate data

    # Step 1: Extract unique school numbers from test_info
    unique_schools = test_info[0].unique()

    # Step 2: Filter the grad_rate_data using the unique school numbers
    schools = grad_rate_data[grad_rate_data['school_number'].isin(unique_schools)]

    # Define a threshold for high and low graduation rates (adjust as needed)
    grad_rate_threshold = 0.9  

    # Step 4: Divide schools into high and low graduation rate groups
    high_grad_schools = schools[schools['grad_rate'] >= grad_rate_threshold]['school_number'].unique()
    low_grad_schools = schools[schools['grad_rate'] < grad_rate_threshold]['school_number'].unique()

    # Step 5: Sample percentage of schools from each group
    high_sample = pd.Series(high_grad_schools).sample(frac=inc_slider/100, random_state=1).tolist()
    low_sample = pd.Series(low_grad_schools).sample(frac=inc_slider/100, random_state=1).tolist()

    # Step 6: Combine the sampled schools
    random_schools = high_sample + low_sample

    # Step 7: Get indices for the sampled schools
    indices = test_info[test_info[0].isin(random_schools)].index.tolist()

    # Load the test file and select rows based on indices
    test = pd.read_csv('train.txt', sep=',', header=None, engine='python')
    selected_rows_df2 = test.loc[indices]

    # Save the selected rows to a file
    selected_rows_df2.to_csv('selected_rows.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')

   
    # For demonstration purposes, we'll just return the content with the selected model name
    if(model_name=="High Graduated Schools"):
        finetune_task="highGRschool10"
    elif(model_name== "Low Graduated Schools" ):
        finetune_task="highGRschool10"
    elif(model_name=="Full Set"):
        finetune_task="highGRschool10"
    else:
        finetune_task=None
    # print(checkpoint)
    progress(0.1, desc="Files created and saved")
    # if (inc_val<5):
    #     model_name="highGRschool10"
    # elif(inc_val>=5 & inc_val<10):
    #     model_name="highGRschool10"
    # else:
    #     model_name="highGRschool10"
    progress(0.2, desc="Executing models")
    subprocess.run([
        "python", "new_test_saved_finetuned_model.py",
        "-workspace_name", "ratio_proportion_change3_2223/sch_largest_100-coded",
        "-finetune_task", "highGRschool10",
        "-test_dataset_path","../../../../selected_rows.txt",
        # "-test_label_path","../../../../train_label.txt",
        "-finetuned_bert_classifier_checkpoint", 
        "ratio_proportion_change3_2223/sch_largest_100-coded/output/highGRschool10/bert_fine_tuned.model.ep42",
        "-e",str(1),
        "-b",str(1000)
    ])
    progress(0.6,desc="Model execution completed")
    result = {}
    with open("result.txt", 'r') as file:
        for line in file:
            key, value = line.strip().split(': ', 1)
            # print(type(key))
            if key=='epoch':
                result[key]=value
            else:
                 result[key]=float(value)
# Create a plot
    with open("roc_data.pkl", "rb") as f:
        fpr, tpr, _ = pickle.load(f)

    roc_auc = auc(fpr, tpr)
    fig, ax = plt.subplots()
    ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    ax.set(xlabel='False Positive Rate', ylabel='True Positive Rate', title=f'ROC Curve: {model_name}')
    ax.legend(loc="lower right")
    ax.grid()

    # Save plot to a file
    plot_path = "plot.png"
    fig.savefig(plot_path)
    plt.close(fig)
    progress(1.0)
    # Prepare text output
    text_output = f"Model: {model_name}\nResult:\n{result}"
    # Prepare text output with HTML formatting
    text_output = f"""
    Model: {model_name}\n
    Result Summary:\n
    -----------------\n
    Precision: {result['precisions']:.2f}\n
    Recall: {result['recalls']:.2f}\n
    Time Taken: {result['time_taken_from_start']:.2f} seconds\n
    Total Schools in test: {len(unique_schools):.4f}\n
    Total Schools taken: {len(random_schools):.4f}\n
    High grad schools: {len(high_sample):.4f}\n
    Low grad schools: {len(low_sample):.4f}\n
    -----------------\n
    Note: The ROC Curve is also displayed for the evaluation.
    """
    return text_output,plot_path

# List of models for the dropdown menu

models = ["High Graduated Schools", "Low Graduated Schools", "Full Set"]

# Create the Gradio interface
with gr.Blocks(css="""
    body {
        background-color: #1e1e1e!important;
        font-family: 'Arial', sans-serif;
        color: #f5f5f5!important;;
    }
    .gradio-container {
        max-width: 850px!important;
        margin: 0 auto!important;;
        padding: 20px!important;;
        background-color: #292929!important;
        border-radius: 10px;
        box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
    }
    .gradio-container-4-44-0 .prose h1 {
    font-size: var(--text-xxl);
    color: #ffffff!important;
}
    #title {
        color: white!important;
        font-size: 2.3em;
        font-weight: bold;
        text-align: center!important;
        margin-bottom: 20px;
    }
    .description {
        text-align: center;
        font-size: 1.1em;
        color: #bfbfbf;
        margin-bottom: 30px;
    }
    .file-box {
        max-width: 180px;
        padding: 5px;
        background-color: #444!important;
        border: 1px solid #666!important;
        border-radius: 6px;
        height: 80px!important;;  
        margin: 0 auto!important;; 
        text-align: center; 
        color: transparent;
    }
    .file-box span {
        color: #f5f5f5!important;
        font-size: 1em;
        line-height: 45px; /* Vertically center text */
    }
    .dropdown-menu {
        max-width: 220px;
        margin: 0 auto!important;
        background-color: #444!important;
        color:#444!important;
        border-radius: 6px;
        padding: 8px;
        font-size: 1.1em;
        border: 1px solid #666;
    }
    .button {
        background-color: #4CAF50!important;
        color: white!important;
        font-size: 1.1em;
        padding: 10px 25px;
        border-radius: 6px;
        cursor: pointer;
        transition: background-color 0.2s ease-in-out;
    }
    .button:hover {
        background-color: #45a049!important;
    }
    .output-text {
        background-color: #333!important;
        padding: 12px;
        border-radius: 8px;
        border: 1px solid #666;
        font-size: 1.1em;
    }
    .footer {
        text-align: center;
        margin-top: 50px;
        font-size: 0.9em;
        color: #b0b0b0;
    }
    .svelte-12ioyct .wrap {
    display: none !important;
}
.file-label-text {
    display: none !important;
}

div.svelte-sfqy0y {
    display: flex;
    flex-direction: inherit;
    flex-wrap: wrap;
    gap: var(--form-gap-width);
    box-shadow: var(--block-shadow);
    border: var(--block-border-width) solid var(--border-color-primary);
    border-radius: var(--block-radius);
    background: #1f2937!important;
    overflow-y: hidden;
}

.block.svelte-12cmxck {
    position: relative;
    margin: 0;
    box-shadow: var(--block-shadow);
    border-width: var(--block-border-width);
    border-color: var(--block-border-color);
    border-radius: var(--block-radius);
    background: #1f2937!important;
    width: 100%;
    line-height: var(--line-sm);
}

    .svelte-12ioyct .wrap {
    display: none !important;
}
.file-label-text {
    display: none !important;
}
input[aria-label="file upload"] {
    display: none !important;
}

gradio-app .gradio-container.gradio-container-4-44-0 .contain .file-box span {
    font-size: 1em;
    line-height: 45px;
    color: #1f2937 !important;
}
.wrap.svelte-12ioyct {
    display: flex;
    flex-direction: column;
    justify-content: center;
    align-items: center;
    min-height: var(--size-60);
    color: #1f2937 !important;
    line-height: var(--line-md);
    height: 100%;
    padding-top: var(--size-3);
    text-align: center;
    margin: auto var(--spacing-lg);
}
span.svelte-1gfkn6j:not(.has-info) {
    margin-bottom: var(--spacing-lg);
    color: white!important;
}
label.float.svelte-1b6s6s {
    position: relative!important;
    top: var(--block-label-margin);
    left: var(--block-label-margin);
}
label.svelte-1b6s6s {
    display: inline-flex;
    align-items: center;
    z-index: var(--layer-2);
    box-shadow: var(--block-label-shadow);
    border: var(--block-label-border-width) solid var(--border-color-primary);
    border-top: none;
    border-left: none;
    border-radius: var(--block-label-radius);
    background: rgb(120 151 180)!important;
    padding: var(--block-label-padding);
    pointer-events: none;
    color: #1f2937!important;
    font-weight: var(--block-label-text-weight);
    font-size: var(--block-label-text-size);
    line-height: var(--line-sm);
}
.file.svelte-18wv37q.svelte-18wv37q {
    display: block!important;
    width: var(--size-full);
}

tbody.svelte-18wv37q>tr.svelte-18wv37q:nth-child(odd) {
    background: ##7897b4!important;
    color: white;
    background: #aca7b2;
}
.gradio-container-4-31-4 .prose h1, .gradio-container-4-31-4 .prose h2, .gradio-container-4-31-4 .prose h3, .gradio-container-4-31-4 .prose h4, .gradio-container-4-31-4 .prose h5 {

    color: white;
""") as demo:
    gr.Markdown("<h1 id='title'>ASTRA</h1>", elem_id="title")
    gr.Markdown("<p class='description'>Upload a .txt file and select a model from the dropdown menu.</p>")
    
    with gr.Row():
        file_input = gr.File(label="Upload a test file", file_types=['.txt'], elem_classes="file-box")
        label_input = gr.File(label="Upload test labels", file_types=['.txt'], elem_classes="file-box")

        info_input = gr.File(label="Upload test info", file_types=['.txt'], elem_classes="file-box")
    
    model_dropdown = gr.Dropdown(choices=models, label="Select Finetune Task", elem_classes="dropdown-menu")

    
    increment_slider = gr.Slider(minimum=1, maximum=100, step=1, label="Schools Percentage", value=1)
    
    with gr.Row():
        output_text = gr.Textbox(label="Output Text")
        output_image = gr.Image(label="Output Plot")

    btn = gr.Button("Submit")
 
    btn.click(fn=process_file, inputs=[file_input,label_input,info_input,model_dropdown,increment_slider], outputs=[output_text,output_image])


# Launch the app
demo.launch()