Pragformer commited on
Commit
b473578
1 Parent(s): 1fda8b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -46
app.py CHANGED
@@ -17,13 +17,13 @@ pragformer_private = transformers.AutoModel.from_pretrained("Pragformer/PragForm
17
  pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
18
 
19
 
20
- #Event Listeners
21
  with_omp_str = 'Should contain a parallel work-sharing loop construct'
22
  without_omp_str = 'Should not contain a parallel work-sharing loop construct'
23
  name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
24
-
25
  tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
26
 
 
27
  with open('c_data.json', 'r') as f:
28
  data = json.load(f)
29
 
@@ -87,49 +87,61 @@ def is_reduction(code_txt):
87
  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
88
 
89
 
90
- def predictor(texts):
91
- tokenized = tokenizer.batch_encode_plus(
92
- texts,
93
- max_length = 150,
94
- pad_to_max_length = True,
95
- truncation = True
96
- )
97
- test_seq = torch.tensor(tokenized['input_ids'])
98
- test_mask = torch.tensor(tokenized['attention_mask'])
99
- test_y = torch.tensor([1]*len(texts))
100
- test_data = TensorDataset(test_seq, test_mask, test_y)
101
- test_sampler = SequentialSampler(test_seq)
102
- test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size = len(texts))
103
- total_probas = []
104
- for step, batch in enumerate(test_dataloader):
105
- sent_id, mask, labels = batch
106
- outputs = pragformer(sent_id, mask)
107
- probas = outputs.detach().numpy()
108
- total_probas.extend(probas)
109
-
110
- return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
111
-
112
-
113
- def lime_explain(code_txt):
114
- class_names = ['Without OpenMP', 'With OpenMP']
115
- SAMPLES = 35
116
- exp = []
117
 
118
- if predict(code_txt)[0] == without_omp_str:
119
- return gr.update(visible=False)
120
 
121
- explainer = LimeTextExplainer(class_names=class_names, split_expression=r"\s+")
122
- exp = explainer.explain_instance(code_txt, predictor, num_features=20, num_samples=SAMPLES)
123
 
124
- return gr.update(visible=True, value=exp.as_pyplot_figure())
125
 
 
 
126
 
127
- def lime_title(code_txt):
128
- if predict(code_txt)[0] == without_omp_str:
129
- return gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
 
 
 
 
131
  return gr.update(visible=True)
132
-
133
 
134
  def activate_c(lang_pred):
135
  langs = lang_pred.split('\n')
@@ -149,7 +161,7 @@ def activate_button(lang_pred):
149
  return gr.update(visible=False)
150
  else:
151
  return gr.update(visible=True)
152
-
153
 
154
  def lang_predict(code_txt):
155
  res = {}
@@ -168,13 +180,14 @@ def lang_predict(code_txt):
168
 
169
  # Define GUI
170
 
 
171
  with gr.Blocks() as pragformer_gui:
172
 
173
  gr.Markdown(
174
  """
175
  # PragFormer Pragma Classifiction
176
-
177
- Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a parallel for construct and, if necessary, suggests
178
  the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
179
  """)
180
 
@@ -208,7 +221,9 @@ with gr.Blocks() as pragformer_gui:
208
  reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
209
 
210
  explain_title = gr.Markdown("## LIME Explainability", visible=False)
211
- explanation = gr.Plot(visible=False)
 
 
212
 
213
 
214
  code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
@@ -218,7 +233,9 @@ with gr.Blocks() as pragformer_gui:
218
  submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
219
  submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
220
  submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
221
- submit_btn.click(fn=lime_explain, inputs=code_in, outputs=explanation)
 
 
222
  submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
223
  sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
224
 
@@ -255,12 +272,11 @@ with gr.Blocks() as pragformer_gui:
255
  We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
256
  compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
257
 
258
- ![](https://user-images.githubusercontent.com/104314626/165228036-d7fadd8d-768a-4e94-bd57-0a77e1330082.png)
259
-
260
  """)
261
 
262
 
263
 
264
-
265
  pragformer_gui.launch()
266
 
 
17
  pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
18
 
19
 
20
+ # Event Listeners
21
  with_omp_str = 'Should contain a parallel work-sharing loop construct'
22
  without_omp_str = 'Should not contain a parallel work-sharing loop construct'
23
  name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
 
24
  tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
25
 
26
+
27
  with open('c_data.json', 'r') as f:
28
  data = json.load(f)
29
 
 
87
  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
88
 
89
 
90
+ def get_predictor(model):
91
+ def predictor(texts):
92
+ tokenized = tokenizer.batch_encode_plus(
93
+ texts,
94
+ max_length = 150,
95
+ pad_to_max_length = True,
96
+ truncation = True
97
+ )
98
+ test_seq = torch.tensor(tokenized['input_ids'])
99
+ test_mask = torch.tensor(tokenized['attention_mask'])
100
+ test_y = torch.tensor([1]*len(texts))
101
+ test_data = TensorDataset(test_seq, test_mask, test_y)
102
+ test_sampler = SequentialSampler(test_seq)
103
+ test_dataloader = DataLoader(test_data, sampler = test_sampler, batch_size = len(texts))
104
+ total_probas = []
105
+ for step, batch in enumerate(test_dataloader):
106
+ sent_id, mask, labels = batch
107
+ outputs = model(sent_id, mask)
108
+ probas = outputs.detach().numpy()
109
+ total_probas.extend(probas)
 
 
 
 
 
 
 
110
 
111
+ return torch.nn.Softmax(dim=1)(torch.tensor(probas)).numpy()
 
112
 
113
+ return predictor
 
114
 
 
115
 
116
+ def get_lime_explain(filename):
117
+ def lime_explain(code_txt):
118
 
119
+ SAMPLES = 10 #40
120
+ exp = []
121
+
122
+ if filename == 'Loop':
123
+ model = pragformer
124
+ class_names = ['Without OpenMP', 'With OpenMP']
125
+ elif filename == 'Private':
126
+ model = pragformer_private
127
+ class_names = ['Without Private', 'With Private']
128
+ else:
129
+ model = pragformer_reduction
130
+ class_names = ['Without Reduction', 'With Reduction']
131
+
132
+ explainer = LimeTextExplainer(class_names=class_names, split_expression=r"\s+")
133
+ exp = explainer.explain_instance(code_txt, get_predictor(model), num_features=20, num_samples=SAMPLES)
134
+
135
+ exp.save_to_file(f'{filename.lower()}_explanation.html')
136
+
137
+ return gr.update(visible=True, value=f'{filename.lower()}_explanation.html')
138
 
139
+ return lime_explain
140
+
141
+
142
+ def lime_title(code_txt):
143
  return gr.update(visible=True)
144
+
145
 
146
  def activate_c(lang_pred):
147
  langs = lang_pred.split('\n')
 
161
  return gr.update(visible=False)
162
  else:
163
  return gr.update(visible=True)
164
+
165
 
166
  def lang_predict(code_txt):
167
  res = {}
 
180
 
181
  # Define GUI
182
 
183
+
184
  with gr.Blocks() as pragformer_gui:
185
 
186
  gr.Markdown(
187
  """
188
  # PragFormer Pragma Classifiction
189
+
190
+ Pragformer is a tool that analyzes C code to determine whether it would benefit from being placed in a work-sharing loop construct and, if necessary, suggests
191
  the use of data-sharing attribute clauses (e.g. private and reduction) to improve performance. It also provides explainability through the use of LIME.
192
  """)
193
 
 
221
  reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
222
 
223
  explain_title = gr.Markdown("## LIME Explainability", visible=False)
224
+ loop_explanation = gr.File(label='Work-sharing loop construct prediction explanation', interactive=False, visible=False)
225
+ private_explanation = gr.File(label='Data-sharing attribute private prediction explanation', interactive=False, visible=False)
226
+ reduction_explanation = gr.File(label='Data-sharing attribute reduction prediction explanation', interactive=False, visible=False)
227
 
228
 
229
  code_in.change(fn=lang_predict, inputs=code_in, outputs=[lang_pred])
 
233
  submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
234
  submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
235
  submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)
236
+ submit_btn.click(fn=get_lime_explain('Loop'), inputs=code_in, outputs=loop_explanation)
237
+ submit_btn.click(fn=get_lime_explain('Private'), inputs=code_in, outputs=private_explanation)
238
+ submit_btn.click(fn=get_lime_explain('Reduction'), inputs=code_in, outputs=reduction_explanation)
239
  submit_btn.click(fn=lime_title, inputs=code_in, outputs=explain_title)
240
  sample_btn.click(fn=fill_code, inputs=drop, outputs=[pragma, code_in])
241
 
 
272
  We train several transformer models, named PragFormer, for these tasks, and show that they outperform statistically-trained baselines and automatic S2S parallelization
273
  compilers in both classifying the overall need for an OpenMP directive and the introduction of private and reduction clauses.
274
 
275
+ ![](https://user-images.githubusercontent.com/48416212/211221896-b4f50ec7-7d6e-47eb-b418-903cf9b31060.png)
276
+
277
  """)
278
 
279
 
280
 
 
281
  pragformer_gui.launch()
282