Spaces:

Pragformer
/

PragFormer-demo

Build error

App Files Files Community

Pragformer commited on Jan 6, 2023

Commit

944cd11

•

1 Parent(s): 1d415c1

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -53

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import gradio as gr
 import transformers
 import torch
 import json
 # load all models
 pragformer = transformers.AutoModel.from_pretrained("Pragformer/PragFormer", trust_remote_code=True)
 pragformer_private = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_private", trust_remote_code=True)
 pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
@@ -12,72 +16,87 @@ pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFo
 #Event Listeners
 with_omp_str = 'Should contain a parallel work-sharing loop construct'
 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
 tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
 with open('c_data.json', 'r') as f:
     data = json.load(f)
 def fill_code(code_pth):
-    pragma = data[code_pth]['pragma']
-    code = data[code_pth]['code']
-    return 'None' if len(pragma)==0 else pragma, code
 def predict(code_txt):
-    code = code_txt.lstrip().rstrip()
-    tokenized = tokenizer.batch_encode_plus(
-                [code],
-                max_length = 150,
-                pad_to_max_length = True,
-                truncation = True
-            )
-    pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-    y_hat = torch.argmax(pred).item()
-    return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
 def is_private(code_txt):
-    if predict(code_txt)[0] == without_omp_str:
-        return gr.update(visible=False)
-    code = code_txt.lstrip().rstrip()
-    tokenized = tokenizer.batch_encode_plus(
-                [code],
-                max_length = 150,
-                pad_to_max_length = True,
-                truncation = True
-            )
-    pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-    y_hat = torch.argmax(pred).item()
-    # if y_hat == 0:
-    #     return gr.update(visible=False)
-    # else:
-    return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
-def is_reduction(code_txt, label):
-    if predict(code_txt)[0] == without_omp_str:
-        return gr.update(visible=False)
-    code = code_txt.lstrip().rstrip()
-    tokenized = tokenizer.batch_encode_plus(
-                [code],
-                max_length = 150,
-                pad_to_max_length = True,
-                truncation = True
-            )
-    pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-    y_hat = torch.argmax(pred).item()
-    # if y_hat == 0:
-    #     return gr.update(visible=False)
-    # else:
-    return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
 # Define GUI
@@ -98,8 +117,10 @@ with gr.Blocks() as pragformer_gui:
                 sample_btn = gr.Button("Sample")
             pragma =  gr.Textbox(label="Original parallelization classification (if any)")
-        code_in = gr.Textbox(lines=5, label="Write some code and see if it should contain a parallel work-sharing loop construct")
         submit_btn = gr.Button("Submit")
     with gr.Column():
         gr.Markdown("## Results")
@@ -112,6 +133,8 @@ with gr.Blocks() as pragformer_gui:
             private = gr.Textbox(label="Data-sharing attribute clause- private", visible=False)
             reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
     submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
     submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
     submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)

 import gradio as gr
 import transformers
+from simpletransformers.classification import ClassificationModel, ClassificationArgs
 import torch
 import json
 # load all models
+deep_scc_model_args = ClassificationArgs(num_train_epochs=10,max_seq_length=300,use_multiprocessing=False)
+deep_scc_model = ClassificationModel("roberta", "NTUYG/DeepSCC-RoBERTa", num_labels=19, args=deep_scc_model_args, use_cuda=False)
 pragformer = transformers.AutoModel.from_pretrained("Pragformer/PragFormer", trust_remote_code=True)
 pragformer_private = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_private", trust_remote_code=True)
 pragformer_reduction = transformers.AutoModel.from_pretrained("Pragformer/PragFormer_reduction", trust_remote_code=True)
 #Event Listeners
 with_omp_str = 'Should contain a parallel work-sharing loop construct'
 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
+name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
 tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
 with open('c_data.json', 'r') as f:
     data = json.load(f)
 def fill_code(code_pth):
+  pragma = data[code_pth]['pragma']
+  code = data[code_pth]['code']
+  return 'None' if len(pragma)==0 else pragma, code
 def predict(code_txt):
+  code = code_txt.lstrip().rstrip()
+  tokenized = tokenizer.batch_encode_plus(
+            [code],
+            max_length = 150,
+            pad_to_max_length = True,
+            truncation = True
+        )
+  pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+  y_hat = torch.argmax(pred).item()
+  return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
 def is_private(code_txt):
+  if predict(code_txt)[0] == without_omp_str:
+      return gr.update(visible=False)
+  code = code_txt.lstrip().rstrip()
+  tokenized = tokenizer.batch_encode_plus(
+            [code],
+            max_length = 150,
+            pad_to_max_length = True,
+            truncation = True
+        )
+  pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+  y_hat = torch.argmax(pred).item()
+  # if y_hat == 0:
+  #     return gr.update(visible=False)
+  # else:
+  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
+def is_reduction(code_txt):
+  if predict(code_txt)[0] == without_omp_str:
+      return gr.update(visible=False)
+  code = code_txt.lstrip().rstrip()
+  tokenized = tokenizer.batch_encode_plus(
+            [code],
+            max_length = 150,
+            pad_to_max_length = True,
+            truncation = True
+        )
+  pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+  y_hat = torch.argmax(pred).item()
+  # if y_hat == 0:
+  #     return gr.update(visible=False)
+  # else:
+  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
+def lang_predict(code_txt):
+  res = {}
+  code = code_txt.replace('\n',' ').replace('\r',' ')
+  predictions, raw_outputs = deep_scc_model.predict([code])
+  # preds = [name_file[predictions[i]] for i in range(5)]
+  softmax_vals = torch.nn.Softmax(dim=1)(torch.tensor(raw_outputs))
+  top5 = torch.topk(softmax_vals, 5)
+  for lang_idx, conf in zip(top5.indices.flatten(), top5.values.flatten()):
+      res[name_file[lang_idx.item()]] = conf.item()
+  return '\n'.join([f" {'V  ' if k=='c' else 'X'}{k}:   {v}" for k,v in res.items()])
 # Define GUI
                 sample_btn = gr.Button("Sample")
             pragma =  gr.Textbox(label="Original parallelization classification (if any)")
+        with gr.Row():
+            code_in = gr.Textbox(lines=5, label="Write some C code and see if it should contain a parallel work-sharing loop construct")
+            lang_pred = gr.Textbox(lines=5, label="DeepScc programming language prediction")
         submit_btn = gr.Button("Submit")
     with gr.Column():
         gr.Markdown("## Results")
             private = gr.Textbox(label="Data-sharing attribute clause- private", visible=False)
             reduction = gr.Textbox(label="Data-sharing attribute clause- reduction", visible=False)
+    code_in.change(fn=lang_predict, inputs=code_in, outputs=lang_pred)
     submit_btn.click(fn=predict, inputs=code_in, outputs=[label_out, confidence_out])
     submit_btn.click(fn=is_private, inputs=code_in, outputs=private)
     submit_btn.click(fn=is_reduction, inputs=code_in, outputs=reduction)