Spaces:

Pragformer
/

PragFormer-demo

Build error

App Files Files Community

Pragformer commited on Jan 6, 2023

Commit

772e550

•

1 Parent(s): 7ec9afa

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -55

app.py CHANGED Viewed

@@ -18,84 +18,85 @@ with_omp_str = 'Should contain a parallel work-sharing loop construct'
 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
 name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
-tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
-with open('c_data.json', 'r') as f:
     data = json.load(f)
 def fill_code(code_pth):
-  pragma = data[code_pth]['pragma']
-  code = data[code_pth]['code']
-  return 'None' if len(pragma)==0 else pragma, code
 def predict(code_txt):
-  code = code_txt.lstrip().rstrip()
-  tokenized = tokenizer.batch_encode_plus(
-            [code],
-            max_length = 150,
-            pad_to_max_length = True,
-            truncation = True
-        )
-  pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-  y_hat = torch.argmax(pred).item()
-  return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
 def is_private(code_txt):
-  if predict(code_txt)[0] == without_omp_str:
-      return gr.update(visible=False)
-  code = code_txt.lstrip().rstrip()
-  tokenized = tokenizer.batch_encode_plus(
-            [code],
-            max_length = 150,
-            pad_to_max_length = True,
-            truncation = True
-        )
-  pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-  y_hat = torch.argmax(pred).item()
-  # if y_hat == 0:
-  #     return gr.update(visible=False)
-  # else:
-  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
 def is_reduction(code_txt):
-  if predict(code_txt)[0] == without_omp_str:
-      return gr.update(visible=False)
-  code = code_txt.lstrip().rstrip()
-  tokenized = tokenizer.batch_encode_plus(
-            [code],
-            max_length = 150,
-            pad_to_max_length = True,
-            truncation = True
-        )
-  pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
-  y_hat = torch.argmax(pred).item()
-  # if y_hat == 0:
-  #     return gr.update(visible=False)
-  # else:
-  return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
 def lang_predict(code_txt):
-  res = {}
-  code = code_txt.replace('\n',' ').replace('\r',' ')
-  predictions, raw_outputs = deep_scc_model.predict([code])
-  # preds = [name_file[predictions[i]] for i in range(5)]
-  softmax_vals = torch.nn.Softmax(dim=1)(torch.tensor(raw_outputs))
-  top5 = torch.topk(softmax_vals, 5)
-  for lang_idx, conf in zip(top5.indices.flatten(), top5.values.flatten()):
-      res[name_file[lang_idx.item()]] = conf.item()
-  return '\n'.join([f" {'V  ' if k=='c' else 'X'}{k}:   {v}" for k,v in res.items()])
 # Define GUI

 without_omp_str = 'Should not contain a parallel work-sharing loop construct'
 name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
+tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
+with open('./HF_Pragformer/c_data.json', 'r') as f:
     data = json.load(f)
 def fill_code(code_pth):
+    pragma = data[code_pth]['pragma']
+    code = data[code_pth]['code']
+    return 'None' if len(pragma)==0 else pragma, code
 def predict(code_txt):
+    code = code_txt.lstrip().rstrip()
+    tokenized = tokenizer.batch_encode_plus(
+                [code],
+                max_length = 150,
+                pad_to_max_length = True,
+                truncation = True
+            )
+    pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+    y_hat = torch.argmax(pred).item()
+    return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
 def is_private(code_txt):
+    if predict(code_txt)[0] == without_omp_str:
+        return gr.update(visible=False)
+    code = code_txt.lstrip().rstrip()
+    tokenized = tokenizer.batch_encode_plus(
+                [code],
+                max_length = 150,
+                pad_to_max_length = True,
+                truncation = True
+            )
+    pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+    y_hat = torch.argmax(pred).item()
+    # if y_hat == 0:
+    #     return gr.update(visible=False)
+    # else:
+    return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
 def is_reduction(code_txt):
+    if predict(code_txt)[0] == without_omp_str:
+        return gr.update(visible=False)
+    code = code_txt.lstrip().rstrip()
+    tokenized = tokenizer.batch_encode_plus(
+                [code],
+                max_length = 150,
+                pad_to_max_length = True,
+                truncation = True
+            )
+    pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
+    y_hat = torch.argmax(pred).item()
+    # if y_hat == 0:
+    #     return gr.update(visible=False)
+    # else:
+    return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
 def lang_predict(code_txt):
+    res = {}
+    code = code_txt.replace('\n',' ').replace('\r',' ')
+    predictions, raw_outputs = deep_scc_model.predict([code])
+    # preds = [name_file[predictions[i]] for i in range(5)]
+    softmax_vals = torch.nn.Softmax(dim=1)(torch.tensor(raw_outputs))
+    top5 = torch.topk(softmax_vals, 5)
+    for lang_idx, conf in zip(top5.indices.flatten(), top5.values.flatten()):
+        res[name_file[lang_idx.item()]] = conf.item()
+    return '\n'.join([f" {'V  ' if k=='c' else 'X'}{k}:   {v}" for k,v in res.items()])
 # Define GUI