Pragformer commited on
Commit
772e550
1 Parent(s): 7ec9afa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -55
app.py CHANGED
@@ -18,84 +18,85 @@ with_omp_str = 'Should contain a parallel work-sharing loop construct'
18
  without_omp_str = 'Should not contain a parallel work-sharing loop construct'
19
  name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
20
 
21
- tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
22
 
 
23
 
24
- with open('c_data.json', 'r') as f:
25
  data = json.load(f)
26
 
27
  def fill_code(code_pth):
28
- pragma = data[code_pth]['pragma']
29
- code = data[code_pth]['code']
30
- return 'None' if len(pragma)==0 else pragma, code
31
-
32
 
33
  def predict(code_txt):
34
- code = code_txt.lstrip().rstrip()
35
- tokenized = tokenizer.batch_encode_plus(
36
- [code],
37
- max_length = 150,
38
- pad_to_max_length = True,
39
- truncation = True
40
- )
41
- pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
42
 
43
- y_hat = torch.argmax(pred).item()
44
- return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
45
 
46
 
47
  def is_private(code_txt):
48
- if predict(code_txt)[0] == without_omp_str:
49
- return gr.update(visible=False)
50
 
51
- code = code_txt.lstrip().rstrip()
52
- tokenized = tokenizer.batch_encode_plus(
53
- [code],
54
- max_length = 150,
55
- pad_to_max_length = True,
56
- truncation = True
57
- )
58
- pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
59
 
60
- y_hat = torch.argmax(pred).item()
61
- # if y_hat == 0:
62
- # return gr.update(visible=False)
63
- # else:
64
- return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
65
 
66
 
67
  def is_reduction(code_txt):
68
- if predict(code_txt)[0] == without_omp_str:
69
- return gr.update(visible=False)
70
 
71
- code = code_txt.lstrip().rstrip()
72
- tokenized = tokenizer.batch_encode_plus(
73
- [code],
74
- max_length = 150,
75
- pad_to_max_length = True,
76
- truncation = True
77
- )
78
- pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
79
 
80
- y_hat = torch.argmax(pred).item()
81
- # if y_hat == 0:
82
- # return gr.update(visible=False)
83
- # else:
84
- return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
85
 
86
 
87
  def lang_predict(code_txt):
88
- res = {}
89
- code = code_txt.replace('\n',' ').replace('\r',' ')
90
- predictions, raw_outputs = deep_scc_model.predict([code])
91
- # preds = [name_file[predictions[i]] for i in range(5)]
92
- softmax_vals = torch.nn.Softmax(dim=1)(torch.tensor(raw_outputs))
93
- top5 = torch.topk(softmax_vals, 5)
 
 
 
94
 
95
- for lang_idx, conf in zip(top5.indices.flatten(), top5.values.flatten()):
96
- res[name_file[lang_idx.item()]] = conf.item()
97
 
98
- return '\n'.join([f" {'V ' if k=='c' else 'X'}{k}: {v}" for k,v in res.items()])
99
 
100
 
101
  # Define GUI
 
18
  without_omp_str = 'Should not contain a parallel work-sharing loop construct'
19
  name_file = ['bash', 'c', 'c#', 'c++','css', 'haskell', 'java', 'javascript', 'lua', 'objective-c', 'perl', 'php', 'python','r','ruby', 'scala', 'sql', 'swift', 'vb.net']
20
 
 
21
 
22
+ tokenizer = transformers.AutoTokenizer.from_pretrained('NTUYG/DeepSCC-RoBERTa')
23
 
24
+ with open('./HF_Pragformer/c_data.json', 'r') as f:
25
  data = json.load(f)
26
 
27
  def fill_code(code_pth):
28
+ pragma = data[code_pth]['pragma']
29
+ code = data[code_pth]['code']
30
+ return 'None' if len(pragma)==0 else pragma, code
31
+
32
 
33
  def predict(code_txt):
34
+ code = code_txt.lstrip().rstrip()
35
+ tokenized = tokenizer.batch_encode_plus(
36
+ [code],
37
+ max_length = 150,
38
+ pad_to_max_length = True,
39
+ truncation = True
40
+ )
41
+ pred = pragformer(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
42
 
43
+ y_hat = torch.argmax(pred).item()
44
+ return with_omp_str if y_hat==1 else without_omp_str, torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()
45
 
46
 
47
  def is_private(code_txt):
48
+ if predict(code_txt)[0] == without_omp_str:
49
+ return gr.update(visible=False)
50
 
51
+ code = code_txt.lstrip().rstrip()
52
+ tokenized = tokenizer.batch_encode_plus(
53
+ [code],
54
+ max_length = 150,
55
+ pad_to_max_length = True,
56
+ truncation = True
57
+ )
58
+ pred = pragformer_private(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
59
 
60
+ y_hat = torch.argmax(pred).item()
61
+ # if y_hat == 0:
62
+ # return gr.update(visible=False)
63
+ # else:
64
+ return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain private with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
65
 
66
 
67
  def is_reduction(code_txt):
68
+ if predict(code_txt)[0] == without_omp_str:
69
+ return gr.update(visible=False)
70
 
71
+ code = code_txt.lstrip().rstrip()
72
+ tokenized = tokenizer.batch_encode_plus(
73
+ [code],
74
+ max_length = 150,
75
+ pad_to_max_length = True,
76
+ truncation = True
77
+ )
78
+ pred = pragformer_reduction(torch.tensor(tokenized['input_ids']), torch.tensor(tokenized['attention_mask']))
79
 
80
+ y_hat = torch.argmax(pred).item()
81
+ # if y_hat == 0:
82
+ # return gr.update(visible=False)
83
+ # else:
84
+ return gr.update(value=f"Should {'not' if y_hat==0 else ''} contain reduction with confidence: {torch.nn.Softmax(dim=1)(pred).squeeze()[y_hat].item()}", visible=True)
85
 
86
 
87
  def lang_predict(code_txt):
88
+ res = {}
89
+ code = code_txt.replace('\n',' ').replace('\r',' ')
90
+ predictions, raw_outputs = deep_scc_model.predict([code])
91
+ # preds = [name_file[predictions[i]] for i in range(5)]
92
+ softmax_vals = torch.nn.Softmax(dim=1)(torch.tensor(raw_outputs))
93
+ top5 = torch.topk(softmax_vals, 5)
94
+
95
+ for lang_idx, conf in zip(top5.indices.flatten(), top5.values.flatten()):
96
+ res[name_file[lang_idx.item()]] = conf.item()
97
 
98
+ return '\n'.join([f" {'V ' if k=='c' else 'X'}{k}: {v}" for k,v in res.items()])
 
99
 
 
100
 
101
 
102
  # Define GUI