minseokKoo commited on
Commit
4c479ed
โ€ข
1 Parent(s): ea6c7bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -17
app.py CHANGED
@@ -15,11 +15,8 @@ import gradio as gr
15
 
16
  def greet(co):
17
  code_text = []
18
- while True:
19
- code = co
20
- if not code:
21
- break
22
- code_text.append(code)
23
 
24
  code_text = ' '.join(code_text)
25
  code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
@@ -27,7 +24,7 @@ def greet(co):
27
  code_text = re.sub('(\\\\n)+', '\\n', code_text)
28
 
29
  # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
30
- path = 'models/CFA-CodeBERTa-small.pt'
31
  tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
32
  input_ids = tokenizer.encode(
33
  code_text, max_length=512, truncation=True, padding='max_length')
@@ -39,7 +36,7 @@ def greet(co):
39
  # model(input_ids)[0].argmax().detach().cpu().numpy().item()
40
 
41
  # 2. CFA-codebert-c.pt -> codebert-c finetuning model
42
- path = 'models/CFA-codebert-c.pt'
43
  tokenizer = AutoTokenizer.from_pretrained(path)
44
  input_ids = tokenizer(code_text, padding=True, max_length=512,
45
  truncation=True, return_token_type_ids=True)['input_ids']
@@ -50,7 +47,7 @@ def greet(co):
50
  pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
51
 
52
  # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
53
- path = 'models/CFA-codebert-c-v2.pt'
54
  tokenizer = RobertaTokenizer.from_pretrained(path)
55
  input_ids = tokenizer(code_text, padding=True, max_length=512,
56
  truncation=True, return_token_type_ids=True)['input_ids']
@@ -61,7 +58,7 @@ def greet(co):
61
  pred_3 = model(input_ids)[0].detach().cpu().numpy()
62
 
63
  # 4. codeT5 finetuning model
64
- path = 'models/CFA-codeT5'
65
  model_params = {
66
  # model_type: t5-base/t5-large
67
  "MODEL": path,
@@ -83,8 +80,11 @@ def greet(co):
83
  # ensemble
84
  tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
85
  pred_3 * 0.1 + pred_4 * 0.1).argmax()
 
 
 
 
86
 
87
- return tot_result
88
 
89
 
90
 
@@ -200,25 +200,30 @@ with gr.Blocks() as demo1:
200
 
201
  gr.Markdown(
202
  """
203
- ์ •์  ๋ถ„์„๊ธฐ๋กœ ์˜ค๋ฅ˜๋ผ๊ณ  ๋ณด๊ณ ๋œ ์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด,\
204
  ์˜ค๋ฅ˜๊ฐ€ True-positive ์ธ์ง€ False-positive ์ธ์ง€ ๋ถ„๋ฅ˜ ํ•ด ์ฃผ๋Š” ํ”„๋กœ๊ทธ๋žจ์ด๋‹ค.
205
  """)
206
 
207
  with gr.Accordion(label='๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )',open=False):
208
  gr.Markdown(
209
  """
210
- ์ด 3๊ฐœ์˜ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•จ
211
- ์Œธ๋ผ์Œธ๋ผ
 
 
 
 
 
212
  """
213
  )
214
  with gr.Row():
215
  with gr.Column():
216
- inputs_1 = gr.Textbox(placeholder="์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜์‹œ์˜ค.", label='Text')
217
  with gr.Row():
218
- btn = gr.Button("์ œ์ถœํ•˜๊ธฐ")
219
  with gr.Column():
220
- outputs_1 = gr.Number(label = 'Result')
221
  btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
222
 
223
  if __name__ == "__main__":
224
- demo1.launch()
 
15
 
16
  def greet(co):
17
  code_text = []
18
+
19
+ code_text.append(co)
 
 
 
20
 
21
  code_text = ' '.join(code_text)
22
  code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
 
24
  code_text = re.sub('(\\\\n)+', '\\n', code_text)
25
 
26
  # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
27
+ path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
28
  tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
29
  input_ids = tokenizer.encode(
30
  code_text, max_length=512, truncation=True, padding='max_length')
 
36
  # model(input_ids)[0].argmax().detach().cpu().numpy().item()
37
 
38
  # 2. CFA-codebert-c.pt -> codebert-c finetuning model
39
+ path = os.getcwd() + '/models/CFA-codebert-c.pt'
40
  tokenizer = AutoTokenizer.from_pretrained(path)
41
  input_ids = tokenizer(code_text, padding=True, max_length=512,
42
  truncation=True, return_token_type_ids=True)['input_ids']
 
47
  pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
48
 
49
  # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
50
+ path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
51
  tokenizer = RobertaTokenizer.from_pretrained(path)
52
  input_ids = tokenizer(code_text, padding=True, max_length=512,
53
  truncation=True, return_token_type_ids=True)['input_ids']
 
58
  pred_3 = model(input_ids)[0].detach().cpu().numpy()
59
 
60
  # 4. codeT5 finetuning model
61
+ path = os.getcwd() + '/models/CFA-codeT5'
62
  model_params = {
63
  # model_type: t5-base/t5-large
64
  "MODEL": path,
 
80
  # ensemble
81
  tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
82
  pred_3 * 0.1 + pred_4 * 0.1).argmax()
83
+ if tot_result == 0:
84
+ return "false positive !!"
85
+ else:
86
+ return "true positive !!"
87
 
 
88
 
89
 
90
 
 
200
 
201
  gr.Markdown(
202
  """
203
+ ์ •์  ๋ถ„์„๊ธฐ๋กœ ์˜ค๋ฅ˜๋ผ๊ณ  ๋ณด๊ณ ๋œ ์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด,
204
  ์˜ค๋ฅ˜๊ฐ€ True-positive ์ธ์ง€ False-positive ์ธ์ง€ ๋ถ„๋ฅ˜ ํ•ด ์ฃผ๋Š” ํ”„๋กœ๊ทธ๋žจ์ด๋‹ค.
205
  """)
206
 
207
  with gr.Accordion(label='๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )',open=False):
208
  gr.Markdown(
209
  """
210
+ ์ด 3๊ฐœ์˜ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์˜€๋‹ค.
211
+ 1. codeBERTa-small-v1
212
+ - codeBERTa-small-v1 ์„ค๋ช…
213
+ 2. codeBERT - C
214
+ - codeBERT - C ์„ค๋ช…
215
+ 3. codeT5
216
+ - codeT5 ์„ค๋ช…
217
  """
218
  )
219
  with gr.Row():
220
  with gr.Column():
221
+ inputs_1 = gr.Textbox(placeholder="์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜์‹œ์˜ค.", label='Code')
222
  with gr.Row():
223
+ btn = gr.Button("๊ฒฐ๊ณผ ์ถœ๋ ฅ")
224
  with gr.Column():
225
+ outputs_1 = gr.Text(label = 'Result')
226
  btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
227
 
228
  if __name__ == "__main__":
229
+ demo1.launch(share=True)