Spaces:

minseokKoo
/

Auto_Classifier

Sleeping

App Files Files Community

minseokKoo commited on Jan 30, 2023

Commit

4c479ed

1 Parent(s): ea6c7bf

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -17

app.py CHANGED Viewed

@@ -15,11 +15,8 @@ import gradio as gr
 def greet(co):
     code_text = []
-    while True:
-        code = co
-        if not code:
-            break
-        code_text.append(code)
     code_text = ' '.join(code_text)
     code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
@@ -27,7 +24,7 @@ def greet(co):
     code_text = re.sub('(\\\\n)+', '\\n', code_text)
     # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
-    path = 'models/CFA-CodeBERTa-small.pt'
     tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
     input_ids = tokenizer.encode(
         code_text, max_length=512, truncation=True, padding='max_length')
@@ -39,7 +36,7 @@ def greet(co):
     # model(input_ids)[0].argmax().detach().cpu().numpy().item()
     # 2. CFA-codebert-c.pt -> codebert-c finetuning model
-    path = 'models/CFA-codebert-c.pt'
     tokenizer = AutoTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
@@ -50,7 +47,7 @@ def greet(co):
     pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
     # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
-    path = 'models/CFA-codebert-c-v2.pt'
     tokenizer = RobertaTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
@@ -61,7 +58,7 @@ def greet(co):
     pred_3 = model(input_ids)[0].detach().cpu().numpy()
     # 4. codeT5 finetuning model
-    path = 'models/CFA-codeT5'
     model_params = {
         # model_type: t5-base/t5-large
         "MODEL": path,
@@ -83,8 +80,11 @@ def greet(co):
     # ensemble
     tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
                   pred_3 * 0.1 + pred_4 * 0.1).argmax()
-    return tot_result
@@ -200,25 +200,30 @@ with gr.Blocks() as demo1:
     gr.Markdown(
     """
-    정적 분석기로 오류라고 보고된 코드를 입력하면,\
     오류가 True-positive 인지 False-positive 인지 분류 해 주는 프로그램이다.
     """)
     with gr.Accordion(label='모델에 대한 설명 ( 여기를 클릭 하시오. )',open=False):
         gr.Markdown(
         """
-        총 3개의 모델을 사용함
-        쌸라쌸라
         """
         )
     with gr.Row():
         with gr.Column():
-            inputs_1 = gr.Textbox(placeholder="코드를 입력하시오.", label='Text')
             with gr.Row():
-                btn = gr.Button("제출하기")
         with gr.Column():
-            outputs_1 = gr.Number(label = 'Result')
     btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
 if __name__ == "__main__":
-    demo1.launch()

 def greet(co):
     code_text = []
+    code_text.append(co)
     code_text = ' '.join(code_text)
     code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
     code_text = re.sub('(\\\\n)+', '\\n', code_text)
     # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
+    path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
     tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
     input_ids = tokenizer.encode(
         code_text, max_length=512, truncation=True, padding='max_length')
     # model(input_ids)[0].argmax().detach().cpu().numpy().item()
     # 2. CFA-codebert-c.pt -> codebert-c finetuning model
+    path = os.getcwd() + '/models/CFA-codebert-c.pt'
     tokenizer = AutoTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
     pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
     # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
+    path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
     tokenizer = RobertaTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
     pred_3 = model(input_ids)[0].detach().cpu().numpy()
     # 4. codeT5 finetuning model
+    path = os.getcwd() + '/models/CFA-codeT5'
     model_params = {
         # model_type: t5-base/t5-large
         "MODEL": path,
     # ensemble
     tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
                   pred_3 * 0.1 + pred_4 * 0.1).argmax()
+    if tot_result == 0:
+        return "false positive !!"
+    else:
+        return "true positive !!"
     gr.Markdown(
     """
+    정적 분석기로 오류라고 보고된 코드를 입력하면,
     오류가 True-positive 인지 False-positive 인지 분류 해 주는 프로그램이다.
     """)
     with gr.Accordion(label='모델에 대한 설명 ( 여기를 클릭 하시오. )',open=False):
         gr.Markdown(
         """
+        총 3개의 모델을 사용하였다.
+        1. codeBERTa-small-v1
+        - codeBERTa-small-v1 설명
+        2. codeBERT - C
+        - codeBERT - C 설명
+        3. codeT5
+        - codeT5 설명
         """
         )
     with gr.Row():
         with gr.Column():
+            inputs_1 = gr.Textbox(placeholder="코드를 입력하시오.", label='Code')
             with gr.Row():
+                btn = gr.Button("결과 출력")
         with gr.Column():
+            outputs_1 = gr.Text(label = 'Result')
     btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
 if __name__ == "__main__":
+    demo1.launch(share=True)