Spaces:

yjwtheonly
/

Scorpius_HF

Runtime error

App Files Files Community

yjwtheonly commited on Nov 9, 2023

Commit

3bdbad7

•

1 Parent(s): 1943a4d

Update server.py

Browse files

Files changed (1) hide show

server.py +98 -69

server.py CHANGED Viewed

@@ -12,6 +12,7 @@ import spacy
 # os.system("python -m spacy download en-core-web-sm")
 import pickle as pkl
 from tqdm import tqdm
 #%%
 # please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.
 # torch.loa
@@ -55,6 +56,8 @@ model_path = 'DiseaseSpecific/saved_models/{0}_{1}.model'.format(args.data, mode
 data_path = os.path.join('DiseaseSpecific/processed_data', args.data)
 data  = utils.load_data(os.path.join(data_path, 'all.txt'))
 n_ent, n_rel, ent_to_id, rel_to_id = utils.generate_dicts(data_path)
 with open(os.path.join(data_path, 'filter.pickle'), 'rb') as fl:
     filters = pkl.load(fl)
@@ -78,6 +81,8 @@ with open(Parameters.GNBRfile+'raw_text_of_each_sentence', 'rb') as fl:
 with open(Parameters.UMLSfile+'drug_term', 'rb') as fl:
     drug_term = pkl.load(fl)
 gallery_specific_target_path = os.path.join(data_path, 'DD_target_distmult_GNBR_random_50_exists:False_single.txt')
 gallery_specific_link_path = 'DiseaseSpecific/attack_results/GNBR/cos_distmult_random_50_exists:False_20_quadratic_single_0.5.txt'
 gallery_specific_text_path = 'DiseaseSpecific/generate_abstract/random_0.5_bioBART_finetune.json'
@@ -154,6 +159,7 @@ for k, v in filters.items():
         t = torch.ByteTensor(tmp).to(args.device)
         filters[k][kk] = t
 gpt_tokenizer = AutoTokenizer.from_pretrained('microsoft/biogpt')
 gpt_tokenizer.pad_token = gpt_tokenizer.eos_token
 gpt_model = BioGptForCausalLM.from_pretrained('microsoft/biogpt', pad_token_id=gpt_tokenizer.eos_token_id)
@@ -162,6 +168,7 @@ gpt_model.eval()
 specific_model = utils.load_model(model_path, args, n_ent, n_rel, args.device)
 specific_model.eval()
 divide_bound, data_mean, data_std = attack.calculate_edge_bound(data, specific_model, args.device, n_ent)
 nlp = spacy.load("en_core_web_sm")
@@ -642,76 +649,93 @@ def generate_agnostic_attack_edge(targets):
     specific_model.to('cpu')
     return attack_edge_list[0]
-def specific_func(start_entity, end_entity):
-    args.reasonable_rate = 0.5
-    s, r, o = generate_specific_attack_edge(start_entity, end_entity)
-    if int(s) == -1:
-        return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
-    s_name = entity_raw_name[id_to_entity[str(s)]]
-    r_name = Parameters.edge_id_to_type[int(r)].split(':')[1]
-    o_name = entity_raw_name[id_to_entity[str(o)]]
-    attack_data = np.array([[s, r, o]])
-    path_list = []
-    with open(f'DiseaseSpecific/generate_abstract/path/random_{args.reasonable_rate}_path.json', 'r') as fl:
-        for line in fl.readlines():
-            line.replace('\n', '')
-            path_list.append(line)
-    with open(f'DiseaseSpecific/generate_abstract/random_{args.reasonable_rate}_sentence.json', 'r') as fl:
-        sentence_dict = json.load(fl)
-    dpath = []
-    for k, v in sentence_dict.items():
-        if f'{s}_{r}_{o}' in k:
-            single_sentence = [v]
-            dpath = [path_list[int(k.split('_')[-1])]]
-            break
-    if len(dpath) == 0:
-        single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
-    elif not(s_name in single_sentence[0] and o_name in single_sentence[0]):
-        single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
-    print('Using ChatGPT for generation...')
-    draft = generate_abstract(single_sentence[0])
-    if 'sorry' in draft or 'Sorry' in draft:
-        return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
-    if device != torch.device('cpu'):
-        print('Using BioBART for tuning...')
-        span , prompt , sen_list, BART_in, Assist = tune_chatgpt([{'in':single_sentence[0], 'out': draft}], attack_data, dpath)
-        text = score_and_select(s, r, o, span , prompt , sen_list, BART_in, Assist, dpath, {'in':single_sentence[0], 'out': draft})
-    else:
-        text = draft
-    return f'{capitalize_the_first_letter(s_name)} - {capitalize_the_first_letter(r_name)} - {capitalize_the_first_letter(o_name)}', server_utils.process(text)
         #   f'The sentence is: {single_sentence[0]}\n The path is: {dpath[0]}'
-def agnostic_func(agnostic_entity):
-    args.reasonable_rate = 0.7
-    target_id = entity_to_id[drug_dict[agnostic_entity]]
-    s = generate_agnostic_attack_edge([int(target_id)])
-    if len(s) == 0:
-        return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
-    if int(s[0]) == -1:
-        return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
-    s, r, o = str(s[0]), str(s[1]), str(s[2])
-    s_name = entity_raw_name[id_to_entity[str(s)]]
-    r_name = Parameters.edge_id_to_type[int(r)].split(':')[1]
-    o_name = entity_raw_name[id_to_entity[str(o)]]
-    attack_data = np.array([[s, r, o]])
-    single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
-    print('Using ChatGPT for generation...')
-    draft = generate_abstract(single_sentence[0])
-    if 'sorry' in draft or 'Sorry' in draft:
-        return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
-    if device != torch.device('cpu'):
-        print('Using BioBART for tuning...')
-        span , prompt , sen_list, BART_in, Assist = tune_chatgpt([{'in':single_sentence[0], 'out': draft}], attack_data, dpath)
-        text = score_and_select(s, r, o, span , prompt , sen_list, BART_in, Assist, dpath, {'in':single_sentence[0], 'out': draft})
-    else:
-        text = draft
-    return f'{capitalize_the_first_letter(s_name)} - {capitalize_the_first_letter(r_name)} - {capitalize_the_first_letter(o_name)}', server_utils.process(text)
 def gallery_specific_func(specific_target):
     index = gallery_specific_target_dict[specific_target]
@@ -743,7 +767,7 @@ def gallery_agnostic_func(agnostic_target):
 with gr.Blocks() as demo:
     with gr.Column():
-        gr.Markdown("Poison scitific knowledge with Scorpius")
         # with gr.Column():
         with gr.Row():
@@ -767,16 +791,21 @@ with gr.Blocks() as demo:
                             if device == torch.device('cpu'):
                                 gr.Markdown("Since the project is currently running on the CPU, we directly treat the malicious link as equivalent to the poisoning target, to accelerate the generation process.")
                             specific_generation_button = gr.Button('Poison!')
                     with gr.Tab('Target agnostic'):
                         agnostic_entity = gr.Dropdown(drug_list, label="Promoting drug")
                         agnostic_generation_button = gr.Button('Poison!')
             with gr.Column():
                 gr.Markdown("Generation")
                 malicisous_link = gr.Textbox(lines=1, label="Malicious link")
                 # gr.Markdown("Malicious text")
                 malicious_text = gr.Textbox(label="Malicious text", lines=5)
-    specific_generation_button.click(specific_func, inputs=[start_entity, end_entity], outputs=[malicisous_link, malicious_text])
-    agnostic_generation_button.click(agnostic_func, inputs=[agnostic_entity], outputs=[malicisous_link, malicious_text])
     gallery_specific_generation_button.click(gallery_specific_func, inputs=[specific_target], outputs=[malicisous_link, malicious_text])
     gallery_agnostic_generation_button.click(gallery_agnostic_func, inputs=[agnostic_target], outputs=[malicisous_link, malicious_text])

 # os.system("python -m spacy download en-core-web-sm")
 import pickle as pkl
 from tqdm import tqdm
+import traceback
 #%%
 # please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.
 # torch.loa
 data_path = os.path.join('DiseaseSpecific/processed_data', args.data)
 data  = utils.load_data(os.path.join(data_path, 'all.txt'))
+print('done')
 n_ent, n_rel, ent_to_id, rel_to_id = utils.generate_dicts(data_path)
 with open(os.path.join(data_path, 'filter.pickle'), 'rb') as fl:
     filters = pkl.load(fl)
 with open(Parameters.UMLSfile+'drug_term', 'rb') as fl:
     drug_term = pkl.load(fl)
+print('done')
 gallery_specific_target_path = os.path.join(data_path, 'DD_target_distmult_GNBR_random_50_exists:False_single.txt')
 gallery_specific_link_path = 'DiseaseSpecific/attack_results/GNBR/cos_distmult_random_50_exists:False_20_quadratic_single_0.5.txt'
 gallery_specific_text_path = 'DiseaseSpecific/generate_abstract/random_0.5_bioBART_finetune.json'
         t = torch.ByteTensor(tmp).to(args.device)
         filters[k][kk] = t
+print('done')
 gpt_tokenizer = AutoTokenizer.from_pretrained('microsoft/biogpt')
 gpt_tokenizer.pad_token = gpt_tokenizer.eos_token
 gpt_model = BioGptForCausalLM.from_pretrained('microsoft/biogpt', pad_token_id=gpt_tokenizer.eos_token_id)
 specific_model = utils.load_model(model_path, args, n_ent, n_rel, args.device)
 specific_model.eval()
 divide_bound, data_mean, data_std = attack.calculate_edge_bound(data, specific_model, args.device, n_ent)
+print('done')
 nlp = spacy.load("en_core_web_sm")
     specific_model.to('cpu')
     return attack_edge_list[0]
+def specific_func(start_entity, end_entity, API_key = ''):
+    try:
+        args.reasonable_rate = 0.5
+        s, r, o = generate_specific_attack_edge(start_entity, end_entity)
+        if int(s) == -1:
+            return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
+        s_name = entity_raw_name[id_to_entity[str(s)]]
+        r_name = Parameters.edge_id_to_type[int(r)].split(':')[1]
+        o_name = entity_raw_name[id_to_entity[str(o)]]
+        attack_data = np.array([[s, r, o]])
+        path_list = []
+        with open(f'DiseaseSpecific/generate_abstract/path/random_{args.reasonable_rate}_path.json', 'r') as fl:
+            for line in fl.readlines():
+                line.replace('\n', '')
+                path_list.append(line)
+        with open(f'DiseaseSpecific/generate_abstract/random_{args.reasonable_rate}_sentence.json', 'r') as fl:
+            sentence_dict = json.load(fl)
+        dpath = []
+        for k, v in sentence_dict.items():
+            if f'{s}_{r}_{o}' in k:
+                single_sentence = [v]
+                dpath = [path_list[int(k.split('_')[-1])]]
+                break
+        if len(dpath) == 0:
+            single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
+        elif not(s_name in single_sentence[0] and o_name in single_sentence[0]):
+            single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
+        print('Using ChatGPT for generation...')
+        API_key = API_key.strip()
+        if API_key != '':
+            draft = generate_abstract(single_sentence[0], API_key)
+        else:
+            draft = generate_abstract(single_sentence[0])
+        if 'sorry' in draft or 'Sorry' in draft:
+            return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
+        if device != torch.device('cpu'):
+            print('Using BioBART for tuning...')
+            span , prompt , sen_list, BART_in, Assist = tune_chatgpt([{'in':single_sentence[0], 'out': draft}], attack_data, dpath)
+            text = score_and_select(s, r, o, span , prompt , sen_list, BART_in, Assist, dpath, {'in':single_sentence[0], 'out': draft})
+        else:
+            text = draft
+        return f'{capitalize_the_first_letter(s_name)} - {capitalize_the_first_letter(r_name)} - {capitalize_the_first_letter(o_name)}', server_utils.process(text)
         #   f'The sentence is: {single_sentence[0]}\n The path is: {dpath[0]}'
+    except:
+        # return message in error
+        return 'Error :(', traceback.format_exc()
+def agnostic_func(agnostic_entity, API_key = ''):
+    try:
+        args.reasonable_rate = 0.7
+        target_id = entity_to_id[drug_dict[agnostic_entity]]
+        s = generate_agnostic_attack_edge([int(target_id)])
+        if len(s) == 0:
+            return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
+        if int(s[0]) == -1:
+            return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
+        s, r, o = str(s[0]), str(s[1]), str(s[2])
+        s_name = entity_raw_name[id_to_entity[str(s)]]
+        r_name = Parameters.edge_id_to_type[int(r)].split(':')[1]
+        o_name = entity_raw_name[id_to_entity[str(o)]]
+        attack_data = np.array([[s, r, o]])
+        single_sentence, _, dpath, _ = generate_template_for_triplet(attack_data)
+        print('Using ChatGPT for generation...')
+        API_key = API_key.strip()
+        if API_key != '':
+            draft = generate_abstract(single_sentence[0], API_key)
+        else:
+            draft = generate_abstract(single_sentence[0])
+        if 'sorry' in draft or 'Sorry' in draft:
+            return 'All candidate links are filterd out by defender, so no malicious link can be generated', 'No malicious abstract can be generated'
+        if device != torch.device('cpu'):
+            print('Using BioBART for tuning...')
+            span , prompt , sen_list, BART_in, Assist = tune_chatgpt([{'in':single_sentence[0], 'out': draft}], attack_data, dpath)
+            text = score_and_select(s, r, o, span , prompt , sen_list, BART_in, Assist, dpath, {'in':single_sentence[0], 'out': draft})
+        else:
+            text = draft
+        return f'{capitalize_the_first_letter(s_name)} - {capitalize_the_first_letter(r_name)} - {capitalize_the_first_letter(o_name)}', server_utils.process(text)
+    except:
+        # return message in error
+        return 'Error :(', traceback.format_exc()
 def gallery_specific_func(specific_target):
     index = gallery_specific_target_dict[specific_target]
 with gr.Blocks() as demo:
     with gr.Column():
+        gr.Markdown("Poison medical knowledge with Scorpius")
         # with gr.Column():
         with gr.Row():
                             if device == torch.device('cpu'):
                                 gr.Markdown("Since the project is currently running on the CPU, we directly treat the malicious link as equivalent to the poisoning target, to accelerate the generation process.")
                             specific_generation_button = gr.Button('Poison!')
+                            gr.Markdown('Please type your openai API key in the textbox below before clicking the **Poison!** button. If the text box is empty, we will use the default API, but the balance of the default API is limited, so the generation may fail. \n We promise that we will not steal your API key in any way. If you still have this concern, please download the source code from **Files**, then use `python CUDA_VISIBLE_DEVICES=0 python server.py` to run the offline version.')
+                            API_key_specific = gr.Textbox(label="API key")
                     with gr.Tab('Target agnostic'):
                         agnostic_entity = gr.Dropdown(drug_list, label="Promoting drug")
                         agnostic_generation_button = gr.Button('Poison!')
+                        gr.Markdown('Please type your openai API key in the textbox below before clicking the **Poison!** button. If the text box is empty, we will use the default API, but the balance of the default API is limited, so the generation may fail. \n We promise that we will not steal your API key in any way. If you still have this concern, please download the source code from **Files**, then use `python CUDA_VISIBLE_DEVICES=0 python server.py` to run the offline version.')
+                        API_key_agnostic = gr.Textbox(label="API key")
             with gr.Column():
                 gr.Markdown("Generation")
                 malicisous_link = gr.Textbox(lines=1, label="Malicious link")
                 # gr.Markdown("Malicious text")
                 malicious_text = gr.Textbox(label="Malicious text", lines=5)
+    specific_generation_button.click(specific_func, inputs=[start_entity, end_entity, API_key_specific], outputs=[malicisous_link, malicious_text])
+    agnostic_generation_button.click(agnostic_func, inputs=[agnostic_entity, API_key_agnostic], outputs=[malicisous_link, malicious_text])
     gallery_specific_generation_button.click(gallery_specific_func, inputs=[specific_target], outputs=[malicisous_link, malicious_text])
     gallery_agnostic_generation_button.click(gallery_agnostic_func, inputs=[agnostic_target], outputs=[malicisous_link, malicious_text])