kcelia commited on
Commit
7cb14dd
1 Parent(s): 6eea781

chore: update Marketing v2

Browse files
app.py CHANGED
@@ -32,8 +32,10 @@ time.sleep(3)
32
  # Load data from files required for the application
33
  UUID_MAP = read_json(MAPPING_UUID_PATH)
34
  ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
35
- MAPPING_SENTENCES = read_pickle(MAPPING_SENTENCES_PATH)
 
36
  ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
 
37
 
38
  # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
39
 
@@ -44,9 +46,9 @@ ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
44
  USER_ID = numpy.random.randint(0, 2**32)
45
 
46
 
47
- def select_static_sentences_fn(selected_sentences: List):
48
 
49
- selected_sentences = [MAPPING_SENTENCES[sentence] for sentence in selected_sentences]
50
 
51
  anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
52
 
@@ -90,6 +92,16 @@ def key_gen_fn() -> Dict:
90
  print("Keys have been generated ✅")
91
  return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
92
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  def encrypt_query_fn(query):
95
 
@@ -141,10 +153,10 @@ def encrypt_query_fn(query):
141
  KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
142
  )
143
 
144
- encrypted_quant_tokens_hex = [token.hex()[500:675] for token in encrypted_tokens]
145
 
146
  return {
147
- output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex)),
148
  anonymized_text_output: gr.update(visible=True, value=None),
149
  identified_words_output_df: gr.update(visible=False, value=None),
150
  }
@@ -510,28 +522,26 @@ with demo:
510
  )
511
 
512
  with gr.Row():
513
- with gr.Column():
514
- gr.Markdown("**Original document:**")
515
-
516
- with gr.Column():
517
- gr.Markdown("**Encrypted document:**")
518
-
519
-
520
- with gr.Row():
521
- with gr.Column():
522
  original_sentences_box = gr.CheckboxGroup(
523
  ORIGINAL_DOCUMENT,
524
  value=ORIGINAL_DOCUMENT,
525
- show_label=False,
 
526
  )
527
 
528
- with gr.Column():
 
 
 
 
529
  anonymized_doc_box = gr.Textbox(
530
- show_label=False, value=ANONYMIZED_DOCUMENT, interactive=False, lines=11
 
531
  )
532
 
533
  original_sentences_box.change(
534
- fn=select_static_sentences_fn,
535
  inputs=[original_sentences_box],
536
  outputs=[anonymized_doc_box],
537
  )
@@ -541,12 +551,9 @@ with demo:
541
  gr.Markdown("<hr />")
542
  gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
543
  """Please choose from the predefined options in
544
- <span style='color:grey'>“Prompt examples”</span>" or craft a custom question in
545
- the <span style='color:grey'>“Customized prompt”</span>" text box.
546
-
547
- Remain concise and relevant to the context. Any off-topic query will not be processed.
548
- """
549
- )
550
 
551
  with gr.Row():
552
  with gr.Column(scale=5):
@@ -559,7 +566,7 @@ with demo:
559
  gr.Markdown("Or")
560
 
561
  query_box = gr.Textbox(
562
- value="What is Alice international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
563
  )
564
 
565
  default_query_box.change(
@@ -592,9 +599,19 @@ with demo:
592
 
593
  run_fhe_btn = gr.Button("Anonymize using FHE")
594
 
595
- anonymized_text_output = gr.Textbox(
596
- label="Decrypted anonymized query that will be sent to ChatGPT:", lines=1, interactive=True
597
- )
 
 
 
 
 
 
 
 
 
 
598
 
599
  identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
600
 
@@ -618,7 +635,7 @@ with demo:
618
  ########################## ChatGpt Part ##########################
619
 
620
  gr.Markdown("<hr />")
621
- gr.Markdown("## Spet 5: Secure your communication on ChatGPT with anonymized queries")
622
  gr.Markdown(
623
  """After securely anonymizing the query with FHE,
624
  you can forward it to ChatGPT without having any concern about information leakage."""
@@ -627,9 +644,9 @@ with demo:
627
  chatgpt_button = gr.Button("Query ChatGPT")
628
 
629
  with gr.Row():
630
- chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=13)
631
  chatgpt_response_deanonymized = gr.Textbox(
632
- label="ChatGPT's non-anonymized response:", lines=13
633
  )
634
 
635
  chatgpt_button.click(
 
32
  # Load data from files required for the application
33
  UUID_MAP = read_json(MAPPING_UUID_PATH)
34
  ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
35
+ MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
36
+ MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
37
  ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
38
+ print(ORIGINAL_DOCUMENT)
39
 
40
  # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
41
 
 
46
  USER_ID = numpy.random.randint(0, 2**32)
47
 
48
 
49
+ def select_static_anonymized_sentences_fn(selected_sentences: List):
50
 
51
+ selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
52
 
53
  anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
54
 
 
92
  print("Keys have been generated ✅")
93
  return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
94
 
95
+ def select_static_encrypted_sentences_fn(selected_sentences: List):
96
+
97
+ selected_sentences = [MAPPING_ENCRYPTED_SENTENCES[sentence] for sentence in selected_sentences]
98
+
99
+ anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
100
+
101
+ anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
102
+
103
+ return {encrypted_doc_box: gr.update(value="\n\n".join(anonymized_selected_sentence))}
104
+
105
 
106
  def encrypt_query_fn(query):
107
 
 
153
  KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
154
  )
155
 
156
+ encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
157
 
158
  return {
159
+ output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=4),
160
  anonymized_text_output: gr.update(visible=True, value=None),
161
  identified_words_output_df: gr.update(visible=False, value=None),
162
  }
 
522
  )
523
 
524
  with gr.Row():
525
+ with gr.Column(scale=5):
 
 
 
 
 
 
 
 
526
  original_sentences_box = gr.CheckboxGroup(
527
  ORIGINAL_DOCUMENT,
528
  value=ORIGINAL_DOCUMENT,
529
+ label="Contract:",
530
+ show_label=True,
531
  )
532
 
533
+ with gr.Column(scale=1, min_width=6):
534
+ gr.HTML("<div style='height: 77px;'></div>")
535
+ encrypt_doc_btn = gr.Button("Encrypt the document")
536
+
537
+ with gr.Column(scale=5):
538
  anonymized_doc_box = gr.Textbox(
539
+ label="Encrypted document:",
540
+ show_label=True, value=ANONYMIZED_DOCUMENT, interactive=False, lines=11
541
  )
542
 
543
  original_sentences_box.change(
544
+ fn=select_static_anonymized_sentences_fn,
545
  inputs=[original_sentences_box],
546
  outputs=[anonymized_doc_box],
547
  )
 
551
  gr.Markdown("<hr />")
552
  gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
553
  """Please choose from the predefined options in
554
+ <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
555
+ the <span style='color:grey'>“Customized prompt”</span> text box.
556
+ Remain concise and relevant to the context. Any off-topic query will not be processed.""")
 
 
 
557
 
558
  with gr.Row():
559
  with gr.Column(scale=5):
 
566
  gr.Markdown("Or")
567
 
568
  query_box = gr.Textbox(
569
+ value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
570
  )
571
 
572
  default_query_box.change(
 
599
 
600
  run_fhe_btn = gr.Button("Anonymize using FHE")
601
 
602
+ with gr.Row():
603
+ with gr.Column(scale=5):
604
+
605
+ anonymized_text_output = gr.Textbox(
606
+ label="Decrypted and anonymized document", lines=5, interactive=True
607
+ )
608
+
609
+ with gr.Column(scale=5):
610
+
611
+ anonymized_query_output = gr.Textbox(
612
+ label="Decrypted and anonymized prompt", lines=5, interactive=True
613
+ )
614
+
615
 
616
  identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
617
 
 
635
  ########################## ChatGpt Part ##########################
636
 
637
  gr.Markdown("<hr />")
638
+ gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
639
  gr.Markdown(
640
  """After securely anonymizing the query with FHE,
641
  you can forward it to ChatGPT without having any concern about information leakage."""
 
644
  chatgpt_button = gr.Button("Query ChatGPT")
645
 
646
  with gr.Row():
647
+ chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
648
  chatgpt_response_deanonymized = gr.Textbox(
649
+ label="ChatGPT's non-anonymized response:", lines=5
650
  )
651
 
652
  chatgpt_button.click(
files/anonymized_document.txt CHANGED
@@ -1,10 +1,11 @@
1
- Hello, my name is ebe99761 53a9291d and I live in 6337f12f.
2
- My credit card number is e5b499b0 and my crypto wallet id is ac41d58b.
3
 
4
- On September 18 I visited 0d574451 and sent an email to 1f78e797, from the IP 116fe81e.
5
 
6
- My passport: 59a83e41 and my phone number: 144a2acc d9e5704e.
7
 
8
- This is a valid 71d0f51c Bank Account Number: 5ca977a4. Can you please check the status on bank account 9eb07461?
9
 
10
- b474d794's social security number is d8da62f1. Her driver license? it is 5e63c327.
 
 
 
1
+ Members: e3383f5b 70fc6ec5 and 2708cb61 cda521d5
 
2
 
3
+ Date: e381418b 3534158a, 96c403e5
4
 
5
+ Scope: 2708cb61 agrees to provide graphic design services to e3383f5b for the creation of a company logo.
6
 
7
+ Amount: Bob agrees to pay 2708cb61 500 upon completion and delivery of the logo.
8
 
9
+ Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
10
+
11
+ Payment terms: 2708cb61s international bank account N: 61294a43
files/encrypted_document.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 827c6bc946254b0ffe4695b4f0fc001f2383a4cf066b44312f9fa5a5733e6b6e097e69d08441b32ad93a3e84f95932e3142ce91a7502f5594eafdd6ccc2ad779e2e166ff0ac66e5e015ac64e5aac0979
2
+ a2a7ebe458d32843738fe5e550642bfb2c7efeb0f3b65892c25c0ed4a329f90e62dd97a58f6c852d0a4f2e7bae863381a325b679761570df764a0f6aec3f01a23516f0cee88972e5ac324f6af45d8e7ee
3
+ 2bb53d60fd24b7e24334179c30624dd49eea11c2210e2b63d2a5fc5ea20766be019c68e061cbbad75bdb3255b1428b1dd46d361ec0261dfcbc3081d1e43aaf1bfde51f81f9036e728c3931a9e3ff4b37
4
+ 3b05e1c4a0adc1a82b3d471990b60990d7d4e66132ed15fc8a18a129432e4c73c2bb74ecc89be49571090b5de8934661928c1e153dd8746858aa8ece4d59452159147f8b54da923a9eb99d1c8006b389
5
+ 788027d896e0977b3012f6d50718153aac3b6642ad7e72383ecd9def3b8f4cc6c7e6851f4491eed1c5693b56dd9ac79f03fde97bf9ad0d3c6bc1c8f94e95901ceeb4a38893f2a189e63562b43a453c3a
6
+ 426b376b438d9755946251b5da13f7585ab9557bec48f1e300c43cfa9e6f3f2bc1eef1a13d801161c8a59384914ffbd4da96b25dad84c51b77df73060a1319d72a2e5d4d7eea734ba72a1a6657aa93ba
files/mapping_clear_to_anonymized.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:944e5c32bd04e955194c513d35b91467615c08973c767745a1756d015b3e6ebb
3
- size 1085
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0f400a2f644ddf99bcbc76f856afc1ad79055b1f01133a69e7617d257de98c
3
+ size 943
files/original_document.txt CHANGED
@@ -1,10 +1,11 @@
1
- Hello, my name is David Johnson and I live in Maine.
2
- My credit card number is 4095-2609-9393-4932 and my crypto wallet id is 16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ.
3
 
4
- On September 18 I visited microsoft.com and sent an email to test@presidio.site, from the IP 192.168.0.1.
5
 
6
- My passport: 191280342 and my phone number: (212) 555-1234.
7
 
8
- This is a valid International Bank Account Number: IL150120690000003111111. Can you please check the status on bank account 954567876544?
9
 
10
- Kate's social security number is 078-05-1126. Her driver license? it is 1234567A.
 
 
 
1
+ Members: David Johnson and Kate Hemingway
 
2
 
3
+ Date: February 06, 2000
4
 
5
+ Scope: Kate agrees to provide graphic design services to David for the creation of a company logo.
6
 
7
+ Amount: Bob agrees to pay Kate $500 upon completion and delivery of the logo.
8
 
9
+ Deadline: The logo design must be completed and delivered to Bob within 14 days of the contract signing date.
10
+
11
+ Payment terms: Kate’s international bank account N°: IL150120690000003111111
files/original_document_uuid_mapping.json CHANGED
@@ -1,19 +1,10 @@
1
  {
2
- "078-05-1126": "d8da62f1",
3
- "1234567A": "5e63c327",
4
- "16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ": "ac41d58b",
5
- "191280342": "59a83e41",
6
- "192.168.0.1": "116fe81e",
7
- "212": "144a2acc",
8
- "4095-2609-9393-4932": "e5b499b0",
9
- "555-1234": "d9e5704e",
10
- "954567876544": "9eb07461",
11
- "David": "ebe99761",
12
- "IL150120690000003111111": "5ca977a4",
13
- "International": "71d0f51c",
14
- "Johnson": "53a9291d",
15
- "Kate": "b474d794",
16
- "Maine": "6337f12f",
17
- "microsoft.com": "0d574451",
18
- "test@presidio.site": "1f78e797"
19
  }
 
1
  {
2
+ "06": "3534158a",
3
+ "2000": "96c403e5",
4
+ "David": "e3383f5b",
5
+ "February": "e381418b",
6
+ "Hemingway": "cda521d5",
7
+ "IL150120690000003111111": "61294a43",
8
+ "Johnson": "70fc6ec5",
9
+ "Kate": "2708cb61"
 
 
 
 
 
 
 
 
 
10
  }
utils_demo.py CHANGED
@@ -18,7 +18,7 @@ from pathlib import Path
18
  SERVER_URL = "http://localhost:8000/"
19
 
20
  # Maximum length for user queries
21
- MAX_USER_QUERY_LEN = 80
22
 
23
  # Base Directories
24
  CURRENT_DIR = Path(__file__).parent
@@ -38,15 +38,16 @@ LOGREG_MODEL_PATH = CURRENT_DIR / "models" / "cml_logreg.model"
38
  ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
39
  ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
40
  MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
41
- MAPPING_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_anonymized.pkl"
 
42
  PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
43
 
44
 
45
  # List of example queries for easy access
46
  DEFAULT_QUERIES = {
47
- "Example Query 1": "Who visited microsoft.com?",
48
- "Example Query 2": "Does Kate have a driving licence?",
49
- "Example Query 3": "What's David Johnson's phone number?",
50
  }
51
 
52
  # Load tokenizer and model
 
18
  SERVER_URL = "http://localhost:8000/"
19
 
20
  # Maximum length for user queries
21
+ MAX_USER_QUERY_LEN = 128
22
 
23
  # Base Directories
24
  CURRENT_DIR = Path(__file__).parent
 
38
  ORIGINAL_FILE_PATH = DATA_PATH / "original_document.txt"
39
  ANONYMIZED_FILE_PATH = DATA_PATH / "anonymized_document.txt"
40
  MAPPING_UUID_PATH = DATA_PATH / "original_document_uuid_mapping.json"
41
+ MAPPING_ANONYMIZED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_anonymized.pkl"
42
+ MAPPING_ENCRYPTED_SENTENCES_PATH = DATA_PATH / "mapping_clear_to_encrypted.pkl"
43
  PROMPT_PATH = DATA_PATH / "chatgpt_prompt.txt"
44
 
45
 
46
  # List of example queries for easy access
47
  DEFAULT_QUERIES = {
48
+ "Example Query 1": "What is the amount of the contract between David and Kate?",
49
+ "Example Query 2": "What's the duration of the contract?",
50
+ "Example Query 3": "Does Kate have an international bank account?",
51
  }
52
 
53
  # Load tokenizer and model