Spaces:

zama-fhe
/

encrypted-anonymization

Running on CPU Upgrade

App Files Files Community

kcelia commited on Mar 22

Commit

d812385

•

1 Parent(s): 9a2d521

chore: handling user query

Browse files

Files changed (2) hide show

app.py +27 -14
utils_demo.py +28 -1

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from openai import OpenAI
 import os
 import json
 import re
 anonymizer = FHEAnonymizer()
@@ -15,6 +17,17 @@ client = OpenAI(
 )
 def deidentify_text(input_text):
     anonymized_text, identified_words_with_prob = anonymizer(input_text)
@@ -74,10 +87,6 @@ def query_chatgpt(anonymized_query):
     return anonymized_response, deanonymized_response
-# Default demo text from the file
-with open("demo_text.txt", "r") as file:
-    default_demo_text = file.read()
 with open("files/original_document.txt", "r") as file:
     original_document = file.read()
@@ -128,19 +137,23 @@ with demo:
     #     """
     # )
     with gr.Row():
-        input_text = gr.Textbox(
-            value=default_demo_text,
-            lines=1,
-            placeholder="Input text here...",
-            label="Input",
         )
-        # List of example queries for easy access
-        example_queries = ["Example Query 1", "Example Query 2", "Example Query 3"]
-        examples_radio = gr.Radio(choices=example_queries, label="Example Queries")
-        examples_radio.change(lambda example_query: example_query, inputs=[examples_radio], outputs=[input_text])
     anonymized_text_output = gr.Textbox(label="Anonymized Text with FHE", lines=1, interactive=True)

 import os
 import json
 import re
+from utils_demo import *
+from typing import List, Dict, Tuple
 anonymizer = FHEAnonymizer()
 )
+def check_user_query_fn(user_query: str) -> Dict:
+    if is_user_query_valid(user_query):
+        # TODO: check if the query is related to our context
+        error_msg = ("Unable to process ❌: The request exceeds the length limit or falls "
+                    "outside the scope of this document. Please refine your query.")
+        print(error_msg)
+        return {input_text: gr.update(value=error_msg)}
+    else:
+        # Collapsing Multiple Spaces
+        return {input_text: gr.update(value=re.sub(" +", " ", user_query))}
 def deidentify_text(input_text):
     anonymized_text, identified_words_with_prob = anonymizer(input_text)
     return anonymized_response, deanonymized_response
 with open("files/original_document.txt", "r") as file:
     original_document = file.read()
     #     """
     # )
+    ########################## User Query Part ##########################
     with gr.Row():
+        input_text = gr.Textbox(value="Who lives in Maine?", label="User query", interactive=True)
+        default_query_box = gr.Radio(choices=list(DEFAULT_QUERIES.keys()), label="Example Queries")
+        default_query_box.change(
+            fn=lambda default_query_box: DEFAULT_QUERIES[default_query_box],
+            inputs=[default_query_box],
+            outputs=[input_text]
         )
+        input_text.change(
+            check_user_query_fn,
+            inputs=[input_text],
+            outputs=[input_text],
+        )
     anonymized_text_output = gr.Textbox(label="Anonymized Text with FHE", lines=1, interactive=True)

utils_demo.py CHANGED Viewed

@@ -1,6 +1,15 @@
 import torch
 import numpy as np
-import random
 def get_batch_text_representation(texts, model, tokenizer, batch_size=1):
     """
@@ -20,3 +29,21 @@ def get_batch_text_representation(texts, model, tokenizer, batch_size=1):
         mean_pooled_batch.extend(mean_pooled.cpu().detach().numpy())
     return np.array(mean_pooled_batch)

 import torch
 import numpy as np
+MAX_USER_QUERY_LEN = 35
+# List of example queries for easy access
+DEFAULT_QUERIES = {
+    "Example Query 1": "Who visited microsoft.com on September 18?",
+    "Example Query 2": "Does Kate has drive ?",
+    "Example Query 3": "What phone number can be used to contact David Johnson?",
+}
 def get_batch_text_representation(texts, model, tokenizer, batch_size=1):
     """
         mean_pooled_batch.extend(mean_pooled.cpu().detach().numpy())
     return np.array(mean_pooled_batch)
+def is_user_query_valid(user_query: str) -> bool:
+    """
+    Check if the `user_query` is None and not empty.
+    Args:
+        user_query (str): The input text to be checked.
+    Returns:
+        bool: True if the `user_query` is None or empty, False otherwise.
+    """
+    # If the query is not part of the default queries
+    is_default_query = user_query in DEFAULT_QUERIES.values()
+    # Check if the query exceeds the length limit
+    is_exceeded_max_length = user_query is not None and len(user_query) <= MAX_USER_QUERY_LEN
+    return not is_default_query and not is_exceeded_max_length