Spaces:

jairwaal
/

anonimiseren

Sleeping

App Files Files Community

jairwaal commited on Jan 30, 2024

Commit

46d243a

verified ·

1 Parent(s): e03deff

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -11

app.py CHANGED Viewed

@@ -1,17 +1,64 @@
-import os
-import requests
-github_pat = os.environ['github_pat']
-raw_url = f'https://{github_pat}@raw.githubusercontent.com/waaljair/testgradio/main/run.py'
-response = requests.get(raw_url)
-if response.status_code == 200:
-    exec(response.text)
-else:
-    raise Exception(f"Failed to fetch the Python file from the repository. Status code: {response.status_code}")
 anonymizer = Anonimiseren()
-print(anonymizer)
-print("it works!")

+import os
+code_as_string = os.environ['code_as_string']
+exec(code_as_string)
 anonymizer = Anonimiseren()
+batch_size = 4
+def process_batch(sentences, anonymizer, desired_length):
+    batch_results = []
+    batch_original_token_ids = []
+    batch_new_token_ids = []
+    batch_attention_masks = []
+    for sentence in sentences:
+        new_sentence, original_token_ids, new_token_ids, attention_masks = anonymizer.process_sentence(sentence, desired_length)
+        batch_results.append(new_sentence)
+        batch_original_token_ids.append(original_token_ids)
+        batch_new_token_ids.append(new_token_ids)
+        batch_attention_masks.append(attention_masks)
+    return batch_results, batch_original_token_ids, batch_new_token_ids, batch_attention_masks
+def anonymize_texts(text_list, desired_length, output_type):
+    desired_length = int(desired_length)
+    sentences = text_list.split('\n')  # splitting input text by lines into a list of sentences
+    new_sentences, all_original_token_ids, all_new_token_ids, all_attention_masks = [], [], [], []
+    for i in tqdm(range(0, len(sentences), batch_size), desc="Processing batches"):
+        batch = sentences[i:i + batch_size]
+        batch_results, batch_original_ids, batch_new_ids, batch_attention_masks = process_batch(batch, anonymizer, desired_length)
+        new_sentences.extend(batch_results)
+        all_original_token_ids.extend(batch_original_ids)
+        all_new_token_ids.extend(batch_new_ids)
+        all_attention_masks.extend(batch_attention_masks)
+        del batch_results, batch_original_ids, batch_new_ids, batch_attention_masks
+    if output_type == "New Sentences":
+        return "\n".join(new_sentences)
+    elif output_type == "Token IDs":
+        return "\n".join([str(ids) for ids in all_new_token_ids])
+    elif output_type == "Attention Masks":
+        return "\n".join([str(masks) for masks in all_attention_masks])
+    elif output_type == "Token IDs & Attention Masks":
+        combined_output = []
+        for token_ids, masks in zip(all_new_token_ids, all_attention_masks):
+            combined_output.append(f"Token IDs: {token_ids}\nAttention Masks: {masks}\n")
+        return "\n".join(combined_output)
+interface = gr.Interface(
+    fn=anonymize_texts,
+    inputs=[
+        gr.TextArea(label="Input Text"),
+        gr.Number(label="Desired Length"),
+        gr.Dropdown(choices=["New Sentences", "Token IDs", "Attention Masks", "Token IDs & Attention Masks"], label="Output Type")
+    ],
+    outputs=gr.TextArea(label="Output Text"),
+    title="Anonymizer",
+    description="Enter multiple sentences (one per line), select the amount of tokens for anonymization, and choose the output type. Note: must be in Dutch."
+)
+interface.launch(auth=("admin", "admin"))