jairwaal commited on
Commit
46d243a
·
verified ·
1 Parent(s): e03deff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -11
app.py CHANGED
@@ -1,17 +1,64 @@
1
- import os
2
- import requests
3
 
4
- github_pat = os.environ['github_pat']
 
5
 
6
- raw_url = f'https://{github_pat}@raw.githubusercontent.com/waaljair/testgradio/main/run.py'
7
 
8
- response = requests.get(raw_url)
9
- if response.status_code == 200:
10
- exec(response.text)
11
- else:
12
- raise Exception(f"Failed to fetch the Python file from the repository. Status code: {response.status_code}")
13
 
14
  anonymizer = Anonimiseren()
15
 
16
- print(anonymizer)
17
- print("it works!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ import os
3
+ code_as_string = os.environ['code_as_string']
4
 
5
+ exec(code_as_string)
6
 
 
 
 
 
 
7
 
8
  anonymizer = Anonimiseren()
9
 
10
+ batch_size = 4
11
+
12
+ def process_batch(sentences, anonymizer, desired_length):
13
+ batch_results = []
14
+ batch_original_token_ids = []
15
+ batch_new_token_ids = []
16
+ batch_attention_masks = []
17
+ for sentence in sentences:
18
+ new_sentence, original_token_ids, new_token_ids, attention_masks = anonymizer.process_sentence(sentence, desired_length)
19
+ batch_results.append(new_sentence)
20
+ batch_original_token_ids.append(original_token_ids)
21
+ batch_new_token_ids.append(new_token_ids)
22
+ batch_attention_masks.append(attention_masks)
23
+ return batch_results, batch_original_token_ids, batch_new_token_ids, batch_attention_masks
24
+
25
+ def anonymize_texts(text_list, desired_length, output_type):
26
+ desired_length = int(desired_length)
27
+ sentences = text_list.split('\n') # splitting input text by lines into a list of sentences
28
+ new_sentences, all_original_token_ids, all_new_token_ids, all_attention_masks = [], [], [], []
29
+
30
+ for i in tqdm(range(0, len(sentences), batch_size), desc="Processing batches"):
31
+ batch = sentences[i:i + batch_size]
32
+ batch_results, batch_original_ids, batch_new_ids, batch_attention_masks = process_batch(batch, anonymizer, desired_length)
33
+ new_sentences.extend(batch_results)
34
+ all_original_token_ids.extend(batch_original_ids)
35
+ all_new_token_ids.extend(batch_new_ids)
36
+ all_attention_masks.extend(batch_attention_masks)
37
+ del batch_results, batch_original_ids, batch_new_ids, batch_attention_masks
38
+
39
+
40
+ if output_type == "New Sentences":
41
+ return "\n".join(new_sentences)
42
+ elif output_type == "Token IDs":
43
+ return "\n".join([str(ids) for ids in all_new_token_ids])
44
+ elif output_type == "Attention Masks":
45
+ return "\n".join([str(masks) for masks in all_attention_masks])
46
+ elif output_type == "Token IDs & Attention Masks":
47
+ combined_output = []
48
+ for token_ids, masks in zip(all_new_token_ids, all_attention_masks):
49
+ combined_output.append(f"Token IDs: {token_ids}\nAttention Masks: {masks}\n")
50
+ return "\n".join(combined_output)
51
+
52
+ interface = gr.Interface(
53
+ fn=anonymize_texts,
54
+ inputs=[
55
+ gr.TextArea(label="Input Text"),
56
+ gr.Number(label="Desired Length"),
57
+ gr.Dropdown(choices=["New Sentences", "Token IDs", "Attention Masks", "Token IDs & Attention Masks"], label="Output Type")
58
+ ],
59
+ outputs=gr.TextArea(label="Output Text"),
60
+ title="Anonymizer",
61
+ description="Enter multiple sentences (one per line), select the amount of tokens for anonymization, and choose the output type. Note: must be in Dutch."
62
+ )
63
+
64
+ interface.launch(auth=("admin", "admin"))