HeshamHaroon commited on
Commit
855a35b
1 Parent(s): b5bcbd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -1,22 +1,33 @@
1
  import gradio as gr
 
2
 
3
- # Placeholder function for loading the tokenizer
4
- def load_tokenizer(tokenizer_choice):
5
- # Placeholder implementation - load your tokenizer here based on the choice
6
- return None
 
 
 
 
 
7
 
8
- def tokenize_and_encode(text, tokenizer_choice):
9
- tokenizer = load_tokenizer(tokenizer_choice)
10
  if tokenizer:
 
11
  tokens = tokenizer.tokenize(text)
12
  encoded_output = tokenizer.encode(text, add_special_tokens=True)
13
  decoded_text = tokenizer.decode(encoded_output)
14
- return " ".join(tokens), str(encoded_output), decoded_text
 
 
 
 
15
  else:
16
- return "Tokenizer not loaded correctly", "", ""
17
 
18
  demo = gr.Interface(
19
- fn=tokenize_and_encode,
20
  inputs=[
21
  gr.Textbox(lines=5, label="النص العربي"),
22
  gr.Dropdown(choices=["aranizer_bpe32k", "aranizer_bpe50k", "aranizer_bpe64k", "aranizer_bpe86k", "aranizer_sp32k", "aranizer_sp50k", "aranizer_sp64k", "aranizer_sp86k"], label="اختر المحلل اللفظي")
@@ -24,10 +35,11 @@ demo = gr.Interface(
24
  outputs=[
25
  gr.Text(label="Tokens"),
26
  gr.Text(label="Encoded Output"),
27
- gr.Text(label="Decoded Text")
 
28
  ],
29
- title="مقارنة المحللات اللفظية للنص العربي",
30
- description="حدد نوع المحلل اللفظي وأدخل نصًا لرؤية النتائج."
31
  )
32
 
33
  demo.launch()
 
1
  import gradio as gr
2
+ from random import random
3
 
4
+ # Assuming `aranizer` is a library that provides these tokenizer classes or functions (pseudo-code for demonstration only)
5
+ # Please implement actual imports and tokenizer initialization logic
6
+ def get_tokenizer(tokenizer_choice):
7
+ # Placeholder - Replace with actual tokenizer loading logic
8
+ tokenizer_map = {
9
+ "aranizer_bpe32k": None, # Replace None with actual tokenizer, e.g., aranizer_bpe32k.get_tokenizer()
10
+ # Add other tokenizers here
11
+ }
12
+ return tokenizer_map.get(tokenizer_choice, None)
13
 
14
+ def tokenize_and_encode_and_embed(text, tokenizer_choice):
15
+ tokenizer = get_tokenizer(tokenizer_choice)
16
  if tokenizer:
17
+ # Example methods. Replace with actual methods from your tokenizer
18
  tokens = tokenizer.tokenize(text)
19
  encoded_output = tokenizer.encode(text, add_special_tokens=True)
20
  decoded_text = tokenizer.decode(encoded_output)
21
+
22
+ # Example embedding (replace with actual embedding generation from your model)
23
+ embeddings = [random() for _ in range(10)] # Example 10-dimensional embedding vector
24
+
25
+ return " ".join(tokens), str(encoded_output), decoded_text, embeddings
26
  else:
27
+ return "Tokenizer not loaded correctly", "", "", []
28
 
29
  demo = gr.Interface(
30
+ fn=tokenize_and_encode_and_embed,
31
  inputs=[
32
  gr.Textbox(lines=5, label="النص العربي"),
33
  gr.Dropdown(choices=["aranizer_bpe32k", "aranizer_bpe50k", "aranizer_bpe64k", "aranizer_bpe86k", "aranizer_sp32k", "aranizer_sp50k", "aranizer_sp64k", "aranizer_sp86k"], label="اختر المحلل اللفظي")
 
35
  outputs=[
36
  gr.Text(label="Tokens"),
37
  gr.Text(label="Encoded Output"),
38
+ gr.Text(label="Decoded Text"),
39
+ gr.Text(label="Embeddings (Example Vector)")
40
  ],
41
+ title="مقارنة المحللات اللفظية وعمليات التضمين للنص العربي",
42
+ description="حدد نوع المحلل اللفظي وأدخل نصًا لرؤية النتائج ومتجه التضمين.",
43
  )
44
 
45
  demo.launch()