File size: 1,800 Bytes
a2b4a62
8961013
a2b4a62
 
 
1ef9f8e
 
 
 
 
 
 
 
a2b4a62
 
1ef9f8e
a2b4a62
1ef9f8e
a2b4a62
 
1ef9f8e
 
 
 
 
 
 
 
 
 
a2b4a62
 
1ef9f8e
 
 
 
 
 
 
 
426cfa6
1ef9f8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2b4a62
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained(
    "salti/arabic-t5-small-question-paraphrasing", use_fast=True
)

model = AutoModelForSeq2SeqLM.from_pretrained(
    "salti/arabic-t5-small-question-paraphrasing"
).eval()

prompt = "أعد صياغة: "


@torch.inference_mode()
def paraphrase(question, num_beams, encoder_no_repeat_ngram_size):
    question = prompt + question
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    generated_tokens = (
        model.generate(
            input_ids,
            num_beams=num_beams,
            encoder_no_repeat_ngram_size=encoder_no_repeat_ngram_size,
        )
        .squeeze()
        .cpu()
        .numpy()
    )
    return tokenizer.decode(generated_tokens, skip_special_tokens=True)


question = gr.inputs.Textbox(label="اكتب سؤالاً باللغة العربية")
num_beams = gr.inputs.Slider(1, 10, step=1, default=1, label="Beam size")
encoder_no_repeat_ngram_size = gr.inputs.Slider(
    0,
    10,
    step=1,
    default=3,
    label="N-grams of this size won't be copied from the input (forces more diverse outputs)",
)

outputs = gr.outputs.Textbox(label="السؤال بصيغة مختلفة")

examples = [
    [
        "متى تم اختراع الكتابة؟",
        5,
        3,
    ],
    [
        "ما عدد حروف اللغة العربية؟",
        5,
        3,
    ],
    [
        "ما هو الذكاء الصنعي؟",
        5,
        3,
    ],
]

iface = gr.Interface(
    fn=paraphrase,
    inputs=[question, num_beams, encoder_no_repeat_ngram_size],
    outputs=outputs,
    examples=examples,
    title="Arabic question paraphrasing",
    theme="huggingface",
)

iface.launch()