milyiyo commited on
Commit
5a34301
0 Parent(s):

Duplicate from milyiyo/paraphrase_es

Browse files
Files changed (4) hide show
  1. .gitattributes +33 -0
  2. README.md +13 -0
  3. app.py +70 -0
  4. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.npy filter=lfs diff=lfs merge=lfs -text
14
+ *.npz filter=lfs diff=lfs merge=lfs -text
15
+ *.onnx filter=lfs diff=lfs merge=lfs -text
16
+ *.ot filter=lfs diff=lfs merge=lfs -text
17
+ *.parquet filter=lfs diff=lfs merge=lfs -text
18
+ *.pb filter=lfs diff=lfs merge=lfs -text
19
+ *.pickle filter=lfs diff=lfs merge=lfs -text
20
+ *.pkl filter=lfs diff=lfs merge=lfs -text
21
+ *.pt filter=lfs diff=lfs merge=lfs -text
22
+ *.pth filter=lfs diff=lfs merge=lfs -text
23
+ *.rar filter=lfs diff=lfs merge=lfs -text
24
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
25
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
27
+ *.tflite filter=lfs diff=lfs merge=lfs -text
28
+ *.tgz filter=lfs diff=lfs merge=lfs -text
29
+ *.wasm filter=lfs diff=lfs merge=lfs -text
30
+ *.xz filter=lfs diff=lfs merge=lfs -text
31
+ *.zip filter=lfs diff=lfs merge=lfs -text
32
+ *.zst filter=lfs diff=lfs merge=lfs -text
33
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Paraphrase Es
3
+ emoji: 🏢
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.8
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: milyiyo/paraphrase_es
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained(
6
+ "prithivida/parrot_paraphraser_on_T5", use_auth_token=os.environ["AUTH_TOKEN"])
7
+ model = AutoModelForSeq2SeqLM.from_pretrained(
8
+ "prithivida/parrot_paraphraser_on_T5", use_auth_token=os.environ["AUTH_TOKEN"])
9
+
10
+ pln_es_to_en = pipeline('translation_es_to_en',
11
+ model=AutoModelForSeq2SeqLM.from_pretrained(
12
+ 'Helsinki-NLP/opus-mt-es-en'),
13
+ tokenizer=AutoTokenizer.from_pretrained(
14
+ 'Helsinki-NLP/opus-mt-es-en')
15
+ )
16
+
17
+ pln_en_to_es = pipeline('translation_en_to_es',
18
+ model=AutoModelForSeq2SeqLM.from_pretrained(
19
+ 'Helsinki-NLP/opus-mt-en-es'),
20
+ tokenizer=AutoTokenizer.from_pretrained(
21
+ 'Helsinki-NLP/opus-mt-en-es')
22
+ )
23
+
24
+
25
+ def paraphrase(sentence: str, lang: str, count: str):
26
+ p_count = int(count)
27
+ if p_count <= 0 or len(sentence.strip()) == 0:
28
+ return {'result': []}
29
+ sentence_input = sentence
30
+ if lang == 'ES':
31
+ sentence_input = pln_es_to_en(sentence_input)[0]['translation_text']
32
+ text = f"paraphrase: {sentence_input} </s>"
33
+ encoding = tokenizer.encode_plus(text, padding=True, return_tensors="pt")
34
+ input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
35
+ outputs = model.generate(
36
+ input_ids=input_ids, attention_mask=attention_masks,
37
+ max_length=512, # 256,
38
+ do_sample=True,
39
+ top_k=120,
40
+ top_p=0.95,
41
+ early_stopping=True,
42
+ num_return_sequences=p_count
43
+ )
44
+ res = []
45
+ for output in outputs:
46
+ line = tokenizer.decode(
47
+ output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
48
+ res.append(line)
49
+ if lang == 'EN':
50
+ return {'result': res}
51
+ else:
52
+ res_es = [pln_en_to_es(x)[0]['translation_text']
53
+ for x in res]
54
+ return {'result': res_es}
55
+
56
+ def paraphrase_dummy(sentence: str, lang: str, count: str):
57
+ return {'result': []}
58
+
59
+
60
+ iface = gr.Interface(fn=paraphrase,
61
+ inputs=[
62
+ gr.inputs.Textbox(
63
+ lines=2, placeholder=None, label='Sentence'),
64
+ gr.inputs.Dropdown(
65
+ ['ES', 'EN'], type="value", label='Language'),
66
+ gr.inputs.Number(
67
+ default=3, label='Paraphrases count'),
68
+ ],
69
+ outputs=[gr.outputs.JSON(label=None)])
70
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ sentencepiece
4
+
5
+ # Recommended in a warning during pip install
6
+ sacremoses