zeerafle commited on
Commit
cb04a82
1 Parent(s): 9486f13

initial commit

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/frasaria.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.9 (frasaria) (2)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="MarkdownSettingsMigration">
4
+ <option name="stateVersion" value="1" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (frasaria) (2)" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/frasaria.iml" filepath="$PROJECT_DIR$/.idea/frasaria.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from frasaria.frasaria import paraphrase_text
3
+
4
+ gr.Interface(
5
+ paraphrase_text,
6
+ inputs=[gr.inputs.Textbox(lines=5, placeholder=None, label='Text'),
7
+ gr.Radio(['id', 'en'], value='id', label='Source Language')],
8
+ outputs=[gr.outputs.Textbox(label=None)]
9
+ ).launch()
frasaria/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from parrot import Parrot
3
+
4
+ parrot = Parrot(model_tag='prithivida/parrot_paraphraser_on_T5')
5
+ translator_id_to_en = pipeline('translation', model='PontifexMaximus/opus-mt-id-en-finetuned-id-to-en')
6
+ translator_en_to_id = pipeline('translation', model='Helsinki-NLP/opus-mt-en-id')
frasaria/frasaria.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from frasaria import parrot, translator_id_to_en, translator_en_to_id
3
+ import warnings
4
+ warnings.filterwarnings('ignore')
5
+
6
+ def split_sentences(text: str):
7
+ return [sentence.strip() for sentence in text.split('.')]
8
+
9
+ def translate(text, source, target='en'):
10
+ if source == 'en':
11
+ return translator_en_to_id(text)
12
+ else:
13
+ return translator_id_to_en(text)
14
+
15
+ def paraphrase(parrot, phrase):
16
+ para_phrases = parrot.augment(phrase, max_return_phrases=10, do_diverse=True)
17
+ max_score = 0
18
+ # sometimes the paraphrasing doesn't return anything, if so, return the original
19
+ try:
20
+ paraphrased = para_phrases[0][0]
21
+ except TypeError:
22
+ return phrase
23
+
24
+ for para_phrase in para_phrases:
25
+ score = para_phrase[-1]
26
+ if score > max_score:
27
+ max_score = score
28
+ paraphrased = para_phrase[0]
29
+ return paraphrased
30
+
31
+ def paraphrase_text(text, source_lang):
32
+ phrases = split_sentences(text)
33
+ print('split_original phrases', phrases)
34
+
35
+ if source_lang == 'id':
36
+ en_sentences = []
37
+ for phrase in phrases:
38
+ en_sentences.append(translate(phrase, source_lang)[0]['translation_text'])
39
+ phrases = en_sentences
40
+
41
+ print('phrase after translated or not', phrases)
42
+ sentences = []
43
+ for phrase in phrases:
44
+ sentences.append(paraphrase(parrot, phrase))
45
+
46
+ paraphrased = '. '.join(sentences)
47
+ source_lang_paraphrased = translate(paraphrased, 'en', 'id')
48
+ return source_lang_paraphrased[0]['translation_text']
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers
2
+ torch
3
+ git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git