zeerafle commited on
Commit
fa8f670
1 Parent(s): e200545

refactor: change translation method

Browse files
Files changed (6) hide show
  1. .gitignore +207 -0
  2. .idea/frasaria.iml +2 -1
  3. .idea/misc.xml +4 -1
  4. app.py +8 -6
  5. frasaria/frasaria.py +23 -31
  6. requirements.txt +7 -3
.gitignore ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### venv template
2
+ # Virtualenv
3
+ # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
4
+ .Python
5
+ [Bb]in
6
+ [Ii]nclude
7
+ [Ll]ib
8
+ [Ll]ib64
9
+ [Ll]ocal
10
+ [Ss]cripts
11
+ pyvenv.cfg
12
+ .venv
13
+ pip-selfcheck.json
14
+
15
+ ### PythonVanilla template
16
+ # Byte-compiled / optimized / DLL files
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+
21
+ # C extensions
22
+ *.so
23
+
24
+ # Distribution / packaging
25
+ build/
26
+ develop-eggs/
27
+ dist/
28
+ downloads/
29
+ eggs/
30
+ .eggs/
31
+ lib/
32
+ lib64/
33
+ parts/
34
+ sdist/
35
+ var/
36
+ wheels/
37
+ share/python-wheels/
38
+ *.egg-info/
39
+ .installed.cfg
40
+ *.egg
41
+ MANIFEST
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py,cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+ cover/
61
+
62
+ # Translations
63
+ *.mo
64
+ *.pot
65
+
66
+ # pyenv
67
+ # For a library or package, you might want to ignore these files since the code is
68
+ # intended to run in multiple environments; otherwise, check them in:
69
+ # .python-version
70
+
71
+ # pipenv
72
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
73
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
74
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
75
+ # install all needed dependencies.
76
+ #Pipfile.lock
77
+
78
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
79
+ __pypackages__/
80
+
81
+
82
+ ### VirtualEnv template
83
+ # Virtualenv
84
+ # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
85
+
86
+ ### Python template
87
+ # Byte-compiled / optimized / DLL files
88
+
89
+ # C extensions
90
+
91
+ # Distribution / packaging
92
+
93
+ # PyInstaller
94
+ # Usually these files are written by a python script from a template
95
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
96
+ *.manifest
97
+ *.spec
98
+
99
+ # Installer logs
100
+
101
+ # Unit test / coverage reports
102
+
103
+ # Translations
104
+
105
+ # Django stuff:
106
+ *.log
107
+ local_settings.py
108
+ db.sqlite3
109
+ db.sqlite3-journal
110
+
111
+ # Flask stuff:
112
+ instance/
113
+ .webassets-cache
114
+
115
+ # Scrapy stuff:
116
+ .scrapy
117
+
118
+ # Sphinx documentation
119
+ docs/_build/
120
+
121
+ # PyBuilder
122
+ .pybuilder/
123
+ target/
124
+
125
+ # Jupyter Notebook
126
+ .ipynb_checkpoints
127
+
128
+ # IPython
129
+ profile_default/
130
+ ipython_config.py
131
+
132
+ # pyenv
133
+ # For a library or package, you might want to ignore these files since the code is
134
+ # intended to run in multiple environments; otherwise, check them in:
135
+ # .python-version
136
+
137
+ # pipenv
138
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
139
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
140
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
141
+ # install all needed dependencies.
142
+ #Pipfile.lock
143
+
144
+ # poetry
145
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
146
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
147
+ # commonly ignored for libraries.
148
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
149
+ #poetry.lock
150
+
151
+ # pdm
152
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
153
+ #pdm.lock
154
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
155
+ # in version control.
156
+ # https://pdm.fming.dev/#use-with-ide
157
+ .pdm.toml
158
+
159
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
160
+
161
+ # Celery stuff
162
+ celerybeat-schedule
163
+ celerybeat.pid
164
+
165
+ # SageMath parsed files
166
+ *.sage.py
167
+
168
+ # Environments
169
+ .env
170
+ env/
171
+ venv/
172
+ ENV/
173
+ env.bak/
174
+ venv.bak/
175
+
176
+ # Spyder project settings
177
+ .spyderproject
178
+ .spyproject
179
+
180
+ # Rope project settings
181
+ .ropeproject
182
+
183
+ # mkdocs documentation
184
+ /site
185
+
186
+ # mypy
187
+ .mypy_cache/
188
+ .dmypy.json
189
+ dmypy.json
190
+
191
+ # Pyre type checker
192
+ .pyre/
193
+
194
+ # pytype static type analyzer
195
+ .pytype/
196
+
197
+ # Cython debug symbols
198
+ cython_debug/
199
+
200
+ # PyCharm
201
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
202
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
203
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
204
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
205
+ #.idea/
206
+
207
+ /flagged/
.idea/frasaria.iml CHANGED
@@ -2,9 +2,10 @@
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$">
 
5
  <excludeFolder url="file://$MODULE_DIR$/venv" />
6
  </content>
7
- <orderEntry type="jdk" jdkName="Python 3.9 (frasaria) (2)" jdkType="Python SDK" />
8
  <orderEntry type="sourceFolder" forTests="false" />
9
  </component>
10
  </module>
 
2
  <module type="PYTHON_MODULE" version="4">
3
  <component name="NewModuleRootManager">
4
  <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
  <excludeFolder url="file://$MODULE_DIR$/venv" />
7
  </content>
8
+ <orderEntry type="jdk" jdkName="Python 3.10 (frasaria-spaces)" jdkType="Python SDK" />
9
  <orderEntry type="sourceFolder" forTests="false" />
10
  </component>
11
  </module>
.idea/misc.xml CHANGED
@@ -1,7 +1,10 @@
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
 
 
 
3
  <component name="MarkdownSettingsMigration">
4
  <option name="stateVersion" value="1" />
5
  </component>
6
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (frasaria) (2)" project-jdk-type="Python SDK" />
7
  </project>
 
1
  <?xml version="1.0" encoding="UTF-8"?>
2
  <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="Python 3.10 (frasaria-spaces)" />
5
+ </component>
6
  <component name="MarkdownSettingsMigration">
7
  <option name="stateVersion" value="1" />
8
  </component>
9
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (frasaria-spaces)" project-jdk-type="Python SDK" />
10
  </project>
app.py CHANGED
@@ -1,9 +1,11 @@
1
  import gradio as gr
2
  from frasaria.frasaria import paraphrase_text
3
 
4
- gr.Interface(
5
- paraphrase_text,
6
- inputs=[gr.inputs.Textbox(lines=5, placeholder=None, label='Text'),
7
- gr.Radio(['id', 'en'], value='id', label='Source Language')],
8
- outputs=[gr.outputs.Textbox(label=None)]
9
- ).launch()
 
 
 
1
  import gradio as gr
2
  from frasaria.frasaria import paraphrase_text
3
 
4
+ demo = gr.Interface(
5
+ fn=paraphrase_text,
6
+ inputs=gr.Textbox(lines=5, placeholder=None, label='Text'),
7
+ outputs=gr.Textbox(label=None)
8
+ )
9
+
10
+ if __name__ == '__main__':
11
+ demo.launch()
frasaria/frasaria.py CHANGED
@@ -1,26 +1,17 @@
1
- import gradio as gr
2
- from frasaria import parrot, translator_id_to_en, translator_en_to_id
3
  import warnings
 
4
  warnings.filterwarnings('ignore')
5
 
 
6
  def split_sentences(text: str):
7
- return [sentence.strip() for sentence in text.split('.')]
8
 
9
- def translate(text, source, target='en'):
10
- if source == 'en':
11
- return translator_en_to_id(text)
12
- else:
13
- return translator_id_to_en(text)
14
 
15
- def paraphrase(parrot, phrase):
16
- para_phrases = parrot.augment(phrase, max_return_phrases=10, do_diverse=True)
17
  max_score = 0
18
- # sometimes the paraphrasing doesn't return anything, if so, return the original
19
- try:
20
- paraphrased = para_phrases[0][0]
21
- except TypeError:
22
- return phrase
23
-
24
  for para_phrase in para_phrases:
25
  score = para_phrase[-1]
26
  if score > max_score:
@@ -28,21 +19,22 @@ def paraphrase(parrot, phrase):
28
  paraphrased = para_phrase[0]
29
  return paraphrased
30
 
31
- def paraphrase_text(text, source_lang):
32
- phrases = split_sentences(text)
33
- print('split_original phrases', phrases)
34
 
35
- if source_lang == 'id':
36
- en_sentences = []
37
- for phrase in phrases:
38
- en_sentences.append(translate(phrase, source_lang)[0]['translation_text'])
39
- phrases = en_sentences
40
 
41
- print('phrase after translated or not', phrases)
42
  sentences = []
43
- for phrase in phrases:
44
- sentences.append(paraphrase(parrot, phrase))
45
-
46
- paraphrased = '. '.join(sentences)
47
- source_lang_paraphrased = translate(paraphrased, 'en', 'id')
48
- return source_lang_paraphrased[0]['translation_text']
 
 
 
 
 
 
 
1
+ from frasaria import parrot, translator
 
2
  import warnings
3
+
4
  warnings.filterwarnings('ignore')
5
 
6
+
7
  def split_sentences(text: str):
8
+ return [sentence.strip() for sentence in text.split(".")]
9
 
 
 
 
 
 
10
 
11
+ def paraphrase(phrases):
12
+ para_phrases = parrot.augment(phrases, max_return_phrases=10, do_diverse=True)
13
  max_score = 0
14
+ paraphrased = para_phrases[0][0]
 
 
 
 
 
15
  for para_phrase in para_phrases:
16
  score = para_phrase[-1]
17
  if score > max_score:
 
19
  paraphrased = para_phrase[0]
20
  return paraphrased
21
 
 
 
 
22
 
23
+ def paraphrase_text(phrase):
24
+ phrase = phrase.replace('"', "'")
25
+ result = translator.translate_text(phrase, target_lang="EN-US")
 
 
26
 
27
+ # paraphrase
28
  sentences = []
29
+ for translated in split_sentences(result.text)[:-1]:
30
+ sentences.append(paraphrase(translated))
31
+
32
+ text = '. '.join(list(map(lambda x: x.capitalize(), sentences))) + '.'
33
+ if result.detected_source_lang == 'EN':
34
+ return text
35
+
36
+ # translate back to whatever the source language is
37
+ src_lang_text = translator.translate_text(
38
+ text, target_lang=result.detected_source_lang
39
+ )
40
+ return src_lang_text.text
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- transformers
2
- torch
3
- git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
 
 
 
 
 
1
+ deepl==1.17.0
2
+ parrot @ git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
3
+ torch==2.3.0
4
+ gradio==4.28.3
5
+ huggingface-hub==0.22.2
6
+ python-dotenv==1.0.1
7
+ protobuf==5.26.1