Cleanup
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
from functools import partial
|
2 |
from pathlib import Path
|
3 |
-
|
4 |
import gradio as gr
|
|
|
5 |
from joeynmt.datasets import build_dataset
|
6 |
from joeynmt.helpers import (
|
7 |
load_checkpoint,
|
@@ -105,7 +106,7 @@ title = """
|
|
105 |
[<a href="https://github.com/sinaahmadi/ScriptNormalization" style="color:blue;">GitHub</a>]
|
106 |
[<a href="https://s3.amazonaws.com/pf-user-files-01/u-59356/uploads/2023-06-04/rw32pwp/ACL2023.mp4" style="color:blue;">Presentation</a>]
|
107 |
</h3>
|
108 |
-
|
109 |
|
110 |
description = """
|
111 |
<ul>
|
@@ -115,11 +116,8 @@ description = """
|
|
115 |
</ul>
|
116 |
|
117 |
<p style="font-size:120%;">What do all these sentences have in common? Being greeted in Arabic with "<em>mar7aba</em>" written in the Latin script, then asked how you are ("<em>هاو ئار یوو؟</em>") in English using the Perso-Arabic script of Kurdish and then, welcomed to this demo in French ("<em>Μπιάνβενου α σετ ντεμό!</em>") written in Greek script. All these sentences are written in an <strong>unconventional</strong> script.</p>
|
118 |
-
|
119 |
<p style="font-size:120%;">Although you may find these sentences risible, unconventional writing is a common practice among millions of speakers in bilingual communities. In our paper entitled "<a href="https://sinaahmadi.github.io/docs/articles/ahmadi2023acl.pdf" target="_blank"><strong>Script Normalization for Unconventional Writing of Under-Resourced Languages in Bilingual Communities</strong></a>", we shed light on this problem and propose an approach to normalize noisy text written in unconventional writing.</p>
|
120 |
-
|
121 |
<p style="font-size:120%;">This demo deploys a few models that are trained for <strong>the normalization of unconventional writing</strong>. Please note that this tool is not a spell-checker and cannot correct errors beyond character normalization. For better performance, you can apply hard-coded rules on the input and then pass it to the models, hence a hybrid system.</p>
|
122 |
-
|
123 |
<p style="font-size:120%;">For more information, you can check out the project on GitHub too: <a href="https://github.com/sinaahmadi/ScriptNormalization" target="_blank"><strong>https://github.com/sinaahmadi/ScriptNormalization</strong></a></p>
|
124 |
"""
|
125 |
|
@@ -142,14 +140,6 @@ examples = [
|
|
142 |
]
|
143 |
|
144 |
|
145 |
-
article = """
|
146 |
-
<div style="text-align: justify; max-width: 1200px; margin: 20px auto;">
|
147 |
-
<h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
148 |
-
<b>Created and deployed by Sina Ahmadi <a href="https://sinaahmadi.github.io/">(https://sinaahmadi.github.io/)</a>.
|
149 |
-
</h3>
|
150 |
-
</div>
|
151 |
-
"""
|
152 |
-
|
153 |
demo = gr.Interface(
|
154 |
title=title,
|
155 |
description=description,
|
@@ -160,7 +150,6 @@ demo = gr.Interface(
|
|
160 |
],
|
161 |
outputs=gr.Textbox(label="Normalized Text \U0001F642"),
|
162 |
examples=examples,
|
163 |
-
article=article,
|
164 |
examples_per_page=20,
|
165 |
)
|
166 |
|
|
|
1 |
from functools import partial
|
2 |
from pathlib import Path
|
3 |
+
|
4 |
import gradio as gr
|
5 |
+
import spaces
|
6 |
from joeynmt.datasets import build_dataset
|
7 |
from joeynmt.helpers import (
|
8 |
load_checkpoint,
|
|
|
106 |
[<a href="https://github.com/sinaahmadi/ScriptNormalization" style="color:blue;">GitHub</a>]
|
107 |
[<a href="https://s3.amazonaws.com/pf-user-files-01/u-59356/uploads/2023-06-04/rw32pwp/ACL2023.mp4" style="color:blue;">Presentation</a>]
|
108 |
</h3>
|
109 |
+
"""
|
110 |
|
111 |
description = """
|
112 |
<ul>
|
|
|
116 |
</ul>
|
117 |
|
118 |
<p style="font-size:120%;">What do all these sentences have in common? Being greeted in Arabic with "<em>mar7aba</em>" written in the Latin script, then asked how you are ("<em>هاو ئار یوو؟</em>") in English using the Perso-Arabic script of Kurdish and then, welcomed to this demo in French ("<em>Μπιάνβενου α σετ ντεμό!</em>") written in Greek script. All these sentences are written in an <strong>unconventional</strong> script.</p>
|
|
|
119 |
<p style="font-size:120%;">Although you may find these sentences risible, unconventional writing is a common practice among millions of speakers in bilingual communities. In our paper entitled "<a href="https://sinaahmadi.github.io/docs/articles/ahmadi2023acl.pdf" target="_blank"><strong>Script Normalization for Unconventional Writing of Under-Resourced Languages in Bilingual Communities</strong></a>", we shed light on this problem and propose an approach to normalize noisy text written in unconventional writing.</p>
|
|
|
120 |
<p style="font-size:120%;">This demo deploys a few models that are trained for <strong>the normalization of unconventional writing</strong>. Please note that this tool is not a spell-checker and cannot correct errors beyond character normalization. For better performance, you can apply hard-coded rules on the input and then pass it to the models, hence a hybrid system.</p>
|
|
|
121 |
<p style="font-size:120%;">For more information, you can check out the project on GitHub too: <a href="https://github.com/sinaahmadi/ScriptNormalization" target="_blank"><strong>https://github.com/sinaahmadi/ScriptNormalization</strong></a></p>
|
122 |
"""
|
123 |
|
|
|
140 |
]
|
141 |
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
demo = gr.Interface(
|
144 |
title=title,
|
145 |
description=description,
|
|
|
150 |
],
|
151 |
outputs=gr.Textbox(label="Normalized Text \U0001F642"),
|
152 |
examples=examples,
|
|
|
153 |
examples_per_page=20,
|
154 |
)
|
155 |
|