Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
LocaleNLP Translation Service
|
| 3 |
============================
|
| 4 |
|
|
@@ -232,7 +231,7 @@ class ContentProcessor:
|
|
| 232 |
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
|
| 233 |
|
| 234 |
@staticmethod
|
| 235 |
-
def _extract_html_text(content: bytes)
|
| 236 |
"""Extract text from HTML file."""
|
| 237 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 238 |
text = content.decode(encoding, errors="ignore")
|
|
@@ -240,7 +239,7 @@ class ContentProcessor:
|
|
| 240 |
return soup.get_text()
|
| 241 |
|
| 242 |
@staticmethod
|
| 243 |
-
def _extract_markdown_text(content: bytes)
|
| 244 |
"""Extract text from Markdown file."""
|
| 245 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 246 |
text = content.decode(encoding, errors="ignore")
|
|
@@ -249,15 +248,15 @@ class ContentProcessor:
|
|
| 249 |
return soup.get_text()
|
| 250 |
|
| 251 |
@staticmethod
|
| 252 |
-
def _extract_srt_text(content: bytes)
|
| 253 |
"""Extract text from SRT subtitle file."""
|
| 254 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 255 |
text = content.decode(encoding, errors="ignore")
|
| 256 |
# Remove timestamp lines
|
| 257 |
-
return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3}
|
| 258 |
|
| 259 |
@staticmethod
|
| 260 |
-
def _extract_plain_text(content: bytes)
|
| 261 |
"""Extract text from plain text file."""
|
| 262 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 263 |
return content.decode(encoding, errors="ignore")
|
|
@@ -471,7 +470,48 @@ class TranslationApp:
|
|
| 471 |
|
| 472 |
with gr.Blocks(
|
| 473 |
title="LocaleNLP Translation Service",
|
| 474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
) as interface:
|
| 476 |
# Header
|
| 477 |
gr.Markdown("""
|
|
@@ -480,70 +520,77 @@ class TranslationApp:
|
|
| 480 |
""")
|
| 481 |
|
| 482 |
# Input controls
|
| 483 |
-
with gr.Row():
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
|
|
|
| 489 |
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
|
|
|
| 495 |
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
|
|
|
| 501 |
|
| 502 |
# Input components
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
|
|
|
| 509 |
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
|
| 522 |
# Processing area
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
|
|
|
| 528 |
|
| 529 |
translate_btn = gr.Button(
|
| 530 |
"🔄 Process & Translate",
|
| 531 |
-
variant="
|
|
|
|
| 532 |
)
|
| 533 |
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
|
|
|
| 539 |
|
| 540 |
# Event handlers
|
| 541 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 542 |
"""Update component visibility based on input mode."""
|
| 543 |
return {
|
| 544 |
input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
|
| 545 |
-
|
| 546 |
-
|
| 547 |
extracted_text: gr.update(value="", visible=True),
|
| 548 |
output_text: gr.update(value="")
|
| 549 |
}
|
|
@@ -591,7 +638,7 @@ class TranslationApp:
|
|
| 591 |
input_mode.change(
|
| 592 |
fn=update_visibility,
|
| 593 |
inputs=input_mode,
|
| 594 |
-
outputs=[input_text,
|
| 595 |
)
|
| 596 |
|
| 597 |
translate_btn.click(
|
|
|
|
|
|
|
| 1 |
LocaleNLP Translation Service
|
| 2 |
============================
|
| 3 |
|
|
|
|
| 231 |
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
|
| 232 |
|
| 233 |
@staticmethod
|
| 234 |
+
def _extract_html_text(content: bytes) -> str:
|
| 235 |
"""Extract text from HTML file."""
|
| 236 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 237 |
text = content.decode(encoding, errors="ignore")
|
|
|
|
| 239 |
return soup.get_text()
|
| 240 |
|
| 241 |
@staticmethod
|
| 242 |
+
def _extract_markdown_text(content: bytes) -> str:
|
| 243 |
"""Extract text from Markdown file."""
|
| 244 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 245 |
text = content.decode(encoding, errors="ignore")
|
|
|
|
| 248 |
return soup.get_text()
|
| 249 |
|
| 250 |
@staticmethod
|
| 251 |
+
def _extract_srt_text(content: bytes) -> str:
|
| 252 |
"""Extract text from SRT subtitle file."""
|
| 253 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 254 |
text = content.decode(encoding, errors="ignore")
|
| 255 |
# Remove timestamp lines
|
| 256 |
+
return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", text)
|
| 257 |
|
| 258 |
@staticmethod
|
| 259 |
+
def _extract_plain_text(content: bytes) -> str:
|
| 260 |
"""Extract text from plain text file."""
|
| 261 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 262 |
return content.decode(encoding, errors="ignore")
|
|
|
|
| 470 |
|
| 471 |
with gr.Blocks(
|
| 472 |
title="LocaleNLP Translation Service",
|
| 473 |
+
css="""
|
| 474 |
+
.input-section {
|
| 475 |
+
background: linear-gradient(135deg, #e0f7fa 0%, #bbdefb 100%);
|
| 476 |
+
padding: 20px;
|
| 477 |
+
border-radius: 10px;
|
| 478 |
+
border: 1px solid #90caf9;
|
| 479 |
+
margin-bottom: 20px;
|
| 480 |
+
}
|
| 481 |
+
.output-section {
|
| 482 |
+
background: linear-gradient(135deg, #f3e5f5 0%, #e8eaf6 100%);
|
| 483 |
+
padding: 20px;
|
| 484 |
+
border-radius: 10px;
|
| 485 |
+
border: 1px solid #7986cb;
|
| 486 |
+
margin-bottom: 20px;
|
| 487 |
+
}
|
| 488 |
+
.button-primary {
|
| 489 |
+
background: linear-gradient(135deg, #42a5f5 0%, #1e88e5 100%) !important;
|
| 490 |
+
color: white !important;
|
| 491 |
+
border: none !important;
|
| 492 |
+
border-radius: 8px !important;
|
| 493 |
+
padding: 12px 24px !important;
|
| 494 |
+
font-weight: bold !important;
|
| 495 |
+
}
|
| 496 |
+
.button-primary:hover {
|
| 497 |
+
background: linear-gradient(135deg, #1e88e5 0%, #1565c0 100%) !important;
|
| 498 |
+
}
|
| 499 |
+
.radio-group {
|
| 500 |
+
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 501 |
+
padding: 15px;
|
| 502 |
+
border-radius: 10px;
|
| 503 |
+
border: 1px solid #90caf9;
|
| 504 |
+
}
|
| 505 |
+
.dropdown-group {
|
| 506 |
+
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 507 |
+
padding: 15px;
|
| 508 |
+
border-radius: 10px;
|
| 509 |
+
border: 1px solid #90caf9;
|
| 510 |
+
}
|
| 511 |
+
.control-row {
|
| 512 |
+
margin-bottom: 20px;
|
| 513 |
+
}
|
| 514 |
+
"""
|
| 515 |
) as interface:
|
| 516 |
# Header
|
| 517 |
gr.Markdown("""
|
|
|
|
| 520 |
""")
|
| 521 |
|
| 522 |
# Input controls
|
| 523 |
+
with gr.Row(elem_classes="control-row"):
|
| 524 |
+
with gr.Column(elem_classes="radio-group"):
|
| 525 |
+
input_mode = gr.Radio(
|
| 526 |
+
choices=[mode.value for mode in InputMode],
|
| 527 |
+
label="Input Type",
|
| 528 |
+
value=InputMode.TEXT.value
|
| 529 |
+
)
|
| 530 |
|
| 531 |
+
with gr.Column(elem_classes="dropdown-group"):
|
| 532 |
+
input_lang = gr.Dropdown(
|
| 533 |
+
choices=[lang.value for lang in Language],
|
| 534 |
+
label="Input Language",
|
| 535 |
+
value=Language.ENGLISH.value
|
| 536 |
+
)
|
| 537 |
|
| 538 |
+
with gr.Column(elem_classes="dropdown-group"):
|
| 539 |
+
output_lang = gr.Dropdown(
|
| 540 |
+
choices=[lang.value for lang in Language],
|
| 541 |
+
label="Output Language",
|
| 542 |
+
value=Language.WOLOF.value
|
| 543 |
+
)
|
| 544 |
|
| 545 |
# Input components
|
| 546 |
+
with gr.Group(elem_classes="input-section"):
|
| 547 |
+
input_text = gr.Textbox(
|
| 548 |
+
label="Enter Text",
|
| 549 |
+
lines=8,
|
| 550 |
+
visible=True,
|
| 551 |
+
placeholder="Type or paste your text here..."
|
| 552 |
+
)
|
| 553 |
|
| 554 |
+
with gr.Group(elem_classes="input-section", visible=False) as audio_group:
|
| 555 |
+
audio_input = gr.Audio(
|
| 556 |
+
label="Upload Audio",
|
| 557 |
+
type="filepath"
|
| 558 |
+
)
|
| 559 |
|
| 560 |
+
with gr.Group(elem_classes="input-section", visible=False) as file_group:
|
| 561 |
+
file_input = gr.File(
|
| 562 |
+
file_types=SUPPORTED_FILE_TYPES,
|
| 563 |
+
label="Upload Document"
|
| 564 |
+
)
|
| 565 |
|
| 566 |
# Processing area
|
| 567 |
+
with gr.Group(elem_classes="output-section"):
|
| 568 |
+
extracted_text = gr.Textbox(
|
| 569 |
+
label="Extracted / Transcribed Text",
|
| 570 |
+
lines=8,
|
| 571 |
+
interactive=False
|
| 572 |
+
)
|
| 573 |
|
| 574 |
translate_btn = gr.Button(
|
| 575 |
"🔄 Process & Translate",
|
| 576 |
+
variant="primary",
|
| 577 |
+
elem_classes="button-primary"
|
| 578 |
)
|
| 579 |
|
| 580 |
+
with gr.Group(elem_classes="output-section"):
|
| 581 |
+
output_text = gr.Textbox(
|
| 582 |
+
label="Translated Text",
|
| 583 |
+
lines=10,
|
| 584 |
+
interactive=False
|
| 585 |
+
)
|
| 586 |
|
| 587 |
# Event handlers
|
| 588 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 589 |
"""Update component visibility based on input mode."""
|
| 590 |
return {
|
| 591 |
input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
|
| 592 |
+
audio_group: gr.update(visible=(mode == InputMode.AUDIO.value)),
|
| 593 |
+
file_group: gr.update(visible=(mode == InputMode.FILE.value)),
|
| 594 |
extracted_text: gr.update(value="", visible=True),
|
| 595 |
output_text: gr.update(value="")
|
| 596 |
}
|
|
|
|
| 638 |
input_mode.change(
|
| 639 |
fn=update_visibility,
|
| 640 |
inputs=input_mode,
|
| 641 |
+
outputs=[input_text, audio_group, file_group, extracted_text, output_text]
|
| 642 |
)
|
| 643 |
|
| 644 |
translate_btn.click(
|