Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
LocaleNLP Translation Service
|
| 2 |
============================
|
| 3 |
|
|
@@ -231,7 +232,7 @@ class ContentProcessor:
|
|
| 231 |
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
|
| 232 |
|
| 233 |
@staticmethod
|
| 234 |
-
def _extract_html_text(content: bytes)
|
| 235 |
"""Extract text from HTML file."""
|
| 236 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 237 |
text = content.decode(encoding, errors="ignore")
|
|
@@ -239,7 +240,7 @@ class ContentProcessor:
|
|
| 239 |
return soup.get_text()
|
| 240 |
|
| 241 |
@staticmethod
|
| 242 |
-
def _extract_markdown_text(content: bytes)
|
| 243 |
"""Extract text from Markdown file."""
|
| 244 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 245 |
text = content.decode(encoding, errors="ignore")
|
|
@@ -248,15 +249,15 @@ class ContentProcessor:
|
|
| 248 |
return soup.get_text()
|
| 249 |
|
| 250 |
@staticmethod
|
| 251 |
-
def _extract_srt_text(content: bytes)
|
| 252 |
"""Extract text from SRT subtitle file."""
|
| 253 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 254 |
text = content.decode(encoding, errors="ignore")
|
| 255 |
# Remove timestamp lines
|
| 256 |
-
return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3}
|
| 257 |
|
| 258 |
@staticmethod
|
| 259 |
-
def _extract_plain_text(content: bytes)
|
| 260 |
"""Extract text from plain text file."""
|
| 261 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 262 |
return content.decode(encoding, errors="ignore")
|
|
@@ -470,48 +471,7 @@ class TranslationApp:
|
|
| 470 |
|
| 471 |
with gr.Blocks(
|
| 472 |
title="LocaleNLP Translation Service",
|
| 473 |
-
|
| 474 |
-
.input-section {
|
| 475 |
-
background: linear-gradient(135deg, #e0f7fa 0%, #bbdefb 100%);
|
| 476 |
-
padding: 20px;
|
| 477 |
-
border-radius: 10px;
|
| 478 |
-
border: 1px solid #90caf9;
|
| 479 |
-
margin-bottom: 20px;
|
| 480 |
-
}
|
| 481 |
-
.output-section {
|
| 482 |
-
background: linear-gradient(135deg, #f3e5f5 0%, #e8eaf6 100%);
|
| 483 |
-
padding: 20px;
|
| 484 |
-
border-radius: 10px;
|
| 485 |
-
border: 1px solid #7986cb;
|
| 486 |
-
margin-bottom: 20px;
|
| 487 |
-
}
|
| 488 |
-
.button-primary {
|
| 489 |
-
background: linear-gradient(135deg, #42a5f5 0%, #1e88e5 100%) !important;
|
| 490 |
-
color: white !important;
|
| 491 |
-
border: none !important;
|
| 492 |
-
border-radius: 8px !important;
|
| 493 |
-
padding: 12px 24px !important;
|
| 494 |
-
font-weight: bold !important;
|
| 495 |
-
}
|
| 496 |
-
.button-primary:hover {
|
| 497 |
-
background: linear-gradient(135deg, #1e88e5 0%, #1565c0 100%) !important;
|
| 498 |
-
}
|
| 499 |
-
.radio-group {
|
| 500 |
-
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 501 |
-
padding: 15px;
|
| 502 |
-
border-radius: 10px;
|
| 503 |
-
border: 1px solid #90caf9;
|
| 504 |
-
}
|
| 505 |
-
.dropdown-group {
|
| 506 |
-
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 507 |
-
padding: 15px;
|
| 508 |
-
border-radius: 10px;
|
| 509 |
-
border: 1px solid #90caf9;
|
| 510 |
-
}
|
| 511 |
-
.control-row {
|
| 512 |
-
margin-bottom: 20px;
|
| 513 |
-
}
|
| 514 |
-
"""
|
| 515 |
) as interface:
|
| 516 |
# Header
|
| 517 |
gr.Markdown("""
|
|
@@ -520,77 +480,70 @@ class TranslationApp:
|
|
| 520 |
""")
|
| 521 |
|
| 522 |
# Input controls
|
| 523 |
-
with gr.Row(
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
)
|
| 530 |
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
)
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
)
|
| 544 |
|
| 545 |
# Input components
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
)
|
| 553 |
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
|
| 566 |
# Processing area
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
)
|
| 573 |
|
| 574 |
translate_btn = gr.Button(
|
| 575 |
"🔄 Process & Translate",
|
| 576 |
-
variant="
|
| 577 |
-
elem_classes="button-primary"
|
| 578 |
)
|
| 579 |
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
)
|
| 586 |
|
| 587 |
# Event handlers
|
| 588 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 589 |
"""Update component visibility based on input mode."""
|
| 590 |
return {
|
| 591 |
input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
|
| 592 |
-
|
| 593 |
-
|
| 594 |
extracted_text: gr.update(value="", visible=True),
|
| 595 |
output_text: gr.update(value="")
|
| 596 |
}
|
|
@@ -638,7 +591,7 @@ class TranslationApp:
|
|
| 638 |
input_mode.change(
|
| 639 |
fn=update_visibility,
|
| 640 |
inputs=input_mode,
|
| 641 |
-
outputs=[input_text,
|
| 642 |
)
|
| 643 |
|
| 644 |
translate_btn.click(
|
|
|
|
| 1 |
+
"""
|
| 2 |
LocaleNLP Translation Service
|
| 3 |
============================
|
| 4 |
|
|
|
|
| 232 |
return "\n".join(paragraph.text for paragraph in doc.paragraphs)
|
| 233 |
|
| 234 |
@staticmethod
|
| 235 |
+
def _extract_html_text(content: bytes) -> str:
|
| 236 |
"""Extract text from HTML file."""
|
| 237 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 238 |
text = content.decode(encoding, errors="ignore")
|
|
|
|
| 240 |
return soup.get_text()
|
| 241 |
|
| 242 |
@staticmethod
|
| 243 |
+
def _extract_markdown_text(content: bytes) -> str:
|
| 244 |
"""Extract text from Markdown file."""
|
| 245 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 246 |
text = content.decode(encoding, errors="ignore")
|
|
|
|
| 249 |
return soup.get_text()
|
| 250 |
|
| 251 |
@staticmethod
|
| 252 |
+
def _extract_srt_text(content: bytes) -> str:
|
| 253 |
"""Extract text from SRT subtitle file."""
|
| 254 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 255 |
text = content.decode(encoding, errors="ignore")
|
| 256 |
# Remove timestamp lines
|
| 257 |
+
return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", text)
|
| 258 |
|
| 259 |
@staticmethod
|
| 260 |
+
def _extract_plain_text(content: bytes) -> str:
|
| 261 |
"""Extract text from plain text file."""
|
| 262 |
encoding = chardet.detect(content)["encoding"] or "utf-8"
|
| 263 |
return content.decode(encoding, errors="ignore")
|
|
|
|
| 471 |
|
| 472 |
with gr.Blocks(
|
| 473 |
title="LocaleNLP Translation Service",
|
| 474 |
+
theme=gr.themes.Monochrome()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
) as interface:
|
| 476 |
# Header
|
| 477 |
gr.Markdown("""
|
|
|
|
| 480 |
""")
|
| 481 |
|
| 482 |
# Input controls
|
| 483 |
+
with gr.Row():
|
| 484 |
+
input_mode = gr.Radio(
|
| 485 |
+
choices=[mode.value for mode in InputMode],
|
| 486 |
+
label="Input Type",
|
| 487 |
+
value=InputMode.TEXT.value
|
| 488 |
+
)
|
|
|
|
| 489 |
|
| 490 |
+
input_lang = gr.Dropdown(
|
| 491 |
+
choices=[lang.value for lang in Language],
|
| 492 |
+
label="Input Language",
|
| 493 |
+
value=Language.ENGLISH.value
|
| 494 |
+
)
|
|
|
|
| 495 |
|
| 496 |
+
output_lang = gr.Dropdown(
|
| 497 |
+
choices=[lang.value for lang in Language],
|
| 498 |
+
label="Output Language",
|
| 499 |
+
value=Language.WOLOF.value
|
| 500 |
+
)
|
|
|
|
| 501 |
|
| 502 |
# Input components
|
| 503 |
+
input_text = gr.Textbox(
|
| 504 |
+
label="Enter Text",
|
| 505 |
+
lines=8,
|
| 506 |
+
visible=True,
|
| 507 |
+
placeholder="Type or paste your text here..."
|
| 508 |
+
)
|
|
|
|
| 509 |
|
| 510 |
+
audio_input = gr.Audio(
|
| 511 |
+
label="Upload Audio",
|
| 512 |
+
type="filepath",
|
| 513 |
+
visible=False
|
| 514 |
+
)
|
| 515 |
|
| 516 |
+
file_input = gr.File(
|
| 517 |
+
file_types=SUPPORTED_FILE_TYPES,
|
| 518 |
+
label="Upload Document",
|
| 519 |
+
visible=False
|
| 520 |
+
)
|
| 521 |
|
| 522 |
# Processing area
|
| 523 |
+
extracted_text = gr.Textbox(
|
| 524 |
+
label="Extracted / Transcribed Text",
|
| 525 |
+
lines=8,
|
| 526 |
+
interactive=False
|
| 527 |
+
)
|
|
|
|
| 528 |
|
| 529 |
translate_btn = gr.Button(
|
| 530 |
"🔄 Process & Translate",
|
| 531 |
+
variant="secondary"
|
|
|
|
| 532 |
)
|
| 533 |
|
| 534 |
+
output_text = gr.Textbox(
|
| 535 |
+
label="Translated Text",
|
| 536 |
+
lines=10,
|
| 537 |
+
interactive=False
|
| 538 |
+
)
|
|
|
|
| 539 |
|
| 540 |
# Event handlers
|
| 541 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 542 |
"""Update component visibility based on input mode."""
|
| 543 |
return {
|
| 544 |
input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
|
| 545 |
+
audio_input: gr.update(visible=(mode == InputMode.AUDIO.value)),
|
| 546 |
+
file_input: gr.update(visible=(mode == InputMode.FILE.value)),
|
| 547 |
extracted_text: gr.update(value="", visible=True),
|
| 548 |
output_text: gr.update(value="")
|
| 549 |
}
|
|
|
|
| 591 |
input_mode.change(
|
| 592 |
fn=update_visibility,
|
| 593 |
inputs=input_mode,
|
| 594 |
+
outputs=[input_text, audio_input, file_input, extracted_text, output_text]
|
| 595 |
)
|
| 596 |
|
| 597 |
translate_btn.click(
|