Mgolo commited on
Commit
a2254b2
·
verified ·
1 Parent(s): 51104f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -100
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  LocaleNLP Translation Service
2
  ============================
3
 
@@ -231,7 +232,7 @@ class ContentProcessor:
231
  return "\n".join(paragraph.text for paragraph in doc.paragraphs)
232
 
233
  @staticmethod
234
- def _extract_html_text(content: bytes) -> str:
235
  """Extract text from HTML file."""
236
  encoding = chardet.detect(content)["encoding"] or "utf-8"
237
  text = content.decode(encoding, errors="ignore")
@@ -239,7 +240,7 @@ class ContentProcessor:
239
  return soup.get_text()
240
 
241
  @staticmethod
242
- def _extract_markdown_text(content: bytes) -> str:
243
  """Extract text from Markdown file."""
244
  encoding = chardet.detect(content)["encoding"] or "utf-8"
245
  text = content.decode(encoding, errors="ignore")
@@ -248,15 +249,15 @@ class ContentProcessor:
248
  return soup.get_text()
249
 
250
  @staticmethod
251
- def _extract_srt_text(content: bytes) -> str:
252
  """Extract text from SRT subtitle file."""
253
  encoding = chardet.detect(content)["encoding"] or "utf-8"
254
  text = content.decode(encoding, errors="ignore")
255
  # Remove timestamp lines
256
- return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", text)
257
 
258
  @staticmethod
259
- def _extract_plain_text(content: bytes) -> str:
260
  """Extract text from plain text file."""
261
  encoding = chardet.detect(content)["encoding"] or "utf-8"
262
  return content.decode(encoding, errors="ignore")
@@ -470,48 +471,7 @@ class TranslationApp:
470
 
471
  with gr.Blocks(
472
  title="LocaleNLP Translation Service",
473
- css="""
474
- .input-section {
475
- background: linear-gradient(135deg, #e0f7fa 0%, #bbdefb 100%);
476
- padding: 20px;
477
- border-radius: 10px;
478
- border: 1px solid #90caf9;
479
- margin-bottom: 20px;
480
- }
481
- .output-section {
482
- background: linear-gradient(135deg, #f3e5f5 0%, #e8eaf6 100%);
483
- padding: 20px;
484
- border-radius: 10px;
485
- border: 1px solid #7986cb;
486
- margin-bottom: 20px;
487
- }
488
- .button-primary {
489
- background: linear-gradient(135deg, #42a5f5 0%, #1e88e5 100%) !important;
490
- color: white !important;
491
- border: none !important;
492
- border-radius: 8px !important;
493
- padding: 12px 24px !important;
494
- font-weight: bold !important;
495
- }
496
- .button-primary:hover {
497
- background: linear-gradient(135deg, #1e88e5 0%, #1565c0 100%) !important;
498
- }
499
- .radio-group {
500
- background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
501
- padding: 15px;
502
- border-radius: 10px;
503
- border: 1px solid #90caf9;
504
- }
505
- .dropdown-group {
506
- background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
507
- padding: 15px;
508
- border-radius: 10px;
509
- border: 1px solid #90caf9;
510
- }
511
- .control-row {
512
- margin-bottom: 20px;
513
- }
514
- """
515
  ) as interface:
516
  # Header
517
  gr.Markdown("""
@@ -520,77 +480,70 @@ class TranslationApp:
520
  """)
521
 
522
  # Input controls
523
- with gr.Row(elem_classes="control-row"):
524
- with gr.Column(elem_classes="radio-group"):
525
- input_mode = gr.Radio(
526
- choices=[mode.value for mode in InputMode],
527
- label="Input Type",
528
- value=InputMode.TEXT.value
529
- )
530
 
531
- with gr.Column(elem_classes="dropdown-group"):
532
- input_lang = gr.Dropdown(
533
- choices=[lang.value for lang in Language],
534
- label="Input Language",
535
- value=Language.ENGLISH.value
536
- )
537
 
538
- with gr.Column(elem_classes="dropdown-group"):
539
- output_lang = gr.Dropdown(
540
- choices=[lang.value for lang in Language],
541
- label="Output Language",
542
- value=Language.WOLOF.value
543
- )
544
 
545
  # Input components
546
- with gr.Group(elem_classes="input-section"):
547
- input_text = gr.Textbox(
548
- label="Enter Text",
549
- lines=8,
550
- visible=True,
551
- placeholder="Type or paste your text here..."
552
- )
553
 
554
- with gr.Group(elem_classes="input-section", visible=False) as audio_group:
555
- audio_input = gr.Audio(
556
- label="Upload Audio",
557
- type="filepath"
558
- )
559
 
560
- with gr.Group(elem_classes="input-section", visible=False) as file_group:
561
- file_input = gr.File(
562
- file_types=SUPPORTED_FILE_TYPES,
563
- label="Upload Document"
564
- )
565
 
566
  # Processing area
567
- with gr.Group(elem_classes="output-section"):
568
- extracted_text = gr.Textbox(
569
- label="Extracted / Transcribed Text",
570
- lines=8,
571
- interactive=False
572
- )
573
 
574
  translate_btn = gr.Button(
575
  "🔄 Process & Translate",
576
- variant="primary",
577
- elem_classes="button-primary"
578
  )
579
 
580
- with gr.Group(elem_classes="output-section"):
581
- output_text = gr.Textbox(
582
- label="Translated Text",
583
- lines=10,
584
- interactive=False
585
- )
586
 
587
  # Event handlers
588
  def update_visibility(mode: str) -> Dict[str, Any]:
589
  """Update component visibility based on input mode."""
590
  return {
591
  input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
592
- audio_group: gr.update(visible=(mode == InputMode.AUDIO.value)),
593
- file_group: gr.update(visible=(mode == InputMode.FILE.value)),
594
  extracted_text: gr.update(value="", visible=True),
595
  output_text: gr.update(value="")
596
  }
@@ -638,7 +591,7 @@ class TranslationApp:
638
  input_mode.change(
639
  fn=update_visibility,
640
  inputs=input_mode,
641
- outputs=[input_text, audio_group, file_group, extracted_text, output_text]
642
  )
643
 
644
  translate_btn.click(
 
1
+ """
2
  LocaleNLP Translation Service
3
  ============================
4
 
 
232
  return "\n".join(paragraph.text for paragraph in doc.paragraphs)
233
 
234
  @staticmethod
235
+ def _extract_html_text(content: bytes) -> str:
236
  """Extract text from HTML file."""
237
  encoding = chardet.detect(content)["encoding"] or "utf-8"
238
  text = content.decode(encoding, errors="ignore")
 
240
  return soup.get_text()
241
 
242
  @staticmethod
243
+ def _extract_markdown_text(content: bytes) -> str:
244
  """Extract text from Markdown file."""
245
  encoding = chardet.detect(content)["encoding"] or "utf-8"
246
  text = content.decode(encoding, errors="ignore")
 
249
  return soup.get_text()
250
 
251
  @staticmethod
252
+ def _extract_srt_text(content: bytes) -> str:
253
  """Extract text from SRT subtitle file."""
254
  encoding = chardet.detect(content)["encoding"] or "utf-8"
255
  text = content.decode(encoding, errors="ignore")
256
  # Remove timestamp lines
257
+ return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", text)
258
 
259
  @staticmethod
260
+ def _extract_plain_text(content: bytes) -> str:
261
  """Extract text from plain text file."""
262
  encoding = chardet.detect(content)["encoding"] or "utf-8"
263
  return content.decode(encoding, errors="ignore")
 
471
 
472
  with gr.Blocks(
473
  title="LocaleNLP Translation Service",
474
+ theme=gr.themes.Monochrome()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  ) as interface:
476
  # Header
477
  gr.Markdown("""
 
480
  """)
481
 
482
  # Input controls
483
+ with gr.Row():
484
+ input_mode = gr.Radio(
485
+ choices=[mode.value for mode in InputMode],
486
+ label="Input Type",
487
+ value=InputMode.TEXT.value
488
+ )
 
489
 
490
+ input_lang = gr.Dropdown(
491
+ choices=[lang.value for lang in Language],
492
+ label="Input Language",
493
+ value=Language.ENGLISH.value
494
+ )
 
495
 
496
+ output_lang = gr.Dropdown(
497
+ choices=[lang.value for lang in Language],
498
+ label="Output Language",
499
+ value=Language.WOLOF.value
500
+ )
 
501
 
502
  # Input components
503
+ input_text = gr.Textbox(
504
+ label="Enter Text",
505
+ lines=8,
506
+ visible=True,
507
+ placeholder="Type or paste your text here..."
508
+ )
 
509
 
510
+ audio_input = gr.Audio(
511
+ label="Upload Audio",
512
+ type="filepath",
513
+ visible=False
514
+ )
515
 
516
+ file_input = gr.File(
517
+ file_types=SUPPORTED_FILE_TYPES,
518
+ label="Upload Document",
519
+ visible=False
520
+ )
521
 
522
  # Processing area
523
+ extracted_text = gr.Textbox(
524
+ label="Extracted / Transcribed Text",
525
+ lines=8,
526
+ interactive=False
527
+ )
 
528
 
529
  translate_btn = gr.Button(
530
  "🔄 Process & Translate",
531
+ variant="secondary"
 
532
  )
533
 
534
+ output_text = gr.Textbox(
535
+ label="Translated Text",
536
+ lines=10,
537
+ interactive=False
538
+ )
 
539
 
540
  # Event handlers
541
  def update_visibility(mode: str) -> Dict[str, Any]:
542
  """Update component visibility based on input mode."""
543
  return {
544
  input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
545
+ audio_input: gr.update(visible=(mode == InputMode.AUDIO.value)),
546
+ file_input: gr.update(visible=(mode == InputMode.FILE.value)),
547
  extracted_text: gr.update(value="", visible=True),
548
  output_text: gr.update(value="")
549
  }
 
591
  input_mode.change(
592
  fn=update_visibility,
593
  inputs=input_mode,
594
+ outputs=[input_text, audio_input, file_input, extracted_text, output_text]
595
  )
596
 
597
  translate_btn.click(