Mgolo commited on
Commit
738edc4
·
verified ·
1 Parent(s): 92593b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -319
app.py CHANGED
@@ -1,10 +1,8 @@
1
  """
2
  LocaleNLP Translation Service
3
  ============================
4
-
5
  A multi-language translation application supporting English, Wolof, Hausa, and Darija.
6
  Features text, audio, and document translation with automatic chaining for all language pairs.
7
-
8
  Author: LocaleNLP
9
  """
10
 
@@ -309,320 +307,4 @@ class TranslationService:
309
  pipeline_obj, lang_tag = self.model_manager.get_translation_pipeline(
310
  source_lang, target_lang
311
  )
312
-
313
- return self._process_text_with_pipeline(text, pipeline_obj, lang_tag)
314
-
315
- def _chained_translate(
316
- self,
317
- text: str,
318
- source_lang: Language,
319
- target_lang: Language
320
- ) -> str:
321
- """
322
- Perform chained translation through English as intermediate language.
323
-
324
- Args:
325
- text: Input text to translate
326
- source_lang: Source language
327
- target_lang: Target language
328
-
329
- Returns:
330
- Translated text through chaining
331
- """
332
- # First: source_lang -> English
333
- intermediate_text = self._direct_translate(
334
- text, source_lang, Language.ENGLISH
335
- )
336
-
337
- # Second: English -> target_lang
338
- final_text = self._direct_translate(
339
- intermediate_text, Language.ENGLISH, target_lang
340
- )
341
-
342
- return final_text
343
-
344
- def _process_text_with_pipeline(
345
- self,
346
- text: str,
347
- pipeline_obj: Any,
348
- lang_tag: str
349
- ) -> str:
350
- """Process text using translation pipeline."""
351
- # Process text in paragraphs
352
- paragraphs = text.splitlines()
353
- translated_paragraphs = []
354
-
355
- with torch.no_grad():
356
- for paragraph in paragraphs:
357
- if not paragraph.strip():
358
- translated_paragraphs.append("")
359
- continue
360
-
361
- # Split into sentences and translate
362
- sentences = [
363
- s.strip() for s in paragraph.split(". ")
364
- if s.strip()
365
- ]
366
-
367
- # Add language tag to each sentence
368
- formatted_sentences = [
369
- f"{lang_tag} {sentence}"
370
- for sentence in sentences
371
- ]
372
-
373
- # Perform translation
374
- results = pipeline_obj(
375
- formatted_sentences,
376
- max_length=5000,
377
- num_beams=5,
378
- early_stopping=True,
379
- no_repeat_ngram_size=3,
380
- repetition_penalty=1.5,
381
- length_penalty=1.2
382
- )
383
-
384
- # Process results
385
- translated_sentences = [
386
- result["translation_text"].capitalize()
387
- for result in results
388
- ]
389
-
390
- translated_paragraphs.append(". ".join(translated_sentences))
391
-
392
- return "\n".join(translated_paragraphs)
393
-
394
- # ================================
395
- # Audio Processing
396
- # ================================
397
-
398
- class AudioProcessor:
399
- """Handles audio file transcription using Whisper."""
400
-
401
- def __init__(self, model_manager: ModelManager):
402
- self.model_manager = model_manager
403
-
404
- def transcribe(self, audio_file_path: str) -> str:
405
- """
406
- Transcribe audio file to text.
407
-
408
- Args:
409
- audio_file_path: Path to audio file
410
-
411
- Returns:
412
- Transcribed text
413
- """
414
- model = self.model_manager.get_whisper_model()
415
- result = model.transcribe(audio_file_path)
416
- return result["text"]
417
-
418
- # ================================
419
- # Main Application
420
- # ================================
421
-
422
- class TranslationApp:
423
- """Main application orchestrating all components."""
424
-
425
- def __init__(self):
426
- self.model_manager = ModelManager()
427
- self.content_processor = ContentProcessor()
428
- self.translation_service = TranslationService(self.model_manager)
429
- self.audio_processor = AudioProcessor(self.model_manager)
430
-
431
- def process_input(
432
- self,
433
- mode: InputMode,
434
- source_lang: Language,
435
- text_input: str,
436
- audio_file: Optional[str],
437
- file_obj: Optional[gr.FileData]
438
- ) -> str:
439
- """
440
- Process input based on selected mode.
441
-
442
- Args:
443
- mode: Input mode
444
- source_lang: Source language
445
- text_input: Text input
446
- audio_file: Audio file path
447
- file_obj: Uploaded file object
448
-
449
- Returns:
450
- Processed text content
451
- """
452
- if mode == InputMode.TEXT:
453
- return text_input
454
-
455
- elif mode == InputMode.AUDIO:
456
- if source_lang != Language.ENGLISH:
457
- raise ValueError("Audio input must be in English.")
458
- if not audio_file:
459
- raise ValueError("No audio file provided.")
460
- return self.audio_processor.transcribe(audio_file)
461
-
462
- elif mode == InputMode.FILE:
463
- if not file_obj:
464
- raise ValueError("No file uploaded.")
465
- return self.content_processor.extract_text_from_file(file_obj.name)
466
-
467
- return ""
468
-
469
- def create_interface(self) -> gr.Blocks:
470
- """Create and return the Gradio interface."""
471
-
472
- with gr.Blocks(
473
- title="LocaleNLP Translation Service",
474
- theme=gr.themes.Monochrome()
475
- ) as interface:
476
- # Header
477
- gr.Markdown("""
478
- # 🌍 LocaleNLP Translation Service
479
- Translate between English, Wolof, Hausa, and Darija with support for text, audio, and documents.
480
- """)
481
-
482
- # Input controls
483
- with gr.Row():
484
- input_mode = gr.Radio(
485
- choices=[mode.value for mode in InputMode],
486
- label="Input Type",
487
- value=InputMode.TEXT.value
488
- )
489
-
490
- input_lang = gr.Dropdown(
491
- choices=[lang.value for lang in Language],
492
- label="Input Language",
493
- value=Language.ENGLISH.value
494
- )
495
-
496
- output_lang = gr.Dropdown(
497
- choices=[lang.value for lang in Language],
498
- label="Output Language",
499
- value=Language.WOLOF.value
500
- )
501
-
502
- # Input components
503
- input_text = gr.Textbox(
504
- label="Enter Text",
505
- lines=8,
506
- visible=True,
507
- placeholder="Type or paste your text here..."
508
- )
509
-
510
- audio_input = gr.Audio(
511
- label="Upload Audio",
512
- type="filepath",
513
- visible=False
514
- )
515
-
516
- file_input = gr.File(
517
- file_types=SUPPORTED_FILE_TYPES,
518
- label="Upload Document",
519
- visible=False
520
- )
521
-
522
- # Processing area
523
- extracted_text = gr.Textbox(
524
- label="Extracted / Transcribed Text",
525
- lines=8,
526
- interactive=False
527
- )
528
-
529
- translate_btn = gr.Button(
530
- "🔄 Process & Translate",
531
- variant="secondary"
532
- )
533
-
534
- output_text = gr.Textbox(
535
- label="Translated Text",
536
- lines=10,
537
- interactive=False
538
- )
539
-
540
- # Event handlers
541
- def update_visibility(mode: str) -> Dict[str, Any]:
542
- """Update component visibility based on input mode."""
543
- return {
544
- input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
545
- audio_input: gr.update(visible=(mode == InputMode.AUDIO.value)),
546
- file_input: gr.update(visible=(mode == InputMode.FILE.value)),
547
- extracted_text: gr.update(value="", visible=True),
548
- output_text: gr.update(value="")
549
- }
550
-
551
- def handle_process(
552
- mode: str,
553
- source_lang: str,
554
- text_input: str,
555
- audio_file: Optional[str],
556
- file_obj: Optional[gr.FileData]
557
- ) -> Tuple[str, str]:
558
- """Handle initial input processing."""
559
- try:
560
- processed_text = self.process_input(
561
- InputMode(mode),
562
- Language(source_lang),
563
- text_input,
564
- audio_file,
565
- file_obj
566
- )
567
- return processed_text, ""
568
- except Exception as e:
569
- logger.error(f"Processing error: {e}")
570
- return "", f"❌ Error: {str(e)}"
571
-
572
- def handle_translate(
573
- extracted_text: str,
574
- source_lang: str,
575
- target_lang: str
576
- ) -> str:
577
- """Handle translation of processed text."""
578
- if not extracted_text.strip():
579
- return "📝 No text to translate."
580
- try:
581
- return self.translation_service.translate(
582
- extracted_text,
583
- Language(source_lang),
584
- Language(target_lang)
585
- )
586
- except Exception as e:
587
- logger.error(f"Translation error: {e}")
588
- return f"❌ Translation error: {str(e)}"
589
-
590
- # Connect events
591
- input_mode.change(
592
- fn=update_visibility,
593
- inputs=input_mode,
594
- outputs=[input_text, audio_input, file_input, extracted_text, output_text]
595
- )
596
-
597
- translate_btn.click(
598
- fn=handle_process,
599
- inputs=[input_mode, input_lang, input_text, audio_input, file_input],
600
- outputs=[extracted_text, output_text]
601
- ).then(
602
- fn=handle_translate,
603
- inputs=[extracted_text, input_lang, output_lang],
604
- outputs=output_text
605
- )
606
-
607
- return interface
608
-
609
- # ================================
610
- # Application Entry Point
611
- # ================================
612
-
613
- def main():
614
- """Main application entry point."""
615
- try:
616
- app = TranslationApp()
617
- interface = app.create_interface()
618
- interface.launch(
619
- server_name="0.0.0.0",
620
- server_port=int(os.getenv("PORT", 7860)),
621
- share=False
622
- )
623
- except Exception as e:
624
- logger.critical(f"Failed to start application: {e}")
625
- raise
626
-
627
- if __name__ == "__main__":
628
- main()
 
1
  """
2
  LocaleNLP Translation Service
3
  ============================
 
4
  A multi-language translation application supporting English, Wolof, Hausa, and Darija.
5
  Features text, audio, and document translation with automatic chaining for all language pairs.
 
6
  Author: LocaleNLP
7
  """
8
 
 
307
  pipeline_obj, lang_tag = self.model_manager.get_translation_pipeline(
308
  source_lang, target_lang
309
  )
310
+