Spaces:

LocaleNLP
/

LocaleNLP_Translator

Runtime error

App Files Files Community

Mgolo commited on Sep 18

Commit

bbd3488

verified ·

1 Parent(s): 738edc4

Update app.py

Browse files

Files changed (1) hide show

app.py +336 -1

app.py CHANGED Viewed

@@ -307,4 +307,339 @@ class TranslationService:
         pipeline_obj, lang_tag = self.model_manager.get_translation_pipeline(
             source_lang, target_lang
         )

         pipeline_obj, lang_tag = self.model_manager.get_translation_pipeline(
             source_lang, target_lang
         )
+        return self._process_text_with_pipeline(text, pipeline_obj, lang_tag)
+    def _chained_translate(
+        self,
+        text: str,
+        source_lang: Language,
+        target_lang: Language
+    ) -> str:
+        """
+        Perform chained translation through English as intermediate language.
+        Args:
+            text: Input text to translate
+            source_lang: Source language
+            target_lang: Target language
+        Returns:
+            Translated text through chaining
+        """
+        # First: source_lang -> English
+        intermediate_text = self._direct_translate(
+            text, source_lang, Language.ENGLISH
+        )
+        # Second: English -> target_lang
+        final_text = self._direct_translate(
+            intermediate_text, Language.ENGLISH, target_lang
+        )
+        return final_text
+    def _process_text_with_pipeline(
+        self,
+        text: str,
+        pipeline_obj: Any,
+        lang_tag: str
+    ) -> str:
+        """Process text using translation pipeline."""
+        # Process text in paragraphs
+        paragraphs = text.splitlines()
+        translated_paragraphs = []
+        with torch.no_grad():
+            for paragraph in paragraphs:
+                if not paragraph.strip():
+                    translated_paragraphs.append("")
+                    continue
+                # Split into sentences and translate
+                sentences = [
+                    s.strip() for s in paragraph.split(". ")
+                    if s.strip()
+                ]
+                # Add language tag to each sentence
+                formatted_sentences = [
+                    f"{lang_tag} {sentence}"
+                    for sentence in sentences
+                ]
+                # Perform translation
+                results = pipeline_obj(
+                    formatted_sentences,
+                    max_length=5000,
+                    num_beams=5,
+                    early_stopping=True,
+                    no_repeat_ngram_size=3,
+                    repetition_penalty=1.5,
+                    length_penalty=1.2
+                )
+                # Process results
+                translated_sentences = [
+                    result["translation_text"].capitalize()
+                    for result in results
+                ]
+                translated_paragraphs.append(". ".join(translated_sentences))
+        return "\n".join(translated_paragraphs)
+# ================================
+# Audio Processing
+# ================================
+class AudioProcessor:
+    """Handles audio file transcription using Whisper."""
+    def __init__(self, model_manager: ModelManager):
+        self.model_manager = model_manager
+    def transcribe(self, audio_file_path: str) -> str:
+        """
+        Transcribe audio file to text.
+        Args:
+            audio_file_path: Path to audio file
+        Returns:
+            Transcribed text
+        """
+        model = self.model_manager.get_whisper_model()
+        result = model.transcribe(audio_file_path)
+        return result["text"]
+# ================================
+# Main Application
+# ================================
+class TranslationApp:
+    """Main application orchestrating all components."""
+    def __init__(self):
+        self.model_manager = ModelManager()
+        self.content_processor = ContentProcessor()
+        self.translation_service = TranslationService(self.model_manager)
+        self.audio_processor = AudioProcessor(self.model_manager)
+    def process_input(
+        self,
+        mode: InputMode,
+        source_lang: Language,
+        text_input: str,
+        audio_file: Optional[str],
+        file_obj: Optional[gr.FileData]
+    ) -> str:
+        """
+        Process input based on selected mode.
+        Args:
+            mode: Input mode
+            source_lang: Source language
+            text_input: Text input
+            audio_file: Audio file path
+            file_obj: Uploaded file object
+        Returns:
+            Processed text content
+        """
+        if mode == InputMode.TEXT:
+            return text_input
+        elif mode == InputMode.AUDIO:
+            if source_lang != Language.ENGLISH:
+                raise ValueError("Audio input must be in English.")
+            if not audio_file:
+                raise ValueError("No audio file provided.")
+            return self.audio_processor.transcribe(audio_file)
+        elif mode == InputMode.FILE:
+            if not file_obj:
+                raise ValueError("No file uploaded.")
+            return self.content_processor.extract_text_from_file(file_obj.name)
+        return ""
+    def create_interface(self) -> gr.Blocks:
+        """Create and return the Gradio interface."""
+        with gr.Blocks(
+            title="LocaleNLP Translation Service",
+            theme=gr.themes.Monochrome()
+        ) as interface:
+            # Header
+            gr.Markdown("""
+            # 🌍 LocaleNLP Translation Service
+            Translate between English, Wolof, Hausa, and Darija with support for text, audio, and documents.
+            """)
+            # Input controls
+            with gr.Row():
+                input_mode = gr.Radio(
+                    choices=[mode.value for mode in InputMode],
+                    label="Input Type",
+                    value=InputMode.TEXT.value
+                )
+                input_lang = gr.Dropdown(
+                    choices=[lang.value for lang in Language],
+                    label="Input Language",
+                    value=Language.ENGLISH.value
+                )
+                output_lang = gr.Dropdown(
+                    choices=[lang.value for lang in Language],
+                    label="Output Language",
+                    value=Language.WOLOF.value
+                )
+            # Input components
+            input_text = gr.Textbox(
+                label="Enter Text",
+                lines=8,
+                visible=True,
+                placeholder="Type or paste your text here..."
+            )
+            audio_input = gr.Audio(
+                label="Upload Audio",
+                type="filepath",
+                visible=False
+            )
+            file_input = gr.File(
+                file_types=SUPPORTED_FILE_TYPES,
+                label="Upload Document",
+                visible=False
+            )
+            # Processing area
+            extracted_text = gr.Textbox(
+                label="Extracted / Transcribed Text",
+                lines=8,
+                interactive=False
+            )
+            translate_btn = gr.Button(
+                "🔄 Process & Translate",
+                variant="secondary"
+            )
+            output_text = gr.Textbox(
+                label="Translated Text",
+                lines=10,
+                interactive=False
+            )
+            # Event handlers
+            def update_visibility(mode: str) -> Dict[str, Any]:
+                """Update component visibility based on input mode."""
+                return {
+                    input_text: gr.update(visible=(mode == InputMode.TEXT.value)),
+                    audio_input: gr.update(visible=(mode == InputMode.AUDIO.value)),
+                    file_input: gr.update(visible=(mode == InputMode.FILE.value)),
+                    extracted_text: gr.update(value="", visible=True),
+                    output_text: gr.update(value="")
+                }
+            def handle_process(
+                mode: str,
+                source_lang: str,
+                text_input: str,
+                audio_file: Optional[str],
+                file_obj: Optional[gr.FileData]
+            ) -> Tuple[str, str]:
+                """Handle initial input processing."""
+                try:
+                    processed_text = self.process_input(
+                        InputMode(mode),
+                        Language(source_lang),
+                        text_input,
+                        audio_file,
+                        file_obj
+                    )
+                    return processed_text, ""
+                except Exception as e:
+                    logger.error(f"Processing error: {e}")
+                    return "", f"❌ Error: {str(e)}"
+            def handle_translate(
+                extracted_text: str,
+                source_lang: str,
+                target_lang: str
+            ) -> str:
+                """Handle translation of processed text."""
+                if not extracted_text.strip():
+                    return "📝 No text to translate."
+                try:
+                    return self.translation_service.translate(
+                        extracted_text,
+                        Language(source_lang),
+                        Language(target_lang)
+                    )
+                except Exception as e:
+                    logger.error(f"Translation error: {e}")
+                    return f"❌ Translation error: {str(e)}"
+            # Connect events
+            input_mode.change(
+                fn=update_visibility,
+                inputs=input_mode,
+                outputs=[input_text, audio_input, file_input, extracted_text, output_text]
+            )
+            translate_btn.click(
+                fn=handle_process,
+                inputs=[input_mode, input_lang, input_text, audio_input, file_input],
+                outputs=[extracted_text, output_text]
+            ).then(
+                fn=handle_translate,
+                inputs=[extracted_text, input_lang, output_lang],
+                outputs=output_text
+            )
+            # Custom CSS for black button (applied after interface creation)
+            interface.load(lambda: None, None, None, _js="""
+            () => {
+                const style = document.createElement('style');
+                style.textContent = `
+                    .gr-button-secondary {
+                        background-color: #000000 !important;
+                        border-color: #000000 !important;
+                        color: white !important;
+                    }
+                    .gr-button-secondary:hover {
+                        background-color: #333333 !important;
+                        border-color: #333333 !important;
+                    }
+                `;
+                document.head.appendChild(style);
+            }
+            """)
+        return interface
+# ================================
+# Application Entry Point
+# ================================
+def main():
+    """Main application entry point."""
+    try:
+        app = TranslationApp()
+        interface = app.create_interface()
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=int(os.getenv("PORT", 7860)),
+            share=False
+        )
+    except Exception as e:
+        logger.critical(f"Failed to start application: {e}")
+        raise
+if __name__ == "__main__":
+    main()