Spaces:

Bhashini-IITJ
/

IndicPhotoOCR

Paused

App Files Files Community

anikde commited on Oct 8

Commit

d04ad72

1 Parent(s): c34507c

Translation Added (#2)

Browse files

- Translation Added (39b0c467a8b1ff824e2bdc75e6a70c5801246fd4)

Files changed (5) hide show

IndicPhotoOCR/translation/__init__.py +4 -0
IndicPhotoOCR/translation/indictrans_translator.py +339 -0
IndicPhotoOCR/translation/simple_translator.py +132 -0
app.py +331 -138
requirements.txt +4 -1

IndicPhotoOCR/translation/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# Translation module for IndicPhotoOCR
+from .indictrans_translator import IndicTransTranslator
+__all__ = ['IndicTransTranslator']

IndicPhotoOCR/translation/indictrans_translator.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""
+IndicTrans2 Translation Module for IndicPhotoOCR
+Clean implementation with proper error handling and full language support
+Optimized for web app usage with better timeout handling
+"""
+import torch
+from typing import List, Optional, Callable
+import warnings
+import unicodedata
+import re
+warnings.filterwarnings("ignore")
+try:
+    from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+    from IndicTransToolkit import IndicProcessor
+    INDICTRANS_AVAILABLE = True
+except ImportError as e:
+    print(f"Warning: IndicTransToolkit not available: {e}")
+    INDICTRANS_AVAILABLE = False
+class IndicTransTranslator:
+    def __init__(self, device: str = "cpu", progress_callback: Optional[Callable] = None):
+        """Initialize the IndicTrans2 translator"""
+        if not INDICTRANS_AVAILABLE:
+            raise ImportError("IndicTransToolkit not available. Install with: pip install IndicTransToolkit")
+        self.device = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
+        self.progress_callback = progress_callback
+        self.models = {}
+        self.tokenizers = {}
+        self.processor = None
+        self.initialized = False
+        # Language code mapping for IndicTrans2
+        self.lang_codes = {
+            'assamese': 'asm_Beng',
+            'bengali': 'ben_Beng',
+            'gujarati': 'guj_Gujr',
+            'hindi': 'hin_Deva',
+            'kannada': 'kan_Knda',  # Alias for Kannada
+            'malayalam': 'mal_Mlym',
+            'marathi': 'mar_Deva',
+            'odia': 'ory_Orya',
+            'punjabi': 'pan_Guru',
+            'tamil': 'tam_Taml',
+            'telugu': 'tel_Telu',
+            'urdu': 'urd_Arab',
+            'english': 'eng_Latn'
+        }
+        # Script ranges for efficient language detection
+        self.script_ranges = {
+            'devanagari': (0x0900, 0x097F),      # Hindi/Marathi
+            'bengali_assamese': (0x0980, 0x09FF), # Bengali/Assamese
+            'gujarati': (0x0A80, 0x0AFF),
+            'tamil': (0x0B80, 0x0BFF),
+            'telugu': (0x0C00, 0x0C7F),
+            'kannada': (0x0C80, 0x0CFF),
+            'malayalam': (0x0D00, 0x0D7F),
+            'odia': (0x0B00, 0x0B7F),
+            'punjabi': (0x0A00, 0x0A7F),          # Gurmukhi
+            'urdu': (0x0600, 0x06FF),             # Arabic
+        }
+    def _clean_text(self, text: str) -> str:
+        """Clean and normalize text for better translation"""
+        if not text:
+            return text
+        # Normalize Unicode and remove problematic characters
+        text = unicodedata.normalize('NFC', text)
+        text = re.sub(r'[\u200C\u200D\uFEFF]', '', text)  # Remove zero-width chars
+        text = re.sub(r'\\u[0-9a-fA-F]{4}', '', text)     # Remove Unicode escapes
+        text = re.sub(r'\s+', ' ', text.strip())           # Normalize whitespace
+        # Fix punctuation spacing
+        text = re.sub(r'\s+([।,.!?;:])', r'\1', text)
+        text = re.sub(r'([।,.!?;:])\s+', r'\1 ', text)
+        return text.strip()
+    def _update_progress(self, message: str):
+        """Update progress via callback if available"""
+        if self.progress_callback:
+            try:
+                self.progress_callback(message)
+            except:
+                pass
+        print(message)
+    def _load_models(self):
+        """Load both translation models"""
+        if self.initialized:
+            return
+        try:
+            self._update_progress("Initializing IndicProcessor...")
+            self.processor = IndicProcessor(inference=True)
+            # Model names
+            model_names = {
+                "indic-en": "ai4bharat/indictrans2-indic-en-1B",
+                "en-indic": "ai4bharat/indictrans2-en-indic-1B"
+            }
+            for key, model_name in model_names.items():
+                self._update_progress(f"Loading {model_name}...")
+                # Load tokenizer
+                tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    trust_remote_code=True
+                )
+                # Load model with memory optimization
+                model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name,
+                    trust_remote_code=True,
+                    torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                    low_cpu_mem_usage=True
+                )
+                self.models[key] = model.to(self.device).eval()
+                self.tokenizers[key] = tokenizer
+                self._update_progress(f"{key} model loaded successfully")
+            self.initialized = True
+            self._update_progress("All translation models loaded!")
+        except Exception as e:
+            self._update_progress(f"Error loading models: {str(e)}")
+            raise e
+    def detect_language_from_script(self, text_lines: List[str]) -> str:
+        """Detect primary language based on script characteristics"""
+        if isinstance(text_lines, str):
+            text_lines = [text_lines]
+        full_text = " ".join(text_lines)
+        if not full_text.strip():
+            return 'english'
+        # Count characters from different scripts
+        script_counts = {script: 0 for script in self.script_ranges.keys()}
+        script_counts['english'] = 0
+        assamese_chars = 0
+        for char in full_text:
+            # Check script ranges
+            found_script = False
+            for script, (start, end) in self.script_ranges.items():
+                if start <= ord(char) <= end:
+                    script_counts[script] += 1
+                    # Special check for Assamese
+                    if script == 'bengali_assamese' and char in 'ৰৱ':
+                        assamese_chars += 1
+                    found_script = True
+                    break
+            # If not found in Indic scripts, check for English
+            if not found_script and char.isalpha() and ord(char) < 128:
+                script_counts['english'] += 1
+        total_chars = sum(script_counts.values())
+        if total_chars == 0:
+            return 'english'
+        # Find dominant script
+        dominant_script = max(script_counts, key=script_counts.get)
+        dominant_count = script_counts[dominant_script]
+        # Special handling for Bengali/Assamese
+        if dominant_script == 'bengali_assamese':
+            return 'assamese' if assamese_chars > 0 else 'bengali'
+        # Check for mixed content
+        english_ratio = script_counts['english'] / total_chars
+        if english_ratio > 0.2 and any(script_counts[s] > 0 for s in self.script_ranges.keys()):
+            return 'mixed'
+        # Map script to language
+        script_to_lang = {
+            'devanagari': 'hindi',
+            'gujarati': 'gujarati',
+            'tamil': 'tamil',
+            'telugu': 'telugu',
+            'kannada': 'kannada',
+            'malayalam': 'malayalam',
+            'odia': 'odia',
+            'punjabi': 'punjabi',
+            'urdu': 'urdu',
+            'english': 'english'
+        }
+        return script_to_lang.get(dominant_script, 'english')
+    def _translate_batch_direct(self, texts: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+        """Direct translation using appropriate model"""
+        if not texts:
+            return []
+        self._load_models()
+        # Convert to language codes
+        src_code = self.lang_codes.get(src_lang.lower(), src_lang)
+        tgt_code = self.lang_codes.get(tgt_lang.lower(), tgt_lang)
+        # Determine which model to use
+        if src_code == 'eng_Latn' and tgt_code != 'eng_Latn':
+            model_key = "en-indic"
+        elif src_code != 'eng_Latn' and tgt_code == 'eng_Latn':
+            model_key = "indic-en"
+        else:
+            raise ValueError(f"Unsupported direct translation: {src_lang} -> {tgt_lang}")
+        model = self.models[model_key]
+        tokenizer = self.tokenizers[model_key]
+        try:
+            # Clean and preprocess
+            cleaned_texts = [self._clean_text(text) for text in texts]
+            processed_texts = self.processor.preprocess_batch(cleaned_texts, src_lang=src_code, tgt_lang=tgt_code)
+            # Tokenize
+            inputs = tokenizer(
+                processed_texts,
+                truncation=True,
+                padding=True,
+                max_length=512,  # Increased for better context
+                return_tensors="pt"
+            ).to(self.device)
+            # Generate
+            with torch.no_grad():
+                generated_tokens = model.generate(
+                    **inputs,
+                    max_length=512,
+                    num_beams=5,  # Increased beam search
+                    early_stopping=True,
+                    do_sample=False,
+                    use_cache=False,
+                    pad_token_id=tokenizer.pad_token_id,
+                    repetition_penalty=1.1,  # Reduce repetition
+                    length_penalty=1.0  # Balanced length penalty
+                )
+            # Decode
+            decoded = tokenizer.batch_decode(
+                generated_tokens,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=True
+            )
+            # Postprocess
+            results = self.processor.postprocess_batch(decoded, lang=tgt_code)
+            return [self._clean_text(result) for result in results]
+        except Exception as e:
+            print(f"Translation error: {str(e)}")
+            return [f"[Translation failed: {text}]" for text in texts]
+    def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> str:
+        """Translate single text"""
+        if not text or not text.strip():
+            return ""
+        result = self.translate_multiple_lines([text], src_lang, tgt_lang)
+        return result[0] if result else text
+    def translate_multiple_lines(self, lines: List[str], src_lang: str, tgt_lang: str) -> List[str]:
+        """Translate multiple lines intelligently"""
+        if not lines:
+            return []
+        tgt_lang = tgt_lang.lower()
+        translated_lines = []
+        print(f"Processing {len(lines)} lines for translation to {tgt_lang}")
+        for i, line in enumerate(lines):
+            if not line or not line.strip():
+                translated_lines.append(line)
+                continue
+            # Detect language
+            line_lang = self.detect_language_from_script([line.strip()])
+            print(f"Line {i+1}: detected as {line_lang}")
+            # Keep if already in target language
+            if line_lang == tgt_lang:
+                print(f"  Keeping unchanged")
+                translated_lines.append(line)
+                continue
+            # Handle mixed content
+            if line_lang == 'mixed':
+                print(f"  Mixed language - attempting translation")
+                try:
+                    assumed_lang = 'hindi' if tgt_lang == 'english' else 'english'
+                    result = self._translate_batch_direct([line.strip()], assumed_lang, tgt_lang)
+                    translated_lines.append(result[0] if result and result[0] else line)
+                except:
+                    translated_lines.append(line)
+                continue
+            # Translate different language
+            try:
+                print(f"  Translating from {line_lang} to {tgt_lang}")
+                if line_lang == "english" and tgt_lang != "english":
+                    # English → Indic
+                    result = self._translate_batch_direct([line.strip()], line_lang, tgt_lang)
+                elif line_lang != "english" and tgt_lang == "english":
+                    # Indic → English
+                    result = self._translate_batch_direct([line.strip()], line_lang, tgt_lang)
+                elif line_lang != "english" and tgt_lang != "english":
+                    # Bridge via English
+                    english_result = self._translate_batch_direct([line.strip()], line_lang, "english")
+                    result = self._translate_batch_direct(english_result, "english", tgt_lang) if english_result and english_result[0] else [line.strip()]
+                else:
+                    result = [line.strip()]
+                translated_lines.append(result[0] if result and result[0] else line)
+            except Exception as e:
+                print(f"   Translation error: {e}")
+                translated_lines.append(line)
+        print(f" Processing completed for all {len(lines)} lines")
+        return translated_lines
+    def get_supported_languages(self) -> List[str]:
+        """Get supported languages"""
+        return list(self.lang_codes.keys())

IndicPhotoOCR/translation/simple_translator.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Simple Translator Module for IndicPhotoOCR
+Fallback translator with basic word mappings and script detection
+"""
+class SimpleTranslator:
+    """Simple fallback translator with basic word mappings"""
+    def __init__(self):
+        # Basic word mappings for common terms
+        self.translations = {
+            'hindi_to_english': {
+                'नमस्ते': 'hello',
+                'धन्यवाद': 'thank you',
+                'हाँ': 'yes',
+                'नहीं': 'no',
+                'अच्छा': 'good',
+                'बुरा': 'bad',
+                'पानी': 'water',
+                'खाना': 'food',
+                'घर': 'home',
+                'स्कूल': 'school'
+            },
+            'english_to_hindi': {
+                'hello': 'नमस्ते',
+                'thank you': 'धन्यवाद',
+                'yes': 'हाँ',
+                'no': 'नहीं',
+                'good': 'अच्छा',
+                'bad': 'बुरा',
+                'water': 'पानी',
+                'food': 'खाना',
+                'home': 'घर',
+                'school': 'स्कूल'
+            },
+            'bengali_to_english': {
+                'নমস্কার': 'hello',
+                'ধন্যবাদ': 'thank you',
+                'হ্যাঁ': 'yes',
+                'না': 'no',
+                'ভাল': 'good',
+                'খারাপ': 'bad'
+            },
+            'english_to_bengali': {
+                'hello': 'নমস্কার',
+                'thank you': 'ধন্যবাদ',
+                'yes': 'হ্যাঁ',
+                'no': 'না',
+                'good': 'ভাল',
+                'bad': 'খারাপ'
+            }
+        }
+    def detect_language_from_script(self, text_lines):
+        """Simple language detection based on Unicode ranges"""
+        if isinstance(text_lines, str):
+            text_lines = [text_lines]
+        full_text = " ".join(text_lines)
+        if not full_text.strip():
+            return 'english'
+        # Check for different scripts
+        # Devanagari (Hindi/Marathi)
+        if any(0x0900 <= ord(char) <= 0x097F for char in full_text):
+            return 'hindi'
+        # Bengali script
+        if any(0x0980 <= ord(char) <= 0x09FF for char in full_text):
+            return 'bengali'
+        # Gujarati script
+        if any(0x0A80 <= ord(char) <= 0x0AFF for char in full_text):
+            return 'gujarati'
+        # Tamil script
+        if any(0x0B80 <= ord(char) <= 0x0BFF for char in full_text):
+            return 'tamil'
+        # Telugu script
+        if any(0x0C00 <= ord(char) <= 0x0C7F for char in full_text):
+            return 'telugu'
+        # Kannada script
+        if any(0x0C80 <= ord(char) <= 0x0CFF for char in full_text):
+            return 'kannada'
+        # Malayalam script
+        if any(0x0D00 <= ord(char) <= 0x0D7F for char in full_text):
+            return 'malayalam'
+        # Odia script
+        if any(0x0B00 <= ord(char) <= 0x0B7F for char in full_text):
+            return 'odia'
+        # Punjabi script (Gurmukhi)
+        if any(0x0A00 <= ord(char) <= 0x0A7F for char in full_text):
+            return 'punjabi'
+        # Urdu script (Arabic)
+        if any(0x0600 <= ord(char) <= 0x06FF for char in full_text):
+            return 'urdu'
+        return 'english'
+    def translate_text(self, text, src_lang, tgt_lang):
+        """Simple word-by-word translation"""
+        if not text or src_lang == tgt_lang:
+            return text
+        # Get translation dictionary
+        dict_key = f"{src_lang}_to_{tgt_lang}"
+        trans_dict = self.translations.get(dict_key, {})
+        if not trans_dict:
+            return f"[Simple translation not available: {src_lang} → {tgt_lang}] {text}"
+        # Simple word replacement
+        words = text.split()
+        translated_words = []
+        for word in words:
+            # Try exact match first, then lowercase
+            translated_word = trans_dict.get(word, trans_dict.get(word.lower(), word))
+            translated_words.append(translated_word)
+        return " ".join(translated_words)
+    def translate_multiple_lines(self, lines, src_lang, tgt_lang):
+        """Translate multiple lines"""
+        return [self.translate_text(line, src_lang, tgt_lang) for line in lines]

app.py CHANGED Viewed

@@ -1,138 +1,331 @@
-import gradio as gr
-from PIL import Image
-import tempfile
-import os
-from IndicPhotoOCR.ocr import OCR  # Ensure OCR class is saved in a file named ocr.py
-from IndicPhotoOCR.theme import Seafoam
-from IndicPhotoOCR.utils.helper import detect_para
-# Possible values for identifier_lang
-VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"]  # Add more as needed
-def process_image(image, identifier_lang):
-    """
-    Processes the uploaded image for text detection and recognition.
-    - Detects bounding boxes in the image
-    - Draws bounding boxes on the image and identifies script in each detected area
-    - Recognizes text in each cropped region and returns the annotated image and recognized text
-    Parameters:
-    image (PIL.Image): The input image to be processed.
-    identifier_lang (str): The script identifier model to use.
-    Returns:
-    tuple: A PIL.Image with bounding boxes and a string of recognized text.
-    """
-    # Save the input image temporarily
-    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input:
-            image.save(temp_input.name)
-            image_path = temp_input.name
-    # Initialize OCR with the selected identifier language
-    ocr = OCR(identifier_lang=identifier_lang, verbose=False)
-    # Detect bounding boxes on the image using OCR
-    detections = ocr.detect(image_path)
-    output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
-    # Draw bounding boxes on the image and save it as output
-    ocr.visualize_detection(image_path, detections, save_path=output_path)
-    # Load the annotated image with bounding boxes drawn
-    output_image = Image.open(output_path)
-    # Recognize text from the detected areas
-    recognized_text = ocr.ocr(image_path)
-    recognized_text = '\n'.join([' '.join(line) for line in recognized_text])
-    return output_image, recognized_text
-# Custom HTML for interface header with logos and alignment
-interface_html = """
-<div style="text-align: left; padding: 10px;">
-    <div style="background-color: white; padding: 10px; display: inline-block;">
-        <img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
-    </div>
-    <img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
-</div>
-"""
-# Links to GitHub and Dataset repositories with GitHub icon
-links_html = """
-<div style="text-align: center; padding-top: 20px;">
-    <a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
-        GitHub Repository
-    </a>
-    <a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
-        Dataset Repository
-    </a>
-</div>
-"""
-# Custom CSS to style the text box and center the title
-custom_css = """
-.custom-textbox textarea {
-    font-size: 20px !important;
-}
-#title {
-    text-align: center;
-    font-size: 28px;
-    font-weight: bold;
-    margin-bottom: 20px;
-}
-"""
-# Create an instance of the Seafoam theme for a consistent visual style
-seafoam = Seafoam()
-# Clear function
-def clear_inputs():
-    return None, "auto", None, ""
-# Define the Gradio Blocks interface
-with gr.Blocks(theme=seafoam, css=custom_css) as demo:
-    gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title")
-    gr.Markdown("# Developed by IIT Jodhpur", elem_id="title")
-    gr.Markdown(interface_html + links_html)
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image")
-            lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto")
-            run_button = gr.Button("Run OCR")
-            clear_button = gr.Button("Clear", variant="stop")  # Added Clear Button
-        with gr.Column():
-            output_image = gr.Image(type="pil", label="Processed Image")
-            output_text = gr.Textbox(label="Recognized Text", lines=10, elem_classes="custom-textbox")
-    # Examples shown separately (to avoid schema error)
-    gr.Examples(
-        examples=[["test_images/image_88.jpg", "auto"],
-                  ["test_images/image_742.jpg", "hindi"]],
-        inputs=[input_image, lang_dropdown],
-        label="Try an example"
-    )
-    # Connect logic
-    run_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text])
-    clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, output_image, output_text])  # Clear logic
-# Launch
-demo.launch(share=True)
-# # 👇 Local server launch config
-# if __name__ == "__main__":
-#     demo.launch(
-#         server_name="0.0.0.0",
-#         server_port=7866,
-#         share=False
-#     )

+import gradio as gr
+from PIL import Image
+import tempfile
+import os
+from IndicPhotoOCR.ocr import OCR  # Ensure OCR class is saved in a file named ocr.py
+from IndicPhotoOCR.theme import Seafoam
+from IndicPhotoOCR.utils.helper import detect_para
+from IndicPhotoOCR.translation.indictrans_translator import IndicTransTranslator
+from IndicPhotoOCR.translation.simple_translator import SimpleTranslator
+# Possible values for identifier_lang
+VALID_IDENTIFIER_LANGS = ["hindi", "assamese", "bengali", "gujarati", "kannada", "malayalam","odia", "punjabi", "tamil", "telugu", "auto"]  # Add more as needed
+# Translation target languages (what users can translate TO)
+TRANSLATION_LANGUAGES = ["None", "assamese", "bengali", "english", "gujarati", "hindi", "kannada", "malayalam", "marathi", "odia", "punjabi", "tamil", "telugu", "urdu"]
+# Initialize the translators (will be loaded when needed)
+advanced_translator = None
+simple_translator = SimpleTranslator()
+def process_image(image, identifier_lang):
+    """
+    Processes the uploaded image for text detection and recognition only.
+    - Detects bounding boxes in the image
+    - Draws bounding boxes on the image and identifies script in each detected area
+    - Recognizes text in each cropped region
+    Parameters:
+    image (PIL.Image): The input image to be processed.
+    identifier_lang (str): The script identifier model to use.
+    Returns:
+    tuple: A PIL.Image with bounding boxes and recognized text.
+    """
+    # Save the input image temporarily
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_input:
+            image.save(temp_input.name)
+            image_path = temp_input.name
+    # Initialize OCR with the selected identifier language
+    ocr = OCR(identifier_lang=identifier_lang, verbose=False, device="cpu")
+    # Detect bounding boxes on the image using OCR
+    detections = ocr.detect(image_path)
+    output_path = tempfile.NamedTemporaryFile(suffix=".png", delete=False).name
+    # Draw bounding boxes on the image and save it as output
+    ocr.visualize_detection(image_path, detections, save_path=output_path)
+    # Load the annotated image with bounding boxes drawn
+    output_image = Image.open(output_path)
+    # Recognize text from the detected areas
+    recognized_text = ocr.ocr(image_path)
+    recognized_text_lines = [' '.join(line) for line in recognized_text]
+    recognized_text_str = '\n'.join(recognized_text_lines)
+    return output_image, recognized_text_str
+def translate_text(recognized_text, target_lang):
+    """
+    Translates the recognized text to the target language.
+    Uses IndicTrans2 for all translation.
+    Parameters:
+    recognized_text (str): The text to translate.
+    target_lang (str): Target language for translation.
+    Returns:
+    str: Translated text.
+    """
+    global advanced_translator, simple_translator
+    if not recognized_text or not recognized_text.strip():
+        return "No text to translate - Please run OCR first"
+    if target_lang == "None" or target_lang.lower() == "none":
+        return "Please select a target language for translation"
+    # Clean problematic Unicode characters from input
+    import re
+    import unicodedata
+    def clean_input_text(text):
+        """Clean problematic Unicode characters from input text"""
+        # Normalize Unicode
+        text = unicodedata.normalize('NFC', text)
+        # Remove problematic Unicode characters that cause translation issues
+        problematic_chars = {
+            '\u09BC': '',  # Bengali nukta - your specific issue
+            '\u093C': '',  # Devanagari nukta
+            '\u200C': '',  # Zero width non-joiner
+            '\u200D': '',  # Zero width joiner
+            '\uFEFF': '',  # Byte order mark
+        }
+        for char, replacement in problematic_chars.items():
+            text = text.replace(char, replacement)
+        # Remove Unicode escape sequences
+        text = re.sub(r'\\u[0-9a-fA-F]{4}', '', text)
+        return text.strip()
+    # Clean the input text
+    recognized_text = clean_input_text(recognized_text)
+    # Progress tracking for UI
+    progress_messages = []
+    def progress_callback(msg):
+        progress_messages.append(msg)
+        return f"LOADING MODELS... Please wait (may take 2-5 minutes)\n\n{msg}\n\n Original text:\n{recognized_text[:200]}..."
+    try:
+        print(f"\nStarting translation to {target_lang}...")
+        print(f"Text to translate (first 200 chars): {recognized_text[:200]}...")
+        # Split text into lines
+        text_lines = [line.strip() for line in recognized_text.split('\n') if line.strip()]
+        if not text_lines:
+            return "No valid text lines to translate"
+        print(f"Found {len(text_lines)} lines to translate")
+        # Initialize advanced translator if not already done
+        if advanced_translator is None:
+            try:
+                print("Initializing IndicTrans2 translator...")
+                print("This may take 2-5 minutes for first-time model download...")
+                print("Please be patient - models are large (~1-2GB each)")
+                # Show initial loading message
+                loading_msg = "INITIALIZING TRANSLATION MODELS...\n\n"
+                loading_msg += "First-time setup may take 2-5 minutes\n"
+                loading_msg += "Downloading models (~2GB)\n"
+                loading_msg += "Please be patient and don't refresh the page\n\n"
+                loading_msg += f"Text to translate:\n{recognized_text[:300]}..."
+                # Initialize with progress callback
+                advanced_translator = IndicTransTranslator(device="cpu", progress_callback=progress_callback)
+                print("IndicTrans2 translator initialized!")
+            except Exception as init_error:
+                print(f"Failed to initialize IndicTrans2: {str(init_error)}")
+                print("Falling back to simple translator...")
+                # Fallback to simple translator
+                try:
+                    source_lang = simple_translator.detect_language_from_script(text_lines)
+                    print(f"Simple translator detected language: {source_lang}")
+                    simple_translated_lines = []
+                    for line in text_lines:
+                        simple_result = simple_translator.translate_text(line, source_lang, target_lang)
+                        simple_translated_lines.append(simple_result)
+                    fallback_result = '\n'.join(simple_translated_lines)
+                    return f"Advanced translator unavailable, using simple translation:\n\n{fallback_result}"
+                except Exception as simple_error:
+                    return f"All translators failed:\nAdvanced: {str(init_error)}\nSimple: {str(simple_error)}"
+        # Detect source language
+        source_lang = advanced_translator.detect_language_from_script(text_lines)
+        print(f"Detected source language: {source_lang}")
+        if source_lang == target_lang.lower():
+            return f"Source and target languages are the same ({source_lang}). No translation needed."
+        # Use the improved batch translation for efficiency
+        print("Starting batch translation...")
+        print("Model loading in progress... Please wait...")
+        try:
+            translated_lines = advanced_translator.translate_multiple_lines(text_lines, source_lang, target_lang)
+            # Combine results
+            result = '\n'.join(translated_lines)
+            print("Translation completed successfully!")
+            # Final cleaning of the result
+            result = clean_input_text(result)
+            return result
+        except Exception as translate_error:
+            print(f"Advanced translation failed: {str(translate_error)}")
+            print("Trying simple translator fallback...")
+            # Try simple translator as fallback
+            source_lang = simple_translator.detect_language_from_script(text_lines)
+            simple_translated_lines = []
+            for line in text_lines:
+                simple_result = simple_translator.translate_text(line, source_lang, target_lang)
+                simple_translated_lines.append(simple_result)
+            fallback_result = '\n'.join(simple_translated_lines)
+            return f"Advanced translation failed, using simple translation:\n\n{fallback_result}"
+    except Exception as e:
+        error_msg = f"Translation error: {str(e)}"
+        print(f"ERROR: {error_msg}")
+        import traceback
+        traceback.print_exc()
+        # Final fallback to simple translator
+        try:
+            print("Attempting simple translator fallback...")
+            text_lines = [line.strip() for line in recognized_text.split('\n') if line.strip()]
+            source_lang = simple_translator.detect_language_from_script(text_lines)
+            simple_translated_lines = []
+            for line in text_lines:
+                simple_result = simple_translator.translate_text(line, source_lang, target_lang)
+                simple_translated_lines.append(simple_result)
+            fallback_result = '\n'.join(simple_translated_lines)
+            return f"Advanced translation failed with error, using simple translation:\n{error_msg}\n\n{fallback_result}"
+        except Exception as e2:
+            return f"All translation methods failed:\nAdvanced: {error_msg}\nSimple: {str(e2)}"
+# Custom HTML for interface header with logos and alignment
+interface_html = """
+<div style="text-align: left; padding: 10px;">
+    <div style="background-color: white; padding: 10px; display: inline-block;">
+        <img src="https://iitj.ac.in/images/logo/Design-of-New-Logo-of-IITJ-2.png" alt="IITJ Logo" style="width: 100px; height: 100px;">
+    </div>
+    <img src="https://play-lh.googleusercontent.com/_FXSr4xmhPfBykmNJvKvC0GIAVJmOLhFl6RA5fobCjV-8zVSypxX8yb8ka6zu6-4TEft=w240-h480-rw" alt="Bhashini Logo" style="width: 100px; height: 100px; float: right;">
+</div>
+"""
+# Links to GitHub and Dataset repositories with GitHub icon
+links_html = """
+<div style="text-align: center; padding-top: 20px;">
+    <a href="https://github.com/Bhashini-IITJ/IndicPhotoOCR" target="_blank" style="margin-right: 20px; font-size: 18px; text-decoration: none;">
+        GitHub Repository
+    </a>
+    <a href="https://github.com/Bhashini-IITJ/BharatSceneTextDataset" target="_blank" style="font-size: 18px; text-decoration: none;">
+        Dataset Repository
+    </a>
+</div>
+"""
+# Custom CSS to style the text box and center the title
+custom_css = """
+.custom-textbox textarea {
+    font-size: 20px !important;
+}
+#title {
+    text-align: center;
+    font-size: 28px;
+    font-weight: bold;
+    margin-bottom: 20px;
+}
+"""
+# Create an instance of the Seafoam theme for a consistent visual style
+seafoam = Seafoam()
+# Clear function
+def clear_inputs():
+    return None, "auto", "None", None, "", ""
+# Define the Gradio Blocks interface
+with gr.Blocks(theme=seafoam, css=custom_css) as demo:
+    gr.Markdown("# IndicPhotoOCR - Indic Scene Text Recogniser Toolkit", elem_id="title")
+    gr.Markdown("# Developed by IIT Jodhpur", elem_id="title")
+    gr.Markdown(interface_html + links_html)
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="pil", image_mode="RGB", label="Upload Image")
+            lang_dropdown = gr.Dropdown(VALID_IDENTIFIER_LANGS, label="Identifier Language", value="auto")
+            translation_dropdown = gr.Dropdown(TRANSLATION_LANGUAGES, label="Translate to Language", value="None")
+            with gr.Row():
+                run_ocr_button = gr.Button("Run OCR", variant="primary")
+                translate_button = gr.Button("Translate Text", variant="secondary")
+            clear_button = gr.Button("Clear", variant="stop")
+        with gr.Column():
+            output_image = gr.Image(type="pil", label="Processed Image")
+            output_text = gr.Textbox(label="Recognized Text", lines=8, elem_classes="custom-textbox")
+            translated_text = gr.Textbox(label="Translated Text", lines=8, elem_classes="custom-textbox")
+    # Examples shown separately (to avoid schema error)
+    gr.Examples(
+        examples=[["test_images/image_88.jpg", "auto", "english"],
+                  ["test_images/image_742.jpg", "hindi", "english"]],
+        inputs=[input_image, lang_dropdown, translation_dropdown],
+        label="Try an example"
+    )
+    # Connect logic
+    run_ocr_button.click(fn=process_image, inputs=[input_image, lang_dropdown], outputs=[output_image, output_text])
+    translate_button.click(fn=translate_text, inputs=[output_text, translation_dropdown], outputs=translated_text)
+    clear_button.click(fn=clear_inputs, outputs=[input_image, lang_dropdown, translation_dropdown, output_image, output_text, translated_text])  # Clear logic
+# Launch
+if __name__ == "__main__":
+    print("Starting IndicPhotoOCR...")
+    try:
+        demo.launch(
+            share=True,
+            server_name="0.0.0.0",
+            server_port=7860,  # Changed to avoid port conflict
+            debug=True
+            # show_error=True,
+            # quiet=False,
+            # max_threads=40,
+            # inbrowser=False,  # Don't try to open browser
+            # prevent_thread_lock=False  # Keep app running
+        )
+    except KeyboardInterrupt:
+        print("App interrupted by user")
+    except Exception as e:
+        print(f"Error launching app: {e}")
+        import traceback
+        traceback.print_exc()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-aiohappyeyeballs==2.4.3
 aiohttp==3.10.10
 aiosignal==1.3.1
 async-timeout==4.0.3
@@ -45,3 +45,6 @@ easydict==1.13
 scipy==1.13.1
 transformers==4.45.1
 datasets==3.1.0

+aiohappyeyeballs>=2.0.0
 aiohttp==3.10.10
 aiosignal==1.3.1
 async-timeout==4.0.3
 scipy==1.13.1
 transformers==4.45.1
 datasets==3.1.0
+IndicTransToolkit>=1.0.0
+sentencepiece>=0.1.99
+sacremoses>=0.0.53