Spaces:

sikeaditya
/

OCRTranslation

Sleeping

File size: 10,658 Bytes

import os
from flask import Flask, render_template, request, jsonify
import google.generativeai as genai
from PIL import Image
from dotenv import load_dotenv
import time
import traceback
import sys
import json

# Load environment variables
load_dotenv()

# Configure Gemini API with key from environment variable
api_key = os.getenv("GEMINI_API_KEY", "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64")
if api_key == "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64":
    print("WARNING: Using hardcoded API key. Set GEMINI_API_KEY environment variable instead.")

# Function to test API connectivity
def test_gemini_api():
    try:
        genai.configure(api_key=api_key)
        
        # Test with a simple text prompt using the latest model
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content("Hello, please respond with 'API is working'")
        
        if not response or not hasattr(response, 'text') or not response.text:
            print("WARNING: Received empty response during API test")
            return False
            
        print(f"API Test Response: {response.text.strip()}")
        return True
    except Exception as e:
        print(f"ERROR: Failed to connect to Gemini API: {str(e)}")
        print(traceback.format_exc())
        return False

# Initialize Flask app
app = Flask(__name__)

# Configure error responses
@app.errorhandler(500)
def server_error(e):
    return jsonify(error="Internal server error: " + str(e)), 500

def extract_text_with_gemini(image_path):
    """Extract text from image using Gemini Vision model"""
    max_retries = 3
    retry_delay = 2

    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to extract text using Gemini...")
            
            # Updated model options to use the latest available models
            model_options = ['gemini-2.0-flash']
            
            for model_name in model_options:
                try:
                    print(f"Trying model: {model_name}")
                    model = genai.GenerativeModel(model_name)
                    break
                except Exception as model_error:
                    print(f"Error with model {model_name}: {str(model_error)}")
                    if model_name == model_options[-1]:  # Last model option
                        raise
                    continue

            # Load the image
            with Image.open(image_path) as img:
                print(f"Image loaded from {image_path} (Size: {img.size}, Format: {img.format})")
                
                # Resize image if too large (API may have size limits)
                max_dimension = 1024
                if img.width > max_dimension or img.height > max_dimension:
                    print(f"Resizing large image from {img.width}x{img.height}")
                    ratio = min(max_dimension / img.width, max_dimension / img.height)
                    new_width = int(img.width * ratio)
                    new_height = int(img.height * ratio)
                    img = img.resize((new_width, new_height))
                    print(f"Resized to {new_width}x{new_height}")
                    img.save(image_path)  # Save resized image
                
                # Create prompt for text extraction
                prompt = "Extract all the text from this image. Return only the extracted text, nothing else."

                # Generate response with image
                print("Sending request to Gemini API for text extraction...")
                response = model.generate_content([prompt, img])

                # Validate response
                if not response or not hasattr(response, 'text') or not response.text:
                    raise ValueError("Received empty response from Gemini API")

                extracted_text = response.text.strip()
                print(f"Successfully extracted text (length: {len(extracted_text)})")
                return extracted_text
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {str(e)}")
            print(traceback.format_exc())
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                continue
            return f"Could not extract text from the image: {str(e)}"


def translate_text(text):
    """Translate text from English to Hindi using Gemini"""
    max_retries = 3
    retry_delay = 2

    # Check if there's text to translate
    if not text or text.strip() == "":
        return "No text to translate."
    
    # If the text indicates an error occurred during extraction, don't try to translate
    if text.startswith("Could not extract text from the image"):
        return "Cannot translate due to OCR failure."

    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to translate text using Gemini...")
            # Updated model options to use the latest available models
            model_options = ['gemini-2.0-flash']
            
            for model_name in model_options:
                try:
                    print(f"Trying model: {model_name}")
                    model = genai.GenerativeModel(model_name)
                    break
                except Exception as model_error:
                    print(f"Error with model {model_name}: {str(model_error)}")
                    if model_name == model_options[-1]:  # Last model option
                        raise
                    continue

            # Create prompt for translation
            prompt = f"""
            Translate the following English text to Hindi. 
            Keep proper names, titles, and organization names unchanged.
            Text to translate: {text}
            """

            # Generate response
            print("Sending request to Gemini API for translation...")
            response = model.generate_content(prompt)

            # Validate response
            if not response or not hasattr(response, 'text') or not response.text:
                raise ValueError("Received empty response from Gemini API")

            translated_text = response.text.strip()
            print(f"Successfully translated text (length: {len(translated_text)})")
            return translated_text
        except Exception as e:
            print(f"Translation attempt {attempt + 1} failed: {str(e)}")
            print(traceback.format_exc())
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                continue
            return f"Translation failed: {str(e)}"


@app.route('/')
def home():
    return render_template('index.html')


@app.route('/upload', methods=['POST'])
def upload_file():
    print("Received upload request")
    if 'file' not in request.files:
        print("No file part in the request")
        return jsonify({'error': 'No file uploaded'}), 400

    file = request.files['file']
    if file.filename == '':
        print("No file selected")
        return jsonify({'error': 'No file selected'}), 400

    # Check file extension
    allowed_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
    if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
        print(f"Invalid file format: {file.filename}")
        return jsonify({'error': 'Invalid file format. Please upload an image (PNG, JPG, JPEG, GIF, BMP).'}), 400

    temp_path = None
    try:
        # Create temp directory if it doesn't exist
        temp_dir = "temp"
        os.makedirs(temp_dir, exist_ok=True)
        print(f"Ensuring temp directory exists: {temp_dir}")

        # Make sure the temp directory has write permissions
        try:
            if not os.access(temp_dir, os.W_OK):
                os.chmod(temp_dir, 0o755)  # chmod to ensure write permissions
                print(f"Updated permissions for temp directory: {temp_dir}")
        except Exception as perm_error:
            print(f"Warning: Could not update permissions: {str(perm_error)}")

        # Save the uploaded file temporarily with a unique name
        temp_filename = f"temp_image_{int(time.time())}.png"
        temp_path = os.path.join(temp_dir, temp_filename)
        print(f"Saving uploaded file to {temp_path}")
        
        # Save in a way that ensures we have write permissions
        file.save(temp_path)
        
        # Ensure the file has appropriate permissions
        try:
            os.chmod(temp_path, 0o644)  # Make the file readable
            print(f"Updated permissions for file: {temp_path}")
        except Exception as file_perm_error:
            print(f"Warning: Could not update file permissions: {str(file_perm_error)}")

        # Extract text using Gemini
        print("Starting text extraction...")
        extracted_text = extract_text_with_gemini(temp_path)
        print(f"Text extraction result: {extracted_text[:100]}...")

        # Translate text
        print("Starting text translation...")
        translated_text = translate_text(extracted_text)
        print(f"Translation result: {translated_text[:100]}...")

        return jsonify({
            'original_text': extracted_text,
            'translated_text': translated_text
        })
    except Exception as e:
        error_msg = f"Error processing image: {str(e)}"
        print(error_msg)
        print(traceback.format_exc())
        return jsonify({
            'error': error_msg
        }), 500
    finally:
        # Clean up temporary file if it exists
        try:
            if temp_path and os.path.exists(temp_path):
                os.remove(temp_path)
                print(f"Removed temporary file: {temp_path}")
        except Exception as e:
            print(f"Failed to remove temporary file: {str(e)}")
            # Don't let this failure affect the response


if __name__ == '__main__':
    # Ensure the template folder exists
    if not os.path.exists('templates'):
        os.makedirs('templates')
        print("Created 'templates' directory. Please place your HTML files here.")

    # Test API connectivity at startup
    api_working = test_gemini_api()
    if api_working:
        print("✅ Gemini API connection successful!")
    else:
        print("❌ WARNING: Gemini API connection failed. The application may not work correctly!")

    # For Hugging Face Spaces, we need to listen on 0.0.0.0 and port 7860
    print(f"Starting Flask app on port {os.environ.get('PORT', 7860)}")
    app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))