File size: 10,658 Bytes
0ff7a5c
 
 
 
 
 
 
 
33b9630
0ff7a5c
 
 
 
 
 
 
 
 
33b9630
 
 
 
 
936240d
b57de84
33b9630
 
 
 
 
 
 
 
 
 
 
 
0ff7a5c
 
 
 
33b9630
 
 
 
0ff7a5c
 
 
 
 
 
 
 
 
33b9630
936240d
7d8fb22
33b9630
 
 
 
 
 
 
 
 
 
 
0ff7a5c
 
 
 
 
33b9630
 
 
 
 
 
 
 
 
 
 
0ff7a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b9630
0ff7a5c
 
 
 
 
 
 
 
 
 
33b9630
 
 
 
0ff7a5c
 
 
 
936240d
5276211
33b9630
 
 
 
 
 
 
 
 
 
 
0ff7a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b9630
0ff7a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4fb03
 
 
 
 
 
 
 
 
 
0ff7a5c
 
7a4fb03
 
 
 
 
0ff7a5c
7a4fb03
 
 
 
 
 
 
0ff7a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b9630
0ff7a5c
 
 
 
 
 
 
 
 
7a4fb03
0ff7a5c
 
 
 
 
 
 
 
33b9630
 
 
 
 
 
 
0ff7a5c
 
cb8b30e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import os
from flask import Flask, render_template, request, jsonify
import google.generativeai as genai
from PIL import Image
from dotenv import load_dotenv
import time
import traceback
import sys
import json

# Load environment variables
load_dotenv()

# Configure Gemini API with key from environment variable
api_key = os.getenv("GEMINI_API_KEY", "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64")
if api_key == "AIzaSyB0IOx76FydAk4wabMz1juzzHF5oBiHW64":
    print("WARNING: Using hardcoded API key. Set GEMINI_API_KEY environment variable instead.")

# Function to test API connectivity
def test_gemini_api():
    try:
        genai.configure(api_key=api_key)
        
        # Test with a simple text prompt using the latest model
        model = genai.GenerativeModel('gemini-2.0-flash')
        response = model.generate_content("Hello, please respond with 'API is working'")
        
        if not response or not hasattr(response, 'text') or not response.text:
            print("WARNING: Received empty response during API test")
            return False
            
        print(f"API Test Response: {response.text.strip()}")
        return True
    except Exception as e:
        print(f"ERROR: Failed to connect to Gemini API: {str(e)}")
        print(traceback.format_exc())
        return False

# Initialize Flask app
app = Flask(__name__)

# Configure error responses
@app.errorhandler(500)
def server_error(e):
    return jsonify(error="Internal server error: " + str(e)), 500

def extract_text_with_gemini(image_path):
    """Extract text from image using Gemini Vision model"""
    max_retries = 3
    retry_delay = 2

    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to extract text using Gemini...")
            
            # Updated model options to use the latest available models
            model_options = ['gemini-2.0-flash']
            
            for model_name in model_options:
                try:
                    print(f"Trying model: {model_name}")
                    model = genai.GenerativeModel(model_name)
                    break
                except Exception as model_error:
                    print(f"Error with model {model_name}: {str(model_error)}")
                    if model_name == model_options[-1]:  # Last model option
                        raise
                    continue

            # Load the image
            with Image.open(image_path) as img:
                print(f"Image loaded from {image_path} (Size: {img.size}, Format: {img.format})")
                
                # Resize image if too large (API may have size limits)
                max_dimension = 1024
                if img.width > max_dimension or img.height > max_dimension:
                    print(f"Resizing large image from {img.width}x{img.height}")
                    ratio = min(max_dimension / img.width, max_dimension / img.height)
                    new_width = int(img.width * ratio)
                    new_height = int(img.height * ratio)
                    img = img.resize((new_width, new_height))
                    print(f"Resized to {new_width}x{new_height}")
                    img.save(image_path)  # Save resized image
                
                # Create prompt for text extraction
                prompt = "Extract all the text from this image. Return only the extracted text, nothing else."

                # Generate response with image
                print("Sending request to Gemini API for text extraction...")
                response = model.generate_content([prompt, img])

                # Validate response
                if not response or not hasattr(response, 'text') or not response.text:
                    raise ValueError("Received empty response from Gemini API")

                extracted_text = response.text.strip()
                print(f"Successfully extracted text (length: {len(extracted_text)})")
                return extracted_text
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {str(e)}")
            print(traceback.format_exc())
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                continue
            return f"Could not extract text from the image: {str(e)}"


def translate_text(text):
    """Translate text from English to Hindi using Gemini"""
    max_retries = 3
    retry_delay = 2

    # Check if there's text to translate
    if not text or text.strip() == "":
        return "No text to translate."
    
    # If the text indicates an error occurred during extraction, don't try to translate
    if text.startswith("Could not extract text from the image"):
        return "Cannot translate due to OCR failure."

    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to translate text using Gemini...")
            # Updated model options to use the latest available models
            model_options = ['gemini-2.0-flash']
            
            for model_name in model_options:
                try:
                    print(f"Trying model: {model_name}")
                    model = genai.GenerativeModel(model_name)
                    break
                except Exception as model_error:
                    print(f"Error with model {model_name}: {str(model_error)}")
                    if model_name == model_options[-1]:  # Last model option
                        raise
                    continue

            # Create prompt for translation
            prompt = f"""
            Translate the following English text to Hindi. 
            Keep proper names, titles, and organization names unchanged.
            Text to translate: {text}
            """

            # Generate response
            print("Sending request to Gemini API for translation...")
            response = model.generate_content(prompt)

            # Validate response
            if not response or not hasattr(response, 'text') or not response.text:
                raise ValueError("Received empty response from Gemini API")

            translated_text = response.text.strip()
            print(f"Successfully translated text (length: {len(translated_text)})")
            return translated_text
        except Exception as e:
            print(f"Translation attempt {attempt + 1} failed: {str(e)}")
            print(traceback.format_exc())
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
                continue
            return f"Translation failed: {str(e)}"


@app.route('/')
def home():
    return render_template('index.html')


@app.route('/upload', methods=['POST'])
def upload_file():
    print("Received upload request")
    if 'file' not in request.files:
        print("No file part in the request")
        return jsonify({'error': 'No file uploaded'}), 400

    file = request.files['file']
    if file.filename == '':
        print("No file selected")
        return jsonify({'error': 'No file selected'}), 400

    # Check file extension
    allowed_extensions = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}
    if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
        print(f"Invalid file format: {file.filename}")
        return jsonify({'error': 'Invalid file format. Please upload an image (PNG, JPG, JPEG, GIF, BMP).'}), 400

    temp_path = None
    try:
        # Create temp directory if it doesn't exist
        temp_dir = "temp"
        os.makedirs(temp_dir, exist_ok=True)
        print(f"Ensuring temp directory exists: {temp_dir}")

        # Make sure the temp directory has write permissions
        try:
            if not os.access(temp_dir, os.W_OK):
                os.chmod(temp_dir, 0o755)  # chmod to ensure write permissions
                print(f"Updated permissions for temp directory: {temp_dir}")
        except Exception as perm_error:
            print(f"Warning: Could not update permissions: {str(perm_error)}")

        # Save the uploaded file temporarily with a unique name
        temp_filename = f"temp_image_{int(time.time())}.png"
        temp_path = os.path.join(temp_dir, temp_filename)
        print(f"Saving uploaded file to {temp_path}")
        
        # Save in a way that ensures we have write permissions
        file.save(temp_path)
        
        # Ensure the file has appropriate permissions
        try:
            os.chmod(temp_path, 0o644)  # Make the file readable
            print(f"Updated permissions for file: {temp_path}")
        except Exception as file_perm_error:
            print(f"Warning: Could not update file permissions: {str(file_perm_error)}")

        # Extract text using Gemini
        print("Starting text extraction...")
        extracted_text = extract_text_with_gemini(temp_path)
        print(f"Text extraction result: {extracted_text[:100]}...")

        # Translate text
        print("Starting text translation...")
        translated_text = translate_text(extracted_text)
        print(f"Translation result: {translated_text[:100]}...")

        return jsonify({
            'original_text': extracted_text,
            'translated_text': translated_text
        })
    except Exception as e:
        error_msg = f"Error processing image: {str(e)}"
        print(error_msg)
        print(traceback.format_exc())
        return jsonify({
            'error': error_msg
        }), 500
    finally:
        # Clean up temporary file if it exists
        try:
            if temp_path and os.path.exists(temp_path):
                os.remove(temp_path)
                print(f"Removed temporary file: {temp_path}")
        except Exception as e:
            print(f"Failed to remove temporary file: {str(e)}")
            # Don't let this failure affect the response


if __name__ == '__main__':
    # Ensure the template folder exists
    if not os.path.exists('templates'):
        os.makedirs('templates')
        print("Created 'templates' directory. Please place your HTML files here.")

    # Test API connectivity at startup
    api_working = test_gemini_api()
    if api_working:
        print("βœ… Gemini API connection successful!")
    else:
        print("❌ WARNING: Gemini API connection failed. The application may not work correctly!")

    # For Hugging Face Spaces, we need to listen on 0.0.0.0 and port 7860
    print(f"Starting Flask app on port {os.environ.get('PORT', 7860)}")
    app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))