Spaces:

Shivangguptasih
/

qwen

Runtime error

App Files Files Community

Shivangguptasih commited on Oct 22

Commit

ab4a31a

verified ·

1 Parent(s): 9622fe0

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -34

app.py CHANGED Viewed

@@ -1,66 +1,66 @@
-# Qwen2-VL OCR Flask API (Optimized for Hugging Face Spaces)
-# Includes CORS, temporary cache redirection, and storage-safe configuration
 from flask import Flask, request, jsonify
 from flask_cors import CORS
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
 import os
-import io
-# Redirect caches to /tmp to avoid exceeding 50G storage limit
-os.environ['TORCH_HOME'] = '/tmp/torch_cache'
-os.environ['HF_HOME'] = '/tmp/huggingface'
-os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_transformers'
-os.environ['TMPDIR'] = '/tmp'
-torch.hub.set_dir('/tmp/torch_hub')
-# Initialize Flask app
 app = Flask(__name__)
 CORS(app)
-# Load model (use lightweight, quantized version for space safety)
 model_name = 'Qwen/Qwen2-VL-2B-Instruct'
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    model_name,
-    torch_dtype=torch.float32,
-    device_map=None
-)
 processor = AutoProcessor.from_pretrained(model_name)
 @app.route('/api/ocr', methods=['POST'])
-def ocr_text_extraction():
     if 'image' not in request.files:
         return jsonify({'error': 'No image uploaded'}), 400
     image = Image.open(request.files['image'].stream)
-    # Configure OCR message
-    messages = [
-        {
-            'role': 'user',
-            'content': [
-                {'type': 'image'},
-                {'type': 'text', 'text': 'Extract all readable text from this image accurately.'}
-            ]
-        }
-    ]
     text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors='pt')
     with torch.no_grad():
         output_ids = model.generate(**inputs, max_new_tokens=256)
-    response = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
-    return jsonify({'extracted_text': response})
-@app.route('/', methods=['GET'])
 def home():
-    return jsonify({'message': 'Qwen2-VL OCR API is running successfully on Hugging Face Spaces'})
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

+# Minimal Qwen2-VL OCR Flask API (Lightweight, Cache-Safe)
+# Completely cleans cache and uses temporary runtime memory only.
 from flask import Flask, request, jsonify
 from flask_cors import CORS
+from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import torch
 import os
+import tempfile
+import shutil
+# Step 1: FULL CLEANUP of previous caches
+dirs_to_clean = ['/root/.cache', '/root/.huggingface', '/root/.torch', '/data', '/spaces']
+for d in dirs_to_clean:
+    if os.path.exists(d):
+        shutil.rmtree(d, ignore_errors=True)
+# Step 2: Use temporary folders for Hugging Face, Torch, and Transformers caches
+os.environ['TRANSFORMERS_CACHE'] = tempfile.mkdtemp()
+os.environ['HF_HOME'] = tempfile.mkdtemp()
+os.environ['TORCH_HOME'] = tempfile.mkdtemp()
+# Step 3: Initialize Flask
 app = Flask(__name__)
 CORS(app)
+# Step 4: Load smaller model dynamically (2B instruct)
 model_name = 'Qwen/Qwen2-VL-2B-Instruct'
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype='auto', device_map='auto')
 processor = AutoProcessor.from_pretrained(model_name)
 @app.route('/api/ocr', methods=['POST'])
+def ocr_image():
     if 'image' not in request.files:
         return jsonify({'error': 'No image uploaded'}), 400
     image = Image.open(request.files['image'].stream)
+    messages = [{
+        'role': 'user',
+        'content': [
+            {'type': 'image'},
+            {'type': 'text', 'text': 'Extract readable text content accurately.'}
+        ]
+    }]
     text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors='pt').to('cpu')
     with torch.no_grad():
         output_ids = model.generate(**inputs, max_new_tokens=256)
+    out = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
+    # Clean everything after response to keep below 50GB
+    for d in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'], os.environ['TORCH_HOME']]:
+        shutil.rmtree(d, ignore_errors=True)
+    return jsonify({'text': out})
+@app.route('/')
 def home():
+    return jsonify({'message': 'Qwen2-VL lightweight OCR API running cache-free'})
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)