Shivangguptasih commited on
Commit
ab4a31a
·
verified ·
1 Parent(s): 9622fe0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -1,66 +1,66 @@
1
- # Qwen2-VL OCR Flask API (Optimized for Hugging Face Spaces)
2
- # Includes CORS, temporary cache redirection, and storage-safe configuration
3
 
4
  from flask import Flask, request, jsonify
5
  from flask_cors import CORS
6
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
7
  from PIL import Image
8
  import torch
9
  import os
10
- import io
 
11
 
12
- # Redirect caches to /tmp to avoid exceeding 50G storage limit
13
- os.environ['TORCH_HOME'] = '/tmp/torch_cache'
14
- os.environ['HF_HOME'] = '/tmp/huggingface'
15
- os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_transformers'
16
- os.environ['TMPDIR'] = '/tmp'
17
 
18
- torch.hub.set_dir('/tmp/torch_hub')
 
 
 
19
 
20
- # Initialize Flask app
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
- # Load model (use lightweight, quantized version for space safety)
25
  model_name = 'Qwen/Qwen2-VL-2B-Instruct'
26
- model = Qwen2VLForConditionalGeneration.from_pretrained(
27
- model_name,
28
- torch_dtype=torch.float32,
29
- device_map=None
30
- )
31
  processor = AutoProcessor.from_pretrained(model_name)
32
 
33
  @app.route('/api/ocr', methods=['POST'])
34
- def ocr_text_extraction():
35
  if 'image' not in request.files:
36
  return jsonify({'error': 'No image uploaded'}), 400
37
 
38
  image = Image.open(request.files['image'].stream)
39
-
40
- # Configure OCR message
41
- messages = [
42
- {
43
- 'role': 'user',
44
- 'content': [
45
- {'type': 'image'},
46
- {'type': 'text', 'text': 'Extract all readable text from this image accurately.'}
47
- ]
48
- }
49
- ]
50
 
51
  text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
52
- inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors='pt')
53
 
54
  with torch.no_grad():
55
  output_ids = model.generate(**inputs, max_new_tokens=256)
56
 
57
- response = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
 
 
 
 
58
 
59
- return jsonify({'extracted_text': response})
60
 
61
- @app.route('/', methods=['GET'])
62
  def home():
63
- return jsonify({'message': 'Qwen2-VL OCR API is running successfully on Hugging Face Spaces'})
64
 
65
  if __name__ == '__main__':
66
  app.run(host='0.0.0.0', port=7860)
 
1
+ # Minimal Qwen2-VL OCR Flask API (Lightweight, Cache-Safe)
2
+ # Completely cleans cache and uses temporary runtime memory only.
3
 
4
  from flask import Flask, request, jsonify
5
  from flask_cors import CORS
6
+ from transformers import AutoModelForCausalLM, AutoProcessor
7
  from PIL import Image
8
  import torch
9
  import os
10
+ import tempfile
11
+ import shutil
12
 
13
+ # Step 1: FULL CLEANUP of previous caches
14
+ dirs_to_clean = ['/root/.cache', '/root/.huggingface', '/root/.torch', '/data', '/spaces']
15
+ for d in dirs_to_clean:
16
+ if os.path.exists(d):
17
+ shutil.rmtree(d, ignore_errors=True)
18
 
19
+ # Step 2: Use temporary folders for Hugging Face, Torch, and Transformers caches
20
+ os.environ['TRANSFORMERS_CACHE'] = tempfile.mkdtemp()
21
+ os.environ['HF_HOME'] = tempfile.mkdtemp()
22
+ os.environ['TORCH_HOME'] = tempfile.mkdtemp()
23
 
24
+ # Step 3: Initialize Flask
25
  app = Flask(__name__)
26
  CORS(app)
27
 
28
+ # Step 4: Load smaller model dynamically (2B instruct)
29
  model_name = 'Qwen/Qwen2-VL-2B-Instruct'
30
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype='auto', device_map='auto')
 
 
 
 
31
  processor = AutoProcessor.from_pretrained(model_name)
32
 
33
  @app.route('/api/ocr', methods=['POST'])
34
+ def ocr_image():
35
  if 'image' not in request.files:
36
  return jsonify({'error': 'No image uploaded'}), 400
37
 
38
  image = Image.open(request.files['image'].stream)
39
+ messages = [{
40
+ 'role': 'user',
41
+ 'content': [
42
+ {'type': 'image'},
43
+ {'type': 'text', 'text': 'Extract readable text content accurately.'}
44
+ ]
45
+ }]
 
 
 
 
46
 
47
  text_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
48
+ inputs = processor(text=[text_prompt], images=[image], padding=True, return_tensors='pt').to('cpu')
49
 
50
  with torch.no_grad():
51
  output_ids = model.generate(**inputs, max_new_tokens=256)
52
 
53
+ out = processor.batch_decode(output_ids, skip_special_tokens=True)[0]
54
+
55
+ # Clean everything after response to keep below 50GB
56
+ for d in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'], os.environ['TORCH_HOME']]:
57
+ shutil.rmtree(d, ignore_errors=True)
58
 
59
+ return jsonify({'text': out})
60
 
61
+ @app.route('/')
62
  def home():
63
+ return jsonify({'message': 'Qwen2-VL lightweight OCR API running cache-free'})
64
 
65
  if __name__ == '__main__':
66
  app.run(host='0.0.0.0', port=7860)