Upload 15 files
Browse files- app/.ipynb_checkpoints/__init__-checkpoint.py +15 -0
- app/.ipynb_checkpoints/config-checkpoint.py +6 -0
- app/.ipynb_checkpoints/routes-checkpoint.py +41 -0
- app/.ipynb_checkpoints/utils-checkpoint.py +32 -0
- app/__init__.py +15 -0
- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/__pycache__/config.cpython-310.pyc +0 -0
- app/__pycache__/routes.cpython-310.pyc +0 -0
- app/__pycache__/utils.cpython-310.pyc +0 -0
- app/config.py +6 -0
- app/routes.py +41 -0
- app/utils.py +41 -0
- render.yaml +13 -0
- requirements.txt +0 -0
- runtime.txt +1 -0
app/.ipynb_checkpoints/__init__-checkpoint.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask
|
2 |
+
from app.config import Config
|
3 |
+
import os
|
4 |
+
|
5 |
+
def create_app():
|
6 |
+
app = Flask(__name__)
|
7 |
+
app.config.from_object(Config)
|
8 |
+
|
9 |
+
# إنشاء مجلد للملفات المرفوعة
|
10 |
+
os.makedirs(os.path.join(app.instance_path, 'uploads'), exist_ok=True)
|
11 |
+
|
12 |
+
from app.routes import main
|
13 |
+
app.register_blueprint(main)
|
14 |
+
|
15 |
+
return app
|
app/.ipynb_checkpoints/config-checkpoint.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
class Config:
|
4 |
+
SECRET_KEY = os.environ.get('SECRET_KEY') or 'your-secret-key'
|
5 |
+
UPLOAD_FOLDER = 'instance/uploads'
|
6 |
+
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
|
app/.ipynb_checkpoints/routes-checkpoint.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Blueprint, jsonify, request, current_app
|
2 |
+
import os
|
3 |
+
from werkzeug.utils import secure_filename
|
4 |
+
from app.utils import OCRModel
|
5 |
+
import torch
|
6 |
+
|
7 |
+
main = Blueprint('main', __name__)
|
8 |
+
ocr_model = OCRModel()
|
9 |
+
|
10 |
+
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
|
11 |
+
|
12 |
+
def allowed_file(filename):
|
13 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
14 |
+
|
15 |
+
@main.route('/')
|
16 |
+
def index():
|
17 |
+
return jsonify({"message": "Welcome to OCR API!"})
|
18 |
+
|
19 |
+
@main.route('/api/ocr', methods=['POST'])
|
20 |
+
def process_image():
|
21 |
+
if 'file' not in request.files:
|
22 |
+
return jsonify({"error": "No file part"}), 400
|
23 |
+
|
24 |
+
file = request.files['file']
|
25 |
+
if file.filename == '':
|
26 |
+
return jsonify({"error": "No selected file"}), 400
|
27 |
+
|
28 |
+
if file and allowed_file(file.filename):
|
29 |
+
filename = secure_filename(file.filename)
|
30 |
+
filepath = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
|
31 |
+
file.save(filepath)
|
32 |
+
|
33 |
+
try:
|
34 |
+
result = ocr_model.process_image(filepath)
|
35 |
+
# حذف الملف بعد المعالجة
|
36 |
+
os.remove(filepath)
|
37 |
+
return jsonify({"result": result})
|
38 |
+
except Exception as e:
|
39 |
+
return jsonify({"error": str(e)}), 500
|
40 |
+
|
41 |
+
return jsonify({"error": "Invalid file type"}), 400
|
app/.ipynb_checkpoints/utils-checkpoint.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModel, AutoTokenizer
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
class OCRModel:
|
5 |
+
_instance = None
|
6 |
+
|
7 |
+
def __new__(cls):
|
8 |
+
if cls._instance is None:
|
9 |
+
cls._instance = super(OCRModel, cls).__new__(cls)
|
10 |
+
cls._instance.initialize()
|
11 |
+
return cls._instance
|
12 |
+
|
13 |
+
def initialize(self):
|
14 |
+
self.tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
15 |
+
self.model = AutoModel.from_pretrained(
|
16 |
+
'ucaslcl/GOT-OCR2_0',
|
17 |
+
trust_remote_code=True,
|
18 |
+
low_cpu_mem_usage=True,
|
19 |
+
device_map='cuda' if torch.cuda.is_available() else 'cpu',
|
20 |
+
use_safetensors=True,
|
21 |
+
pad_token_id=self.tokenizer.eos_token_id
|
22 |
+
)
|
23 |
+
self.model = self.model.eval()
|
24 |
+
if torch.cuda.is_available():
|
25 |
+
self.model = self.model.cuda()
|
26 |
+
|
27 |
+
def process_image(self, image_path):
|
28 |
+
try:
|
29 |
+
result = self.model.chat(self.tokenizer, image_path, ocr_type='format')
|
30 |
+
return result
|
31 |
+
except Exception as e:
|
32 |
+
return str(e)
|
app/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask
|
2 |
+
from app.config import Config
|
3 |
+
import os
|
4 |
+
|
5 |
+
def create_app():
|
6 |
+
app = Flask(__name__)
|
7 |
+
app.config.from_object(Config)
|
8 |
+
|
9 |
+
# إنشاء مجلد للملفات المرفوعة
|
10 |
+
os.makedirs(os.path.join(app.instance_path, 'uploads'), exist_ok=True)
|
11 |
+
|
12 |
+
from app.routes import main
|
13 |
+
app.register_blueprint(main)
|
14 |
+
|
15 |
+
return app
|
app/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (564 Bytes). View file
|
|
app/__pycache__/config.cpython-310.pyc
ADDED
Binary file (438 Bytes). View file
|
|
app/__pycache__/routes.cpython-310.pyc
ADDED
Binary file (1.42 kB). View file
|
|
app/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (1.35 kB). View file
|
|
app/config.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
class Config:
|
4 |
+
SECRET_KEY = os.environ.get('SECRET_KEY') or 'your-secret-key'
|
5 |
+
UPLOAD_FOLDER = 'instance/uploads'
|
6 |
+
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
|
app/routes.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Blueprint, jsonify, request, current_app
|
2 |
+
import os
|
3 |
+
from werkzeug.utils import secure_filename
|
4 |
+
from app.utils import OCRModel
|
5 |
+
import torch
|
6 |
+
|
7 |
+
main = Blueprint('main', __name__)
|
8 |
+
ocr_model = OCRModel()
|
9 |
+
|
10 |
+
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
|
11 |
+
|
12 |
+
def allowed_file(filename):
|
13 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
14 |
+
|
15 |
+
@main.route('/')
|
16 |
+
def index():
|
17 |
+
return jsonify({"message": "Welcome to OCR API!"})
|
18 |
+
|
19 |
+
@main.route('/api/ocr', methods=['POST'])
|
20 |
+
def process_image():
|
21 |
+
if 'file' not in request.files:
|
22 |
+
return jsonify({"error": "No file part"}), 400
|
23 |
+
|
24 |
+
file = request.files['file']
|
25 |
+
if file.filename == '':
|
26 |
+
return jsonify({"error": "No selected file"}), 400
|
27 |
+
|
28 |
+
if file and allowed_file(file.filename):
|
29 |
+
filename = secure_filename(file.filename)
|
30 |
+
filepath = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
|
31 |
+
file.save(filepath)
|
32 |
+
|
33 |
+
try:
|
34 |
+
result = ocr_model.process_image(filepath)
|
35 |
+
# حذف الملف بعد المعالجة
|
36 |
+
os.remove(filepath)
|
37 |
+
return jsonify({"result": result})
|
38 |
+
except Exception as e:
|
39 |
+
return jsonify({"error": str(e)}), 500
|
40 |
+
|
41 |
+
return jsonify({"error": "Invalid file type"}), 400
|
app/utils.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoModel, AutoTokenizer
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
|
5 |
+
class OCRModel:
|
6 |
+
_instance = None
|
7 |
+
|
8 |
+
def __new__(cls):
|
9 |
+
if cls._instance is None:
|
10 |
+
cls._instance = super(OCRModel, cls).__new__(cls)
|
11 |
+
cls._instance.initialize()
|
12 |
+
return cls._instance
|
13 |
+
|
14 |
+
def initialize(self):
|
15 |
+
# تحميل النموذج مرة واحدة وتخزينه محلياً
|
16 |
+
model_path = os.getenv('MODEL_PATH', 'ucaslcl/GOT-OCR2_0')
|
17 |
+
|
18 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
19 |
+
model_path,
|
20 |
+
trust_remote_code=True,
|
21 |
+
local_files_only=False # سيتم تحميل الملفات إذا لم تكن موجودة
|
22 |
+
)
|
23 |
+
|
24 |
+
self.model = AutoModel.from_pretrained(
|
25 |
+
model_path,
|
26 |
+
trust_remote_code=True,
|
27 |
+
low_cpu_mem_usage=True,
|
28 |
+
device_map='auto', # سيختار أفضل جهاز متاح
|
29 |
+
use_safetensors=True,
|
30 |
+
pad_token_id=self.tokenizer.eos_token_id
|
31 |
+
)
|
32 |
+
|
33 |
+
self.model = self.model.eval()
|
34 |
+
|
35 |
+
def process_image(self, image_path):
|
36 |
+
try:
|
37 |
+
with torch.no_grad(): # تحسين الأداء وتقليل استهلاك الذاكرة
|
38 |
+
result = self.model.chat(self.tokenizer, image_path, ocr_type='format')
|
39 |
+
return result
|
40 |
+
except Exception as e:
|
41 |
+
return f"Error processing image: {str(e)}"
|
render.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
services:
|
2 |
+
- type: web
|
3 |
+
name: my-app
|
4 |
+
env: python
|
5 |
+
repo: https://github.com/ZienabMakhloof/ocr.git
|
6 |
+
branch: main
|
7 |
+
buildCommand: "pip install -r requirements.txt"
|
8 |
+
startCommand: "python app.py"
|
9 |
+
envVars:
|
10 |
+
- key: PYTHON_VERSION
|
11 |
+
value: 3.10.11
|
12 |
+
region: oregon
|
13 |
+
plan: standard
|
requirements.txt
ADDED
Binary file (264 Bytes). View file
|
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.10.15
|