ZienabM commited on
Commit
c30f358
·
verified ·
1 Parent(s): 08c3118

Upload 10 files

Browse files
Files changed (10) hide show
  1. Dockerfile +37 -0
  2. app.py +6 -0
  3. app/__init__.py +9 -0
  4. app/config.py +6 -0
  5. app/routes.py +117 -0
  6. app/utils.py +44 -0
  7. docker-compose.yml +21 -0
  8. render.yaml +13 -0
  9. requirements.txt +8 -0
  10. runtime.txt +1 -0
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV MODEL_PATH=RufusRubin777/GOT-OCR2_0_CPU
7
+
8
+ WORKDIR /app
9
+
10
+ # تثبيت المكتبات الأساسية
11
+ RUN apt-get update && apt-get install -y \
12
+ build-essential \
13
+ libpq-dev \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # نسخ وتثبيت المتطلبات
17
+ COPY requirements.txt .
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # إنشاء وتكوين مجلد الكاش
21
+ RUN mkdir -p /root/.cache/huggingface
22
+ VOLUME /root/.cache/huggingface
23
+
24
+ # تحميل النموذج مسبقاً
25
+ RUN python -c "from transformers_modules.RufusRubin777.GOT_OCR2_0_CPU.modeling_GOT import GOTModel, GOTConfig; \
26
+ from transformers import AutoTokenizer; \
27
+ model_path='RufusRubin777/GOT-OCR2_0_CPU'; \
28
+ config = GOTConfig.from_pretrained(model_path); \
29
+ model = GOTModel.from_pretrained(model_path, config=config); \
30
+ tokenizer = AutoTokenizer.from_pretrained(model_path)"
31
+
32
+ # نسخ كود التطبيق
33
+ COPY . .
34
+
35
+ EXPOSE 7863
36
+
37
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from app import create_app
2
+
3
+ app = create_app()
4
+
5
+ if __name__ == '__main__':
6
+ app.run(host='0.0.0.0', port=7863)
app/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask
2
+
3
+ def create_app():
4
+ app = Flask(__name__)
5
+
6
+ from app.routes import main
7
+ app.register_blueprint(main)
8
+
9
+ return app
app/config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ class Config:
4
+ SECRET_KEY = os.environ.get('SECRET_KEY') or 'your-secret-key'
5
+ UPLOAD_FOLDER = 'instance/uploads'
6
+ MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB max file size
app/routes.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, jsonify, request
2
+ import io
3
+ from app.utils import OCRModel
4
+
5
+ main = Blueprint('main', __name__)
6
+ ocr_model = OCRModel()
7
+
8
+ # تحديد امتدادات الملفات المسموح بها
9
+ ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
10
+
11
+ # قائمة الحساسيات المعروفة (يمكن تخصيصها حسب الحاجة)
12
+ KNOWN_ALLERGENS = {
13
+ 'gluten': ['wheat', 'barley', 'gluten'],
14
+ 'dairy': ['milk', 'yogurt', 'cheese', 'lactose'],
15
+ 'nuts': ['nuts', 'peanuts', 'almonds', 'walnuts'],
16
+ 'eggs': ['eggs'],
17
+ 'soy': ['soy'],
18
+ 'fish': ['fish'],
19
+ 'shellfish': ['oyster', 'shrimp', 'shrimp'],
20
+ }
21
+
22
+ def allowed_file(filename):
23
+ """التحقق من صحة امتداد الملف"""
24
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
25
+
26
+ def find_allergens(text, user_allergens):
27
+ """البحث عن الحساسيات في النص"""
28
+ text = text.lower()
29
+ found_allergens = set()
30
+ allergen_details = {}
31
+
32
+ for allergen in user_allergens:
33
+ allergen = allergen.strip().lower()
34
+ # البحث في القائمة الرئيسية للحساسيات
35
+ if allergen in KNOWN_ALLERGENS:
36
+ for variant in KNOWN_ALLERGENS[allergen]:
37
+ if variant.lower() in text:
38
+ found_allergens.add(allergen)
39
+ allergen_details[allergen] = variant
40
+ # البحث المباشر عن النص المدخل
41
+ elif allergen in text:
42
+ found_allergens.add(allergen)
43
+ allergen_details[allergen] = allergen
44
+
45
+ return found_allergens, allergen_details
46
+
47
+ @main.route('/')
48
+ def index():
49
+ return jsonify({
50
+ "message": "Welcome to the Text Recognition and Sensitivity Checking Service",
51
+ "endpoints": {
52
+ "/api/ocr": "POST - Image analysis and sensitivity testing",
53
+ },
54
+ "supported_formats": list(ALLOWED_EXTENSIONS),
55
+ "known_allergens": list(KNOWN_ALLERGENS.keys())
56
+ })
57
+
58
+ @main.route('/api/ocr', methods=['POST'])
59
+ def process_image():
60
+ # التحقق من وجود الملف
61
+ if 'file' not in request.files:
62
+ return jsonify({"error": "No file uploaded"}), 400
63
+
64
+ # التحقق من وجود قائمة الحساسيات
65
+ if 'allergens' not in request.form:
66
+ return jsonify({"error": "Sensitivities not specified"}), 400
67
+
68
+ file = request.files['file']
69
+ if file.filename == '':
70
+ return jsonify({"error": "No file selected"}), 400
71
+
72
+ # التحقق من نوع الملف
73
+ if not allowed_file(file.filename):
74
+ return jsonify({
75
+ "error": "File type not supported",
76
+ "supported_formats": list(ALLOWED_EXTENSIONS)
77
+ }), 400
78
+
79
+ # تحضير قائمة الحساسيات
80
+ user_allergens = request.form['allergens'].split(',')
81
+
82
+ try:
83
+ # قراءة الصورة
84
+ file_bytes = file.read()
85
+ file_stream = io.BytesIO(file_bytes)
86
+
87
+ # معالجة الصورة
88
+ extracted_text = ocr_model.process_image(file_stream)
89
+
90
+ # البحث عن الحساسيات
91
+ found_allergens, allergen_details = find_allergens(extracted_text, user_allergens)
92
+
93
+ # تحضير الرد
94
+ response = {
95
+ "success": True,
96
+ "extracted_text": extracted_text,
97
+ "analysis": {
98
+ "found_allergens": list(found_allergens),
99
+ "allergen_details": allergen_details,
100
+ "has_allergens": len(found_allergens) > 0,
101
+ "warning": "Warning: Allergens found!" if found_allergens else "No allergens found"
102
+ }
103
+ }
104
+
105
+ return jsonify(response)
106
+
107
+ except Exception as e:
108
+ return jsonify({
109
+ "error": "An error occurred while processing the image.",
110
+ "details": str(e)
111
+ }), 500
112
+
113
+ @main.route('/api/allergens', methods=['GET'])
114
+ def get_known_allergens():
115
+ return jsonify({
116
+ "allergens": KNOWN_ALLERGENS
117
+ })
app/utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoModel, AutoTokenizer
3
+
4
+ class OCRModel:
5
+ _instance = None
6
+
7
+ def __new__(cls):
8
+ if cls._instance is None:
9
+ cls._instance = super(OCRModel, cls).__new__(cls)
10
+ cls._instance.initialize()
11
+ return cls._instance
12
+
13
+ def initialize(self):
14
+ # تحميل النموذج مرة واحدة وتخزينه محلياً
15
+ model_path = os.getenv('MODEL_PATH', 'RufusRubin777/GOT-OCR2_0_CPU')
16
+
17
+ self.tokenizer = AutoTokenizer.from_pretrained(
18
+ model_path,
19
+ trust_remote_code=True,
20
+ local_files_only=False # سيتم تحميل الملفات إذا لم تكن موجودة
21
+ )
22
+
23
+ self.model = AutoModel.from_pretrained(
24
+ model_path,
25
+ trust_remote_code=True,
26
+ low_cpu_mem_usage=True,
27
+ device_map='cpu', # سيختار أفضل جهاز متاح
28
+ use_safetensors=True,
29
+ pad_token_id=self.tokenizer.eos_token_id
30
+ )
31
+
32
+ self.model = self.model.eval()
33
+
34
+
35
+ def process_image(self, image_stream):
36
+ try:
37
+ # فتح الصورة من الذاكرة
38
+ image = Image.open(image_stream)
39
+
40
+ with torch.no_grad():
41
+ result = self.model.chat(self.tokenizer, image, ocr_type='format')
42
+ return result
43
+ except Exception as e:
44
+ return f"Error processing image: {str(e)}"
docker-compose.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.10'
2
+
3
+ services:
4
+ app:
5
+ build: .
6
+ ports:
7
+ - "7863:7863"
8
+ volumes:
9
+ - huggingface_cache:/root/.cache/huggingface
10
+ environment:
11
+ - MODEL_PATH=RufusRubin777/GOT-OCR2_0_CPU
12
+ restart: unless-stopped
13
+ deploy:
14
+ resources:
15
+ limits:
16
+ memory: 4G
17
+ reservations:
18
+ memory: 2G
19
+
20
+ volumes:
21
+ huggingface_cache:
render.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ - type: web
3
+ name: my-app
4
+ env: python
5
+ repo: https://github.com/ZienabMakhloof/ocr.git
6
+ branch: main
7
+ buildCommand: "pip install -r requirements.txt"
8
+ startCommand: "python app.py"
9
+ envVars:
10
+ - key: PYTHON_VERSION
11
+ value: 3.10.11
12
+ region: oregon
13
+ plan: standard
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.1.1
2
+ Flask==3.1.0
3
+ torch==2.5.1
4
+ torchvision==0.20.1
5
+ transformers==4.37.2
6
+ tiktoken==0.6.0
7
+ verovio==4.3.1
8
+ gunicorn
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10.15