cdcvd commited on
Commit
075b69c
1 Parent(s): 63bc1e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image, ImageOps, ImageChops
3
+ import io
4
+ import fitz # PyMuPDF
5
+ from docx import Document
6
+ from rembg import remove
7
+ import gradio as gr
8
+ from hezar.models import Model
9
+ from ultralytics import YOLO
10
+ import json
11
+
12
+ # ایجاد دایرکتوری‌های لازم
13
+ os.makedirs("static", exist_ok=True)
14
+ os.makedirs("output_images", exist_ok=True)
15
+
16
+ def trim_whitespace(image):
17
+ gray_image = ImageOps.grayscale(image)
18
+ inverted_image = ImageChops.invert(gray_image)
19
+ bbox = inverted_image.getbbox()
20
+ trimmed_image = image.crop(bbox)
21
+ return trimmed_image
22
+
23
+ def convert_pdf_to_images(pdf_path, zoom=2):
24
+ pdf_document = fitz.open(pdf_path)
25
+ images = []
26
+ for page_num in range(len(pdf_document)):
27
+ page = pdf_document.load_page(page_num)
28
+ matrix = fitz.Matrix(zoom, zoom)
29
+ pix = page.get_pixmap(matrix=matrix)
30
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
31
+ trimmed_image = trim_whitespace(image)
32
+ images.append(trimmed_image)
33
+ return images
34
+
35
+ def convert_docx_to_jpeg(docx_bytes):
36
+ document = Document(BytesIO(docx_bytes))
37
+ images = []
38
+ for rel in document.part.rels.values():
39
+ if "image" in rel.target_ref:
40
+ image_stream = rel.target_part.blob
41
+ image = Image.open(BytesIO(image_stream))
42
+ jpeg_image = BytesIO()
43
+ image.convert('RGB').save(jpeg_image, format="JPEG")
44
+ jpeg_image.seek(0)
45
+ images.append(Image.open(jpeg_image))
46
+ return images
47
+
48
+ def remove_background_from_image(image):
49
+ return remove(image)
50
+
51
+ def process_file(input_file):
52
+ file_extension = os.path.splitext(input_file.name)[1].lower()
53
+ images = []
54
+
55
+ if file_extension in ['.png', '.jpeg', '.jpg', '.bmp', '.gif']:
56
+ image = Image.open(input_file)
57
+ image = image.convert('RGB')
58
+ output_image = remove_background_from_image(image)
59
+ images.append(output_image)
60
+ elif file_extension == '.pdf':
61
+ images = convert_pdf_to_images(input_file.name)
62
+ images = [remove_background_from_image(image) for image in images]
63
+ elif file_extension in ['.docx', '.doc']:
64
+ images = convert_docx_to_jpeg(input_file.name)
65
+ images = [remove_background_from_image(image) for image in images]
66
+ else:
67
+ return "File format not supported."
68
+
69
+ input_folder = 'output_images'
70
+ for i, img in enumerate(images):
71
+ img.save(os.path.join(input_folder, f'image_{i}.jpg'))
72
+
73
+ return images
74
+
75
+ def run_detection_and_ocr():
76
+ # Load models
77
+ ocr_model = Model.load('hezarai/crnn-fa-printed-96-long')
78
+ yolo_model = YOLO("/content/drive/MyDrive/train3/weights/best.pt")
79
+
80
+ input_folder = 'output_images'
81
+ yolo_model.predict(input_folder, save=True, imgsz=320, conf=0.5, save_crop=True)
82
+
83
+ output_folder = '/content/runs/detect/predict'
84
+ results = []
85
+
86
+ for filename in os.listdir(input_folder):
87
+ if filename.endswith('.JPEG') or filename.endswith('.jpg'):
88
+ image_path = os.path.join(input_folder, filename)
89
+ crop_folder = os.path.join(output_folder, 'crops')
90
+ crops = []
91
+ for crop_label in os.listdir(crop_folder):
92
+ crop_label_folder = os.path.join(crop_folder, crop_label)
93
+ if os.path.isdir(crop_label_folder):
94
+ for crop_filename in os.listdir(crop_label_folder):
95
+ crop_image_path = os.path.join(crop_label_folder, crop_filename)
96
+ text_prediction = predict_text(ocr_model, crop_image_path)
97
+ crops.append({
98
+ 'crop_image_path': crop_image_path,
99
+ 'text_prediction': text_prediction,
100
+ 'class_label': crop_label
101
+ })
102
+ results.append({
103
+ 'image': filename,
104
+ 'crops': crops
105
+ })
106
+
107
+ output_json_path = 'output.json'
108
+ with open(output_json_path, 'w', encoding='utf-8') as f:
109
+ json.dump(results, f, ensure_ascii=False, indent=4)
110
+
111
+ return output_json_path
112
+
113
+ def predict_text(model, image_path):
114
+ try:
115
+ image = Image.open(image_path)
116
+ image = image.resize((320, 320))
117
+ output = model.predict(image)
118
+ if isinstance(output, list):
119
+ return ' '.join([item['text'] for item in output])
120
+ return str(output)
121
+ except FileNotFoundError:
122
+ return "N/A"
123
+
124
+ def gradio_interface(input_file):
125
+ process_file(input_file)
126
+ json_output = run_detection_and_ocr()
127
+ with open(json_output, 'r', encoding='utf-8') as f:
128
+ return json.load(f)
129
+
130
+ iface = gr.Interface(
131
+ fn=gradio_interface,
132
+ inputs=gr.File(label="Upload Word, PDF, or Image"),
133
+ outputs=gr.JSON(label="JSON Output"),
134
+ title="Document to JSON Converter with Background Removal"
135
+ )
136
+
137
+ if __name__ == "__main__":
138
+ iface.launch()