merve HF staff mertcobanov commited on
Commit
3416d9c
0 Parent(s):

Duplicate from deprem-ml/deprem-ocr

Browse files

Co-authored-by: Mert Cobanov <mertcobanov@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +182 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Deprem Ocr 2
3
+ emoji: 👀
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: deprem-ml/deprem-ocr
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from easyocr import Reader
3
+ from PIL import Image
4
+ import io
5
+ import json
6
+ import csv
7
+ import openai
8
+ import ast
9
+ import os
10
+ from deta import Deta
11
+
12
+
13
+ ######################
14
+ import requests
15
+ import json
16
+
17
+ import os
18
+ import openai
19
+
20
+
21
+
22
+ class OpenAI_API:
23
+ def __init__(self):
24
+ self.openai_api_key = ''
25
+
26
+ def single_request(self, address_text):
27
+
28
+ openai.api_type = "azure"
29
+ openai.api_base = "https://damlaopenai.openai.azure.com/"
30
+ openai.api_version = "2022-12-01"
31
+ openai.api_key = os.getenv("API_KEY")
32
+
33
+ response = openai.Completion.create(
34
+ engine="Davinci-003",
35
+ prompt=address_text,
36
+ temperature=0.9,
37
+ max_tokens=256,
38
+ top_p=1.0,
39
+ n=1,
40
+ logprobs=0,
41
+ echo=False,
42
+ stop=None,
43
+ frequency_penalty=0,
44
+ presence_penalty=0,
45
+ best_of=1)
46
+
47
+ return response
48
+
49
+ ########################
50
+
51
+ openai.api_key = os.getenv('API_KEY')
52
+ reader = Reader(["tr"])
53
+
54
+
55
+ def get_parsed_address(input_img):
56
+
57
+ address_full_text = get_text(input_img)
58
+ return openai_response(address_full_text)
59
+
60
+
61
+ def preprocess_img(inp_image):
62
+ gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
63
+ gray_img = cv2.bitwise_not(gray)
64
+ return gray_img
65
+
66
+
67
+ def get_text(input_img):
68
+ result = reader.readtext(input_img, detail=0)
69
+ return " ".join(result)
70
+
71
+
72
+ def save_csv(mahalle, il, sokak, apartman):
73
+ adres_full = [mahalle, il, sokak, apartman]
74
+
75
+ with open("adress_book.csv", "a", encoding="utf-8") as f:
76
+ write = csv.writer(f)
77
+ write.writerow(adres_full)
78
+ return adres_full
79
+
80
+
81
+ def get_json(mahalle, il, sokak, apartman):
82
+ adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
83
+ dump = json.dumps(adres, indent=4, ensure_ascii=False)
84
+ return dump
85
+
86
+ def write_db(data_dict):
87
+ # 2) initialize with a project key
88
+ deta_key = os.getenv('DETA_KEY')
89
+ deta = Deta(deta_key)
90
+
91
+ # 3) create and use as many DBs as you want!
92
+ users = deta.Base("deprem-ocr")
93
+ users.insert(data_dict)
94
+
95
+
96
+ def text_dict(input):
97
+ eval_result = ast.literal_eval(input)
98
+ write_db(eval_result)
99
+
100
+ return (
101
+ str(eval_result['city']),
102
+ str(eval_result['distinct']),
103
+ str(eval_result['neighbourhood']),
104
+ str(eval_result['street']),
105
+ str(eval_result['address']),
106
+ str(eval_result['tel']),
107
+ str(eval_result['name_surname']),
108
+ str(eval_result['no']),
109
+ )
110
+
111
+ def openai_response(ocr_input):
112
+ prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
113
+ plain text input and especially from emergency text that carries address information, your inputs can be text
114
+ of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
115
+ to only extract keys that are shared as an example in the examples section, if a key value is not found in the
116
+ text input, then it should be ignored. Have only city, distinct, neighbourhood,
117
+ street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
118
+ Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
119
+ Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
120
+ Input: {ocr_input}
121
+ Output:
122
+ """
123
+
124
+ openai_client = OpenAI_API()
125
+ response = openai_client.single_request(ocr_input)
126
+ resp = response["choices"][0]["text"]
127
+ print(resp)
128
+ resp = eval(resp.replace("'{", "{").replace("}'", "}"))
129
+ resp["input"] = ocr_input
130
+ dict_keys = [
131
+ 'city',
132
+ 'distinct',
133
+ 'neighbourhood',
134
+ 'street',
135
+ 'no',
136
+ 'tel',
137
+ 'name_surname',
138
+ 'address',
139
+ 'input',
140
+ ]
141
+ for key in dict_keys:
142
+ if key not in resp.keys():
143
+ resp[key] = ''
144
+ return resp
145
+
146
+
147
+ with gr.Blocks() as demo:
148
+ gr.Markdown(
149
+ """
150
+ # Enkaz Bildirme Uygulaması
151
+ """)
152
+ gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
153
+ with gr.Row():
154
+ img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
155
+ ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
156
+ open_api_text = gr.Textbox(label="Tam Adres")
157
+ submit_button = gr.Button(label="Yükle")
158
+ with gr.Column():
159
+ with gr.Row():
160
+ city = gr.Textbox(label="İl")
161
+ distinct = gr.Textbox(label="İlçe")
162
+ with gr.Row():
163
+ neighbourhood = gr.Textbox(label="Mahalle")
164
+ street = gr.Textbox(label="Sokak/Cadde/Bulvar")
165
+ with gr.Row():
166
+ tel = gr.Textbox(label="Telefon")
167
+ with gr.Row():
168
+ name_surname = gr.Textbox(label="İsim Soyisim")
169
+ address = gr.Textbox(label="Adres")
170
+ with gr.Row():
171
+ no = gr.Textbox(label="Kapı No")
172
+
173
+
174
+ submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
175
+
176
+ ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
177
+
178
+ open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
179
+
180
+
181
+ if __name__ == "__main__":
182
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ Pillow
3
+ easyocr
4
+ gradio
5
+ deta