Spaces:
Sleeping
Sleeping
Commit
•
28153e6
0
Parent(s):
Duplicate from deprem-ml/deprem-ocr
Browse filesCo-authored-by: Mert Cobanov <mertcobanov@users.noreply.huggingface.co>
- .gitattributes +34 -0
- README.md +13 -0
- app.py +152 -0
- requirements.txt +5 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Deprem Ocr 2
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: deprem-ml/deprem-ocr
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from easyocr import Reader
|
3 |
+
from PIL import Image
|
4 |
+
import io
|
5 |
+
import json
|
6 |
+
import csv
|
7 |
+
import openai
|
8 |
+
import ast
|
9 |
+
import os
|
10 |
+
from deta import Deta
|
11 |
+
|
12 |
+
|
13 |
+
openai.api_key = os.getenv('API_KEY')
|
14 |
+
reader = Reader(["tr"])
|
15 |
+
|
16 |
+
|
17 |
+
def get_parsed_address(input_img):
|
18 |
+
|
19 |
+
address_full_text = get_text(input_img)
|
20 |
+
return openai_response(address_full_text)
|
21 |
+
|
22 |
+
|
23 |
+
def preprocess_img(inp_image):
|
24 |
+
gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
|
25 |
+
gray_img = cv2.bitwise_not(gray)
|
26 |
+
return gray_img
|
27 |
+
|
28 |
+
|
29 |
+
def get_text(input_img):
|
30 |
+
result = reader.readtext(input_img, detail=0)
|
31 |
+
return " ".join(result)
|
32 |
+
|
33 |
+
|
34 |
+
def save_csv(mahalle, il, sokak, apartman):
|
35 |
+
adres_full = [mahalle, il, sokak, apartman]
|
36 |
+
|
37 |
+
with open("adress_book.csv", "a", encoding="utf-8") as f:
|
38 |
+
write = csv.writer(f)
|
39 |
+
write.writerow(adres_full)
|
40 |
+
return adres_full
|
41 |
+
|
42 |
+
|
43 |
+
def get_json(mahalle, il, sokak, apartman):
|
44 |
+
adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
|
45 |
+
dump = json.dumps(adres, indent=4, ensure_ascii=False)
|
46 |
+
return dump
|
47 |
+
|
48 |
+
def write_db(data_dict):
|
49 |
+
# 2) initialize with a project key
|
50 |
+
deta_key = os.getenv('DETA_KEY')
|
51 |
+
deta = Deta(deta_key)
|
52 |
+
|
53 |
+
# 3) create and use as many DBs as you want!
|
54 |
+
users = deta.Base("deprem-ocr")
|
55 |
+
users.insert(data_dict)
|
56 |
+
|
57 |
+
|
58 |
+
def text_dict(input):
|
59 |
+
eval_result = ast.literal_eval(input)
|
60 |
+
write_db(eval_result)
|
61 |
+
|
62 |
+
return (
|
63 |
+
str(eval_result['city']),
|
64 |
+
str(eval_result['distinct']),
|
65 |
+
str(eval_result['neighbourhood']),
|
66 |
+
str(eval_result['street']),
|
67 |
+
str(eval_result['address']),
|
68 |
+
str(eval_result['tel']),
|
69 |
+
str(eval_result['name_surname']),
|
70 |
+
str(eval_result['no']),
|
71 |
+
)
|
72 |
+
|
73 |
+
def openai_response(ocr_input):
|
74 |
+
prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
|
75 |
+
plain text input and especially from emergency text that carries address information, your inputs can be text
|
76 |
+
of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
|
77 |
+
to only extract keys that are shared as an example in the examples section, if a key value is not found in the
|
78 |
+
text input, then it should be ignored. Have only city, distinct, neighbourhood,
|
79 |
+
street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
|
80 |
+
Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
|
81 |
+
Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
|
82 |
+
Input: {ocr_input}
|
83 |
+
Output:
|
84 |
+
"""
|
85 |
+
|
86 |
+
response = openai.Completion.create(
|
87 |
+
model="text-davinci-003",
|
88 |
+
prompt=prompt,
|
89 |
+
temperature=0,
|
90 |
+
max_tokens=300,
|
91 |
+
top_p=1,
|
92 |
+
frequency_penalty=0.0,
|
93 |
+
presence_penalty=0.0,
|
94 |
+
stop=["\n"],
|
95 |
+
)
|
96 |
+
resp = response["choices"][0]["text"]
|
97 |
+
print(resp)
|
98 |
+
resp = eval(resp.replace("'{", "{").replace("}'", "}"))
|
99 |
+
resp["input"] = ocr_input
|
100 |
+
dict_keys = [
|
101 |
+
'city',
|
102 |
+
'distinct',
|
103 |
+
'neighbourhood',
|
104 |
+
'street',
|
105 |
+
'no',
|
106 |
+
'tel',
|
107 |
+
'name_surname',
|
108 |
+
'address',
|
109 |
+
'input',
|
110 |
+
]
|
111 |
+
for key in dict_keys:
|
112 |
+
if key not in resp.keys():
|
113 |
+
resp[key] = ''
|
114 |
+
return resp
|
115 |
+
|
116 |
+
|
117 |
+
with gr.Blocks() as demo:
|
118 |
+
gr.Markdown(
|
119 |
+
"""
|
120 |
+
# Enkaz Bildirme Uygulaması
|
121 |
+
""")
|
122 |
+
gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
|
123 |
+
with gr.Row():
|
124 |
+
img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
|
125 |
+
ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
|
126 |
+
open_api_text = gr.Textbox(label="Tam Adres")
|
127 |
+
submit_button = gr.Button(label="Yükle")
|
128 |
+
with gr.Column():
|
129 |
+
with gr.Row():
|
130 |
+
city = gr.Textbox(label="İl")
|
131 |
+
distinct = gr.Textbox(label="İlçe")
|
132 |
+
with gr.Row():
|
133 |
+
neighbourhood = gr.Textbox(label="Mahalle")
|
134 |
+
street = gr.Textbox(label="Sokak/Cadde/Bulvar")
|
135 |
+
with gr.Row():
|
136 |
+
tel = gr.Textbox(label="Telefon")
|
137 |
+
with gr.Row():
|
138 |
+
name_surname = gr.Textbox(label="İsim Soyisim")
|
139 |
+
address = gr.Textbox(label="Adres")
|
140 |
+
with gr.Row():
|
141 |
+
no = gr.Textbox(label="Kapı No")
|
142 |
+
|
143 |
+
|
144 |
+
submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
|
145 |
+
|
146 |
+
ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
|
147 |
+
|
148 |
+
open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
|
149 |
+
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
Pillow
|
3 |
+
easyocr
|
4 |
+
gradio
|
5 |
+
deta
|