merve HF staff commited on
Commit
27d2f67
1 Parent(s): 63d4bdf

replaced openai with our NER model

Browse files
Files changed (1) hide show
  1. app.py +33 -60
app.py CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
4
  import numpy as np
5
  import openai
6
  import ast
 
7
  import os
8
 
9
  from openai_api import OpenAI_API
@@ -24,7 +25,7 @@ def get_text(input_img):
24
  def get_parsed_address(input_img):
25
 
26
  address_full_text = get_text(input_img)
27
- return openai_response(address_full_text)
28
 
29
 
30
  def save_deta_db(input):
@@ -41,62 +42,33 @@ def clear_textbox(value):
41
  return gr.update(value="")
42
 
43
 
44
- # Open API on change
45
  def text_dict(input):
46
  eval_result = ast.literal_eval(input)
47
  return (
48
- str(eval_result["city"]),
49
- str(eval_result["distinct"]),
50
- str(eval_result["neighbourhood"]),
51
- str(eval_result["street"]),
52
- str(eval_result["address"]),
53
- str(eval_result["tel"]),
54
- str(eval_result["name_surname"]),
55
  str(eval_result["no"]),
 
 
56
  )
57
 
58
 
59
- def openai_response(ocr_input):
60
- prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
61
- plain text input and especially from emergency text that carries address information, your inputs can be text
62
- of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
63
- to only extract keys that are shared as an example in the examples section, if a key value is not found in the
64
- text input, then it should be ignored. Have only city, distinct, neighbourhood,
65
- street, no, tel, name_surname, address Examples:
66
-
67
- Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
68
- Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
69
-
70
- Input: 5.29 PMO $ 0 87 DEVREMİZ ÖZGÜR ORÇAN ARKADAŞIMIZA ULAŞAMIYORUZ BEYOĞLU MAH FEVZİ ÇAKMAK CAD. NO.58-TÜRKOĞLUI KAHRAMANMARAŞ 5524357578 AdReSe YaKIN OLANLAR VEYA ULASANLAR LÜTFEN BiLGILENDIRSIN .
71
- Output: {{'city': 'Kahramanmaraş', 'distinct': 'Türkoğlu', 'neighbourhood': 'Beyoğlu Mahallesi', 'street': 'Çakmak Caddesi', 'no': '58', 'tel': '5524357578', 'name_surname': 'Özgür Orçan', 'address': 'Beyoğlu Mahallesi, Çakmak Caddesi, No:58 Türkoğlu/Kahramanmaraş'}}
72
-
73
- Input: Ahmet @ozknhmt Ekim 2021 tarihinde katıldı - 2 Takipçi Takip ettiğin kimse takip etmiyor AKEVLER MAH. 432SK RÜYA APT ANT(BEDİİ SABUNCU KARŞISI) ANTAKYA HATAY MERVE BELANLI ses veriyor ancak hiçbiryardım ekibi olmadığı için kurtaramryoruz içeri girip, lütfen acil yardım_ İsim: Merve Belanlı tel 542 757 5484 Ö0 12.07
74
- Output: {{'city': 'Hatay', 'distinct': 'Antakya', 'neighbourhood': 'Akevler Mahallesi', 'street': '432 Sokak', 'no': '', 'tel': '5427575484', 'name_surname': 'Merve Belanlı', 'address': 'Akevler Mahallesi, 432 Sokak, Rüya Apt. Antakya/Hatay'}}
75
-
76
- Input: 14:04 Sümerler Cemil Şükrü Çolokoğlu ilköğretim okulu karşısı 3 9öçük altında yardım bekyouk Lütfen herkes paylogsın
77
- Output: {{'city': '', 'distinct': '', 'neighbourhood': 'Sümerler Mahallesi', 'street': 'Cemil Şükrü Çolokoğlu İlköğretim Okulu Karşısı', 'no': '', 'tel': '', 'name_surname': '', 'address': 'Sümerler Mahallesi, Cemil Şükrü Çolokoğlu İlköğretim Okulu Karşısı'}}
78
-
79
- Input: {ocr_input}
80
- Output:
81
- """
82
 
83
- openai_client = OpenAI_API()
84
- response = openai_client.single_request(prompt)
85
- print(resp)
86
- resp = response["choices"][0]["text"]
87
- resp = eval(resp.replace("'{", "{").replace("}'", "}"))
88
  resp["input"] = ocr_input
89
- dict_keys = [
90
- "city",
91
- "distinct",
92
- "neighbourhood",
93
- "street",
94
- "no",
95
- "tel",
96
- "name_surname",
97
- "address",
98
- "input",
99
- ]
100
  for key in dict_keys:
101
  if key not in resp.keys():
102
  resp[key] = ""
@@ -126,24 +98,24 @@ with gr.Blocks() as demo:
126
 
127
  with gr.Column():
128
  with gr.Row():
129
- city = gr.Textbox(label="İl", interactive=True, show_progress=False)
130
- distinct = gr.Textbox(label="İlçe", interactive=True, show_progress=False)
131
  with gr.Row():
132
- neighbourhood = gr.Textbox(
133
  label="Mahalle", interactive=True, show_progress=False
134
  )
135
- street = gr.Textbox(
136
  label="Sokak/Cadde/Bulvar", interactive=True, show_progress=False
137
  )
138
  with gr.Row():
139
- tel = gr.Textbox(label="Telefon", interactive=True, show_progress=False)
140
  with gr.Row():
141
- name_surname = gr.Textbox(
142
  label="İsim Soyisim", interactive=True, show_progress=False
143
  )
144
- address = gr.Textbox(label="Adres", interactive=True, show_progress=False)
145
  with gr.Row():
146
- no = gr.Textbox(label="Kapı No", interactive=True, show_progress=False)
147
 
148
  img_area_button.click(
149
  get_parsed_address,
@@ -153,13 +125,14 @@ with gr.Blocks() as demo:
153
  )
154
 
155
  text_area_button.click(
156
- openai_response, text_area, open_api_text, api_name="upload-text"
157
  )
158
 
 
159
  open_api_text.change(
160
  text_dict,
161
  open_api_text,
162
- [city, distinct, neighbourhood, street, address, tel, name_surname, no],
163
  )
164
  ocr_button = gr.Button(value="Sadece OCR kullan")
165
  ocr_button.click(
@@ -172,9 +145,9 @@ with gr.Blocks() as demo:
172
  submit_button.click(save_deta_db, open_api_text)
173
  done_text = gr.Textbox(label="Done", value="Not Done", visible=False)
174
  submit_button.click(update_component, outputs=done_text)
175
- for txt in [city, distinct, neighbourhood, street, address, tel, name_surname, no]:
176
  submit_button.click(fn=clear_textbox, inputs=txt, outputs=txt)
177
 
178
 
179
  if __name__ == "__main__":
180
- demo.launch()
 
4
  import numpy as np
5
  import openai
6
  import ast
7
+ from transformers import pipeline
8
  import os
9
 
10
  from openai_api import OpenAI_API
 
25
  def get_parsed_address(input_img):
26
 
27
  address_full_text = get_text(input_img)
28
+ return ner_response(address_full_text)
29
 
30
 
31
  def save_deta_db(input):
 
42
  return gr.update(value="")
43
 
44
 
 
45
  def text_dict(input):
46
  eval_result = ast.literal_eval(input)
47
  return (
48
+ str(eval_result["il"]),
49
+ str(eval_result["ilce"]),
50
+ str(eval_result["mahalle"]),
51
+ str(eval_result["sokak"]),
52
+ str(eval_result["Apartman/site"]),
 
 
53
  str(eval_result["no"]),
54
+ str(eval_result["ad-soyad"]),
55
+ str(eval_result["dis kapi no"]),
56
  )
57
 
58
 
59
+ def ner_response(ocr_input):
60
+
61
+ ner_pipe = pipeline("token-classification","deprem-ml/deprem-ner", aggregation_strategy="first")
62
+ predictions = ner_pipe(ocr_input)
63
+ resp = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ for item in predictions:
66
+ print(item)
67
+ key = item["entity_group"]
68
+ resp[key] = item["word"]
69
+
70
  resp["input"] = ocr_input
71
+ dict_keys = ["il", "ilce", "mahalle", "sokak", "Apartman/site", "no", "ad-soyad", "dis kapi no"]
 
 
 
 
 
 
 
 
 
 
72
  for key in dict_keys:
73
  if key not in resp.keys():
74
  resp[key] = ""
 
98
 
99
  with gr.Column():
100
  with gr.Row():
101
+ il = gr.Textbox(label="İl", interactive=True, show_progress=False)
102
+ ilce = gr.Textbox(label="İlçe", interactive=True, show_progress=False)
103
  with gr.Row():
104
+ mahalle = gr.Textbox(
105
  label="Mahalle", interactive=True, show_progress=False
106
  )
107
+ sokak = gr.Textbox(
108
  label="Sokak/Cadde/Bulvar", interactive=True, show_progress=False
109
  )
110
  with gr.Row():
111
+ no = gr.Textbox(label="Telefon", interactive=True, show_progress=False)
112
  with gr.Row():
113
+ ad_soyad = gr.Textbox(
114
  label="İsim Soyisim", interactive=True, show_progress=False
115
  )
116
+ apartman = gr.Textbox(label="apartman", interactive=True, show_progress=False)
117
  with gr.Row():
118
+ dis_kapi_no = gr.Textbox(label="Kapı No", interactive=True, show_progress=False)
119
 
120
  img_area_button.click(
121
  get_parsed_address,
 
125
  )
126
 
127
  text_area_button.click(
128
+ ner_response, text_area, open_api_text, api_name="upload-text"
129
  )
130
 
131
+
132
  open_api_text.change(
133
  text_dict,
134
  open_api_text,
135
+ [il, ilce, mahalle, sokak, no, apartman, ad_soyad, dis_kapi_no],
136
  )
137
  ocr_button = gr.Button(value="Sadece OCR kullan")
138
  ocr_button.click(
 
145
  submit_button.click(save_deta_db, open_api_text)
146
  done_text = gr.Textbox(label="Done", value="Not Done", visible=False)
147
  submit_button.click(update_component, outputs=done_text)
148
+ for txt in [il, ilce, mahalle, sokak, apartman, no, ad_soyad, dis_kapi_no]:
149
  submit_button.click(fn=clear_textbox, inputs=txt, outputs=txt)
150
 
151
 
152
  if __name__ == "__main__":
153
+ demo.launch()