linjieccc commited on
Commit
b35eb97
1 Parent(s): 11ba4ce
Files changed (2) hide show
  1. app.py +6 -87
  2. header.html +3 -12
app.py CHANGED
@@ -38,7 +38,7 @@ examples = [
38
  [
39
  "license.jpeg",
40
  "Name;DOB;ISS;EXP",
41
- ],
42
  [
43
  "invoice.jpeg",
44
  "名称;纳税人识别号;开票日期",
@@ -160,11 +160,15 @@ def run_taskflow(document, schema, argument):
160
 
161
 
162
  def process_doc(document, schema, ocr_lang, layout_analysis):
 
 
 
163
  if not schema:
164
  schema = '时间;组织机构;人物'
165
  if document is None:
166
  return None, None
167
 
 
168
  schema, schema_lang = get_schema(dbc2sbc(schema))
169
  argument = {
170
  "ocr_lang": ocr_lang,
@@ -190,7 +194,6 @@ def load_example_document(img, schema, ocr_lang, layout_analysis):
190
  document = example_files[schema]
191
  choice = lang_map[document].split("-")
192
  ocr_lang = choice[0]
193
- layout_analysis = False if len(choice) == 1 else True
194
  preview, answer = process_doc(document, schema, ocr_lang, layout_analysis)
195
  return document, schema, preview, gr.update(visible=True), answer
196
  else:
@@ -206,91 +209,7 @@ def read_content(file_path: str) -> str:
206
  return content
207
 
208
 
209
- CSS = """
210
- #prompt input {
211
- font-size: 16px;
212
- }
213
- #url-textbox {
214
- padding: 0 !important;
215
- }
216
- #short-upload-box .w-full {
217
- min-height: 10rem !important;
218
- }
219
- /* I think something like this can be used to re-shape
220
- * the table
221
- */
222
- /*
223
- .gr-samples-table tr {
224
- display: inline;
225
- }
226
- .gr-samples-table .p-2 {
227
- width: 100px;
228
- }
229
- */
230
- #select-a-file {
231
- width: 100%;
232
- }
233
- #file-clear {
234
- padding-top: 2px !important;
235
- padding-bottom: 2px !important;
236
- padding-left: 8px !important;
237
- padding-right: 8px !important;
238
- margin-top: 10px;
239
- }
240
- .gradio-container .gr-button-primary {
241
- background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
242
- border: 1px solid #B0DCCC;
243
- border-radius: 8px;
244
- color: #1B8700;
245
- }
246
- .gradio-container.dark button#submit-button {
247
- background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
248
- border: 1px solid #B0DCCC;
249
- border-radius: 8px;
250
- color: #1B8700
251
- }
252
- table.gr-samples-table tr td {
253
- border: none;
254
- outline: none;
255
- }
256
- table.gr-samples-table tr td:first-of-type {
257
- width: 0%;
258
- }
259
- div#short-upload-box div.absolute {
260
- display: none !important;
261
- }
262
- gradio-app > div > div > div > div.w-full > div, .gradio-app > div > div > div > div.w-full > div {
263
- gap: 0px 2%;
264
- }
265
- gradio-app div div div div.w-full, .gradio-app div div div div.w-full {
266
- gap: 0px;
267
- }
268
- gradio-app h2, .gradio-app h2 {
269
- padding-top: 10px;
270
- }
271
- #answer {
272
- overflow-y: scroll;
273
- color: white;
274
- background: #666;
275
- border-color: #666;
276
- font-size: 20px;
277
- font-weight: bold;
278
- }
279
- #answer span {
280
- color: white;
281
- }
282
- #answer textarea {
283
- color:white;
284
- background: #777;
285
- border-color: #777;
286
- font-size: 18px;
287
- }
288
- #url-error input {
289
- color: red;
290
- }
291
- """
292
-
293
- with gr.Blocks(css=CSS) as demo:
294
  gr.HTML(read_content("header.html"))
295
  gr.Markdown(
296
  "Open-sourced by PaddleNLP, **UIE-X** is a universal information extraction engine for both scanned document and text inputs. It supports Entity Extraction, Relation Extraction and Event Extraction tasks."
 
38
  [
39
  "license.jpeg",
40
  "Name;DOB;ISS;EXP",
41
+ ],
42
  [
43
  "invoice.jpeg",
44
  "名称;纳税人识别号;开票日期",
 
160
 
161
 
162
  def process_doc(document, schema, ocr_lang, layout_analysis):
163
+ if [document, schema] in examples:
164
+ ocr_lang = lang_map[document]
165
+
166
  if not schema:
167
  schema = '时间;组织机构;人物'
168
  if document is None:
169
  return None, None
170
 
171
+ layout_analysis = True if layout_analysis == "yes" else False
172
  schema, schema_lang = get_schema(dbc2sbc(schema))
173
  argument = {
174
  "ocr_lang": ocr_lang,
 
194
  document = example_files[schema]
195
  choice = lang_map[document].split("-")
196
  ocr_lang = choice[0]
 
197
  preview, answer = process_doc(document, schema, ocr_lang, layout_analysis)
198
  return document, schema, preview, gr.update(visible=True), answer
199
  else:
 
209
  return content
210
 
211
 
212
+ with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  gr.HTML(read_content("header.html"))
214
  gr.Markdown(
215
  "Open-sourced by PaddleNLP, **UIE-X** is a universal information extraction engine for both scanned document and text inputs. It supports Entity Extraction, Relation Extraction and Event Extraction tasks."
header.html CHANGED
@@ -1,16 +1,7 @@
1
  <div style="text-align: center; max-width: 650px; margin: 0 auto;">
2
- <div
3
- style="
4
- display: inline-flex;
5
- gap: 0.8rem;
6
- font-size: 1.75rem;
7
- margin-bottom: 10px;
8
- margin-left: 220px;
9
- justify-content: center;
10
- "
11
- >
12
- <a href="https://github.com/PaddlePaddle/PaddleNLP"><img src="https://user-images.githubusercontent.com/1371212/175816733-8ec25eb0-9af3-4380-9218-27c154518258.png" alt="PaddleNLP" width="60%"></a>
13
- </div>
14
  <div
15
  style="
16
  display: inline-flex;
 
1
  <div style="text-align: center; max-width: 650px; margin: 0 auto;">
2
+ <p align="center">
3
+ <img src="https://user-images.githubusercontent.com/1371212/175816733-8ec25eb0-9af3-4380-9218-27c154518258.png" align="middle" width="500" />
4
+ </p>
 
 
 
 
 
 
 
 
 
5
  <div
6
  style="
7
  display: inline-flex;