sotirios-slv commited on
Commit
16858c9
1 Parent(s): e6c2cd7

Updated excel export details + print debugging

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -36,7 +36,10 @@ def get_named_entities(ocr_text: str):
36
 
37
  def run(image, lang="eng"):
38
  print("Image ", image)
39
- print("Image type ", type(image))
 
 
 
40
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
41
 
42
  ner = get_named_entities(result)
@@ -52,13 +55,18 @@ def download_output(ocr_text: str, named_entities: str, image_name="test"):
52
  output_file = f"{image_name}_{datetime_now}.xlsx"
53
 
54
  ocr_df = pd.Series(ocr_text)
55
- print("OCR ", ocr_df)
56
  ner_df = pd.Series(named_entities_list)
57
- print("NER ", ner_df)
58
 
59
  with pd.ExcelWriter(output_file) as writer:
60
- ocr_df.to_excel(writer, sheet_name="OCR text")
61
- ner_df.to_excel(writer, sheet_name="Named entities")
 
 
 
 
 
 
 
62
  return output_file
63
 
64
  except Exception as e:
 
36
 
37
  def run(image, lang="eng"):
38
  print("Image ", image)
39
+ try:
40
+ print("Image filename ", image.filename)
41
+ except:
42
+ print("Could not print image filename")
43
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
44
 
45
  ner = get_named_entities(result)
 
55
  output_file = f"{image_name}_{datetime_now}.xlsx"
56
 
57
  ocr_df = pd.Series(ocr_text)
 
58
  ner_df = pd.Series(named_entities_list)
 
59
 
60
  with pd.ExcelWriter(output_file) as writer:
61
+ ocr_df.to_excel(
62
+ writer, sheet_name="OCR text", columns=["OCR text"], index=False
63
+ )
64
+ ner_df.to_excel(
65
+ writer,
66
+ sheet_name="Named entities",
67
+ columns=["Named entities"],
68
+ index=False,
69
+ )
70
  return output_file
71
 
72
  except Exception as e: