sotirios-slv commited on
Commit
cd373fc
2 Parent(s): ee9ceb2 44bed6a

Merge pull request #2 from StateLibraryVictoria/export-file-fixes

Browse files
.github/workflows/deploy-to-hf-dev.yml CHANGED
@@ -17,6 +17,6 @@ jobs:
17
  lfs: true
18
  - name: Push to hub
19
  env:
20
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
  run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer-dev main
22
 
 
17
  lfs: true
18
  - name: Push to hub
19
  env:
20
+ HF_TOKEN: ${{ secrets.HF_DEV_TOKEN }}
21
  run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer-dev main
22
 
.github/workflows/deploy-to-hf-prod.yml CHANGED
@@ -17,4 +17,4 @@ jobs:
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer main
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer main
app.py CHANGED
@@ -36,7 +36,10 @@ def get_named_entities(ocr_text: str):
36
 
37
  def run(image, lang="eng"):
38
  print("Image ", image)
39
- print("Image type ", type(image))
 
 
 
40
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
41
 
42
  ner = get_named_entities(result)
@@ -52,13 +55,18 @@ def download_output(ocr_text: str, named_entities: str, image_name="test"):
52
  output_file = f"{image_name}_{datetime_now}.xlsx"
53
 
54
  ocr_df = pd.Series(ocr_text)
55
- print("OCR ", ocr_df)
56
  ner_df = pd.Series(named_entities_list)
57
- print("NER ", ner_df)
58
 
59
  with pd.ExcelWriter(output_file) as writer:
60
- ocr_df.to_excel(writer, sheet_name="OCR text")
61
- ner_df.to_excel(writer, sheet_name="Named entities")
 
 
 
 
 
 
 
62
  return output_file
63
 
64
  except Exception as e:
 
36
 
37
  def run(image, lang="eng"):
38
  print("Image ", image)
39
+ try:
40
+ print("Image filename ", image.filename)
41
+ except:
42
+ print("Could not print image filename")
43
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
44
 
45
  ner = get_named_entities(result)
 
55
  output_file = f"{image_name}_{datetime_now}.xlsx"
56
 
57
  ocr_df = pd.Series(ocr_text)
 
58
  ner_df = pd.Series(named_entities_list)
 
59
 
60
  with pd.ExcelWriter(output_file) as writer:
61
+ ocr_df.to_excel(
62
+ writer, sheet_name="OCR text", columns=["OCR text"], index=False
63
+ )
64
+ ner_df.to_excel(
65
+ writer,
66
+ sheet_name="Named entities",
67
+ columns=["Named entities"],
68
+ index=False,
69
+ )
70
  return output_file
71
 
72
  except Exception as e: