Spaces:

sotirios-slv
/

theatre-programmer

Sleeping

sotirios-slv commited on Dec 4, 2023

Commit

cd373fc

•

2 Parent(s): ee9ceb2 44bed6a

Merge pull request #2 from StateLibraryVictoria/export-file-fixes

Files changed (3) hide show

.github/workflows/deploy-to-hf-dev.yml CHANGED Viewed

@@ -17,6 +17,6 @@ jobs:
           lfs: true
       - name: Push to hub
         env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer-dev main

           lfs: true
       - name: Push to hub
         env:
+          HF_TOKEN: ${{ secrets.HF_DEV_TOKEN }}
         run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer-dev main

.github/workflows/deploy-to-hf-prod.yml CHANGED Viewed

@@ -17,4 +17,4 @@ jobs:
       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer main

       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/sotirios-slv/theatre-programmer main

app.py CHANGED Viewed

@@ -36,7 +36,10 @@ def get_named_entities(ocr_text: str):
 def run(image, lang="eng"):
     print("Image ", image)
-    print("Image type ", type(image))
     result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
     ner = get_named_entities(result)
@@ -52,13 +55,18 @@ def download_output(ocr_text: str, named_entities: str, image_name="test"):
         output_file = f"{image_name}_{datetime_now}.xlsx"
         ocr_df = pd.Series(ocr_text)
-        print("OCR ", ocr_df)
         ner_df = pd.Series(named_entities_list)
-        print("NER ", ner_df)
         with pd.ExcelWriter(output_file) as writer:
-            ocr_df.to_excel(writer, sheet_name="OCR text")
-            ner_df.to_excel(writer, sheet_name="Named entities")
         return output_file
     except Exception as e:

 def run(image, lang="eng"):
     print("Image ", image)
+    try:
+        print("Image filename ", image.filename)
+    except:
+        print("Could not print image filename")
     result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
     ner = get_named_entities(result)
         output_file = f"{image_name}_{datetime_now}.xlsx"
         ocr_df = pd.Series(ocr_text)
         ner_df = pd.Series(named_entities_list)
         with pd.ExcelWriter(output_file) as writer:
+            ocr_df.to_excel(
+                writer, sheet_name="OCR text", columns=["OCR text"], index=False
+            )
+            ner_df.to_excel(
+                writer,
+                sheet_name="Named entities",
+                columns=["Named entities"],
+                index=False,
+            )
         return output_file
     except Exception as e: