Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,14 +10,16 @@ classifier = pipeline("text-classification", model="distilbert-base-uncased-fine
|
|
| 10 |
def detect_job(text, file):
|
| 11 |
extracted_text = ""
|
| 12 |
if file:
|
| 13 |
-
filename = file.name
|
| 14 |
if filename.endswith(".pdf"):
|
| 15 |
-
|
|
|
|
| 16 |
for img in images:
|
| 17 |
extracted_text += pytesseract.image_to_string(img) + "\n"
|
| 18 |
else:
|
| 19 |
-
img = Image.open(file)
|
| 20 |
extracted_text = pytesseract.image_to_string(img)
|
|
|
|
| 21 |
full_text = text + "\n" + extracted_text
|
| 22 |
if full_text.strip() == "":
|
| 23 |
return "No text provided!"
|
|
@@ -31,7 +33,7 @@ iface = gr.Interface(
|
|
| 31 |
fn=detect_job,
|
| 32 |
inputs=[
|
| 33 |
gr.Textbox(lines=10, placeholder="Paste job description here..."),
|
| 34 |
-
gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"], type="
|
| 35 |
],
|
| 36 |
outputs="text",
|
| 37 |
title="Fake Job Detector"
|
|
|
|
| 10 |
def detect_job(text, file):
|
| 11 |
extracted_text = ""
|
| 12 |
if file:
|
| 13 |
+
filename = file.name if hasattr(file, "name") else "uploaded_file"
|
| 14 |
if filename.endswith(".pdf"):
|
| 15 |
+
# file is bytes, convert PDF to images
|
| 16 |
+
images = convert_from_bytes(file.read() if hasattr(file, "read") else file)
|
| 17 |
for img in images:
|
| 18 |
extracted_text += pytesseract.image_to_string(img) + "\n"
|
| 19 |
else:
|
| 20 |
+
img = Image.open(file if hasattr(file, "read") else open(file, "rb"))
|
| 21 |
extracted_text = pytesseract.image_to_string(img)
|
| 22 |
+
|
| 23 |
full_text = text + "\n" + extracted_text
|
| 24 |
if full_text.strip() == "":
|
| 25 |
return "No text provided!"
|
|
|
|
| 33 |
fn=detect_job,
|
| 34 |
inputs=[
|
| 35 |
gr.Textbox(lines=10, placeholder="Paste job description here..."),
|
| 36 |
+
gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"], type="binary")
|
| 37 |
],
|
| 38 |
outputs="text",
|
| 39 |
title="Fake Job Detector"
|