Infinity-1995 commited on
Commit
1849ee0
·
verified ·
1 Parent(s): a57db7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -55
app.py CHANGED
@@ -1,62 +1,37 @@
1
- import streamlit as st
2
  from transformers import pipeline
3
  from PIL import Image
4
  import pytesseract
5
  from pdf2image import convert_from_bytes
6
 
7
- # -------------------
8
- # Load Hugging Face text classifier
9
- # -------------------
10
- @st.cache_resource
11
- def load_classifier():
12
- classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
13
- return classifier
14
 
15
- classifier = load_classifier()
16
-
17
- # -------------------
18
- # App UI
19
- # -------------------
20
- st.title("Fake Job Detector")
21
- st.write("Paste job text or upload a screenshot/PDF to check if the job posting is Legitimate or Fake.")
22
-
23
- # Job text input
24
- job_text = st.text_area("Paste job description here:")
25
-
26
- # File upload (image or PDF)
27
- uploaded_file = st.file_uploader("Or upload a screenshot / PDF", type=["png","jpg","jpeg","pdf"])
28
-
29
- extracted_text = ""
30
-
31
- # Extract text from file if uploaded
32
- if uploaded_file:
33
- if uploaded_file.type == "application/pdf":
34
- images = convert_from_bytes(uploaded_file.read())
35
- for img in images:
36
- extracted_text += pytesseract.image_to_string(img) + "\n"
37
- else:
38
- img = Image.open(uploaded_file)
39
- extracted_text = pytesseract.image_to_string(img)
40
-
41
- # Combine pasted text + extracted text
42
- full_text = job_text + "\n" + extracted_text
43
-
44
- if st.button("Detect"):
45
- if full_text.strip() == "":
46
- st.warning("Please paste job text or upload a file!")
47
- else:
48
- result = classifier(full_text)
49
- label = result[0]['label']
50
- score = result[0]['score']
51
-
52
- # Map labels to Fake/Suspicious/Legit (simple demo)
53
- # Here using SST-2 labels for demo; in real app, fine-tune model
54
- if label == "NEGATIVE":
55
- final_label = "Suspicious / Fake"
56
  else:
57
- final_label = "Legitimate"
58
-
59
- st.success(f"Prediction: **{final_label}**")
60
- st.info(f"Confidence: {score:.2f}")
61
- st.subheader("Extracted Job Text:")
62
- st.text_area("Text Extracted", full_text, height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  from transformers import pipeline
3
  from PIL import Image
4
  import pytesseract
5
  from pdf2image import convert_from_bytes
6
 
7
+ # Load classifier
8
+ classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
 
 
 
 
 
9
 
10
+ def detect_job(text, file):
11
+ extracted_text = ""
12
+ if file:
13
+ if file.name.endswith(".pdf"):
14
+ images = convert_from_bytes(file.read())
15
+ for img in images:
16
+ extracted_text += pytesseract.image_to_string(img) + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  else:
18
+ img = Image.open(file)
19
+ extracted_text = pytesseract.image_to_string(img)
20
+ full_text = text + "\n" + extracted_text
21
+ if full_text.strip() == "":
22
+ return "No text provided!"
23
+ result = classifier(full_text)
24
+ label = "Legitimate" if result[0]['label'] == "POSITIVE" else "Suspicious / Fake"
25
+ score = result[0]['score']
26
+ return f"Prediction: {label} (Confidence: {score:.2f})"
27
+
28
+ # Gradio UI
29
+ iface = gr.Interface(
30
+ fn=detect_job,
31
+ inputs=[gr.Textbox(lines=10, placeholder="Paste job description here..."),
32
+ gr.File(type=["pdf","png","jpg","jpeg"])],
33
+ outputs="text",
34
+ title="Fake Job Detector"
35
+ )
36
+
37
+ iface.launch()