File size: 1,301 Bytes
1849ee0
8850ba6
a57db7e
 
 
ff9adfe
1849ee0
 
a57db7e
1849ee0
 
 
d0f094c
 
1849ee0
 
 
a57db7e
1849ee0
 
 
 
 
 
 
 
 
 
 
 
 
d0f094c
 
 
 
1849ee0
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
from transformers import pipeline
from PIL import Image
import pytesseract
from pdf2image import convert_from_bytes

# Load classifier
classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

def detect_job(text, file):
    extracted_text = ""
    if file:
        filename = file.name
        if filename.endswith(".pdf"):
            images = convert_from_bytes(file.read())
            for img in images:
                extracted_text += pytesseract.image_to_string(img) + "\n"
        else:
            img = Image.open(file)
            extracted_text = pytesseract.image_to_string(img)
    full_text = text + "\n" + extracted_text
    if full_text.strip() == "":
        return "No text provided!"
    result = classifier(full_text)
    label = "Legitimate" if result[0]['label'] == "POSITIVE" else "Suspicious / Fake"
    score = result[0]['score']
    return f"Prediction: {label} (Confidence: {score:.2f})"

# Gradio UI
iface = gr.Interface(
    fn=detect_job,
    inputs=[
        gr.Textbox(lines=10, placeholder="Paste job description here..."),
        gr.File(label="Upload PDF/Image", file_types=[".pdf", ".png", ".jpg", ".jpeg"], type="file")
    ],
    outputs="text",
    title="Fake Job Detector"
)

iface.launch()