File size: 2,975 Bytes
f54f27c b15b79a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import pickle
import docx
import PyPDF2
import re
# Load pre-trained model and TF-IDF vectorizer
svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
# Function to clean resume text
def clean_resume(txt):
clean_text = re.sub('http\S+\s', ' ', txt)
clean_text = re.sub('RT|cc', ' ', clean_text)
clean_text = re.sub('#\S+\s', ' ', clean_text)
clean_text = re.sub('@\S+', ' ', clean_text)
clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"""), ' ', clean_text)
clean_text = re.sub(r'[^\x00-\x7f]', ' ', clean_text)
clean_text = re.sub('\s+', ' ', clean_text)
return clean_text
# Function to extract text from PDF
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ''
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text + '\n'
return text
# Function to extract text from TXT
def extract_text_from_txt(file):
try:
text = file.read().decode('utf-8')
except UnicodeDecodeError:
text = file.read().decode('latin-1')
return text
# Function to handle file upload and extraction
def handle_file_upload(uploaded_file):
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension == 'pdf':
text = extract_text_from_pdf(uploaded_file)
elif file_extension == 'docx':
text = extract_text_from_docx(uploaded_file)
elif file_extension == 'txt':
text = extract_text_from_txt(uploaded_file)
else:
raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
return text
# Function to predict the category of a resume
def predict_category(file):
try:
resume_text = handle_file_upload(file)
cleaned_text = clean_resume(resume_text)
vectorized_text = tfidf.transform([cleaned_text])
vectorized_text = vectorized_text.toarray()
predicted_category = svc_model.predict(vectorized_text)
predicted_category_name = le.inverse_transform(predicted_category)
return f"Predicted Category: {predicted_category_name[0]}"
except Exception as e:
return f"Error: {str(e)}"
# Define Gradio interface
inputs = gr.File(label="Upload Resume (PDF, DOCX, TXT)")
outputs = gr.Textbox(label="Prediction")
interface = gr.Interface(fn=predict_category, inputs=inputs, outputs=outputs, title="Resume Classifier",
description="Upload your resume to predict its job category using an AI model.")
# Launch the interface
if __name__ == "__main__":
interface.launch(share=True)
|