# import gradio as gr # from sklearn.feature_extraction.text import TfidfVectorizer # from sklearn.metrics.pairwise import cosine_similarity # import fitz # from docx import Document # # def read_resume_file(file): # if file.name.endswith('.txt'): # content = file.read().decode('utf-8') # elif file.name.endswith('.pdf'): # content = '' # with fitz.open(stream=file.read(), filetype='pdf') as doc: # for page in doc: # content+= page.get_text() # elif file.name.endswith('.docx'): # content ='' # document = Document(file) # for para in document.paragraphs: # content+=para.text+ '\n' # else: # return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." # return content # # # def calculate_similarity(job_desc, resume): # vectorizer = TfidfVectorizer(stop_words = 'english') # tfidf_matrix = vectorizer.fit_transform([job_desc, resume]) # print(tfidf_matrix) # # similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] # return f"Similarity Score: {similarityScore * 100:.2f}%" # # def find_missing_keywords(job_desc, resume): # vectorizer = TfidfVectorizer(stop_words='english') # vectorizer.fit_transform([job_desc, resume]) # # job_desc_words = set(job_desc.lower().split()) # resume_words = set(resume.lower().split()) # # missing_words = job_desc_words - resume_words # # return list(missing_words) # # def ats_evalution(job_desc, resume_file): # resume_text = read_resume_file(resume_file) # if isinstance(resume_text, str) and resume_text.startswith("Unsupported"): # return resume_text, "" # similarity = calculate_similarity(job_desc, resume_text) # missing_keywords = find_missing_keywords(job_desc, resume_text) # # if missing_keywords: # missing_keywords_str = ", ".join(missing_keywords) # missing_info = f"Missing Keywords: {missing_keywords_str}" # else: # missing_info = "No missing keywords. Your resume covers all keywords in the job description." # return similarity, missing_info # # app = gr.Interface( # fn=ats_evalution, # inputs = [ # gr.Textbox(lines = 10, placeholder = 'Paste job description here....'), # gr.File(label='Upload your resume (.txt & .pdf & .docx)') # ], # # outputs = [ # gr.Text(label="Similarity Score"), # gr.Text(label="Missing Keywords") # ], # # title = "ATS Resume Score Generator", # description="Upload your resume and paste the job description to get a similarity score and identify missing keywords." # # ) # # if __name__ == "__main__": # app.launch() # import gradio as gr import PyPDF2 import docx import re from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import string import nltk nltk.download('punkt_tab') # Download necessary NLTK data nltk.download('punkt') nltk.download('stopwords') # Function to extract text from uploaded files def extract_text_from_file(file): if file.name.endswith('.pdf'): reader = PyPDF2.PdfReader(file) text = '' for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text return text elif file.name.endswith('.docx'): doc = docx.Document(file) return '\n'.join([para.text for para in doc.paragraphs]) elif file.name.endswith('.txt'): return file.read().decode('utf-8') else: return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." # Function to preprocess the text def preprocess_text(text): text = text.lower() text = re.sub(r'\d+', '', text) # Remove numbers text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation tokens = word_tokenize(text) stop_words = set(stopwords.words('english')) filtered_tokens = [word for word in tokens if word not in stop_words] # Remove stopwords return ' '.join(filtered_tokens) # Function to extract keywords using TF-IDF def extract_keywords(text, top_n=10): vectorizer = TfidfVectorizer(max_features=top_n) tfidf_matrix = vectorizer.fit_transform([text]) feature_names = vectorizer.get_feature_names_out() return set(feature_names) # Combined function to evaluate ATS score and find missing keywords def ats_evaluation(job_desc, resume_file): resume_text = extract_text_from_file(resume_file) if isinstance(resume_text, str) and "Unsupported" in resume_text: return resume_text, "" job_desc_processed = preprocess_text(job_desc) resume_processed = preprocess_text(resume_text) job_keywords = extract_keywords(job_desc_processed) resume_keywords = extract_keywords(resume_processed) missing_keywords = job_keywords - resume_keywords # Calculate similarity score vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed]) similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] # Format output similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%" if missing_keywords: missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}" else: missing_keywords_output = "No missing keywords. Your resume covers all key terms." return similarity_output, missing_keywords_output # Create the Gradio interface app = gr.Interface( fn=ats_evaluation, inputs=[ gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"), gr.File(label='Upload your resume (.txt, .pdf, .docx)') ], outputs=[ gr.Textbox(label="Similarity Score"), gr.Textbox(label="Missing Keywords") ], title="ATS Resume Score Generator", description="Upload your resume and paste the job description to get a similarity score and identify missing keywords." ) # Run the app if __name__ == "__main__": app.launch()