Spaces:

Vaishvik1618
/

ATSscore

Sleeping

App Files Files Community

ATSscore / app.py

Vaishvik1618

Upload app.py

2053c29 verified 8 months ago

raw

history blame contribute delete

6.39 kB

	# import gradio as gr
	# from sklearn.feature_extraction.text import TfidfVectorizer
	# from sklearn.metrics.pairwise import cosine_similarity
	# import fitz
	# from docx import Document
	#
	# def read_resume_file(file):
	# if file.name.endswith('.txt'):
	# content = file.read().decode('utf-8')
	# elif file.name.endswith('.pdf'):
	# content = ''
	# with fitz.open(stream=file.read(), filetype='pdf') as doc:
	# for page in doc:
	# content+= page.get_text()
	# elif file.name.endswith('.docx'):
	# content =''
	# document = Document(file)
	# for para in document.paragraphs:
	# content+=para.text+ '\n'
	# else:
	# return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
	# return content
	#
	#
	# def calculate_similarity(job_desc, resume):
	# vectorizer = TfidfVectorizer(stop_words = 'english')
	# tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
	# print(tfidf_matrix)
	#
	# similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
	# return f"Similarity Score: {similarityScore * 100:.2f}%"
	#
	# def find_missing_keywords(job_desc, resume):
	# vectorizer = TfidfVectorizer(stop_words='english')
	# vectorizer.fit_transform([job_desc, resume])
	#
	# job_desc_words = set(job_desc.lower().split())
	# resume_words = set(resume.lower().split())
	#
	# missing_words = job_desc_words - resume_words
	#
	# return list(missing_words)
	#
	# def ats_evalution(job_desc, resume_file):
	# resume_text = read_resume_file(resume_file)
	# if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
	# return resume_text, ""
	# similarity = calculate_similarity(job_desc, resume_text)
	# missing_keywords = find_missing_keywords(job_desc, resume_text)
	#
	# if missing_keywords:
	# missing_keywords_str = ", ".join(missing_keywords)
	# missing_info = f"Missing Keywords: {missing_keywords_str}"
	# else:
	# missing_info = "No missing keywords. Your resume covers all keywords in the job description."
	# return similarity, missing_info
	#
	# app = gr.Interface(
	# fn=ats_evalution,
	# inputs = [
	# gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
	# gr.File(label='Upload your resume (.txt & .pdf & .docx)')
	# ],
	#
	# outputs = [
	# gr.Text(label="Similarity Score"),
	# gr.Text(label="Missing Keywords")
	# ],
	#
	# title = "ATS Resume Score Generator",
	# description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
	#
	# )
	#
	# if __name__ == "__main__":
	# app.launch()
	#

	import gradio as gr
	import PyPDF2
	import docx
	import re
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize
	import string
	import nltk
	nltk.download('punkt_tab')
	# Download necessary NLTK data
	nltk.download('punkt')
	nltk.download('stopwords')

	# Function to extract text from uploaded files
	def extract_text_from_file(file):
	if file.name.endswith('.pdf'):
	reader = PyPDF2.PdfReader(file)
	text = ''
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text
	elif file.name.endswith('.docx'):
	doc = docx.Document(file)
	return '\n'.join([para.text for para in doc.paragraphs])
	elif file.name.endswith('.txt'):
	return file.read().decode('utf-8')
	else:
	return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."

	# Function to preprocess the text
	def preprocess_text(text):
	text = text.lower()
	text = re.sub(r'\d+', '', text) # Remove numbers
	text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
	tokens = word_tokenize(text)
	stop_words = set(stopwords.words('english'))
	filtered_tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
	return ' '.join(filtered_tokens)

	# Function to extract keywords using TF-IDF
	def extract_keywords(text, top_n=10):
	vectorizer = TfidfVectorizer(max_features=top_n)
	tfidf_matrix = vectorizer.fit_transform([text])
	feature_names = vectorizer.get_feature_names_out()
	return set(feature_names)

	# Combined function to evaluate ATS score and find missing keywords
	def ats_evaluation(job_desc, resume_file):
	resume_text = extract_text_from_file(resume_file)
	if isinstance(resume_text, str) and "Unsupported" in resume_text:
	return resume_text, ""

	job_desc_processed = preprocess_text(job_desc)
	resume_processed = preprocess_text(resume_text)

	job_keywords = extract_keywords(job_desc_processed)
	resume_keywords = extract_keywords(resume_processed)

	missing_keywords = job_keywords - resume_keywords

	# Calculate similarity score
	vectorizer = TfidfVectorizer()
	tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
	similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

	# Format output
	similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
	if missing_keywords:
	missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
	else:
	missing_keywords_output = "No missing keywords. Your resume covers all key terms."

	return similarity_output, missing_keywords_output

	# Create the Gradio interface
	app = gr.Interface(
	fn=ats_evaluation,
	inputs=[
	gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
	gr.File(label='Upload your resume (.txt, .pdf, .docx)')
	],
	outputs=[
	gr.Textbox(label="Similarity Score"),
	gr.Textbox(label="Missing Keywords")
	],
	title="ATS Resume Score Generator",
	description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
	)

	# Run the app
	if __name__ == "__main__":
	app.launch()