Spaces:
Build error
Build error
| # import gradio as gr | |
| # from sklearn.feature_extraction.text import TfidfVectorizer | |
| # from sklearn.metrics.pairwise import cosine_similarity | |
| # import fitz | |
| # from docx import Document | |
| # | |
| # def read_resume_file(file): | |
| # if file.name.endswith('.txt'): | |
| # content = file.read().decode('utf-8') | |
| # elif file.name.endswith('.pdf'): | |
| # content = '' | |
| # with fitz.open(stream=file.read(), filetype='pdf') as doc: | |
| # for page in doc: | |
| # content+= page.get_text() | |
| # elif file.name.endswith('.docx'): | |
| # content ='' | |
| # document = Document(file) | |
| # for para in document.paragraphs: | |
| # content+=para.text+ '\n' | |
| # else: | |
| # return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." | |
| # return content | |
| # | |
| # | |
| # def calculate_similarity(job_desc, resume): | |
| # vectorizer = TfidfVectorizer(stop_words = 'english') | |
| # tfidf_matrix = vectorizer.fit_transform([job_desc, resume]) | |
| # print(tfidf_matrix) | |
| # | |
| # similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
| # return f"Similarity Score: {similarityScore * 100:.2f}%" | |
| # | |
| # def find_missing_keywords(job_desc, resume): | |
| # vectorizer = TfidfVectorizer(stop_words='english') | |
| # vectorizer.fit_transform([job_desc, resume]) | |
| # | |
| # job_desc_words = set(job_desc.lower().split()) | |
| # resume_words = set(resume.lower().split()) | |
| # | |
| # missing_words = job_desc_words - resume_words | |
| # | |
| # return list(missing_words) | |
| # | |
| # def ats_evalution(job_desc, resume_file): | |
| # resume_text = read_resume_file(resume_file) | |
| # if isinstance(resume_text, str) and resume_text.startswith("Unsupported"): | |
| # return resume_text, "" | |
| # similarity = calculate_similarity(job_desc, resume_text) | |
| # missing_keywords = find_missing_keywords(job_desc, resume_text) | |
| # | |
| # if missing_keywords: | |
| # missing_keywords_str = ", ".join(missing_keywords) | |
| # missing_info = f"Missing Keywords: {missing_keywords_str}" | |
| # else: | |
| # missing_info = "No missing keywords. Your resume covers all keywords in the job description." | |
| # return similarity, missing_info | |
| # | |
| # app = gr.Interface( | |
| # fn=ats_evalution, | |
| # inputs = [ | |
| # gr.Textbox(lines = 10, placeholder = 'Paste job description here....'), | |
| # gr.File(label='Upload your resume (.txt & .pdf & .docx)') | |
| # ], | |
| # | |
| # outputs = [ | |
| # gr.Text(label="Similarity Score"), | |
| # gr.Text(label="Missing Keywords") | |
| # ], | |
| # | |
| # title = "ATS Resume Score Generator", | |
| # description="Upload your resume and paste the job description to get a similarity score and identify missing keywords." | |
| # | |
| # ) | |
| # | |
| # if __name__ == "__main__": | |
| # app.launch() | |
| # | |
| import gradio as gr | |
| import PyPDF2 | |
| import docx | |
| import re | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from nltk.corpus import stopwords | |
| from nltk.tokenize import word_tokenize | |
| import string | |
| import nltk | |
| nltk.download('punkt_tab') | |
| # Download necessary NLTK data | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| # Function to extract text from uploaded files | |
| def extract_text_from_file(file): | |
| if file.name.endswith('.pdf'): | |
| reader = PyPDF2.PdfReader(file) | |
| text = '' | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text | |
| return text | |
| elif file.name.endswith('.docx'): | |
| doc = docx.Document(file) | |
| return '\n'.join([para.text for para in doc.paragraphs]) | |
| elif file.name.endswith('.txt'): | |
| return file.read().decode('utf-8') | |
| else: | |
| return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." | |
| # Function to preprocess the text | |
| def preprocess_text(text): | |
| text = text.lower() | |
| text = re.sub(r'\d+', '', text) # Remove numbers | |
| text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation | |
| tokens = word_tokenize(text) | |
| stop_words = set(stopwords.words('english')) | |
| filtered_tokens = [word for word in tokens if word not in stop_words] # Remove stopwords | |
| return ' '.join(filtered_tokens) | |
| # Function to extract keywords using TF-IDF | |
| def extract_keywords(text, top_n=10): | |
| vectorizer = TfidfVectorizer(max_features=top_n) | |
| tfidf_matrix = vectorizer.fit_transform([text]) | |
| feature_names = vectorizer.get_feature_names_out() | |
| return set(feature_names) | |
| # Combined function to evaluate ATS score and find missing keywords | |
| def ats_evaluation(job_desc, resume_file): | |
| resume_text = extract_text_from_file(resume_file) | |
| if isinstance(resume_text, str) and "Unsupported" in resume_text: | |
| return resume_text, "" | |
| job_desc_processed = preprocess_text(job_desc) | |
| resume_processed = preprocess_text(resume_text) | |
| job_keywords = extract_keywords(job_desc_processed) | |
| resume_keywords = extract_keywords(resume_processed) | |
| missing_keywords = job_keywords - resume_keywords | |
| # Calculate similarity score | |
| vectorizer = TfidfVectorizer() | |
| tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed]) | |
| similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
| # Format output | |
| similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%" | |
| if missing_keywords: | |
| missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}" | |
| else: | |
| missing_keywords_output = "No missing keywords. Your resume covers all key terms." | |
| return similarity_output, missing_keywords_output | |
| # Create the Gradio interface | |
| app = gr.Interface( | |
| fn=ats_evaluation, | |
| inputs=[ | |
| gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"), | |
| gr.File(label='Upload your resume (.txt, .pdf, .docx)') | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Similarity Score"), | |
| gr.Textbox(label="Missing Keywords") | |
| ], | |
| title="ATS Resume Score Generator", | |
| description="Upload your resume and paste the job description to get a similarity score and identify missing keywords." | |
| ) | |
| # Run the app | |
| if __name__ == "__main__": | |
| app.launch() | |