Spaces:

Prernas19
/

resume_analysis

Sleeping

App Files Files Community

Prernas19 commited on Jul 26, 2024

Commit

3a52026

verified ·

1 Parent(s): d91a2ca

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -25

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import os
 import re
 import fitz  # Importing PyMuPDF for PDF text extraction
 import nltk
-from transformers import BertTokenizer, BertModel
-import torch
 from gensim.models.doc2vec import Doc2Vec, TaggedDocument
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
@@ -14,10 +12,6 @@ import gradio as gr
 nltk.download('punkt')
 nltk.download('stopwords')
-# Load BERT model and tokenizer
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model = BertModel.from_pretrained('bert-base-uncased')
 # Function to preprocess text
 def preprocess_text(text):
     text = re.sub(r'\W+', ' ', text.lower())  # Remove non-alphanumeric characters and lower case
@@ -79,25 +73,12 @@ def give_feedback(resume_text, job_description):
     return feedback
-# Function to compute BERT embeddings
-def get_bert_embeddings(text):
-    tokens = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
-    with torch.no_grad():
-        outputs = model(**tokens)
-    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
-# Function to calculate BERT similarity score
-def bert_similarity(resume, job_description):
-    resume_embedding = get_bert_embeddings(resume)
-    job_description_embedding = get_bert_embeddings(job_description)
-    cosine_sim = cosine_similarity([resume_embedding], [job_description_embedding])
-    return cosine_sim[0][0]
 # Function to calculate TF-IDF cosine similarity score
 def tfidf_cosine_similarity(resume, jd):
     documents = [resume, jd]
     vectorizer = TfidfVectorizer()
     tfidf_matrix = vectorizer.fit_transform(documents)
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
     return cosine_sim[0][0]
@@ -105,6 +86,7 @@ def tfidf_cosine_similarity(resume, jd):
 def doc2vec_cosine_similarity(resume, jd, model):
     resume_vector = model.infer_vector(resume.split())
     jd_vector = model.infer_vector(jd.split())
     cosine_sim = cosine_similarity([resume_vector], [jd_vector])
     return cosine_sim[0][0]
@@ -127,7 +109,7 @@ def extract_info_from_resumes(resume_files, job_description):
     documents.append(preprocess_text(job_description))
     tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)]
-    model_doc2vec = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4)
     for file in resume_files:
         text = extract_text_from_pdf(file.name)
@@ -149,15 +131,13 @@ def extract_info_from_resumes(resume_files, job_description):
         common_keywords = set(jd_keywords).intersection(set(resume_keywords))
         keyword_match_score = len(common_keywords)  # Count of common keywords as a whole number
         tfidf_score = tfidf_cosine_similarity(text, job_description)
-        doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model_doc2vec)
-        bert_score = bert_similarity(preprocessed_text, preprocess_text(job_description))
         data.append({
             'Name': name,
             'Keyword_Match_Score': keyword_match_score,  # Whole number
             'TFIDF_Score': tfidf_score,
             'Doc2Vec_Score': doc2vec_score,
-            'BERT_Score': bert_score,  # Add BERT score
             'Years_of_Experience': years_of_experience,
             'Feedback': '; '.join(feedback),  # Combine feedback into a single string
         })
@@ -197,4 +177,4 @@ iface = gr.Interface(
 )
 # Launch the Gradio interface
-iface.launch(inline = False)

 import re
 import fitz  # Importing PyMuPDF for PDF text extraction
 import nltk
 from gensim.models.doc2vec import Doc2Vec, TaggedDocument
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 nltk.download('punkt')
 nltk.download('stopwords')
 # Function to preprocess text
 def preprocess_text(text):
     text = re.sub(r'\W+', ' ', text.lower())  # Remove non-alphanumeric characters and lower case
     return feedback
 # Function to calculate TF-IDF cosine similarity score
 def tfidf_cosine_similarity(resume, jd):
     documents = [resume, jd]
     vectorizer = TfidfVectorizer()
     tfidf_matrix = vectorizer.fit_transform(documents)
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
     return cosine_sim[0][0]
 def doc2vec_cosine_similarity(resume, jd, model):
     resume_vector = model.infer_vector(resume.split())
     jd_vector = model.infer_vector(jd.split())
     cosine_sim = cosine_similarity([resume_vector], [jd_vector])
     return cosine_sim[0][0]
     documents.append(preprocess_text(job_description))
     tagged_docs = [TaggedDocument(doc.split(), [i]) for i, doc in enumerate(documents)]
+    model = Doc2Vec(tagged_docs, vector_size=50, window=2, min_count=1, workers=4)
     for file in resume_files:
         text = extract_text_from_pdf(file.name)
         common_keywords = set(jd_keywords).intersection(set(resume_keywords))
         keyword_match_score = len(common_keywords)  # Count of common keywords as a whole number
         tfidf_score = tfidf_cosine_similarity(text, job_description)
+        doc2vec_score = doc2vec_cosine_similarity(preprocessed_text, preprocess_text(job_description), model)
         data.append({
             'Name': name,
             'Keyword_Match_Score': keyword_match_score,  # Whole number
             'TFIDF_Score': tfidf_score,
             'Doc2Vec_Score': doc2vec_score,
             'Years_of_Experience': years_of_experience,
             'Feedback': '; '.join(feedback),  # Combine feedback into a single string
         })
 )
 # Launch the Gradio interface
+iface.launch()