DreamStream-1 commited on
Commit
fb835f9
·
verified ·
1 Parent(s): 36492c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -183
app.py CHANGED
@@ -1,198 +1,91 @@
1
- import os
2
- import pandas as pd
3
- import google.generativeai as genai
4
- import PyPDF2 as pdf
5
- import io
6
- import re
7
  import streamlit as st
8
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
9
  from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
- import torch
 
 
 
 
12
 
13
- # Set API key for Google Generative AI
14
- api_key = os.getenv('GOOGLE_API_KEY')
15
- if not api_key:
16
- raise ValueError("API key not found. Please set GOOGLE_API_KEY as an environment variable.")
17
 
18
- # Initialize the generative AI client
19
- genai.configure(api_key=api_key)
20
 
21
- # Load Hugging Face pipelines and models
22
- skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
23
- education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
24
 
25
- # Sentiment analysis using Hugging Face RoBERTa
26
- task = "sentiment-analysis"
27
- model_name = "roberta-base"
28
- tokenizer = AutoTokenizer.from_pretrained(model_name)
29
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
30
-
31
- # Function to extract text from uploaded PDF
32
- def extract_pdf_text(uploaded_file):
33
- """
34
- Extract text from the uploaded PDF file.
35
-
36
- Args:
37
- uploaded_file: Streamlit uploaded file object.
38
- Returns:
39
- str: Extracted text content.
40
- """
41
- try:
42
- file_stream = io.BytesIO(uploaded_file.read())
43
- reader = pdf.PdfReader(file_stream)
44
- text = "".join([page.extract_text() for page in reader.pages])
45
- return text.strip()
46
- except Exception as e:
47
- st.error(f"Error extracting text from PDF: {e}")
48
- return ""
49
-
50
- # Function to extract email and phone numbers
51
- def extract_contact_info(text):
52
- """
53
- Extract email and phone number using regex.
54
-
55
- Args:
56
- text: Extracted text content from the resume.
57
- Returns:
58
- tuple: Extracted email and phone number.
59
- """
60
- email = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
61
- phone = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}", text)
62
- return (email.group(0) if email else "Not Available",
63
- phone.group(0) if phone else "Not Available")
64
-
65
- # Function to extract skills using NER
66
- def extract_skills(text):
67
- """
68
- Extract skills from resume text using NER.
69
-
70
- Args:
71
- text: Resume text.
72
- Returns:
73
- str: Comma-separated skills or "Not Available".
74
- """
75
- ner_results = skill_extractor(text)
76
- skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
77
- return ", ".join(skills) if skills else "Not Available"
78
-
79
- # Function to extract education details
80
- def extract_education(text):
81
- """
82
- Extract education information using NER and regex.
83
-
84
- Args:
85
- text: Resume text.
86
- Returns:
87
- str: Extracted education details.
88
- """
89
- ner_results = education_extractor(text)
90
- education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
91
-
92
- if education_entities:
93
- return ", ".join(education_entities)
94
- else:
95
- education_patterns = [
96
- r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Engineering|Data Science)",
97
- r"(University of [A-Za-z]+)"
98
- ]
99
- matches = []
100
- for pattern in education_patterns:
101
- matches.extend(re.findall(pattern, text))
102
- return ", ".join(matches) if matches else "Not Available"
103
-
104
- # Function to calculate match percentage using TF-IDF
105
- def calculate_match_percentage(resume_text, job_description):
106
- """
107
- Calculate the match percentage using TF-IDF and cosine similarity.
108
-
109
- Args:
110
- resume_text: Resume text.
111
- job_description: Job description.
112
- Returns:
113
- float: Match percentage (0-100).
114
- """
115
- documents = [resume_text, job_description]
116
- tfidf_vectorizer = TfidfVectorizer(stop_words='english')
117
- tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
118
  cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
119
- return round(cosine_sim[0][0] * 100, 2)
120
 
121
- # Function to analyze resume with Gemini Flash 1.5
122
- def analyze_with_gemini(resume_text, job_description):
123
- """
124
- Use Gemini Flash 1.5 to generate an ATS analysis.
125
 
126
- Args:
127
- resume_text: Text content of the resume.
128
- job_description: Job description content.
129
- Returns:
130
- str: AI-generated analysis.
131
- """
132
  prompt = f"""
133
- Act as an advanced ATS. Analyze the resume and job description.
 
134
  Resume: {resume_text}
135
  Job Description: {job_description}
136
- Extract:
137
- - Candidate Name
138
- - Skills
139
- - Education
140
- - Leadership Experience (years)
141
- - Match Percentage
142
- Provide a summary of the candidate's strengths in bullet points.
143
  """
144
- try:
145
- response = genai.generate_text(
146
- model="gemini-1p5",
147
- prompt=prompt,
148
- temperature=0.7,
149
- max_output_tokens=500
150
- )
151
- return response.result
152
- except Exception as e:
153
- return f"Error generating analysis: {e}"
 
 
154
 
155
- # Streamlit Interface
156
- st.title("Resume ATS Analysis Tool")
157
- st.markdown("### Upload Resume PDF and Enter Job Description for Analysis")
158
-
159
- uploaded_file = st.file_uploader("Upload Resume (PDF format)", type=["pdf"])
160
- job_description = st.text_area("Job Description", height=200)
161
-
162
- if uploaded_file and job_description.strip():
163
- if st.button("Analyze"):
164
- resume_text = extract_pdf_text(uploaded_file)
165
- if not resume_text:
166
- st.error("No text extracted from PDF. Please upload a valid file.")
167
- st.stop()
168
-
169
- # Extract candidate details
170
- email, phone = extract_contact_info(resume_text)
171
- skills = extract_skills(resume_text)
172
- education = extract_education(resume_text)
173
- match_percentage = calculate_match_percentage(resume_text, job_description)
174
- gemini_analysis = analyze_with_gemini(resume_text, job_description)
175
-
176
- # Prepare the results
177
- results = {
178
- "Email": email,
179
- "Contact": phone,
180
- "Skills": skills,
181
- "Education": education,
182
- "Match Percentage": match_percentage,
183
- "Gemini Analysis": gemini_analysis
184
- }
185
-
186
- # Display results
187
- st.write(pd.DataFrame([results]))
188
-
189
- # Allow download as CSV
190
- csv = pd.DataFrame([results]).to_csv(index=False)
191
- st.download_button(
192
- label="Download Results as CSV",
193
- data=csv,
194
- file_name="resume_analysis_results.csv",
195
- mime="text/csv"
196
- )
197
- else:
198
- st.info("Upload a resume and provide a job description to start the analysis.")
 
1
+ import spacy
 
 
 
 
 
2
  import streamlit as st
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
+ import PyPDF2
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import word_tokenize
9
+ from gemini_flash import GeminiFlash # Assuming Gemini Flash is installed
10
 
11
+ # Ensure that NLTK's stopwords are available
12
+ nltk.download('punkt')
13
+ nltk.download('stopwords')
 
14
 
15
+ # Load spaCy model for NER
16
+ nlp = spacy.load("en_core_web_sm")
17
 
18
+ # Initialize Gemini Flash for prompt engineering
19
+ prompt_engineer = GeminiFlash()
 
20
 
21
+ # Streamlit Interface
22
+ st.title("AI Resume and Job Description Analyzer")
23
+
24
+ # Step 1: Resume Upload
25
+ uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")
26
+
27
+ if uploaded_file is not None:
28
+ # Read the PDF file
29
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
30
+ resume_text = ""
31
+ for page in range(len(pdf_reader.pages)):
32
+ resume_text += pdf_reader.pages[page].extract_text()
33
+
34
+ # Display the resume text
35
+ st.text_area("Resume Text", resume_text, height=300)
36
+
37
+ # Step 2: Job Description Input
38
+ job_description = st.text_area("Enter Job Description")
39
+
40
+ if job_description:
41
+ # Preprocess job description using NLTK
42
+ stop_words = set(stopwords.words("english"))
43
+ tokens = word_tokenize(job_description)
44
+ filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
45
+ ps = nltk.PorterStemmer()
46
+ stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
47
+ preprocessed_job_description = " ".join(stemmed_tokens)
48
+
49
+ # Display preprocessed job description
50
+ st.text_area("Processed Job Description", preprocessed_job_description)
51
+
52
+ # Step 3: Named Entity Recognition (NER) on Resume
53
+ if resume_text:
54
+ doc = nlp(resume_text)
55
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
56
+
57
+ # Display extracted entities
58
+ st.subheader("Named Entities from Resume")
59
+ st.write(entities)
60
+
61
+ # Step 4: Candidate-Job Relevance Using Cosine Similarity
62
+ if resume_text and job_description:
63
+ vectorizer = TfidfVectorizer()
64
+ tfidf_matrix = vectorizer.fit_transform([job_description, resume_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
 
66
 
67
+ st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}")
 
 
 
68
 
69
+ # Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM)
70
+ if resume_text and job_description:
 
 
 
 
71
  prompt = f"""
72
+ Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements.
73
+
74
  Resume: {resume_text}
75
  Job Description: {job_description}
76
+
77
+ Based on the information provided, generate a detailed match score between the candidate and the job.
 
 
 
 
 
78
  """
79
+
80
+ # Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt)
81
+ enhanced_prompt = prompt_engineer.refine_prompt(prompt)
82
+
83
+ # Display the enhanced prompt (for debugging or transparency)
84
+ st.subheader("Enhanced Prompt for LLM")
85
+ st.write(enhanced_prompt)
86
+
87
+ # Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation
88
+ # For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model.
89
+ # response = get_llm_response(enhanced_prompt)
90
+ # st.write("LLM Response:", response)
91