akazmi commited on
Commit
1d4d6a7
·
verified ·
1 Parent(s): d4f3e5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -40
app.py CHANGED
@@ -1,44 +1,122 @@
1
- from transformers import pipeline
2
  import streamlit as st
3
- # Function to summarize text using Hugging Face Transformers
4
- def summarize_text(text, model_name="facebook/bart-large-cnn"):
5
- summarizer = pipeline("summarization", model=model_name)
6
- summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
7
- return summary[0]["summary_text"]
8
-
9
- # Streamlit UI additions
10
- if st.button("Analyze Resumes"):
11
- if not uploaded_files:
12
- st.error("Please upload at least one resume.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  else:
14
- # Extract text from resumes
15
- resumes = [extract_text(file) for file in uploaded_files]
16
- resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files
 
 
 
 
 
 
 
 
17
 
18
- if not resumes:
19
- st.error("No valid text extracted from resumes. Please check your files.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  else:
21
- # Combine job description and resumes for analysis
22
- documents = [job_description] + resumes
23
-
24
- # Extract keywords and calculate similarity
25
- vectorizer, tfidf_matrix = extract_keywords(documents)
26
- similarities = calculate_similarity(tfidf_matrix)
27
-
28
- # Display match scores and summaries
29
- st.subheader("Resume Analysis")
30
- for i, file in enumerate(uploaded_files):
31
- st.write(f"**Resume {i+1}: {file.name}**")
32
- st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")
33
-
34
- # Generate and display summary
35
- resume_summary = summarize_text(resumes[i])
36
- st.write("**Summary:**")
37
- st.write(resume_summary)
38
-
39
- # Display weightage basis (keywords match)
40
- job_keywords = set(vectorizer.get_feature_names_out())
41
- resume_keywords = set(resumes[i].lower().split())
42
- matched_keywords = job_keywords.intersection(resume_keywords)
43
- st.write("**Matched Keywords:**", ", ".join(matched_keywords))
44
- st.write("---")
 
 
 
1
  import streamlit as st
2
+ import os
3
+ from groq import Groq
4
+ import numpy as np
5
+ import re
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ from docx import Document
9
+ from PyPDF2 import PdfReader
10
+ from transformers import pipeline
11
+
12
+ # Initialize Groq client
13
+ client = Groq(
14
+ api_key=os.environ.get("GROQ_API_KEY"),
15
+ )
16
+
17
+ # Initialize HuggingFace summarization pipeline
18
+ summarizer = pipeline("summarization")
19
+
20
+ # Function to get Groq analysis of the job description
21
+ def groq_chat_completion(prompt):
22
+ chat_completion = client.chat.completions.create(
23
+ messages=[
24
+ {
25
+ "role": "user",
26
+ "content": prompt,
27
+ }
28
+ ],
29
+ model="llama3-8b-8192",
30
+ )
31
+ return chat_completion.choices[0].message.content
32
+
33
+ # Function to extract text from uploaded files
34
+ def extract_text(file):
35
+ if file.type == "text/plain":
36
+ return file.read().decode("utf-8")
37
+ elif file.type == "application/pdf":
38
+ pdf_reader = PdfReader(file)
39
+ text = ""
40
+ for page in pdf_reader.pages:
41
+ text += page.extract_text() or ""
42
+ return text
43
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
44
+ doc = Document(file)
45
+ text = ""
46
+ for para in doc.paragraphs:
47
+ text += para.text + "\n"
48
+ return text
49
  else:
50
+ return ""
51
+
52
+ # Function to extract keywords and calculate similarity
53
+ def extract_keywords(documents):
54
+ vectorizer = TfidfVectorizer(stop_words="english")
55
+ tfidf_matrix = vectorizer.fit_transform(documents)
56
+ return vectorizer, tfidf_matrix
57
+
58
+ def calculate_similarity(tfidf_matrix):
59
+ similarity_matrix = cosine_similarity(tfidf_matrix)
60
+ return similarity_matrix
61
 
62
+ # Function to generate summary for each resume
63
+ def generate_summary(text):
64
+ if len(text.split()) > 200: # Summarize only if the text is long enough
65
+ summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
66
+ return summary[0]['summary_text']
67
+ return text # Return original text if it's short
68
+
69
+ # Streamlit UI
70
+ st.title("Detail Job Creator and Resume Scanner")
71
+ st.write("Analyze resumes and match them with job descriptions.")
72
+
73
+ # Upload job description and display Groq analysis first
74
+ st.subheader("Job Description")
75
+ job_description = st.text_area(
76
+ "Paste the job description here:",
77
+ height=150,
78
+ )
79
+
80
+ if job_description:
81
+ st.subheader("Groq Analysis")
82
+ groq_response = groq_chat_completion(job_description)
83
+ st.write("Groq's analysis of the job description:")
84
+ st.write(groq_response)
85
+
86
+ # Proceed with resume upload only if job description is provided
87
+ st.subheader("Upload Resumes")
88
+ uploaded_files = st.file_uploader(
89
+ "Upload resume files (Text, Word, or PDF):",
90
+ accept_multiple_files=True,
91
+ type=["txt", "docx", "pdf"]
92
+ )
93
+
94
+ if st.button("Analyze Resumes"):
95
+ if not uploaded_files:
96
+ st.error("Please upload at least one resume.")
97
  else:
98
+ # Extract text from resumes
99
+ resumes = [extract_text(file) for file in uploaded_files]
100
+ resumes = [resume for resume in resumes if resume.strip()] # Filter out empty files
101
+
102
+ if not resumes:
103
+ st.error("No valid text extracted from resumes. Please check your files.")
104
+ else:
105
+ # Combine job description and resumes for analysis
106
+ documents = [job_description] + resumes
107
+
108
+ # Extract keywords and calculate similarity
109
+ vectorizer, tfidf_matrix = extract_keywords(documents)
110
+ similarities = calculate_similarity(tfidf_matrix)
111
+
112
+ # Display match scores and summaries
113
+ st.subheader("Resume Match Scores and Summaries")
114
+ for i, file in enumerate(uploaded_files):
115
+ st.write(f"**Resume {i+1}: {file.name}**")
116
+ st.write(f"Match Score: {similarities[0][i + 1] * 100:.2f}%")
117
+
118
+ # Generate and display summary
119
+ summary = generate_summary(resumes[i])
120
+ st.write("**Summary:**")
121
+ st.write(summary)
122
+ st.write("---")