ruslanmv commited on
Commit
900f3cd
·
verified ·
1 Parent(s): ec1a8b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -119
app.py CHANGED
@@ -4,34 +4,32 @@ import PyPDF2
4
  import io
5
  from docx import Document
6
  import os
 
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
- from reportlab.lib import utils
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
- # Initialize the inference client from Hugging Face.
16
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
17
 
 
18
 
19
  def extract_text_from_pdf(pdf_file):
20
- """Extract text from PDF file."""
21
  try:
22
  pdf_reader = PyPDF2.PdfReader(pdf_file)
23
- text = ""
24
- for page in pdf_reader.pages:
25
- page_text = page.extract_text()
26
- if page_text:
27
- text += page_text + "\n"
28
  return text.strip() or "No text could be extracted from the PDF."
29
  except Exception as e:
30
  return f"Error reading PDF: {e}"
31
 
32
-
33
  def extract_text_from_docx(docx_file):
34
- """Extract text from DOCX file."""
35
  try:
36
  doc = Document(docx_file)
37
  text = "\n".join(para.text for para in doc.paragraphs)
@@ -39,14 +37,13 @@ def extract_text_from_docx(docx_file):
39
  except Exception as e:
40
  return f"Error reading DOCX: {e}"
41
 
42
-
43
  def parse_cv(file, job_description):
44
- """Analyze the CV, show the prompt (debug) and return LLM analysis."""
45
  if file is None:
46
  return "Please upload a CV file.", ""
47
 
48
  try:
49
- file_path = file.name # Get the file path
50
  file_ext = os.path.splitext(file_path)[1].lower()
51
 
52
  if file_ext == ".pdf":
@@ -54,20 +51,13 @@ def parse_cv(file, job_description):
54
  elif file_ext == ".docx":
55
  extracted_text = extract_text_from_docx(file_path)
56
  else:
57
- return (
58
- "Unsupported file format. Please upload a PDF or DOCX file.",
59
- "Unsupported file format.",
60
- )
61
-
62
  except Exception as e:
63
- error_msg = f"Error reading file: {e}"
64
- return error_msg, error_msg
65
 
66
- # Check for extraction errors
67
  if extracted_text.startswith("Error"):
68
  return extracted_text, "Error during text extraction. Please check the file."
69
 
70
- # Prepare debug prompt
71
  prompt = (
72
  f"Analyze the CV against the job description. Provide a summary, assessment, "
73
  f"and a score 0-10.\n\n"
@@ -75,140 +65,100 @@ def parse_cv(file, job_description):
75
  f"Candidate CV:\n{extracted_text}\n"
76
  )
77
 
78
- # Call LLM
79
  try:
80
  analysis = client.text_generation(prompt, max_new_tokens=512)
81
- # Show both the debug prompt and the LLM analysis in the "Analysis Report"
82
- analysis_report = (
83
- f"--- DEBUG PROMPT ---\n{prompt}\n"
84
- f"--- LLM ANALYSIS ---\n{analysis}"
85
- )
86
- return extracted_text, analysis_report
87
  except Exception as e:
88
  return extracted_text, f"Analysis Error: {e}"
89
 
90
-
91
- def respond(
92
- message,
93
- history: list[tuple[str, str]],
94
- system_message,
95
- max_tokens,
96
- temperature,
97
- top_p,
98
- ):
99
- """Generate chatbot response."""
100
- messages = [{"role": "system", "content": system_message}]
101
- for user_msg, bot_msg in history:
102
- if user_msg:
103
- messages.append({"role": "user", "content": user_msg})
104
- if bot_msg:
105
- messages.append({"role": "assistant", "content": bot_msg})
106
- messages.append({"role": "user", "content": message})
107
-
108
- response = ""
109
  try:
110
- for message_chunk in client.chat_completion(
111
- messages,
112
- max_tokens=max_tokens,
113
  stream=True,
114
- temperature=temperature,
115
- top_p=top_p,
116
  ):
117
- token = message_chunk.choices[0].delta.content
118
- response += token
119
- yield response
120
  except Exception as e:
121
- yield f"Error during chat generation: {e}"
 
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
 
124
  def create_pdf_report(report_text):
125
- """Creates a PDF report using SimpleDocTemplate for better formatting."""
126
- if not report_text.strip():
127
- report_text = "No analysis report to convert."
128
-
129
  buffer = io.BytesIO()
130
  doc = SimpleDocTemplate(buffer, pagesize=letter)
131
  styles = getSampleStyleSheet()
132
- Story = []
133
-
134
- # Title
135
- Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
136
- Story.append(Spacer(1, 12))
137
-
138
- # Report Content
139
  for line in report_text.split("\n"):
140
  Story.append(Paragraph(line, styles["Normal"]))
141
- Story.append(Spacer(1, 6)) # Add a small space between lines
142
-
143
  doc.build(Story)
144
  buffer.seek(0)
145
  return buffer
146
 
147
-
148
  def toggle_download_button(analysis_report):
149
- """Toggle the download button."""
150
- return gr.update(
151
- interactive=bool(analysis_report.strip()),
152
- visible=bool(analysis_report.strip()),
153
- )
154
-
155
 
156
  # Build the Gradio UI
157
  demo = gr.Blocks()
158
  with demo:
159
- gr.Markdown("## AI-powered CV Analyzer and Chatbot")
160
 
161
  with gr.Tab("Chatbot"):
162
  chat_interface = gr.ChatInterface(
163
- respond,
164
- chatbot=gr.Chatbot(value=[], label="Chatbot"),
 
 
 
165
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
166
- additional_inputs=[
167
- gr.Textbox(
168
- value="You are a friendly Chatbot.", label="System message"
169
- ),
170
- gr.Slider(
171
- minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
172
- ),
173
- gr.Slider(
174
- minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
175
- ),
176
- gr.Slider(
177
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
178
- ),
179
- ],
180
  )
181
 
182
  with gr.Tab("CV Analyzer"):
183
  gr.Markdown("### Upload your CV and provide the job description")
184
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
185
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
186
- extracted_text = gr.Textbox(
187
- label="Extracted CV Content", lines=10, interactive=False
188
- )
189
- analysis_output = gr.Textbox(
190
- label="Analysis Report", lines=10, interactive=False
191
- )
192
- download_pdf_button = gr.Button(
193
- "Download Analysis as PDF", visible=False, interactive=False
194
- )
195
  pdf_file = gr.File(label="Download PDF", interactive=False)
196
-
197
  analyze_button = gr.Button("Analyze CV")
198
-
199
- analyze_button.click(
200
- parse_cv,
201
- inputs=[file_input, job_desc_input],
202
- outputs=[extracted_text, analysis_output],
203
- ).then(
204
- toggle_download_button,
205
- inputs=[analysis_output],
206
- outputs=[download_pdf_button],
207
- )
208
-
209
- download_pdf_button.click(
210
- create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
211
- )
212
 
213
  if __name__ == "__main__":
214
  demo.queue().launch()
 
4
  import io
5
  from docx import Document
6
  import os
7
+ import fitz # PyMuPDF for better PDF handling
8
 
9
  # For PDF generation
10
  from reportlab.pdfgen import canvas
11
  from reportlab.lib.pagesizes import letter
 
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
+ # Initialize Hugging Face Inference Client with Meta-Llama-3.1-8B-Instruct
16
+ client = InferenceClient(
17
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct",
18
+ token=os.getenv("HF_TOKEN")
19
+ )
20
 
21
+ # Function to extract text from PDF
22
 
23
  def extract_text_from_pdf(pdf_file):
 
24
  try:
25
  pdf_reader = PyPDF2.PdfReader(pdf_file)
26
+ text = "".join(page.extract_text() or "" for page in pdf_reader.pages)
 
 
 
 
27
  return text.strip() or "No text could be extracted from the PDF."
28
  except Exception as e:
29
  return f"Error reading PDF: {e}"
30
 
31
+ # Function to extract text from DOCX
32
  def extract_text_from_docx(docx_file):
 
33
  try:
34
  doc = Document(docx_file)
35
  text = "\n".join(para.text for para in doc.paragraphs)
 
37
  except Exception as e:
38
  return f"Error reading DOCX: {e}"
39
 
40
+ # Function to analyze CV
41
  def parse_cv(file, job_description):
 
42
  if file is None:
43
  return "Please upload a CV file.", ""
44
 
45
  try:
46
+ file_path = file.name
47
  file_ext = os.path.splitext(file_path)[1].lower()
48
 
49
  if file_ext == ".pdf":
 
51
  elif file_ext == ".docx":
52
  extracted_text = extract_text_from_docx(file_path)
53
  else:
54
+ return "Unsupported file format. Please upload a PDF or DOCX file.", ""
 
 
 
 
55
  except Exception as e:
56
+ return f"Error reading file: {e}", ""
 
57
 
 
58
  if extracted_text.startswith("Error"):
59
  return extracted_text, "Error during text extraction. Please check the file."
60
 
 
61
  prompt = (
62
  f"Analyze the CV against the job description. Provide a summary, assessment, "
63
  f"and a score 0-10.\n\n"
 
65
  f"Candidate CV:\n{extracted_text}\n"
66
  )
67
 
 
68
  try:
69
  analysis = client.text_generation(prompt, max_new_tokens=512)
70
+ return extracted_text, f"--- Analysis Report ---\n{analysis}"
 
 
 
 
 
71
  except Exception as e:
72
  return extracted_text, f"Analysis Error: {e}"
73
 
74
+ # Function to optimize resume
75
+ def optimize_resume(resume_text, job_title):
76
+ prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
77
+ responses = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
79
+ for message in client.chat_completion(
80
+ messages=[{"role": "user", "content": prompt}],
81
+ max_tokens=1000,
82
  stream=True,
 
 
83
  ):
84
+ responses.append(message.choices[0].delta.content)
 
 
85
  except Exception as e:
86
+ return f"Error during model inference: {e}"
87
+
88
+ return ''.join(responses)
89
 
90
+ # Function to process resume and job title inputs
91
+ def process_resume(file, job_title):
92
+ try:
93
+ file_name = file.name
94
+ if file_name.endswith(".pdf"):
95
+ resume_text = extract_text_from_pdf(file.name)
96
+ elif file_name.endswith(".docx"):
97
+ resume_text = extract_text_from_docx(file.name)
98
+ else:
99
+ return "Unsupported file format. Please upload a PDF or DOCX file."
100
+
101
+ optimized_resume = optimize_resume(resume_text, job_title)
102
+ return optimized_resume
103
+ except Exception as e:
104
+ return f"Error processing resume: {e}"
105
 
106
+ # Function to generate a PDF report
107
  def create_pdf_report(report_text):
 
 
 
 
108
  buffer = io.BytesIO()
109
  doc = SimpleDocTemplate(buffer, pagesize=letter)
110
  styles = getSampleStyleSheet()
111
+ Story = [Paragraph("<b>Analysis Report</b>", styles["Title"]), Spacer(1, 12)]
112
+
 
 
 
 
 
113
  for line in report_text.split("\n"):
114
  Story.append(Paragraph(line, styles["Normal"]))
115
+ Story.append(Spacer(1, 6))
116
+
117
  doc.build(Story)
118
  buffer.seek(0)
119
  return buffer
120
 
121
+ # Function to toggle the download button
122
  def toggle_download_button(analysis_report):
123
+ return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
 
 
 
 
 
124
 
125
  # Build the Gradio UI
126
  demo = gr.Blocks()
127
  with demo:
128
+ gr.Markdown("## AI-powered CV Analyzer, Optimizer, and Chatbot")
129
 
130
  with gr.Tab("Chatbot"):
131
  chat_interface = gr.ChatInterface(
132
+ lambda message, history: client.chat_completion(
133
+ messages=[{"role": "user", "content": message}],
134
+ max_tokens=512,
135
+ ),
136
+ chatbot=gr.Chatbot(label="Chatbot"),
137
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
 
140
  with gr.Tab("CV Analyzer"):
141
  gr.Markdown("### Upload your CV and provide the job description")
142
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
143
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
144
+ extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
145
+ analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
146
+ download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
 
 
 
 
 
 
147
  pdf_file = gr.File(label="Download PDF", interactive=False)
 
148
  analyze_button = gr.Button("Analyze CV")
149
+
150
+ analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output])
151
+ analyze_button.then(toggle_download_button, [analysis_output], [download_pdf_button])
152
+ download_pdf_button.click(create_pdf_report, [analysis_output], [pdf_file])
153
+
154
+ with gr.Tab("CV Optimizer"):
155
+ gr.Markdown("### Upload your Resume and Enter Job Title")
156
+ resume_file = gr.File(label="Upload Resume (PDF or Word)")
157
+ job_title_input = gr.Textbox(label="Job Title", lines=1)
158
+ optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
159
+ optimize_button = gr.Button("Optimize Resume")
160
+
161
+ optimize_button.click(process_resume, [resume_file, job_title_input], [optimized_resume_output])
 
162
 
163
  if __name__ == "__main__":
164
  demo.queue().launch()