ijtelgote commited on
Commit
445ee73
·
verified ·
1 Parent(s): 8bda68e

created app.py

Browse files
Files changed (1) hide show
  1. app.py +732 -0
app.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ import pickle
5
+ import re
6
+ import threading
7
+ import time
8
+ import warnings
9
+ from concurrent.futures import ThreadPoolExecutor
10
+
11
+ import fitz
12
+ import joblib
13
+ import nltk
14
+ import numpy as np
15
+ import pandas as pd
16
+ import pdfplumber
17
+ import pendulum
18
+ import requests
19
+ import spacy
20
+ import tensorflow as tf
21
+ import tensorflow_hub as hub
22
+ import torch
23
+ from bs4 import BeautifulSoup
24
+ from gensim.models import Word2Vec
25
+ from langchain_core.prompts import ChatPromptTemplate
26
+ from langchain_groq import ChatGroq
27
+ from sklearn.ensemble import RandomForestRegressor
28
+ from sklearn.metrics.pairwise import cosine_similarity
29
+ from sklearn.model_selection import train_test_split
30
+ from transformers import (BertModel, BertTokenizer,
31
+ TFBertForSequenceClassification)
32
+
33
+ # Set the logging level to WARNING to suppress DEBUG and INFO logs
34
+ logging.basicConfig(level=logging.WARNING)
35
+ import os
36
+
37
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
38
+ import logging
39
+
40
+ logging.getLogger('tensorflow').setLevel(logging.ERROR)
41
+
42
+ # Set the logging level to WARNING to suppress DEBUG and INFO logs
43
+ logging.getLogger("httpx").setLevel(logging.WARNING)
44
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
45
+ logging.getLogger("groq._base_client").setLevel(logging.WARNING)
46
+ logging.getLogger("httpx").setLevel(logging.INFO)
47
+ logging.getLogger("urllib3").setLevel(logging.INFO)
48
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
49
+
50
+ logging.basicConfig(level=logging.DEBUG)
51
+ warnings.filterwarnings("ignore")
52
+ from dotenv import load_dotenv
53
+
54
+ load_dotenv()
55
+
56
+ def pdf_to_text(pdf_path):
57
+ pdf_document = fitz.open(pdf_path)
58
+ text = ""
59
+ for page_num in range(pdf_document.page_count):
60
+ page = pdf_document.load_page(page_num)
61
+ text += page.get_text()
62
+ return text
63
+
64
+ resumeLink='Datasets\Jay_Telgote_Resume (1).pdf'
65
+ jd_path='Datasets\JD.txt'
66
+ json_path='Datasets\country_data.json'
67
+ resume = pdf_to_text(resumeLink)
68
+ with open(jd_path, 'r') as f:
69
+ jd=f.read()
70
+
71
+ path='Datasets\sent-models-transformers-default-v1'
72
+ bert_tokenizer = BertTokenizer.from_pretrained(path +'\output_directory2')
73
+ bert_model = TFBertForSequenceClassification.from_pretrained(path +'\Model2')
74
+
75
+ country='Japan'
76
+ class MasterOther:
77
+ def __init__(self):
78
+ self.backlink_score = 0
79
+ self.font_size_score = 0
80
+ self.font_name_score = 0
81
+ self.image_score = 0
82
+ self.table_score = 0
83
+ self.page_count_score = 0
84
+
85
+ def normalize_value(self, value, min_value, max_value):
86
+ if value < min_value:
87
+ return 0
88
+ return (value - min_value) / (max_value - min_value)
89
+
90
+ def extract_pdf_fonts_and_sizes(self, pdf_file_path):
91
+ doc = fitz.open(pdf_file_path)
92
+ font_sizes = set()
93
+
94
+ for page in doc:
95
+ blocks = page.get_text("dict")["blocks"]
96
+ for block in blocks:
97
+ if "lines" in block:
98
+ for line in block["lines"]:
99
+ for span in line["spans"]:
100
+ font_sizes.add(span["size"])
101
+
102
+ doc.close()
103
+ return font_sizes
104
+
105
+ def extract_pdf_fonts_and_sizes_score(self, path):
106
+ font_sizes = self.extract_pdf_fonts_and_sizes(path)
107
+ score = 20
108
+ max_score = 20
109
+ min_score = 0
110
+
111
+ for size in font_sizes:
112
+ if size > 20.0 or size < 5.0:
113
+ score = 0
114
+ print('Tailor the font size accordingly')
115
+ break
116
+ self.font_size_score = self.normalize_value(score, min_score, max_score)
117
+
118
+ def check_backlinks(self, path):
119
+ score = 10
120
+ max_score = 10
121
+ min_score = 0
122
+ doc = fitz.open(path)
123
+ page = doc.load_page(0)
124
+ links = page.get_links()
125
+ if links:
126
+ score = 0
127
+ print('Resume contain backlinks')
128
+ doc.close()
129
+ self.backlink_score = self.normalize_value(score, min_score, max_score)
130
+
131
+ def contains_table(self, path):
132
+ score = 10
133
+ max_score = 10
134
+ min_score = 0
135
+ with pdfplumber.open(path) as pdf:
136
+ for page in pdf.pages:
137
+ tables = page.extract_tables()
138
+ if tables:
139
+ score -= 5
140
+ if score==0:
141
+ break
142
+ if score<max_score:
143
+ print('Resume contain tables')
144
+ self.table_score = self.normalize_value(score, min_score, max_score)
145
+
146
+ def contains_images(self, pdf_file_path):
147
+ score = 10
148
+ max_score = 10
149
+ min_score = 0
150
+ doc = fitz.open(pdf_file_path)
151
+ for page_num in range(len(doc)):
152
+ page = doc.load_page(page_num)
153
+ image_list = page.get_images(full=True)
154
+ if image_list:
155
+ score = 0
156
+ print('Resume contain images')
157
+ break
158
+ doc.close()
159
+ self.image_score = self.normalize_value(score, min_score, max_score)
160
+
161
+ def detect_fonts(self, pdf_path):
162
+ doc = fitz.open(pdf_path)
163
+ font_counts = {}
164
+
165
+ for page_num in range(len(doc)):
166
+ page = doc.load_page(page_num)
167
+ blocks = page.get_text("dict")["blocks"]
168
+
169
+ for block in blocks:
170
+ if "lines" in block:
171
+ for line in block["lines"]:
172
+ for span in line["spans"]:
173
+ font_name = span["font"]
174
+ if font_name in font_counts:
175
+ font_counts[font_name] += 1
176
+ else:
177
+ font_counts[font_name] = 1
178
+
179
+ doc.close()
180
+ return font_counts
181
+
182
+ def tune_font(self, path):
183
+ score = 100
184
+ max_score = 100
185
+ min_score = 18
186
+ font_counts = self.detect_fonts(path)
187
+ never_use_fonts = ['Comic Sans', 'Futura', 'Lucida Console', 'Bradley Hand ITC', 'Brush Script']
188
+ for font, count in font_counts.items():
189
+ if font in never_use_fonts:
190
+ score -=count*18
191
+ print(f"{font} is not recommended for resume")
192
+ break
193
+ self.font_name_score = self.normalize_value(score, min_score, max_score)
194
+
195
+ def count_pdf_pages_score(self, pdf_path):
196
+ doc = fitz.open(pdf_path)
197
+ num_pages = doc.page_count
198
+ doc.close()
199
+ score = 30
200
+ max_value = 30
201
+ min_value = 7
202
+ if num_pages == 2:
203
+ score -= 13
204
+ elif num_pages > 2:
205
+ print('Resume should not be more than 2 pages')
206
+ score -= 23
207
+ if score < min_value:
208
+ score = 0
209
+ self.page_count_score = self.normalize_value(score, min_value, max_value)
210
+ def all_other(master_score,path):
211
+ master = MasterOther()
212
+ master.extract_pdf_fonts_and_sizes_score(path)
213
+ master.check_backlinks(path)
214
+ master.contains_table(path)
215
+ master.contains_images(path)
216
+ master.tune_font(path)
217
+ master.count_pdf_pages_score(path)
218
+ mean=((master.font_size_score)+(master.table_score)+(master.font_name_score)+(master.backlink_score)+(master.page_count_score)+(master.image_score))/6 # Normalized image score
219
+ master_score['score_other']=mean*100
220
+
221
+
222
+ ##############################################################
223
+
224
+
225
+ def calculate_similarity_use(text1, text2):
226
+
227
+ model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
228
+
229
+ embeddings = model([text1, text2])
230
+
231
+ similarity_score = cosine_similarity(embeddings)[0, 1]
232
+
233
+ return similarity_score
234
+
235
+ def containment_similarity(text1, text2):
236
+ # Tokenize the texts
237
+ set1 = set(text1.split())
238
+ set2 = set(text2.split())
239
+
240
+ # Calculate intersection
241
+ intersection = set1.intersection(set2)
242
+
243
+ containment_score = len(intersection) / min(len(set1), len(set2))
244
+
245
+ return containment_score
246
+
247
+
248
+ def remove_special_characters(text):
249
+ pattern = r"[.,!()*&⋄:|/^]"
250
+ cleaned_text = re.sub(pattern, "", text)
251
+ tex=cleaned_text.replace('\n','')
252
+ return tex.lower()
253
+ def logic_similarity_matching(text1,text2):
254
+
255
+ score_encoder=0
256
+ score_containment=0
257
+ text1=remove_special_characters(text1)
258
+ text2=remove_special_characters(text2)
259
+ similarity_score_use = calculate_similarity_use(text1, text2)
260
+
261
+ similarity_score = containment_similarity(text1, text2)
262
+ if similarity_score>0.8:
263
+ score_containment+=1
264
+
265
+ if similarity_score_use>=0.75:
266
+ score_encoder=1
267
+ return score_encoder == 1 and score_containment == 0
268
+
269
+ def normalize_value(value, min_value, max_value):
270
+ return (value - min_value) / (max_value - min_value)
271
+
272
+
273
+ def logic_similarity_matching2(text1,text2,master_score):
274
+ score=10
275
+ max_score=10
276
+ min_score=0
277
+ if logic_similarity_matching(text1,text2)==False:
278
+ score-=10
279
+ print('Resume not tailored according to JD')
280
+
281
+ master_score['similarity_matching_score']= normalize_value(score, min_score, max_score)
282
+
283
+ #################################################
284
+
285
+ master_score={}
286
+ def get_bert_embeddings(texts):
287
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
288
+ model = BertModel.from_pretrained('bert-base-uncased')
289
+
290
+ inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=512)
291
+ with torch.no_grad():
292
+ outputs = model(**inputs)
293
+ return outputs.last_hidden_state.mean(dim=1).numpy()
294
+
295
+ # Function to predict resume scores
296
+ def predict_resume_score(new_resumes):
297
+ # Generate BERT embeddings for the new resumes
298
+ regressor = joblib.load('Datasets\jghfgdf-keras-default-v1\model_filename2.pkl')
299
+
300
+ embeddings = get_bert_embeddings(new_resumes)
301
+ X_new = torch.tensor(embeddings, dtype=torch.float32)
302
+
303
+ # Predict using the trained Random Forest Regressor
304
+ predictions = regressor.predict(X_new)
305
+
306
+ return predictions
307
+ def normalize_value(value, min_value, max_value):
308
+ return (value - min_value) / (max_value - min_value)
309
+
310
+
311
+
312
+ def logic_actionable_words(text,master_score):
313
+ score=0
314
+ max_score=100
315
+ min_score=0
316
+ pred_score=predict_resume_score(text)[0]*100
317
+ if int(pred_score)>50:
318
+ score=100
319
+ elif int(pred_score)>=40 and int(pred_score)<=49:
320
+ score=80
321
+ elif int(pred_score)>=30 and int(pred_score)<=39:
322
+ score=60
323
+ elif int(pred_score)>=20 and int(pred_score)<=29:
324
+ print('Resume contain some Non Action Keywords or Resume dont has Actionable Keywords ')
325
+ score=40
326
+ else:
327
+ score=10
328
+ print('Resume contain some Non Action Keywords or Resume dont has Actionable Keywords ')
329
+ master_score['Action_score']= normalize_value(score, min_score,max_score)
330
+
331
+
332
+
333
+ ##############################################################
334
+ groq_api_key = os.getenv('API_KEY')
335
+
336
+
337
+ llm = ChatGroq(
338
+ groq_api_key=groq_api_key,
339
+ model_name='llama3-70b-8192'
340
+ )
341
+
342
+
343
+
344
+ def keep_only_alphanumeric(text):
345
+ pattern = r'[^a-zA-Z0-9]'
346
+
347
+ cleaned_text = re.sub(pattern, ' ', text.lower())
348
+ return ' '.join(cleaned_text.split())
349
+
350
+ def groq(jd):
351
+
352
+
353
+ system = '''
354
+ 1. Act as a Minium No of Experience required telling person.
355
+ 2. The user will provide input as Job description, you have to give him minimun no of experience required to apply for the job
356
+ 3. If you will not able to find any kind of expereince or find that Freshers can apply then just respond with "0.0".
357
+ 4. Do not give any introduction about who you are and what you are going to be doing.
358
+ 5. you will give the no of experience in number like 8 years, not in like eight years or someting.
359
+ 6. remember this formula Years= Month no/12 so for 2 months it is 0.17 in round figure
360
+ 7. Always give no of experience in decimal, for 4 years it you should give me 4.0 similarly for 6 months you should give me 0.5 dont add any alphabetic words there.
361
+ '''
362
+
363
+ human = "{text}"
364
+ prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])
365
+
366
+ chain = prompt | llm
367
+ jd2=keep_only_alphanumeric(jd)
368
+ res = chain.invoke({"text": jd2})
369
+ p = dict(res)
370
+ final_text = ' '.join(p['content'].split())
371
+ return final_text
372
+
373
+
374
+ def parse_date(date_str):
375
+ try:
376
+ parsed_date = pendulum.parse(date_str, strict=False)
377
+ return parsed_date
378
+ except ValueError:
379
+ raise ValueError(f"No valid date format found for '{date_str}'")
380
+
381
+ def calculate_experience(start_date, end_date):
382
+ duration = end_date.diff(start_date)
383
+ years = duration.years
384
+ months = duration.months
385
+ return years + months / 12
386
+
387
+ def calculate_total_experience(resume_text):
388
+ # Regular expression to match date ranges with various formats including year-only ranges
389
+ date_range_pattern = re.compile(
390
+ r'((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[\s\'\"`*+,\-–.:/;!@#$%^&(){}\[\]<>_=~`]*\d{2,4}|\d{1,2}/\d{1,2}/\d{4}|\d{1,2}/\d{4}|\d{4})\s*(?:[-–to ]+)\s*((?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)[\s\'\"`*+,\-–.:/;!@#$%^&(){}\[\]<>_=~`]*\d{2,4}|\d{1,2}/\d{1,2}/\d{4}|\d{1,2}/\d{4}|\d{4}|\b[Tt]ill\b|\b[Nn]ow\b|\b[Pp]resent\b|\b[Oo]ngoing\b|\b[Cc]ontinue\b|\b[Cc]urrent\b)?'
391
+ )
392
+
393
+ date_matches = date_range_pattern.findall(resume_text)
394
+
395
+ total_experience = 0
396
+
397
+ for start_date_str, end_date_str in date_matches:
398
+ try:
399
+ start_date = parse_date(start_date_str.strip())
400
+ end_date = pendulum.now() if not end_date_str or end_date_str.strip().lower() in ['till', 'now', 'present', 'ongoing', 'continue', 'current'] else parse_date(end_date_str.strip())
401
+
402
+ experience = calculate_experience(start_date, end_date)
403
+
404
+ total_experience += experience
405
+ except ValueError as e:
406
+ print(e)
407
+
408
+ return round(total_experience, 2)
409
+ calculate_total_experience(resume)
410
+
411
+
412
+ def extract_experience(text):
413
+ experience_pattern = (
414
+ r"\b(?:Experience|Experiences|Employments?|Work History|Professional Background|"
415
+ r"Career History|Professional Experience|Job History|Work Experience|"
416
+ r"Job Experiences?|Employment History|Work Experiences?|Professional Experiences?|"
417
+ r"WORK EXPERIENCE)\b"
418
+ r"[\s:\-\n]*"
419
+ r"(.+?)(?=\b(?:Skills?|Abilities?|Competenc(?:ies|y)|Expertise|Skillset|"
420
+ r"Technical Skills?|Technical Abilities?|Projects?|Project Work|Case Studies|"
421
+ r"Education|Educations|Academic Background|Qualifications|Studies|its last|"
422
+ r"Soft Skills|Achievements|$))"
423
+ )
424
+ experience_match = re.search(experience_pattern, text, re.DOTALL | re.IGNORECASE)
425
+ if experience_match:
426
+ return experience_match.group(1).strip()
427
+ return None
428
+
429
+ main_score={}
430
+ def to_check_exp(resume, jd,main_score):
431
+ required_experience = float(groq(jd))
432
+ tt=extract_experience(resume)
433
+ candidate_experience = float(calculate_total_experience(tt))
434
+ if candidate_experience < required_experience:
435
+ print('User experience does not matches with the Job Description')
436
+ main_score['exp_match']=int(candidate_experience >= required_experience)
437
+
438
+
439
+
440
+
441
+ ####################################################
442
+ def extract_skills(text):
443
+ skills_pattern = (
444
+ r"\b(Skill(?:s|z)?|Abilit(?:ies|y|tys)?|Competenc(?:ies|y)|Expertise|Skillset|Technical Skills?|Technical Abilities?|Technological Skills?|TECHNICAL SKILLS?|Technical Expertise)\b"
445
+ r"[\s:\-\n]*"
446
+ r"(.+?)(?=\b(Experience|Experiences|Employment|Work History|Professional Background|Projects|its last|Project Work|Case Studies|Education|Educations|Academic Background|Qualifications|Studies|Soft Skills|Achievements|$))"
447
+ )
448
+ skills_match = re.search(skills_pattern, text, re.DOTALL | re.IGNORECASE)
449
+ if skills_match:
450
+ return skills_match.group(2).strip()
451
+ return None
452
+
453
+ def extract_experience(text):
454
+ experience_pattern = (
455
+ r"\b(Experience|Experiences|Employments?|Work History|Professional Background|Career History|Professional Experience|Job History|Work Experience|Job Experiences?|Employment History|Work Experiences?|Professional Experiences?|WORK EXPERIENCE)\b"
456
+ r"[\s:\-\n]*"
457
+ r"(.+?)(?=\b(Skills?|Abilities?|Competenc(?:ies|y)|Expertise|Skillset|Technical Skills?|Technical Abilities?|Projects?|Project Work|Case Studies|Education|Educations|Academic Background|Qualifications|Studies|its last|Soft Skills|Achievements|$))"
458
+ )
459
+ experience_match = re.search(experience_pattern, text, re.DOTALL | re.IGNORECASE)
460
+ if experience_match:
461
+ return experience_match.group(2).strip()
462
+ return None
463
+
464
+ def extract_education(text):
465
+ education_pattern = (
466
+ r"\b(Education|Educations|Academic Background|Qualifications|Studies|Academic Qualifications|Educational Background|Academic History|Educational History|Education and Training|Educational Qualifications|EDUCATION)\b"
467
+ r"[\s:\-\n]*"
468
+ r"(.+?)(?=\b(Skills?|Abilities?|Competenc(?:ies|y)|Expertise|Skillset|Technical Skills?|Technical Abilities?|Experience|Experiences|Employment|Work History|Professional Background|Projects?|Project Work|Case Studies|its last|Soft Skills|Achievements|$))"
469
+ )
470
+ education_match = re.search(education_pattern, text, re.DOTALL | re.IGNORECASE)
471
+ if education_match:
472
+ return education_match.group(2).strip()
473
+ return None
474
+
475
+
476
+
477
+
478
+ def parsed(resume1):
479
+ resume1=resume1+' its last'
480
+ resume1=resume1.replace('\n',' ')
481
+
482
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
483
+ phone_pattern = r'(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\+\d{2,4}[-.\s]?\d{10}|\d{10}|\d{11})'
484
+
485
+ email_match = re.search(email_pattern, resume1)
486
+ phone_match = re.search(phone_pattern, resume1)
487
+
488
+ email = email_match.group() if email_match else None
489
+ phone = phone_match.group() if phone_match else None
490
+
491
+
492
+ skills = extract_skills(resume1)
493
+
494
+ experience = extract_experience(resume1)
495
+
496
+ education = extract_education(resume1)
497
+
498
+
499
+
500
+ return {
501
+ 'Email': email,
502
+ 'Phone': phone,
503
+ 'Skills': skills.replace('\n','') if skills else None,
504
+ 'Experience': experience.replace('\n','') if experience else None,
505
+ 'Education': education.replace('\n','') if education else None,
506
+ }
507
+ def resume_parsing_2(resume,master_score):
508
+ parsed_resume = parsed(resume)
509
+ if any(value is None for value in parsed_resume.values()):
510
+ print('Resume template is not ATS friendly.')
511
+ master_score['Parsing_score']= 0
512
+ else:
513
+ master_score['Parsing_score']= 1
514
+
515
+
516
+ ###################################
517
+
518
+
519
+
520
+ import os
521
+ import sys
522
+
523
+
524
+ def Get_sentiment(Review, Tokenizer=bert_tokenizer, Model=bert_model, threshold=0.5):
525
+ if not isinstance(Review, list):
526
+ Review = [Review]
527
+ Input_ids, Token_type_ids, Attention_mask = Tokenizer.batch_encode_plus(Review,
528
+ padding=True,
529
+ truncation=True,
530
+ max_length=128,
531
+ return_tensors='tf').values()
532
+
533
+ # Redirect stdout to suppress progress messages
534
+ original_stdout = sys.stdout
535
+ sys.stdout = open(os.devnull, 'w') # Suppress output
536
+
537
+ prediction = Model.predict([Input_ids, Token_type_ids, Attention_mask])
538
+
539
+ # Restore stdout
540
+ sys.stdout.close()
541
+ sys.stdout = original_stdout
542
+
543
+ probs = tf.nn.softmax(prediction.logits, axis=1)
544
+ pred_labels = tf.argmax(probs, axis=1)
545
+ pred_probs = probs.numpy().tolist()
546
+ return pred_probs[0][1]
547
+
548
+
549
+
550
+ word2vec_model=joblib.load(r'Datasets\4-tensorflow2-default-v1\word2vec_res_model.pkl')
551
+ model=joblib.load(r'Datasets\4-tensorflow2-default-v1\word_matrix_ml_model.pkl')
552
+ def get_average_word2vec(words, word2vec_model):
553
+ word_vectors = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
554
+ if not word_vectors:
555
+ return np.zeros(word2vec_model.vector_size)
556
+ return np.mean(word_vectors, axis=0)
557
+
558
+ def another_word2vec(texts):
559
+ X_new = np.array([get_average_word2vec(texts, word2vec_model)])
560
+
561
+ new_predictions = model.predict(X_new)
562
+ return new_predictions
563
+
564
+ def semi_final(texts):
565
+ min_score = 0
566
+ max_score = 10
567
+ sentiment_score = np.mean([Get_sentiment(text) for text in texts])
568
+
569
+ if sentiment_score > 0.8:
570
+ word2vec_score = np.mean([another_word2vec(text) for text in texts]) * 100
571
+
572
+ if 70 <= word2vec_score < 85:
573
+ score = 8
574
+ elif word2vec_score >= 86:
575
+ score = 10
576
+ elif 50 <= word2vec_score < 69:
577
+ score = 6
578
+ elif 30 <= word2vec_score < 49:
579
+ score = 4
580
+ else:
581
+ score = 2
582
+ else:
583
+ print('Resume is not Customized')
584
+ return None
585
+
586
+ return normalize_value(score, min_score, max_score)
587
+
588
+ def normalize_value(value, min_value, max_value):
589
+ return (value - min_value) / (max_value - min_value)
590
+
591
+ def process(texts):
592
+ textt=(texts.lower()).replace('\n','').replace('\t','').replace('"','')
593
+ texts2=textt.split('.')
594
+ return [i for i in texts2 if i!='']
595
+
596
+ def finale(resume, master_score):
597
+ texts=process(resume)
598
+ score = semi_final(texts)
599
+ if score is not None:
600
+ master_score['matrix_score'] = score
601
+ else:
602
+ master_score['matrix_score'] = 0
603
+
604
+
605
+
606
+ #################################################################
607
+
608
+
609
+
610
+ def fetch_page(country, page_number):
611
+ path_country=json_path
612
+ with open(path_country, 'r') as file:
613
+ countries_dict = json.load(file)
614
+
615
+ try:
616
+ url = f'http://161.111.47.11:80/en/{countries_dict[country]}?page={page_number}'
617
+ response = requests.get(url)
618
+ response.raise_for_status()
619
+ return response.content
620
+ except requests.RequestException as e:
621
+ print(f"Error fetching page {page_number} for {country}: {e}")
622
+ return None
623
+
624
+ def fetch_all_pages(country, num_pages=2):
625
+ with ThreadPoolExecutor() as executor:
626
+ pages_content = list(executor.map(lambda p: fetch_page(country, p), range(num_pages)))
627
+ return [content for content in pages_content if content]
628
+
629
+ def parse_pages(pages_content):
630
+ institutions = set()
631
+ for content in pages_content:
632
+ soup = BeautifulSoup(content, 'html.parser')
633
+ rows = soup.select('tbody tr')
634
+ for row in rows:
635
+ name_element = row.select_one('td:nth-of-type(3) a')
636
+ if name_element:
637
+ institution_name = name_element.text.strip().lower()
638
+ institutions.add(institution_name)
639
+ return institutions
640
+
641
+ def extract_education(text):
642
+ education_pattern = (
643
+ r"\b(Education|Educations|Academic Background|Qualifications|Studies|Academic Qualifications|Educational Background|Academic History|Educational History|Education and Training|Educational Qualifications|EDUCATION)\b"
644
+ r"[\s:\-\n]*"
645
+ r"(.+?)(?=\b(Skills?|Abilities?|Competenc(?:ies|y)|Expertise|Skillset|Technical Skills?|Technical Abilities?|Experience|Experiences|Employment|Work History|Professional Background|Projects?|Project Work|Case Studies|its last|Soft Skills|Achievements|$))"
646
+ )
647
+ education_match = re.search(education_pattern, text, re.DOTALL | re.IGNORECASE)
648
+ return education_match.group(2).strip() if education_match else None
649
+
650
+ def extract_institutions_from_resume(resume_text):
651
+ pattern = r'[>(><&#%")-:\'\d]'
652
+ res = resume_text.replace('|', '\n')
653
+ cleaned_text = re.sub(pattern, '', res)
654
+ return [re.sub(r'\s+', ' ', inst).strip().lower() for inst in cleaned_text.splitlines() if len(inst.split()) >= 3]
655
+
656
+ def main(resume_text, country='India'):
657
+ pages_content = fetch_all_pages(country)
658
+ institutions = parse_pages(pages_content)
659
+
660
+ education_text = extract_education(resume_text)
661
+ if education_text:
662
+ resume_institutions = extract_institutions_from_resume(education_text)
663
+ found_institutions = [name for name in resume_institutions if name in institutions]
664
+ return found_institutions
665
+ return []
666
+
667
+
668
+ def education_master(resume_text, master_score, country):
669
+ score = 0.0
670
+ educ_institutions = main(resume_text, country)
671
+ if educ_institutions:
672
+ if len(educ_institutions) == 1:
673
+ score = 0.5
674
+ elif len(educ_institutions) > 1:
675
+ score = 1.0
676
+ master_score['score_education_detection_'] = score
677
+
678
+
679
+ edu_thread = threading.Thread(target=education_master, args=(resume, master_score,country))
680
+
681
+
682
+ matrix_thread = threading.Thread(target=finale, args=(resume, master_score))
683
+
684
+ parse_thread = threading.Thread(target=resume_parsing_2, args=(resume,master_score))
685
+
686
+ exp_thread = threading.Thread(target=to_check_exp, args=(resume,jd,main_score))
687
+
688
+ action_word_thread = threading.Thread(target=logic_actionable_words, args=(resume,master_score))
689
+
690
+ simi_matching_thread = threading.Thread(target=logic_similarity_matching2, args=(resume,jd, master_score))
691
+
692
+ other_thread=threading.Thread(target=all_other, args=(master_score, resumeLink))
693
+
694
+
695
+
696
+
697
+ def normalize_scores(scores):
698
+ # Define the ranges for each score type
699
+ ranges = [
700
+ (0.0, 1.0), # First score: 0 to 1
701
+ (0.0, 100.0), # Second score: 0 to 100
702
+ (0.0, 1.0), # Third score: 0 to 1
703
+ (0.0, 1.0), # Fourth score: 0 to 1
704
+ (0.0, 1.0), # Fifth score: 0 to 1
705
+ (0.0, 1.0) # Sixth score: 0 to 1
706
+ ]
707
+
708
+ # Normalize each score
709
+ normalized_scores = []
710
+ for score, (min_val, max_val) in zip(scores, ranges):
711
+ normalized_score = (score - min_val) / (max_val - min_val) * 100
712
+ normalized_scores.append(normalized_score)
713
+
714
+ return normalized_scores
715
+
716
+
717
+ print('Possible Suggestions: \n')
718
+ edu_thread.start()
719
+ edu_thread.join()
720
+ matrix_thread.start()
721
+ matrix_thread.join()
722
+ parse_thread.start()
723
+ parse_thread.join()
724
+ exp_thread.start()
725
+ exp_thread.join()
726
+ action_word_thread.start()
727
+ action_word_thread.join()
728
+ simi_matching_thread.start()
729
+ simi_matching_thread.join()
730
+ other_thread.start()
731
+ other_thread.join()
732
+