Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Commit
•
cf24fe3
1
Parent(s):
b6eccd9
Update utility/utils.py
Browse files- utility/utils.py +23 -1
utility/utils.py
CHANGED
@@ -400,9 +400,25 @@ def process_extracted_text(extracted_text):
|
|
400 |
|
401 |
return combined_results_json
|
402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
# Process the model output for parsed result
|
404 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
405 |
|
|
|
|
|
406 |
# Removing duplicate emails
|
407 |
unique_emails = []
|
408 |
for email in cont_data['emails']:
|
@@ -421,7 +437,13 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
|
|
421 |
for num in cont_data['phone_numbers']:
|
422 |
if num[-10:] not in normalized_contact:
|
423 |
unique_numbers.append(num)
|
424 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
# Add unique emails, links, and phone numbers to the original LLMdata
|
426 |
LLMdata['Email'] += unique_emails
|
427 |
LLMdata['Link'] += unique_links
|
|
|
400 |
|
401 |
return combined_results_json
|
402 |
|
403 |
+
# Function to remove duplicates (case-insensitive) from each list in the dictionary
|
404 |
+
def remove_duplicates_case_insensitive(data_dict):
|
405 |
+
for key, value_list in data_dict.items():
|
406 |
+
seen = set()
|
407 |
+
unique_list = []
|
408 |
+
|
409 |
+
for item in value_list:
|
410 |
+
if item.lower() not in seen:
|
411 |
+
unique_list.append(item) # Add original item (preserving its case)
|
412 |
+
seen.add(item.lower()) # Track lowercase version
|
413 |
+
|
414 |
+
# Update the dictionary with unique values
|
415 |
+
data_dict[key] = unique_list
|
416 |
+
|
417 |
# Process the model output for parsed result
|
418 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
419 |
|
420 |
+
# Apply the function to the data
|
421 |
+
LLMdata=remove_duplicates_case_insensitive(LLMdata)
|
422 |
# Removing duplicate emails
|
423 |
unique_emails = []
|
424 |
for email in cont_data['emails']:
|
|
|
437 |
for num in cont_data['phone_numbers']:
|
438 |
if num[-10:] not in normalized_contact:
|
439 |
unique_numbers.append(num)
|
440 |
+
|
441 |
+
# Removing duplicate Compani
|
442 |
+
unique_emails = []
|
443 |
+
for email in cont_data['emails']:
|
444 |
+
if email.lower() not in LLMdata['Email'].lower():
|
445 |
+
unique_emails.append(email)
|
446 |
+
|
447 |
# Add unique emails, links, and phone numbers to the original LLMdata
|
448 |
LLMdata['Email'] += unique_emails
|
449 |
LLMdata['Link'] += unique_links
|