WebashalarForML commited on
Commit
cf24fe3
1 Parent(s): b6eccd9

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +23 -1
utility/utils.py CHANGED
@@ -400,9 +400,25 @@ def process_extracted_text(extracted_text):
400
 
401
  return combined_results_json
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  # Process the model output for parsed result
404
  def process_resume_data(LLMdata,cont_data,extracted_text):
405
 
 
 
406
  # Removing duplicate emails
407
  unique_emails = []
408
  for email in cont_data['emails']:
@@ -421,7 +437,13 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
421
  for num in cont_data['phone_numbers']:
422
  if num[-10:] not in normalized_contact:
423
  unique_numbers.append(num)
424
-
 
 
 
 
 
 
425
  # Add unique emails, links, and phone numbers to the original LLMdata
426
  LLMdata['Email'] += unique_emails
427
  LLMdata['Link'] += unique_links
 
400
 
401
  return combined_results_json
402
 
403
+ # Function to remove duplicates (case-insensitive) from each list in the dictionary
404
+ def remove_duplicates_case_insensitive(data_dict):
405
+ for key, value_list in data_dict.items():
406
+ seen = set()
407
+ unique_list = []
408
+
409
+ for item in value_list:
410
+ if item.lower() not in seen:
411
+ unique_list.append(item) # Add original item (preserving its case)
412
+ seen.add(item.lower()) # Track lowercase version
413
+
414
+ # Update the dictionary with unique values
415
+ data_dict[key] = unique_list
416
+
417
  # Process the model output for parsed result
418
  def process_resume_data(LLMdata,cont_data,extracted_text):
419
 
420
+ # Apply the function to the data
421
+ LLMdata=remove_duplicates_case_insensitive(LLMdata)
422
  # Removing duplicate emails
423
  unique_emails = []
424
  for email in cont_data['emails']:
 
437
  for num in cont_data['phone_numbers']:
438
  if num[-10:] not in normalized_contact:
439
  unique_numbers.append(num)
440
+
441
+ # Removing duplicate Compani
442
+ unique_emails = []
443
+ for email in cont_data['emails']:
444
+ if email.lower() not in LLMdata['Email'].lower():
445
+ unique_emails.append(email)
446
+
447
  # Add unique emails, links, and phone numbers to the original LLMdata
448
  LLMdata['Email'] += unique_emails
449
  LLMdata['Link'] += unique_links