minskiter commited on
Commit
be33f2e
·
1 Parent(s): 669e2b2

fix(server.py): optimize similarity algorithm

Browse files
Files changed (2) hide show
  1. predictor/__init__.py +13 -6
  2. server.py +2 -2
predictor/__init__.py CHANGED
@@ -9,6 +9,8 @@ import queue
9
  from datetime import date
10
  import time
11
  import logging
 
 
12
 
13
  class Predictor():
14
 
@@ -443,7 +445,7 @@ class Predictor():
443
  'start': start,
444
  'end': end,
445
  'entity': 'GENDER',
446
- 'word': text[start:end],
447
  'text': text[start:end]
448
  })
449
  end_time = time.perf_counter()
@@ -505,17 +507,22 @@ class PositionPredictor():
505
  ) -> List[Dict[str, Union[str, float]]]:
506
  ans = []
507
  resume_blocks = self.__split_blocks(resume)
508
- print(resume_blocks,positions)
 
 
 
509
  for position in positions:
510
  requireds = position['required']
511
  score = 0.0
 
512
  for required in requireds:
513
  blocks = self.__split_blocks(required)
514
- print(blocks)
515
  for block in blocks:
516
- for block_resume in resume_blocks:
517
- score = max(score, self.pipeline((block_resume, block))[0])
518
- self.logger.info(f"position: {position['name']}, required: {block}, resume: {block_resume}, score: {score}")
 
 
519
  ans.append({
520
  'position': position['name'],
521
  'score': score
 
9
  from datetime import date
10
  import time
11
  import logging
12
+ import torch
13
+ import torch.nn.functional as F
14
 
15
  class Predictor():
16
 
 
445
  'start': start,
446
  'end': end,
447
  'entity': 'GENDER',
448
+ 'origin': text[start:end],
449
  'text': text[start:end]
450
  })
451
  end_time = time.perf_counter()
 
507
  ) -> List[Dict[str, Union[str, float]]]:
508
  ans = []
509
  resume_blocks = self.__split_blocks(resume)
510
+ resume_encoding = []
511
+ for block_resume in resume_blocks:
512
+ resume_encoding.append(torch.tensor(self.pipeline(block_resume)[0]))
513
+ resume_encoding = torch.stack(resume_encoding,dim=0)
514
  for position in positions:
515
  requireds = position['required']
516
  score = 0.0
517
+ block_encodings = []
518
  for required in requireds:
519
  blocks = self.__split_blocks(required)
 
520
  for block in blocks:
521
+ block_encodings.append(torch.tensor(self.pipeline(block)[0]))
522
+ block_encodings = torch.stack(block_encodings,dim=0)
523
+ cos_sims = F.cosine_similarity(resume_encoding.unsqueeze(1), block_encodings.unsqueeze(0),dim=-1)
524
+ score = cos_sims.max().item()
525
+ self.logger.info(f"position: {position['name']}, score: {score}")
526
  ans.append({
527
  'position': position['name'],
528
  'score': score
server.py CHANGED
@@ -20,8 +20,8 @@ class Resume(protos.resume_pb2_grpc.ResumeServicer):
20
  self.logger = logging.getLogger(__name__)
21
  self.position_predictor = PositionPredictor(
22
  pipeline=pipeline(
23
- "sentences_sim",
24
- model="minskiter/simbert-chinese-bert-wwm-ext",
25
  device="cpu",
26
  trust_remote_code=True,
27
  use_auth_token=True
 
20
  self.logger = logging.getLogger(__name__)
21
  self.position_predictor = PositionPredictor(
22
  pipeline=pipeline(
23
+ "textencode",
24
+ model="minskiter/cossim-bert-chinese-wwm-ext",
25
  device="cpu",
26
  trust_remote_code=True,
27
  use_auth_token=True