Spaces:
Runtime error
Runtime error
fix(server.py): optimize similarity algorithm
Browse files- predictor/__init__.py +13 -6
- server.py +2 -2
predictor/__init__.py
CHANGED
@@ -9,6 +9,8 @@ import queue
|
|
9 |
from datetime import date
|
10 |
import time
|
11 |
import logging
|
|
|
|
|
12 |
|
13 |
class Predictor():
|
14 |
|
@@ -443,7 +445,7 @@ class Predictor():
|
|
443 |
'start': start,
|
444 |
'end': end,
|
445 |
'entity': 'GENDER',
|
446 |
-
'
|
447 |
'text': text[start:end]
|
448 |
})
|
449 |
end_time = time.perf_counter()
|
@@ -505,17 +507,22 @@ class PositionPredictor():
|
|
505 |
) -> List[Dict[str, Union[str, float]]]:
|
506 |
ans = []
|
507 |
resume_blocks = self.__split_blocks(resume)
|
508 |
-
|
|
|
|
|
|
|
509 |
for position in positions:
|
510 |
requireds = position['required']
|
511 |
score = 0.0
|
|
|
512 |
for required in requireds:
|
513 |
blocks = self.__split_blocks(required)
|
514 |
-
print(blocks)
|
515 |
for block in blocks:
|
516 |
-
|
517 |
-
|
518 |
-
|
|
|
|
|
519 |
ans.append({
|
520 |
'position': position['name'],
|
521 |
'score': score
|
|
|
9 |
from datetime import date
|
10 |
import time
|
11 |
import logging
|
12 |
+
import torch
|
13 |
+
import torch.nn.functional as F
|
14 |
|
15 |
class Predictor():
|
16 |
|
|
|
445 |
'start': start,
|
446 |
'end': end,
|
447 |
'entity': 'GENDER',
|
448 |
+
'origin': text[start:end],
|
449 |
'text': text[start:end]
|
450 |
})
|
451 |
end_time = time.perf_counter()
|
|
|
507 |
) -> List[Dict[str, Union[str, float]]]:
|
508 |
ans = []
|
509 |
resume_blocks = self.__split_blocks(resume)
|
510 |
+
resume_encoding = []
|
511 |
+
for block_resume in resume_blocks:
|
512 |
+
resume_encoding.append(torch.tensor(self.pipeline(block_resume)[0]))
|
513 |
+
resume_encoding = torch.stack(resume_encoding,dim=0)
|
514 |
for position in positions:
|
515 |
requireds = position['required']
|
516 |
score = 0.0
|
517 |
+
block_encodings = []
|
518 |
for required in requireds:
|
519 |
blocks = self.__split_blocks(required)
|
|
|
520 |
for block in blocks:
|
521 |
+
block_encodings.append(torch.tensor(self.pipeline(block)[0]))
|
522 |
+
block_encodings = torch.stack(block_encodings,dim=0)
|
523 |
+
cos_sims = F.cosine_similarity(resume_encoding.unsqueeze(1), block_encodings.unsqueeze(0),dim=-1)
|
524 |
+
score = cos_sims.max().item()
|
525 |
+
self.logger.info(f"position: {position['name']}, score: {score}")
|
526 |
ans.append({
|
527 |
'position': position['name'],
|
528 |
'score': score
|
server.py
CHANGED
@@ -20,8 +20,8 @@ class Resume(protos.resume_pb2_grpc.ResumeServicer):
|
|
20 |
self.logger = logging.getLogger(__name__)
|
21 |
self.position_predictor = PositionPredictor(
|
22 |
pipeline=pipeline(
|
23 |
-
"
|
24 |
-
model="minskiter/
|
25 |
device="cpu",
|
26 |
trust_remote_code=True,
|
27 |
use_auth_token=True
|
|
|
20 |
self.logger = logging.getLogger(__name__)
|
21 |
self.position_predictor = PositionPredictor(
|
22 |
pipeline=pipeline(
|
23 |
+
"textencode",
|
24 |
+
model="minskiter/cossim-bert-chinese-wwm-ext",
|
25 |
device="cpu",
|
26 |
trust_remote_code=True,
|
27 |
use_auth_token=True
|