minskiter commited on
Commit
2447637
1 Parent(s): fddeeda

feat(server.py): update match positions

Browse files
predictor/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict,List
2
  from transformers import Pipeline
3
  import requests
4
  import re
@@ -474,3 +474,56 @@ class Predictor():
474
  def __call__(self, *args: Any, **kwds: Any) -> Any:
475
  return self.process(*args, **kwds)
476
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict,List,Union
2
  from transformers import Pipeline
3
  import requests
4
  import re
 
474
  def __call__(self, *args: Any, **kwds: Any) -> Any:
475
  return self.process(*args, **kwds)
476
 
477
+ class PositionPredictor():
478
+
479
+ def __init__(self, pipeline: Pipeline) -> None:
480
+ self.pipeline = pipeline
481
+ self.__init_split_data()
482
+ self.logger = logging.getLogger(__name__)
483
+
484
+ def __split_blocks(self, text: str) -> List[str]:
485
+ start,end = 0,0
486
+ blocks = []
487
+ while end<len(text):
488
+ if text[end] in self.splits:
489
+ if end>start:
490
+ blocks.append(text[start:end])
491
+ start = end+1
492
+ end += 1
493
+ if end>start:
494
+ blocks.append(text[start:end])
495
+ return blocks
496
+
497
+ def __init_split_data(
498
+ self
499
+ ):
500
+ self.splits = {'\\', '_', '"', '%', '{', '《', ')', '$', '(', '\n', '~', '*', ':', '!', ';', '”', '’', '\t', '?', '-', ';', '》', '】', '`', '、', '+', '“', '[', '—', '·', ')', '=', '‘', '}', '?', ',', '&', '@', '#', ']', '——', ' ', '.', '【', "'", '>', ',', '/', ':', '。', '...', '^', '(', '<', '|', '……', '!'}
501
+
502
+ def predict(self,
503
+ positions: List[Dict[str,Union[str,List[str]]]],
504
+ resume: str
505
+ ) -> List[Dict[str, Union[str, float]]]:
506
+ ans = []
507
+ resume_blocks = self.__split_blocks(resume)
508
+ print(resume_blocks,positions)
509
+ for position in positions:
510
+ requireds = position['required']
511
+ score = 0.0
512
+ for required in requireds:
513
+ blocks = self.__split_blocks(required)
514
+ print(blocks)
515
+ for block in blocks:
516
+ for block_resume in resume_blocks:
517
+ score = max(score, self.pipeline((block_resume, block))[0])
518
+ self.logger.info(f"position: {position['name']}, required: {block}, resume: {block_resume}, score: {score}")
519
+ ans.append({
520
+ 'position': position['name'],
521
+ 'score': score
522
+ })
523
+ ans.sort(key=lambda x:x['score'], reverse=True)
524
+ return ans
525
+
526
+ def __call__(self, *args: Any, **kwds: Any) -> Any:
527
+ return self.predict(*args, **kwds)
528
+
529
+
protos/resume.proto CHANGED
@@ -4,12 +4,32 @@ service Resume{
4
  // Get Resume Info
5
  rpc GetInfo(TextRequest) returns (ResumeEntitiesResponse);
6
  rpc Health(PingRequest) returns (PongResponse);
 
7
  }
8
 
9
  message PingRequest{
10
  string ping = 1;
11
  }
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  message PongResponse{
14
  string done = 1;
15
  }
 
4
  // Get Resume Info
5
  rpc GetInfo(TextRequest) returns (ResumeEntitiesResponse);
6
  rpc Health(PingRequest) returns (PongResponse);
7
+ rpc MatchPosition(PositionMatchRequest) returns (PositionMatchResponse);
8
  }
9
 
10
  message PingRequest{
11
  string ping = 1;
12
  }
13
 
14
+ message Position{
15
+ string name = 1;
16
+ repeated string required = 2;
17
+ }
18
+
19
+ message PositionMatchRequest{
20
+ repeated Position positions = 1;
21
+ string resume = 2;
22
+ }
23
+
24
+ message PositionMatch{
25
+ string position = 1;
26
+ float score = 2;
27
+ }
28
+
29
+ message PositionMatchResponse{
30
+ repeated PositionMatch matches = 1;
31
+ }
32
+
33
  message PongResponse{
34
  string done = 1;
35
  }
protos/resume_pb2.py CHANGED
@@ -13,7 +13,7 @@ _sym_db = _symbol_database.Default()
13
 
14
 
15
 
16
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13protos/resume.proto\"\x1b\n\x0bPingRequest\x12\x0c\n\x04ping\x18\x01 \x01(\t\"\x1c\n\x0cPongResponse\x12\x0c\n\x04\x64one\x18\x01 \x01(\t\"\x1b\n\x0bTextRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\"~\n\x06\x45ntity\x12\x0e\n\x06\x65ntity\x18\x01 \x01(\t\x12\r\n\x05start\x18\x02 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x05\x12\x11\n\x04text\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x0e\n\x06origin\x18\x05 \x01(\t\x12\x12\n\x05level\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x07\n\x05_textB\x08\n\x06_level\"S\n\tJobEntity\x12\x16\n\x05start\x18\x01 \x01(\x0b\x32\x07.Entity\x12\x14\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x07.Entity\x12\x18\n\x07\x63ompany\x18\x03 \x01(\x0b\x32\x07.Entity\"\x8b\x02\n\x16ResumeEntitiesResponse\x12\x16\n\x05names\x18\x01 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06\x65mails\x18\x02 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06phones\x18\x03 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07schools\x18\x04 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x65\x64us\x18\x05 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x61ges\x18\x06 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07genders\x18\x07 \x03(\x0b\x32\x07.Entity\x12\x12\n\nwork_years\x18\x08 \x01(\x05\x12\x18\n\x04jobs\x18\t \x03(\x0b\x32\n.JobEntity\x12\x17\n\x06titles\x18\n \x03(\x0b\x32\x07.Entity2a\n\x06Resume\x12\x30\n\x07GetInfo\x12\x0c.TextRequest\x1a\x17.ResumeEntitiesResponse\x12%\n\x06Health\x12\x0c.PingRequest\x1a\r.PongResponseb\x06proto3')
17
 
18
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
19
  _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'protos.resume_pb2', globals())
@@ -22,16 +22,24 @@ if _descriptor._USE_C_DESCRIPTORS == False:
22
  DESCRIPTOR._options = None
23
  _PINGREQUEST._serialized_start=23
24
  _PINGREQUEST._serialized_end=50
25
- _PONGRESPONSE._serialized_start=52
26
- _PONGRESPONSE._serialized_end=80
27
- _TEXTREQUEST._serialized_start=82
28
- _TEXTREQUEST._serialized_end=109
29
- _ENTITY._serialized_start=111
30
- _ENTITY._serialized_end=237
31
- _JOBENTITY._serialized_start=239
32
- _JOBENTITY._serialized_end=322
33
- _RESUMEENTITIESRESPONSE._serialized_start=325
34
- _RESUMEENTITIESRESPONSE._serialized_end=592
35
- _RESUME._serialized_start=594
36
- _RESUME._serialized_end=691
 
 
 
 
 
 
 
 
37
  # @@protoc_insertion_point(module_scope)
 
13
 
14
 
15
 
16
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13protos/resume.proto\"\x1b\n\x0bPingRequest\x12\x0c\n\x04ping\x18\x01 \x01(\t\"*\n\x08Position\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08required\x18\x02 \x03(\t\"D\n\x14PositionMatchRequest\x12\x1c\n\tpositions\x18\x01 \x03(\x0b\x32\t.Position\x12\x0e\n\x06resume\x18\x02 \x01(\t\"0\n\rPositionMatch\x12\x10\n\x08position\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\"8\n\x15PositionMatchResponse\x12\x1f\n\x07matches\x18\x01 \x03(\x0b\x32\x0e.PositionMatch\"\x1c\n\x0cPongResponse\x12\x0c\n\x04\x64one\x18\x01 \x01(\t\"\x1b\n\x0bTextRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\"~\n\x06\x45ntity\x12\x0e\n\x06\x65ntity\x18\x01 \x01(\t\x12\r\n\x05start\x18\x02 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x05\x12\x11\n\x04text\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x0e\n\x06origin\x18\x05 \x01(\t\x12\x12\n\x05level\x18\x06 \x01(\tH\x01\x88\x01\x01\x42\x07\n\x05_textB\x08\n\x06_level\"S\n\tJobEntity\x12\x16\n\x05start\x18\x01 \x01(\x0b\x32\x07.Entity\x12\x14\n\x03\x65nd\x18\x02 \x01(\x0b\x32\x07.Entity\x12\x18\n\x07\x63ompany\x18\x03 \x01(\x0b\x32\x07.Entity\"\x8b\x02\n\x16ResumeEntitiesResponse\x12\x16\n\x05names\x18\x01 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06\x65mails\x18\x02 \x03(\x0b\x32\x07.Entity\x12\x17\n\x06phones\x18\x03 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07schools\x18\x04 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x65\x64us\x18\x05 \x03(\x0b\x32\x07.Entity\x12\x15\n\x04\x61ges\x18\x06 \x03(\x0b\x32\x07.Entity\x12\x18\n\x07genders\x18\x07 \x03(\x0b\x32\x07.Entity\x12\x12\n\nwork_years\x18\x08 \x01(\x05\x12\x18\n\x04jobs\x18\t \x03(\x0b\x32\n.JobEntity\x12\x17\n\x06titles\x18\n \x03(\x0b\x32\x07.Entity2\xa1\x01\n\x06Resume\x12\x30\n\x07GetInfo\x12\x0c.TextRequest\x1a\x17.ResumeEntitiesResponse\x12%\n\x06Health\x12\x0c.PingRequest\x1a\r.PongResponse\x12>\n\rMatchPosition\x12\x15.PositionMatchRequest\x1a\x16.PositionMatchResponseb\x06proto3')
17
 
18
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
19
  _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'protos.resume_pb2', globals())
 
22
  DESCRIPTOR._options = None
23
  _PINGREQUEST._serialized_start=23
24
  _PINGREQUEST._serialized_end=50
25
+ _POSITION._serialized_start=52
26
+ _POSITION._serialized_end=94
27
+ _POSITIONMATCHREQUEST._serialized_start=96
28
+ _POSITIONMATCHREQUEST._serialized_end=164
29
+ _POSITIONMATCH._serialized_start=166
30
+ _POSITIONMATCH._serialized_end=214
31
+ _POSITIONMATCHRESPONSE._serialized_start=216
32
+ _POSITIONMATCHRESPONSE._serialized_end=272
33
+ _PONGRESPONSE._serialized_start=274
34
+ _PONGRESPONSE._serialized_end=302
35
+ _TEXTREQUEST._serialized_start=304
36
+ _TEXTREQUEST._serialized_end=331
37
+ _ENTITY._serialized_start=333
38
+ _ENTITY._serialized_end=459
39
+ _JOBENTITY._serialized_start=461
40
+ _JOBENTITY._serialized_end=544
41
+ _RESUMEENTITIESRESPONSE._serialized_start=547
42
+ _RESUMEENTITIESRESPONSE._serialized_end=814
43
+ _RESUME._serialized_start=817
44
+ _RESUME._serialized_end=978
45
  # @@protoc_insertion_point(module_scope)
protos/resume_pb2.pyi CHANGED
@@ -43,6 +43,36 @@ class PongResponse(_message.Message):
43
  done: str
44
  def __init__(self, done: _Optional[str] = ...) -> None: ...
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  class ResumeEntitiesResponse(_message.Message):
47
  __slots__ = ["ages", "edus", "emails", "genders", "jobs", "names", "phones", "schools", "titles", "work_years"]
48
  AGES_FIELD_NUMBER: _ClassVar[int]
 
43
  done: str
44
  def __init__(self, done: _Optional[str] = ...) -> None: ...
45
 
46
+ class Position(_message.Message):
47
+ __slots__ = ["name", "required"]
48
+ NAME_FIELD_NUMBER: _ClassVar[int]
49
+ REQUIRED_FIELD_NUMBER: _ClassVar[int]
50
+ name: str
51
+ required: _containers.RepeatedScalarFieldContainer[str]
52
+ def __init__(self, name: _Optional[str] = ..., required: _Optional[_Iterable[str]] = ...) -> None: ...
53
+
54
+ class PositionMatch(_message.Message):
55
+ __slots__ = ["position", "score"]
56
+ POSITION_FIELD_NUMBER: _ClassVar[int]
57
+ SCORE_FIELD_NUMBER: _ClassVar[int]
58
+ position: str
59
+ score: float
60
+ def __init__(self, position: _Optional[str] = ..., score: _Optional[float] = ...) -> None: ...
61
+
62
+ class PositionMatchRequest(_message.Message):
63
+ __slots__ = ["positions", "resume"]
64
+ POSITIONS_FIELD_NUMBER: _ClassVar[int]
65
+ RESUME_FIELD_NUMBER: _ClassVar[int]
66
+ positions: _containers.RepeatedCompositeFieldContainer[Position]
67
+ resume: str
68
+ def __init__(self, positions: _Optional[_Iterable[_Union[Position, _Mapping]]] = ..., resume: _Optional[str] = ...) -> None: ...
69
+
70
+ class PositionMatchResponse(_message.Message):
71
+ __slots__ = ["matches"]
72
+ MATCHES_FIELD_NUMBER: _ClassVar[int]
73
+ matches: _containers.RepeatedCompositeFieldContainer[PositionMatch]
74
+ def __init__(self, matches: _Optional[_Iterable[_Union[PositionMatch, _Mapping]]] = ...) -> None: ...
75
+
76
  class ResumeEntitiesResponse(_message.Message):
77
  __slots__ = ["ages", "edus", "emails", "genders", "jobs", "names", "phones", "schools", "titles", "work_years"]
78
  AGES_FIELD_NUMBER: _ClassVar[int]
protos/resume_pb2_grpc.py CHANGED
@@ -24,6 +24,11 @@ class ResumeStub(object):
24
  request_serializer=protos_dot_resume__pb2.PingRequest.SerializeToString,
25
  response_deserializer=protos_dot_resume__pb2.PongResponse.FromString,
26
  )
 
 
 
 
 
27
 
28
 
29
  class ResumeServicer(object):
@@ -42,6 +47,12 @@ class ResumeServicer(object):
42
  context.set_details('Method not implemented!')
43
  raise NotImplementedError('Method not implemented!')
44
 
 
 
 
 
 
 
45
 
46
  def add_ResumeServicer_to_server(servicer, server):
47
  rpc_method_handlers = {
@@ -55,6 +66,11 @@ def add_ResumeServicer_to_server(servicer, server):
55
  request_deserializer=protos_dot_resume__pb2.PingRequest.FromString,
56
  response_serializer=protos_dot_resume__pb2.PongResponse.SerializeToString,
57
  ),
 
 
 
 
 
58
  }
59
  generic_handler = grpc.method_handlers_generic_handler(
60
  'Resume', rpc_method_handlers)
@@ -98,3 +114,20 @@ class Resume(object):
98
  protos_dot_resume__pb2.PongResponse.FromString,
99
  options, channel_credentials,
100
  insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  request_serializer=protos_dot_resume__pb2.PingRequest.SerializeToString,
25
  response_deserializer=protos_dot_resume__pb2.PongResponse.FromString,
26
  )
27
+ self.MatchPosition = channel.unary_unary(
28
+ '/Resume/MatchPosition',
29
+ request_serializer=protos_dot_resume__pb2.PositionMatchRequest.SerializeToString,
30
+ response_deserializer=protos_dot_resume__pb2.PositionMatchResponse.FromString,
31
+ )
32
 
33
 
34
  class ResumeServicer(object):
 
47
  context.set_details('Method not implemented!')
48
  raise NotImplementedError('Method not implemented!')
49
 
50
+ def MatchPosition(self, request, context):
51
+ """Missing associated documentation comment in .proto file."""
52
+ context.set_code(grpc.StatusCode.UNIMPLEMENTED)
53
+ context.set_details('Method not implemented!')
54
+ raise NotImplementedError('Method not implemented!')
55
+
56
 
57
  def add_ResumeServicer_to_server(servicer, server):
58
  rpc_method_handlers = {
 
66
  request_deserializer=protos_dot_resume__pb2.PingRequest.FromString,
67
  response_serializer=protos_dot_resume__pb2.PongResponse.SerializeToString,
68
  ),
69
+ 'MatchPosition': grpc.unary_unary_rpc_method_handler(
70
+ servicer.MatchPosition,
71
+ request_deserializer=protos_dot_resume__pb2.PositionMatchRequest.FromString,
72
+ response_serializer=protos_dot_resume__pb2.PositionMatchResponse.SerializeToString,
73
+ ),
74
  }
75
  generic_handler = grpc.method_handlers_generic_handler(
76
  'Resume', rpc_method_handlers)
 
114
  protos_dot_resume__pb2.PongResponse.FromString,
115
  options, channel_credentials,
116
  insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
117
+
118
+ @staticmethod
119
+ def MatchPosition(request,
120
+ target,
121
+ options=(),
122
+ channel_credentials=None,
123
+ call_credentials=None,
124
+ insecure=False,
125
+ compression=None,
126
+ wait_for_ready=None,
127
+ timeout=None,
128
+ metadata=None):
129
+ return grpc.experimental.unary_unary(request, target, '/Resume/MatchPosition',
130
+ protos_dot_resume__pb2.PositionMatchRequest.SerializeToString,
131
+ protos_dot_resume__pb2.PositionMatchResponse.FromString,
132
+ options, channel_credentials,
133
+ insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
server.py CHANGED
@@ -5,7 +5,7 @@ import protos.resume_pb2
5
  import protos.resume_pb2_grpc
6
  from huggingface_hub import login
7
  import os
8
- from predictor import Predictor
9
  from transformers import pipeline
10
  from datetime import date
11
 
@@ -17,7 +17,16 @@ class Resume(protos.resume_pb2_grpc.ResumeServicer):
17
 
18
  def __init__(self):
19
  self.done = False
20
- self.logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
21
  self.predictor = Predictor(
22
  pipelines={
23
  "name": pipeline("nerpipe", model="minskiter/resume-token-classification-name-0708",trust_remote_code=True,use_auth_token=True),
@@ -39,6 +48,25 @@ class Resume(protos.resume_pb2_grpc.ResumeServicer):
39
  else:
40
  return protos.resume_pb2.PongResponse(done="Pending")
41
  return protos.resume_pb2.PongResponse(done="PING request is not valid")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def GetInfo(self, request, context):
44
  entities = self.predictor(request.text)
@@ -137,7 +165,6 @@ class Resume(protos.resume_pb2_grpc.ResumeServicer):
137
  res.work_years = entities['work_time']
138
  return res
139
 
140
-
141
  def serve(port = "50051"):
142
  logger = logging.getLogger(__name__)
143
  server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
 
5
  import protos.resume_pb2_grpc
6
  from huggingface_hub import login
7
  import os
8
+ from predictor import Predictor, PositionPredictor
9
  from transformers import pipeline
10
  from datetime import date
11
 
 
17
 
18
  def __init__(self):
19
  self.done = False
20
+ self.logger = logging.getLogger(__name__)
21
+ self.position_predictor = PositionPredictor(
22
+ pipeline=pipeline(
23
+ "sentences_sim",
24
+ model="minskiter/simbert-chinese-bert-wwm-ext",
25
+ device="cpu",
26
+ trust_remote_code=True,
27
+ use_auth_token=True
28
+ )
29
+ )
30
  self.predictor = Predictor(
31
  pipelines={
32
  "name": pipeline("nerpipe", model="minskiter/resume-token-classification-name-0708",trust_remote_code=True,use_auth_token=True),
 
48
  else:
49
  return protos.resume_pb2.PongResponse(done="Pending")
50
  return protos.resume_pb2.PongResponse(done="PING request is not valid")
51
+
52
+ def MatchPosition(self, request, context):
53
+ positions = []
54
+ for position in request.positions:
55
+ required = list(text for text in position.required)
56
+ name = position.name
57
+ positions.append({
58
+ "name": name,
59
+ "required": required
60
+ })
61
+ resume = request.resume
62
+ scores = self.position_predictor(positions,resume)
63
+ res = protos.resume_pb2.PositionMatchResponse()
64
+ for score in scores:
65
+ res.matches.append(protos.resume_pb2.PositionMatch(
66
+ position=score["position"],
67
+ score=score["score"]
68
+ ))
69
+ return res
70
 
71
  def GetInfo(self, request, context):
72
  entities = self.predictor(request.text)
 
165
  res.work_years = entities['work_time']
166
  return res
167
 
 
168
  def serve(port = "50051"):
169
  logger = logging.getLogger(__name__)
170
  server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))