fkonovalenko commited on
Commit
d99e452
1 Parent(s): f05530b

first commit

Browse files
Files changed (3) hide show
  1. app.py +1 -2
  2. ml.py +1 -10
  3. tiny.pt +3 -0
app.py CHANGED
@@ -12,7 +12,6 @@ class GlobalState:
12
  result_file_path = os.path.join(os.path.dirname(__file__), 'result/archive.json')
13
  result_dir = os.path.join(os.path.dirname(__file__), 'result')
14
  bert_path = os.path.join(os.path.dirname(__file__), 'tiny.pt')
15
- catboost_path = os.path.join(os.path.dirname(__file__), 'best_cat.joblib')
16
  conv_classes = {0: 'low',
17
  1: 'middle',
18
  2: 'high'
@@ -72,7 +71,7 @@ def append_to_json(_dict, path):
72
 
73
 
74
  def predict(btn):
75
- analyzer = VacancyAnalyzer(GlobalState.bert_path, GlobalState.catboost_path, GlobalState.data)
76
  status, result = analyzer.classify()
77
  gr.Info(status)
78
  if result != 'unknown':
 
12
  result_file_path = os.path.join(os.path.dirname(__file__), 'result/archive.json')
13
  result_dir = os.path.join(os.path.dirname(__file__), 'result')
14
  bert_path = os.path.join(os.path.dirname(__file__), 'tiny.pt')
 
15
  conv_classes = {0: 'low',
16
  1: 'middle',
17
  2: 'high'
 
71
 
72
 
73
  def predict(btn):
74
+ analyzer = VacancyAnalyzer(GlobalState.bert_path, GlobalState.data)
75
  status, result = analyzer.classify()
76
  gr.Info(status)
77
  if result != 'unknown':
ml.py CHANGED
@@ -1,5 +1,4 @@
1
  import pandas as pd
2
- from catboost import Pool
3
  import joblib
4
  import torch
5
  import re
@@ -8,9 +7,8 @@ from llm import TransformerRegrModel
8
 
9
 
10
  class VacancyAnalyzer:
11
- def __init__(self, transformer_path: str, catboost_path: str, inputs: dict):
12
  self.transformer_path = transformer_path
13
- self.catboost_path = catboost_path
14
  self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1)
15
  self.cat_features = ['profession', 'grade', 'location']
16
  self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions']
@@ -21,13 +19,6 @@ class VacancyAnalyzer:
21
  txt = re.sub(r'([\n\t]*)', r'', txt)
22
  return txt
23
 
24
- def predict(self) -> float:
25
- df = self.inputs.drop(columns=self.text_features, axis=1)
26
- pool = Pool(df, cat_features=self.cat_features)
27
- regressor = joblib.load(self.catboost_path)
28
- prediction = regressor.predict(pool).tolist()
29
- return prediction[0]
30
-
31
  def classify(self) -> tuple:
32
  df = self.inputs[self.text_features]
33
  description = df[self.text_features[0]].values[0] + ' '
 
1
  import pandas as pd
 
2
  import joblib
3
  import torch
4
  import re
 
7
 
8
 
9
  class VacancyAnalyzer:
10
+ def __init__(self, transformer_path: str, inputs: dict):
11
  self.transformer_path = transformer_path
 
12
  self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1)
13
  self.cat_features = ['profession', 'grade', 'location']
14
  self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions']
 
19
  txt = re.sub(r'([\n\t]*)', r'', txt)
20
  return txt
21
 
 
 
 
 
 
 
 
22
  def classify(self) -> tuple:
23
  df = self.inputs[self.text_features]
24
  description = df[self.text_features[0]].values[0] + ' '
tiny.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c505eb64cc6dd292b8823ff2d996f84ff199ff0ce5117aaef95ddcffe1c6cefc
3
+ size 116799348