Spaces:
Running
Running
fkonovalenko
commited on
Commit
•
d99e452
1
Parent(s):
f05530b
first commit
Browse files
app.py
CHANGED
@@ -12,7 +12,6 @@ class GlobalState:
|
|
12 |
result_file_path = os.path.join(os.path.dirname(__file__), 'result/archive.json')
|
13 |
result_dir = os.path.join(os.path.dirname(__file__), 'result')
|
14 |
bert_path = os.path.join(os.path.dirname(__file__), 'tiny.pt')
|
15 |
-
catboost_path = os.path.join(os.path.dirname(__file__), 'best_cat.joblib')
|
16 |
conv_classes = {0: 'low',
|
17 |
1: 'middle',
|
18 |
2: 'high'
|
@@ -72,7 +71,7 @@ def append_to_json(_dict, path):
|
|
72 |
|
73 |
|
74 |
def predict(btn):
|
75 |
-
analyzer = VacancyAnalyzer(GlobalState.bert_path, GlobalState.
|
76 |
status, result = analyzer.classify()
|
77 |
gr.Info(status)
|
78 |
if result != 'unknown':
|
|
|
12 |
result_file_path = os.path.join(os.path.dirname(__file__), 'result/archive.json')
|
13 |
result_dir = os.path.join(os.path.dirname(__file__), 'result')
|
14 |
bert_path = os.path.join(os.path.dirname(__file__), 'tiny.pt')
|
|
|
15 |
conv_classes = {0: 'low',
|
16 |
1: 'middle',
|
17 |
2: 'high'
|
|
|
71 |
|
72 |
|
73 |
def predict(btn):
|
74 |
+
analyzer = VacancyAnalyzer(GlobalState.bert_path, GlobalState.data)
|
75 |
status, result = analyzer.classify()
|
76 |
gr.Info(status)
|
77 |
if result != 'unknown':
|
ml.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import pandas as pd
|
2 |
-
from catboost import Pool
|
3 |
import joblib
|
4 |
import torch
|
5 |
import re
|
@@ -8,9 +7,8 @@ from llm import TransformerRegrModel
|
|
8 |
|
9 |
|
10 |
class VacancyAnalyzer:
|
11 |
-
def __init__(self, transformer_path: str,
|
12 |
self.transformer_path = transformer_path
|
13 |
-
self.catboost_path = catboost_path
|
14 |
self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1)
|
15 |
self.cat_features = ['profession', 'grade', 'location']
|
16 |
self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions']
|
@@ -21,13 +19,6 @@ class VacancyAnalyzer:
|
|
21 |
txt = re.sub(r'([\n\t]*)', r'', txt)
|
22 |
return txt
|
23 |
|
24 |
-
def predict(self) -> float:
|
25 |
-
df = self.inputs.drop(columns=self.text_features, axis=1)
|
26 |
-
pool = Pool(df, cat_features=self.cat_features)
|
27 |
-
regressor = joblib.load(self.catboost_path)
|
28 |
-
prediction = regressor.predict(pool).tolist()
|
29 |
-
return prediction[0]
|
30 |
-
|
31 |
def classify(self) -> tuple:
|
32 |
df = self.inputs[self.text_features]
|
33 |
description = df[self.text_features[0]].values[0] + ' '
|
|
|
1 |
import pandas as pd
|
|
|
2 |
import joblib
|
3 |
import torch
|
4 |
import re
|
|
|
7 |
|
8 |
|
9 |
class VacancyAnalyzer:
|
10 |
+
def __init__(self, transformer_path: str, inputs: dict):
|
11 |
self.transformer_path = transformer_path
|
|
|
12 |
self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1)
|
13 |
self.cat_features = ['profession', 'grade', 'location']
|
14 |
self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions']
|
|
|
19 |
txt = re.sub(r'([\n\t]*)', r'', txt)
|
20 |
return txt
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def classify(self) -> tuple:
|
23 |
df = self.inputs[self.text_features]
|
24 |
description = df[self.text_features[0]].values[0] + ' '
|
tiny.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c505eb64cc6dd292b8823ff2d996f84ff199ff0ce5117aaef95ddcffe1c6cefc
|
3 |
+
size 116799348
|