|
import pandas as pd |
|
import numpy as np |
|
from flask_restful import Resource, Api |
|
import gradio as gr |
|
import os |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
|
|
|
|
|
|
model = SentenceTransformer("Jiahuixu/occt5") |
|
|
|
current_directory = os.getcwd() |
|
file_path_dict = os.path.join(current_directory,'dict_text2pums.csv') |
|
dict_text2pums = pd.read_csv(file_path_dict) |
|
dict_text2pums.index = dict_text2pums.iloc[:,0].to_list() |
|
targetlabs = dict_text2pums['Description (2018 Census Occupation Code)'] |
|
targetlabs.reset_index(drop=True,inplace=True) |
|
targetembs = model.encode(targetlabs.to_list()) |
|
|
|
def t5occsingle(job_title_description): |
|
|
|
input_emb = model.encode(job_title_description) |
|
cos_sim = [util.cos_sim(input_emb,i) for i in targetembs] |
|
all_sentence_combinations = [] |
|
for i in range(len(cos_sim)-1): |
|
all_sentence_combinations.append([cos_sim[i][0][0],i]) |
|
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True) |
|
|
|
occtext = [targetlabs.iloc[i[1]] for i in all_sentence_combinations[0:1]] |
|
occpums = dict_text2pums.loc[str.strip(occtext[0]),'2018 Census PUMS Occupation Code'] |
|
|
|
return {str(occpums):occtext[0]} |
|
|
|
|
|
demo = gr.Interface(fn=t5occsingle,inputs="text", outputs="text") |
|
demo.launch(share=True) |
|
|
|
|
|
|