t5-occ / app.py
Jiahuixu's picture
change eval_text to job_title_description
bd327fb
import pandas as pd
import numpy as np
from flask_restful import Resource, Api
import gradio as gr
import os
from sentence_transformers import SentenceTransformer, util
# def greet(name):
# return "Hello " + name + ", to t5-occ!"
model = SentenceTransformer("Jiahuixu/occt5")
current_directory = os.getcwd()
file_path_dict = os.path.join(current_directory,'dict_text2pums.csv')
dict_text2pums = pd.read_csv(file_path_dict)
dict_text2pums.index = dict_text2pums.iloc[:,0].to_list()
targetlabs = dict_text2pums['Description (2018 Census Occupation Code)']
targetlabs.reset_index(drop=True,inplace=True)
targetembs = model.encode(targetlabs.to_list())
def t5occsingle(job_title_description):
input_emb = model.encode(job_title_description)
cos_sim = [util.cos_sim(input_emb,i) for i in targetembs]
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
all_sentence_combinations.append([cos_sim[i][0][0],i])
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)
occtext = [targetlabs.iloc[i[1]] for i in all_sentence_combinations[0:1]]
occpums = dict_text2pums.loc[str.strip(occtext[0]),'2018 Census PUMS Occupation Code']
return {str(occpums):occtext[0]}
# return job_title_description
demo = gr.Interface(fn=t5occsingle,inputs="text", outputs="text")
demo.launch(share=True)