Spaces:
Runtime error
Runtime error
File size: 4,051 Bytes
fdb6b1e 688b98f 9ab4f02 688b98f 275562d 06a9e39 688b98f fdb6b1e 275562d fdb6b1e 688b98f fdb6b1e 688b98f 275562d fdb6b1e 275562d fdb6b1e 688b98f fdb6b1e 688b98f fdb6b1e 275562d fdb6b1e 114789e fdb6b1e 275562d fdb6b1e 275562d fdb6b1e 275562d fdb6b1e 688b98f fdb6b1e 275562d 688b98f fdb6b1e 688b98f fdb6b1e 275562d fdb6b1e 688b98f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from typing import List, Tuple
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
st.set_page_config(layout='wide')
@st.cache(allow_output_mutation=True)
def load_model():
"""Load pretrained model from SentenceTransformer"""
return SentenceTransformer('minilm_sbert')
def semantic_search(model, sentence, corpus_embeddings):
"""Perform semantic search on the corpus"""
query_embeddings = model.encode(sentence,
convert_to_tensor=True,
normalize_embeddings=True)
hits = util.semantic_search(query_embeddings,
corpus_embeddings,
top_k=len(corpus_embeddings),
score_function=util.dot_score)
return pd.DataFrame(hits[0])
def get_similarity_score(model, data, query, corpus_embeddings):
"""Get similarity score for each data point"""
hits = semantic_search(model, [query], corpus_embeddings)
result = pd.merge(data, hits, left_on='ID', right_on='corpus_id')
result.sort_values(by='score', ascending=False, inplace=True)
return result
@st.cache(allow_output_mutation=True)
def create_embedding(model: SentenceTransformer, data: pd.DataFrame, key: str) -> Tuple[list, list]:
"""Create vector embeddings from the dataset"""
corpus_sentences = data[key].astype(str).tolist()
corpus_embeddings = model.encode(sentences=corpus_sentences,
show_progress_bar=True,
convert_to_tensor=True,
normalize_embeddings=True)
return corpus_embeddings
def load_dataset(columns: List) -> pd.DataFrame:
"""Load real-time dataset from google sheets"""
sheet_id = '1KeuPPVw9gueNmMrQXk1uGFlY9H1vvhErMLiX_ZVRv_Y'
sheet_name = 'Form Response 3'.replace(' ', '%20')
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'
data = pd.read_csv(url)
data = data.iloc[: , :7]
data.columns = columns
data.insert(0, 'ID', range(len(data)))
return data
def show_aggrid_table(result: pd.DataFrame):
"""Show interactive table from similarity result"""
gb = GridOptionsBuilder.from_dataframe(result)
gb.configure_pagination(paginationAutoPageSize=True)
gb.configure_side_bar()
gb.configure_default_column(min_column_width=200)
gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
gb.configure_column(field='LinkedIn Profile',
headerName='LinkedIn Profile',
cellRenderer=JsCode('''function(params) {return `<a href=${params.value} target="_blank">${params.value}</a>`}'''))
grid_options = gb.build()
grid_response = AgGrid(
dataframe=result,
gridOptions=grid_options,
height=1100,
fit_columns_on_grid_load=True,
data_return_mode='AS_INPUT',
update_mode='VALUE_CHANGED',
theme='light',
enable_enterprise_modules=True,
allow_unsafe_jscode=True,
)
def main():
"""Main Function"""
st.title('Job Posting Similarity')
st.write('This app will help you find similar job titles real-time from ecommurz google sheets.')
columns = ['Timestamp', 'Full Name', 'Company', 'Previous Role',
'Experience', 'Last Day', 'LinkedIn Profile']
data = load_dataset(columns)
model = load_model()
corpus_embeddings = create_embedding(model, data, 'Previous Role')
job_title = st.text_input('Insert the job title below:', '')
submitted = st.button('Submit')
if submitted:
st.info(f'Showing results for {job_title}')
result = get_similarity_score(model, data, job_title, corpus_embeddings)
result = result[columns]
show_aggrid_table(result)
if __name__ == '__main__':
main()
|