Spaces:
Runtime error
Runtime error
File size: 4,265 Bytes
fdb6b1e 688b98f 9ab4f02 688b98f d12744d 9ab4f02 688b98f fdb6b1e d12744d fdb6b1e 688b98f fdb6b1e 688b98f d12744d fdb6b1e d12744d fdb6b1e 688b98f fdb6b1e 688b98f fdb6b1e d12744d fdb6b1e 114789e fdb6b1e d12744d fdb6b1e d12744d fdb6b1e d12744d fdb6b1e 688b98f fdb6b1e d12744d 688b98f fdb6b1e 688b98f fdb6b1e d12744d fdb6b1e 9ab4f02 d12744d 9ab4f02 fdb6b1e 688b98f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
from typing import List, Tuple
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
st.set_page_config(layout='wide')
@st.cache(allow_output_mutation=True)
def load_model():
"""Load pretrained model from SentenceTransformer"""
return SentenceTransformer('all-MiniLM-L6-v2')
def semantic_search(model, sentence, corpus_embeddings):
"""Perform semantic search on the corpus"""
query_embeddings = model.encode(sentence,
convert_to_tensor=True,
normalize_embeddings=True)
hits = util.semantic_search(query_embeddings,
corpus_embeddings,
top_k=len(corpus_embeddings),
score_function=util.dot_score)
return pd.DataFrame(hits[0])
def get_similarity_score(model, data, query, corpus_embeddings):
"""Get similarity score for each data point"""
hits = semantic_search(model, [query], corpus_embeddings)
result = pd.merge(data, hits, left_on='ID', right_on='corpus_id')
result.sort_values(by='score', ascending=False, inplace=True)
return result
@st.cache(allow_output_mutation=True)
def create_embedding(model: SentenceTransformer, data: pd.DataFrame, key: str) -> Tuple[list, list]:
"""Create vector embeddings from the dataset"""
corpus_sentences = data[key].astype(str).tolist()
corpus_embeddings = model.encode(sentences=corpus_sentences,
show_progress_bar=True,
convert_to_tensor=True,
normalize_embeddings=True)
return corpus_embeddings
def load_dataset(columns: List) -> pd.DataFrame:
"""Load real-time dataset from google sheets"""
sheet_id = '1KeuPPVw9gueNmMrQXk1uGFlY9H1vvhErMLiX_ZVRv_Y'
sheet_name = 'Form Response 3'.replace(' ', '%20')
url = f'https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv&sheet={sheet_name}'
data = pd.read_csv(url)
data = data.iloc[: , :7]
data.columns = columns
data.insert(0, 'ID', range(len(data)))
return data
def show_aggrid_table(result: pd.DataFrame):
"""Show interactive table from similarity result"""
gb = GridOptionsBuilder.from_dataframe(result)
gb.configure_pagination(paginationAutoPageSize=True)
gb.configure_side_bar()
gb.configure_default_column(min_column_width=200)
gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
gb.configure_column(field='LinkedIn Profile',
headerName='LinkedIn Profile',
cellRenderer=JsCode('''function(params) {return `<a href=${params.value} target="_blank">${params.value}</a>`}'''))
grid_options = gb.build()
grid_response = AgGrid(
dataframe=result,
gridOptions=grid_options,
height=1100,
fit_columns_on_grid_load=True,
data_return_mode='AS_INPUT',
update_mode='VALUE_CHANGED',
theme='light',
enable_enterprise_modules=True,
allow_unsafe_jscode=True,
)
def main():
"""Main Function"""
st.title('Job Posting Similarity')
st.write('This app will help you find similar job titles real-time from ecommurz google sheets.')
columns = ['Timestamp', 'Full Name', 'Company', 'Previous Role',
'Experience', 'Last Day', 'LinkedIn Profile']
data = load_dataset(columns)
model = load_model()
corpus_embeddings = create_embedding(model, data, 'Previous Role')
job_title = st.text_input('Insert the job title below:', '')
submitted = st.button('Submit')
if submitted:
st.info(f'Showing results for {job_title}')
result = get_similarity_score(model, data, job_title, corpus_embeddings)
result = result[columns]
# st.download_button(
# "Download Table",
# result.to_csv().encode('utf-8'),
# "result.csv",
# "text/csv",
# key='download-csv'
# )
show_aggrid_table(result)
if __name__ == '__main__':
main()
|