Elvan Selvano commited on
Commit
752420c
β€’
1 Parent(s): 6d463e1

Make proper link

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  from sentence_transformers import SentenceTransformer, util
4
  import streamlit as st
5
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
 
6
  st.set_page_config(layout='wide')
7
 
8
  @st.cache(allow_output_mutation=True)
@@ -27,11 +28,11 @@ def get_similarity_score(model, data, query, corpus_embeddings):
27
  """Get similarity score for each data point and sort by similarity score and day"""
28
  hits = semantic_search(model, [query], corpus_embeddings)
29
  result = pd.merge(data, hits, left_on='ID', right_on='corpus_id')
30
- result['Last Day'] = pd.to_datetime(result['Last Day'], format='%d/%m/%Y')
31
  result.sort_values(by=['score', 'Last Day'], ascending=[False, True], inplace=True)
32
  return result
33
 
34
- @st.cache(allow_output_mutation=True)
35
  def create_embedding(model: SentenceTransformer, data: pd.DataFrame, key: str) -> Tuple[list, list]:
36
  """Create vector embeddings from the dataset"""
37
  corpus_sentences = data[key].astype(str).tolist()
@@ -83,8 +84,19 @@ def main():
83
  st.write('This app lets you search and sort talent by job title or relevant job descriptions from ecommurz talent list in real-time.')
84
 
85
  columns = ['Timestamp', 'Full Name', 'Company', 'Previous Role',
86
- 'Experience', 'Last Day', 'LinkedIn Profile']
87
  data = load_dataset(columns)
 
 
 
 
 
 
 
 
 
 
 
88
  model = load_model()
89
  corpus_embeddings = create_embedding(model, data, 'Previous Role')
90
 
 
3
  from sentence_transformers import SentenceTransformer, util
4
  import streamlit as st
5
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
6
+ import numpy as np
7
  st.set_page_config(layout='wide')
8
 
9
  @st.cache(allow_output_mutation=True)
 
28
  """Get similarity score for each data point and sort by similarity score and day"""
29
  hits = semantic_search(model, [query], corpus_embeddings)
30
  result = pd.merge(data, hits, left_on='ID', right_on='corpus_id')
31
+ result['Last Day'] = pd.to_datetime(result['Last Day'], format='%d/%m/%Y').dt.date
32
  result.sort_values(by=['score', 'Last Day'], ascending=[False, True], inplace=True)
33
  return result
34
 
35
+ @st.cache(ttl=24*3600)
36
  def create_embedding(model: SentenceTransformer, data: pd.DataFrame, key: str) -> Tuple[list, list]:
37
  """Create vector embeddings from the dataset"""
38
  corpus_sentences = data[key].astype(str).tolist()
 
84
  st.write('This app lets you search and sort talent by job title or relevant job descriptions from ecommurz talent list in real-time.')
85
 
86
  columns = ['Timestamp', 'Full Name', 'Company', 'Previous Role',
87
+ 'Experience (months)', 'Last Day', 'LinkedIn Profile']
88
  data = load_dataset(columns)
89
+
90
+ # Preprocess Data
91
+ data['Full Name'] = data['Full Name'].str.title()
92
+ data['LinkedIn Profile'] = np.where(data['LinkedIn Profile'].str.startswith('www.linkedin.com'),
93
+ "https://" + data['LinkedIn Profile'],
94
+ data['LinkedIn Profile'])
95
+ data['LinkedIn Profile'] = np.where(data['LinkedIn Profile'].str.startswith('linkedin.com'),
96
+ "https://www." + data['LinkedIn Profile'],
97
+ data['LinkedIn Profile'])
98
+
99
+
100
  model = load_model()
101
  corpus_embeddings = create_embedding(model, data, 'Previous Role')
102