elvanselvano commited on
Commit
275562d
β€’
1 Parent(s): eb7f8fa
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -7,9 +7,11 @@ st.set_page_config(layout='wide')
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def load_model():
 
10
  return SentenceTransformer('all-MiniLM-L6-v2')
11
 
12
  def semantic_search(model, sentence, corpus_embeddings):
 
13
  query_embeddings = model.encode(sentence,
14
  convert_to_tensor=True,
15
  normalize_embeddings=True)
@@ -21,9 +23,10 @@ def semantic_search(model, sentence, corpus_embeddings):
21
 
22
  return pd.DataFrame(hits[0])
23
 
24
- def top_k_similarity(model, df, query, corpus_embeddings):
 
25
  hits = semantic_search(model, [query], corpus_embeddings)
26
- result = pd.merge(df, hits, left_on='ID', right_on='corpus_id')
27
  result.sort_values(by='score', ascending=False, inplace=True)
28
  return result
29
 
@@ -49,20 +52,21 @@ def load_dataset(columns: List) -> pd.DataFrame:
49
  return data
50
 
51
  def show_aggrid_table(result: pd.DataFrame):
 
52
  gb = GridOptionsBuilder.from_dataframe(result)
53
  gb.configure_pagination(paginationAutoPageSize=True)
54
  gb.configure_side_bar()
55
  gb.configure_default_column(min_column_width=200)
56
  gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
57
- gb.configure_column(field="LinkedIn Profile",
58
- headerName="LinkedIn Profile",
59
  cellRenderer=JsCode('''function(params) {return `<a href=${params.value} target="_blank">${params.value}</a>`}'''))
60
 
61
- gridOptions = gb.build()
62
 
63
  grid_response = AgGrid(
64
  dataframe=result,
65
- gridOptions=gridOptions,
66
  height=1100,
67
  fit_columns_on_grid_load=True,
68
  data_return_mode='AS_INPUT',
@@ -73,6 +77,7 @@ def show_aggrid_table(result: pd.DataFrame):
73
  )
74
 
75
  def main():
 
76
  st.title('Job Posting Similarity')
77
  st.write('This app will help you find similar job titles real-time from ecommurz google sheets.')
78
 
@@ -87,16 +92,16 @@ def main():
87
 
88
  if submitted:
89
  st.info(f'Showing results for {job_title}')
90
- result = top_k_similarity(model, data, job_title, corpus_embeddings)
91
  result = result[columns]
92
 
93
- st.download_button(
94
- "Download Table",
95
- result.to_csv().encode('utf-8'),
96
- "result.csv",
97
- "text/csv",
98
- key='download-csv'
99
- )
100
 
101
  show_aggrid_table(result)
102
 
 
7
 
8
  @st.cache(allow_output_mutation=True)
9
  def load_model():
10
+ """Load pretrained model from SentenceTransformer"""
11
  return SentenceTransformer('all-MiniLM-L6-v2')
12
 
13
  def semantic_search(model, sentence, corpus_embeddings):
14
+ """Perform semantic search on the corpus"""
15
  query_embeddings = model.encode(sentence,
16
  convert_to_tensor=True,
17
  normalize_embeddings=True)
 
23
 
24
  return pd.DataFrame(hits[0])
25
 
26
+ def get_similarity_score(model, data, query, corpus_embeddings):
27
+ """Get similarity score for each data point"""
28
  hits = semantic_search(model, [query], corpus_embeddings)
29
+ result = pd.merge(data, hits, left_on='ID', right_on='corpus_id')
30
  result.sort_values(by='score', ascending=False, inplace=True)
31
  return result
32
 
 
52
  return data
53
 
54
  def show_aggrid_table(result: pd.DataFrame):
55
+ """Show interactive table from similarity result"""
56
  gb = GridOptionsBuilder.from_dataframe(result)
57
  gb.configure_pagination(paginationAutoPageSize=True)
58
  gb.configure_side_bar()
59
  gb.configure_default_column(min_column_width=200)
60
  gb.configure_selection('multiple', use_checkbox=True, groupSelectsChildren="Group checkbox select children")
61
+ gb.configure_column(field='LinkedIn Profile',
62
+ headerName='LinkedIn Profile',
63
  cellRenderer=JsCode('''function(params) {return `<a href=${params.value} target="_blank">${params.value}</a>`}'''))
64
 
65
+ grid_options = gb.build()
66
 
67
  grid_response = AgGrid(
68
  dataframe=result,
69
+ gridOptions=grid_options,
70
  height=1100,
71
  fit_columns_on_grid_load=True,
72
  data_return_mode='AS_INPUT',
 
77
  )
78
 
79
  def main():
80
+ """Main Function"""
81
  st.title('Job Posting Similarity')
82
  st.write('This app will help you find similar job titles real-time from ecommurz google sheets.')
83
 
 
92
 
93
  if submitted:
94
  st.info(f'Showing results for {job_title}')
95
+ result = get_similarity_score(model, data, job_title, corpus_embeddings)
96
  result = result[columns]
97
 
98
+ # st.download_button(
99
+ # "Download Table",
100
+ # result.to_csv().encode('utf-8'),
101
+ # "result.csv",
102
+ # "text/csv",
103
+ # key='download-csv'
104
+ # )
105
 
106
  show_aggrid_table(result)
107