kaxap commited on
Commit
b9334e6
1 Parent(s): 0cedeae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -16,8 +16,8 @@ def average_pool(last_hidden_states: Tensor,
16
  return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
17
 
18
 
19
- df = pd.read_csv('rjokes.csv')
20
- data_embeddings = np.load("rjokes-embeddings.npy")
21
 
22
  print("loading the model...")
23
  tokenizer = AutoTokenizer.from_pretrained('intfloat/multilingual-e5-large')
@@ -25,7 +25,7 @@ model = AutoModel.from_pretrained('intfloat/multilingual-e5-large')
25
 
26
  with gr.Blocks() as demo:
27
  chatbot = gr.Chatbot()
28
- msg = gr.Textbox(label="r/jokes semantic search query", placeholder="for example, \"programming and religion\"")
29
  clear = gr.ClearButton([msg, chatbot])
30
 
31
  def respond(message, chat_history):
@@ -47,7 +47,7 @@ with gr.Blocks() as demo:
47
  top_k_idx = cos_similarities.argsort()[-k:][::-1]
48
 
49
  # Get corresponding 'text' for top k similar points
50
- top_k_text = df['text'].iloc[top_k_idx].tolist()
51
 
52
  bot_message = "\n".join(f"{i+1}. {top_k_text[i]}" for i in range(len(top_k_text)))
53
 
 
16
  return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
17
 
18
 
19
+ df = pd.read_csv('wiki.csv')
20
+ data_embeddings = np.load("wiki-embeddings.npy")
21
 
22
  print("loading the model...")
23
  tokenizer = AutoTokenizer.from_pretrained('intfloat/multilingual-e5-large')
 
25
 
26
  with gr.Blocks() as demo:
27
  chatbot = gr.Chatbot()
28
+ msg = gr.Textbox(label="simple wikipedia semantic search query", placeholder="for example, \"medieval battles\"")
29
  clear = gr.ClearButton([msg, chatbot])
30
 
31
  def respond(message, chat_history):
 
47
  top_k_idx = cos_similarities.argsort()[-k:][::-1]
48
 
49
  # Get corresponding 'text' for top k similar points
50
+ top_k_text = df['title'].iloc[top_k_idx].tolist()
51
 
52
  bot_message = "\n".join(f"{i+1}. {top_k_text[i]}" for i in range(len(top_k_text)))
53