vjain commited on
Commit
9ddaca4
1 Parent(s): f3baea4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -7,7 +7,19 @@ from datasets import load_dataset
7
  openai.api_key="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"
8
  from openai.embeddings_utils import get_embedding
9
  from openai.embeddings_utils import cosine_similarity
 
 
 
 
 
 
10
 
 
 
 
 
 
 
11
  Bio_embeddings = load_dataset('vjain/biology_AP_embeddings')
12
 
13
  df = pd.DataFrame(Bio_embeddings['train'])
@@ -17,8 +29,8 @@ df = pd.DataFrame(Bio_embeddings['train'])
17
  def reply(input):
18
 
19
  input = input
20
- input_vector = get_embedding(input, engine="text-embedding-ada-002")
21
- df["similiarities"]=df["embedding"].apply(lambda x: cosine_similarity(x,input_vector))
22
  data = df.sort_values("similiarities", ascending=False).head(20)
23
  data.to_csv("sorted.csv")
24
  context = []
 
7
  openai.api_key="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"
8
  from openai.embeddings_utils import get_embedding
9
  from openai.embeddings_utils import cosine_similarity
10
+ import requests
11
+ model_id = "sentence-transformers/all-MiniLM-L6-v2"
12
+ import json
13
+ hf_token = "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"
14
+ import re
15
+ from sklearn.metrics.pairwise import cosine_similarity
16
 
17
+ def generate_embeddings(texts, model_id, hf_token):
18
+ api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
19
+ headers = {"Authorization": f"Bearer {hf_token}"}
20
+ response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
21
+ embeddings = response.json()
22
+ return embeddings
23
  Bio_embeddings = load_dataset('vjain/biology_AP_embeddings')
24
 
25
  df = pd.DataFrame(Bio_embeddings['train'])
 
29
  def reply(input):
30
 
31
  input = input
32
+ input_vector = get_embedding(input, mdoel_id,hf_token)
33
+ df["similiarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0])
34
  data = df.sort_values("similiarities", ascending=False).head(20)
35
  data.to_csv("sorted.csv")
36
  context = []