coolkrishds commited on
Commit
b30d739
·
1 Parent(s): 6c6e6a2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ import torch
4
+ from datasets import load_dataset
5
+ from sentence_transformers.util import semantic_search
6
+
7
+
8
+ # Installable
9
+ # pip install datasets
10
+ # !pip install retry
11
+ # !pip install -U sentence-transformers
12
+
13
+
14
+ texts = ["How do I get a replacement Medicare card?",
15
+ "What is the monthly premium for Medicare Part B?",
16
+ "How do I terminate my Medicare Part B (medical insurance)?",
17
+ "How do I sign up for Medicare?",
18
+ "Can I sign up for Medicare Part B if I am working and have health insurance through an employer?",
19
+ "How do I sign up for Medicare Part B if I already have Part A?",
20
+ "What are Medicare late enrollment penalties?",
21
+ "What is Medicare and who can get it?",
22
+ "How can I get help with my Medicare Part A and Part B premiums?",
23
+ "What are the different parts of Medicare?",
24
+ "Will my Medicare premiums be higher because of my higher income?",
25
+ "What is TRICARE ?",
26
+ "Should I sign up for Medicare Part B if I have Veterans' Benefits?"]
27
+
28
+ model_id = "sentence-transformers/all-MiniLM-L6-v2"
29
+ hf_token = "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"
30
+
31
+ api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
32
+ headers = {"Authorization": f"Bearer {hf_token}"}
33
+
34
+ # def query(texts):
35
+ # response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
36
+ # return response.json()
37
+
38
+ @retry(tries=3, delay=10)
39
+ def query(texts):
40
+ response = requests.post(api_url, headers=headers, json={"inputs": texts})
41
+ result = response.json()
42
+ if isinstance(result, list):
43
+ return result
44
+ elif list(result.keys())[0] == "error":
45
+ raise RuntimeError(
46
+ "The model is currently loading, please re-run the query."
47
+ )
48
+
49
+ output = (dict(inputs = texts))
50
+
51
+ embeddings = pd.DataFrame(output)
52
+ embeddings.to_csv("embeddings.csv", index=False)
53
+
54
+
55
+ faqs_embeddings = load_dataset('ITESM/embedded_faqs_medicare')
56
+ dataset_embeddings = torch.from_numpy(faqs_embeddings["train"].to_pandas().to_numpy()).to(torch.float)
57
+
58
+ question = ["How can Medicare help me?"]
59
+ output = query(question)
60
+
61
+
62
+ query_embeddings = torch.FloatTensor(output)
63
+ print(f"The size of our embedded dataset is {dataset_embeddings.shape} and of our embedded query is {query_embeddings.shape}.")
64
+
65
+ # Search top 5 matching query
66
+
67
+ hits = semantic_search(query_embeddings, dataset_embeddings, top_k=5)
68
+ print([texts[hits[0][i]['corpus_id']] for i in range(len(hits[0]))])