Spaces:

coolkrishds
/

embeddingDemoProject1

Sleeping

App Files Files Community

coolkrishds commited on Aug 12, 2023

Commit

b30d739

1 Parent(s): 6c6e6a2

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import requests
+import pandas as pd
+import torch
+from datasets import load_dataset
+from sentence_transformers.util import semantic_search
+# Installable
+# pip install datasets
+# !pip install retry
+# !pip install -U sentence-transformers
+texts = ["How do I get a replacement Medicare card?",
+        "What is the monthly premium for Medicare Part B?",
+        "How do I terminate my Medicare Part B (medical insurance)?",
+        "How do I sign up for Medicare?",
+        "Can I sign up for Medicare Part B if I am working and have health insurance through an employer?",
+        "How do I sign up for Medicare Part B if I already have Part A?",
+        "What are Medicare late enrollment penalties?",
+        "What is Medicare and who can get it?",
+        "How can I get help with my Medicare Part A and Part B premiums?",
+        "What are the different parts of Medicare?",
+        "Will my Medicare premiums be higher because of my higher income?",
+        "What is TRICARE ?",
+        "Should I sign up for Medicare Part B if I have Veterans' Benefits?"]
+model_id = "sentence-transformers/all-MiniLM-L6-v2"
+hf_token = "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"
+api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
+headers = {"Authorization": f"Bearer {hf_token}"}
+# def query(texts):
+#     response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
+#     return response.json()
+@retry(tries=3, delay=10)
+def query(texts):
+    response = requests.post(api_url, headers=headers, json={"inputs": texts})
+    result = response.json()
+    if isinstance(result, list):
+      return result
+    elif list(result.keys())[0] == "error":
+      raise RuntimeError(
+          "The model is currently loading, please re-run the query."
+          )
+output = (dict(inputs = texts))
+embeddings = pd.DataFrame(output)
+embeddings.to_csv("embeddings.csv", index=False)
+faqs_embeddings = load_dataset('ITESM/embedded_faqs_medicare')
+dataset_embeddings = torch.from_numpy(faqs_embeddings["train"].to_pandas().to_numpy()).to(torch.float)
+question = ["How can Medicare help me?"]
+output = query(question)
+query_embeddings = torch.FloatTensor(output)
+print(f"The size of our embedded dataset is {dataset_embeddings.shape} and of our embedded query is {query_embeddings.shape}.")
+# Search top 5 matching query
+hits = semantic_search(query_embeddings, dataset_embeddings, top_k=5)
+print([texts[hits[0][i]['corpus_id']] for i in range(len(hits[0]))])