Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,850 Bytes
2217335 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import logging
import os
import requests
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
class RAG:
NO_ANSWER_MESSAGE: str = "Sorry, I couldn't answer your question."
def __init__(self, hf_token, embeddings_model, model_name):
self.model_name = model_name
self.hf_token = hf_token
# load vectore store
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
self.vectore_store = FAISS.load_local("vectorestore", embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)
logging.info("RAG loaded!")
def get_context(self, instruction, number_of_contexts=1):
context = ""
documentos = self.vectore_store.similarity_search_with_score(instruction, k=number_of_contexts)
for doc in documentos:
context += doc[0].page_content
return context
def predict(self, instruction, context):
api_key = os.getenv("HF_TOKEN")
headers = {
"Accept" : "application/json",
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
query = f"### Instruction\n{instruction}\n\n### Context\n{context}\n\n### Answer\n "
payload = {
"inputs": query,
"parameters": {}
}
response = requests.post(self.model_name, headers=headers, json=payload)
return response.json()[0]["generated_text"].split("###")[-1][8:-1]
def get_response(self, prompt: str) -> str:
context = self.get_context(prompt)
response = self.predict(prompt, context)
if not response:
return self.NO_ANSWER_MESSAGE
return response |