unhcr / tools.py
elyxlz
initial commit
0c7add2
raw
history blame
1.89 kB
import faiss
import pickle
import os
from PyPDF2 import PdfReader
import glob
from pathlib import Path
import re
import requests
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import dotenv
dotenv.load_dotenv()
def call_semantic_api(query, store_path, k):
payload = {
"query": query,
"store_path": store_path,
"k": k,
}
# response = requests.post("http://localhost:3001/search", json=payload)
response = semantic_search.search(payload)
return response
class SemanticSearch():
def __init__(
self,
threshold: float,
with_source=False,
k=5,
):
self.threshold = threshold
self.with_source = with_source
self.k = k
with open('./data/store.pkl', 'rb') as f:
self.db = pickle.load(f)
def __call__(self, query):
documents = self.db.similarity_search_with_score(query, k=self.k)
if len(documents) == 0:
return None
if not self.with_source:
output = '\n\n\n'.join([i[0].page_content for i in documents])
else:
output = '\n\n\n'.join([i[0].page_content + '\n\nSource:' + os.path.basename(
str(i[0].metadata['source']) + '\n') for i in documents])
return output
class ContentSearch():
def __init__(
self,
semantic_search,
prompt_template,
):
self.semantic_search = semantic_search
self.prompt_template = prompt_template
def __call__(self, query):
content = self.semantic_search(query)
if content is None:
return "No results found"
else:
return self.prompt_template.format(content=content)