# gradio
import gradio as gr
#import random
import time
#boto3 for S3 access
import boto3
from botocore import UNSIGNED
from botocore.client import Config
# access .env file
import os
from dotenv import load_dotenv
#from bs4 import BeautifulSoup
# HF libraries
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceHubEmbeddings
# vectorestore
from langchain.vectorstores import Chroma
#from langchain.vectorstores import FAISS
# retrieval chain
#from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
# prompt template
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
# logging
import logging
#import zipfile
# improve results with retriever
# from langchain.retrievers import ContextualCompressionRetriever
# from langchain.retrievers.document_compressors import LLMChainExtractor
# from langchain.retrievers.document_compressors import EmbeddingsFilter
# from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# reorder retrived documents
#from langchain.document_transformers import LongContextReorder
# github issues
from langchain.document_loaders import GitHubIssuesLoader
# debugging
from langchain.globals import set_verbose
# caching
from langchain.globals import set_llm_cache
#from langchain.cache import InMemoryCache
# We can do the same thing with a SQLite cache
from langchain.cache import SQLiteCache
#set_llm_cache(InMemoryCache())
set_verbose(True)
# load .env variables
config = load_dotenv(".env")
HUGGINGFACEHUB_API_TOKEN=os.getenv('HUGGINGFACEHUB_API_TOKEN')
AWS_S3_LOCATION=os.getenv('AWS_S3_LOCATION')
AWS_S3_FILE=os.getenv('AWS_S3_FILE')
VS_DESTINATION=os.getenv('VS_DESTINATION')
# initialize Model config
model_id = HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={
# "temperature":0.1,
"max_new_tokens":1024,
"repetition_penalty":1.2,
# "streaming": True,
# "return_full_text":True
})
#model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
model_name = "sentence-transformers/all-mpnet-base-v2"
embeddings = HuggingFaceHubEmbeddings(repo_id=model_name)
# remove old vectorstore
if os.path.exists(VS_DESTINATION):
os.remove(VS_DESTINATION)
# remove old sqlite cache
if os.path.exists('.langchain.sqlite'):
os.remove('.langchain.sqlite')
set_llm_cache(SQLiteCache(database_path=".langchain.sqlite"))
# retrieve vectorsrore
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
## Chroma DB
s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION)
# use the cached embeddings instead of embeddings to speed up re-retrival
db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings)
db.get()
## FAISS DB
# s3.download_file('rad-rag-demos', 'vectorstores/faiss_db_ray.zip', './chroma_db/faiss_db_ray.zip')
# with zipfile.ZipFile('./chroma_db/faiss_db_ray.zip', 'r') as zip_ref:
# zip_ref.extractall('./chroma_db/')
# FAISS_INDEX_PATH='./chroma_db/faiss_db_ray'
# db = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
# initialize the bm25 retriever and chroma/faiss retriever
# bm25_retriever = BM25Retriever.
# bm25_retriever.k = 2
retriever = db.as_retriever(search_type="mmr")#, search_kwargs={'k': 3, 'lambda_mult': 0.25})
# asks LLM to create 3 alternatives baed on user query
# multi_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=model_id)
# asks LLM to extract relevant parts from retrieved documents
# compressor = LLMChainExtractor.from_llm(model_id)
# compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=multi_retriever)
global qa
template = """
You are the friendly documentation buddy Arti, who helps the Human in using RAY, the open-source unified framework for scaling AI and Python applications.\
Use the following context (delimited by
This is a privately hosten Docs AI Buddy,
It will help you with any question regarding the documentation of Ray ;)