import gradio as gr
from operator import itemgetter
import os
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
## models tried
## TinyLlama/TinyLlama-1.1B-Chat-v1.0
## meta-llama/Meta-Llama-3-8B
## google/gemma-1.1-7b-it
HF_TOKEN = os.environ.get("HF_TOKEN", None)
model_id = "google/gemma-1.1-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
embeddings = HuggingFaceEmbeddings()
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100)
hf = HuggingFacePipeline(pipeline=pipe)
pdfLoader = PyPDFLoader("./LangchainPaper/RAGInputPaper.pdf")
documents = pdfLoader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=30)
docs = text_splitter.split_documents(documents)
## creating vector embeddings during run using FAISS
# vectorstore = FAISS.from_documents(
# docs, embedding=embeddings
# )
# retriever = vectorstore.as_retriever()
## loading previously saved vector embeddings from local space
vectorstore = FAISS.load_local("./fi_LangchainPaper", embeddings,allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever()
qa = RetrievalQA.from_chain_type(
llm=hf, chain_type="stuff", retriever=retriever, return_source_documents=False)
queries=pd.read_csv('./interactions/queries.csv')
def greet(Question):
answer = qa({"query": Question})
pa=[a.split("Helpful Answer: ") for a in answer.get('result').split('\n') if "Helpful Answer" in a]
new=pd.DataFrame({'query':Question,'response':pa[0][-1]})
queries.append(new)
queries.to_csv('./interactions/queries.csv')
return pa[0][-1]
if __name__ == "__main__":
title = "RAG with LLMs"
description = """
Demo using Vector store-backed retriever. This space demonstrate application of RAG on a small model and its effectiveness, I used small model because of the space constraint. The current space runs on mere 2GB of RAM, hence there is some delay in generating output. Test this to your hearts content and let me know your thoughts, I will keep updating this space with tiny improvements on architecture and design