RajatChaudhari's picture
Update app.py
482281a verified
raw
history blame
4.42 kB
import gradio as gr
from operator import itemgetter
import os
# import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
## not needed since we are loading previously saved vector store from file and not reading pdf on the run
# from langchain_community.document_loaders import PyPDFLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
## models tried
## TinyLlama/TinyLlama-1.1B-Chat-v1.0
## meta-llama/Meta-Llama-3-8B
## google/gemma-1.1-7b-it
HF_TOKEN = os.environ.get("HF_TOKEN", None)
model_id = "google/gemma-1.1-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
embeddings = HuggingFaceEmbeddings()
pipe = pipeline("text-generation", model = model, tokenizer = tokenizer, max_new_tokens = 200)
hf = HuggingFacePipeline(pipeline=pipe)
## commenting this code because now we are loading vectors directly and not parsing the pdf
# pdfLoader = PyPDFLoader("./LangchainPaper/RAGInputPaper.pdf")
# documents = pdfLoader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512, chunk_overlap = 30)
# docs = text_splitter.split_documents(documents)
## creating vector embeddings during run using FAISS
# vectorstore = FAISS.from_documents(
# docs, embedding=embeddings
# )
# retriever = vectorstore.as_retriever()
## loading previously saved vector embeddings from local space
vectorstore = FAISS.load_local("./fi_LangchainPaper", embeddings, allow_dangerous_deserialization = True)
retriever = vectorstore.as_retriever()
qa = RetrievalQA.from_chain_type(
llm = hf, chain_type = "stuff", retriever = retriever, return_source_documents = False)
# queries=pd.read_csv('./interactions/queries.csv')
def greet(Question):
answer = qa({"query": Question})
pa = [a.split("Helpful Answer: ") for a in answer.get('result').split('\n') if "Helpful Answer" in a]
# new=pd.DataFrame.from_dict({'query':Question,'response':pa[0][-1]},orient='index')
# queries.append(new)
# queries.to_csv('./interactions/queries.csv')
return pa[0][-1]
if __name__ == "__main__":
title = "RAG with LLMs"
description = """
<img src="https://superagi.com/wp-content/uploads/2023/10/Introduction-to-RAGA-Retrieval-Augmented-Generation-and-Actions-1200x600.png.webp" width=100%>
<br>
Demo using Vector store-backed retriever. This space demonstrate application of RAG on a small model and its effectiveness, I used small model because of the space constraint. The current space runs on mere <b>2GB of RAM</b>, hence there is some delay in generating output. Test this to your hearts content and let me know your thoughts, I will keep updating this space with tiny improvements on architecture and design
<ul>
<li>model: TinyLlama/TinyLlama-1.1B-Chat-v1.0</li>
<li></li>
<li>update1: This space now does not create a faiss index on build, it uses a locally saved faiss index</li>
<li>update2: This space now uses google/gemma-1.1-2b-it model to generate output, reduces the response time to 1/3rd</li>
</ul>
"""
article = """<p style='text-align: center'>
<ul>You can ask questions like -
<li>What is langchain framework?</li>
<li>What is Action Agent?</li>
<li>What are forms of memory implementation in langchain</li>
<li>What is question answering from documents</li>
</ul>
Go through this paper here to find more about langchain and then test how this solution performs. <a href='https://www.researchgate.net/publication/372669736_Creating_Large_Language_Model_Applications_Utilizing_LangChain_A_Primer_on_Developing_LLM_Apps_Fast' target='_blank'>This paper is the data source for this solution</a>
Have you already used RAG? feel free to suggest improvements
Feel excited about the implementation? You know where to find me!
I would love to connect and have a chat.
</p>"""
iface = gr.Interface(fn = greet, inputs = "text", outputs = gr.Textbox(lines = 5, label = "Answer"), title = title,
description = description,
article = article,)
iface.launch(share = True)