Spaces:

RajatChaudhari
/

QueryingLangchainPaper

Runtime error

App Files Files Community

QueryingLangchainPaper / app.py

RajatChaudhari

Update app.py

482281a verified 8 months ago

raw

history blame

4.42 kB

	import gradio as gr
	from operator import itemgetter
	import os
	# import pandas as pd

	from langchain_community.vectorstores import FAISS

	from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	## not needed since we are loading previously saved vector store from file and not reading pdf on the run
	# from langchain_community.document_loaders import PyPDFLoader
	# from langchain.text_splitter import RecursiveCharacterTextSplitter

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA

	## models tried
	## TinyLlama/TinyLlama-1.1B-Chat-v1.0
	## meta-llama/Meta-Llama-3-8B
	## google/gemma-1.1-7b-it

	HF_TOKEN = os.environ.get("HF_TOKEN", None)

	model_id = "google/gemma-1.1-2b-it"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id)

	embeddings = HuggingFaceEmbeddings()

	pipe = pipeline("text-generation", model = model, tokenizer = tokenizer, max_new_tokens = 200)
	hf = HuggingFacePipeline(pipeline=pipe)

	## commenting this code because now we are loading vectors directly and not parsing the pdf
	# pdfLoader = PyPDFLoader("./LangchainPaper/RAGInputPaper.pdf")
	# documents = pdfLoader.load()

	# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512, chunk_overlap = 30)
	# docs = text_splitter.split_documents(documents)

	## creating vector embeddings during run using FAISS
	# vectorstore = FAISS.from_documents(
	# docs, embedding=embeddings
	# )
	# retriever = vectorstore.as_retriever()

	## loading previously saved vector embeddings from local space

	vectorstore = FAISS.load_local("./fi_LangchainPaper", embeddings, allow_dangerous_deserialization = True)
	retriever = vectorstore.as_retriever()

	qa = RetrievalQA.from_chain_type(
	llm = hf, chain_type = "stuff", retriever = retriever, return_source_documents = False)

	# queries=pd.read_csv('./interactions/queries.csv')

	def greet(Question):
	answer = qa({"query": Question})

	pa = [a.split("Helpful Answer: ") for a in answer.get('result').split('\n') if "Helpful Answer" in a]

	# new=pd.DataFrame.from_dict({'query':Question,'response':pa[0][-1]},orient='index')
	# queries.append(new)
	# queries.to_csv('./interactions/queries.csv')

	return pa[0][-1]

	if __name__ == "__main__":

	title = "RAG with LLMs"

	description = """
	<img src="https://superagi.com/wp-content/uploads/2023/10/Introduction-to-RAGA-Retrieval-Augmented-Generation-and-Actions-1200x600.png.webp" width=100%>
	<br>
	Demo using Vector store-backed retriever. This space demonstrate application of RAG on a small model and its effectiveness, I used small model because of the space constraint. The current space runs on mere <b>2GB of RAM</b>, hence there is some delay in generating output. Test this to your hearts content and let me know your thoughts, I will keep updating this space with tiny improvements on architecture and design
	<ul>
	<li>model: TinyLlama/TinyLlama-1.1B-Chat-v1.0</li>
	<li></li>

	<li>update1: This space now does not create a faiss index on build, it uses a locally saved faiss index</li>
	<li>update2: This space now uses google/gemma-1.1-2b-it model to generate output, reduces the response time to 1/3rd</li>
	</ul>
	"""
	article = """<p style='text-align: center'>
	<ul>You can ask questions like -
	<li>What is langchain framework?</li>
	<li>What is Action Agent?</li>
	<li>What are forms of memory implementation in langchain</li>
	<li>What is question answering from documents</li>
	</ul>
	Go through this paper here to find more about langchain and then test how this solution performs. <a href='https://www.researchgate.net/publication/372669736_Creating_Large_Language_Model_Applications_Utilizing_LangChain_A_Primer_on_Developing_LLM_Apps_Fast' target='_blank'>This paper is the data source for this solution</a>
	Have you already used RAG? feel free to suggest improvements
	Feel excited about the implementation? You know where to find me!
	I would love to connect and have a chat.
	</p>"""

	iface = gr.Interface(fn = greet, inputs = "text", outputs = gr.Textbox(lines = 5, label = "Answer"), title = title,
	description = description,
	article = article,)
	iface.launch(share = True)