Spaces:
Runtime error
Runtime error
import gradio as gr | |
from operator import itemgetter | |
import os | |
# import pandas as pd | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
## not needed since we are loading previously saved vector store from file and not reading pdf on the run | |
# from langchain_community.document_loaders import PyPDFLoader | |
# from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.chains import RetrievalQA | |
## models tried | |
## TinyLlama/TinyLlama-1.1B-Chat-v1.0 | |
## meta-llama/Meta-Llama-3-8B | |
## google/gemma-1.1-7b-it | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
model_id = "google/gemma-1.1-2b-it" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
embeddings = HuggingFaceEmbeddings() | |
pipe = pipeline("text-generation", model = model, tokenizer = tokenizer, max_new_tokens = 200) | |
hf = HuggingFacePipeline(pipeline=pipe) | |
## commenting this code because now we are loading vectors directly and not parsing the pdf | |
# pdfLoader = PyPDFLoader("./LangchainPaper/RAGInputPaper.pdf") | |
# documents = pdfLoader.load() | |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512, chunk_overlap = 30) | |
# docs = text_splitter.split_documents(documents) | |
## creating vector embeddings during run using FAISS | |
# vectorstore = FAISS.from_documents( | |
# docs, embedding=embeddings | |
# ) | |
# retriever = vectorstore.as_retriever() | |
## loading previously saved vector embeddings from local space | |
vectorstore = FAISS.load_local("./fi_LangchainPaper", embeddings, allow_dangerous_deserialization = True) | |
retriever = vectorstore.as_retriever() | |
qa = RetrievalQA.from_chain_type( | |
llm = hf, chain_type = "stuff", retriever = retriever, return_source_documents = False) | |
# queries=pd.read_csv('./interactions/queries.csv') | |
def greet(Question): | |
answer = qa({"query": Question}) | |
pa = [a.split("Helpful Answer: ") for a in answer.get('result').split('\n') if "Helpful Answer" in a] | |
# new=pd.DataFrame.from_dict({'query':Question,'response':pa[0][-1]},orient='index') | |
# queries.append(new) | |
# queries.to_csv('./interactions/queries.csv') | |
return pa[0][-1] | |
if __name__ == "__main__": | |
title = "RAG with LLMs" | |
description = """ | |
<img src="https://superagi.com/wp-content/uploads/2023/10/Introduction-to-RAGA-Retrieval-Augmented-Generation-and-Actions-1200x600.png.webp" width=100%> | |
<br> | |
Demo using Vector store-backed retriever. This space demonstrate application of RAG on a small model and its effectiveness, I used small model because of the space constraint. The current space runs on mere <b>2GB of RAM</b>, hence there is some delay in generating output. Test this to your hearts content and let me know your thoughts, I will keep updating this space with tiny improvements on architecture and design | |
<ul> | |
<li>model: TinyLlama/TinyLlama-1.1B-Chat-v1.0</li> | |
<li></li> | |
<li>update1: This space now does not create a faiss index on build, it uses a locally saved faiss index</li> | |
<li>update2: This space now uses google/gemma-1.1-2b-it model to generate output, reduces the response time to 1/3rd</li> | |
</ul> | |
""" | |
article = """<p style='text-align: center'> | |
<ul>You can ask questions like - | |
<li>What is langchain framework?</li> | |
<li>What is Action Agent?</li> | |
<li>What are forms of memory implementation in langchain</li> | |
<li>What is question answering from documents</li> | |
</ul> | |
Go through this paper here to find more about langchain and then test how this solution performs. <a href='https://www.researchgate.net/publication/372669736_Creating_Large_Language_Model_Applications_Utilizing_LangChain_A_Primer_on_Developing_LLM_Apps_Fast' target='_blank'>This paper is the data source for this solution</a> | |
Have you already used RAG? feel free to suggest improvements | |
Feel excited about the implementation? You know where to find me! | |
I would love to connect and have a chat. | |
</p>""" | |
iface = gr.Interface(fn = greet, inputs = "text", outputs = gr.Textbox(lines = 5, label = "Answer"), title = title, | |
description = description, | |
article = article,) | |
iface.launch(share = True) | |