import gradio as gr import ollama import bs4 from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import WebBaseLoader from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import Chroma from langchain_community.embeddings import OllamaEmbeddings # Check if user has inputted a URL or uploaded a document and load, split, and retrieve documents def load_and_retrieve(url, document): # If user has inputted a URL if url: loader = WebBaseLoader( web_paths=(url,), bs_kwargs=dict() ) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200) splits = text_splitter.split_documents(docs) embeddings = OllamaEmbeddings(model="nomic-embed-text") vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings) return vectorstore.as_retriever() # If user has uploaded a document if document: loader = PyPDFLoader(document) docs = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200) splits = text_splitter.split_documents(docs) embeddings = OllamaEmbeddings(model="nomic-embed-text") vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings) return vectorstore.as_retriever() # Function to format documents def format_docs(docs): # Return the page content of each document return "\n\n".join(doc.page_content for doc in docs) # Function that defines the RAG chain def rag_chain(url = False, document = False, question = ''): retriever = load_and_retrieve(url, document) retrieved_docs = retriever.invoke(question) formatted_context = format_docs(retrieved_docs) formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}" print("==============") print(formatted_prompt) print("==============") response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}]) return response['message']['content'] # Gradio interface iface = gr.Interface( fn=rag_chain, inputs=["text", "file", "text"], outputs="text", title="RAG Chain Question Answering", description="Enter a URL or upload a document and a query to get answers from the RAG chain." ) # Launch the app iface.launch(share=True)