HOXSEC commited on
Commit
b8a4d3f
1 Parent(s): 3d90b5c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ollama
3
+ import bs4
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.document_loaders import WebBaseLoader
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_community.embeddings import OllamaEmbeddings
9
+
10
+ # Check if user has inputted a URL or uploaded a document and load, split, and retrieve documents
11
+ def load_and_retrieve(url, document):
12
+
13
+ # If user has inputted a URL
14
+ if url:
15
+ loader = WebBaseLoader(
16
+ web_paths=(url,),
17
+ bs_kwargs=dict()
18
+ )
19
+ docs = loader.load()
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
21
+ splits = text_splitter.split_documents(docs)
22
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
23
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
24
+ return vectorstore.as_retriever()
25
+
26
+ # If user has uploaded a document
27
+ if document:
28
+ loader = PyPDFLoader(document)
29
+ docs = loader.load_and_split()
30
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
31
+ splits = text_splitter.split_documents(docs)
32
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
33
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
34
+ return vectorstore.as_retriever()
35
+
36
+ # Function to format documents
37
+ def format_docs(docs):
38
+ # Return the page content of each document
39
+ return "\n\n".join(doc.page_content for doc in docs)
40
+
41
+ # Function that defines the RAG chain
42
+ def rag_chain(url = False, document = False, question = ''):
43
+ retriever = load_and_retrieve(url, document)
44
+ retrieved_docs = retriever.invoke(question)
45
+ formatted_context = format_docs(retrieved_docs)
46
+ formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
47
+ print("==============")
48
+ print(formatted_prompt)
49
+ print("==============")
50
+ response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}])
51
+ return response['message']['content']
52
+
53
+ # Gradio interface
54
+ iface = gr.Interface(
55
+ fn=rag_chain,
56
+ inputs=["text", "file", "text"],
57
+ outputs="text",
58
+ title="RAG Chain Question Answering",
59
+ description="Enter a URL or upload a document and a query to get answers from the RAG chain."
60
+ )
61
+
62
+ # Launch the app
63
+ iface.launch(share=True)