rishabh5752's picture
Create app.py
17829f2 verified
raw
history blame
No virus
2.55 kB
import os
import pickle
import time
import gradio as gr
from langchain import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env (especially openai api key)
# Define the main function to process URLs and handle queries
def process_and_query(url1, url2, url3, query):
urls = [url1, url2, url3]
file_path = "faiss_store_openai.pkl"
llm = OpenAI(temperature=0.9, max_tokens=500)
# Load data
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()
# Split data
text_splitter = RecursiveCharacterTextSplitter(
separators=['\n\n', '\n', '.', ','],
chunk_size=1000
)
docs = text_splitter.split_documents(data)
# Create embeddings and save it to FAISS index
embeddings = OpenAIEmbeddings()
vectorstore_openai = FAISS.from_documents(docs, embeddings)
# Save the FAISS index to a pickle file
with open(file_path, "wb") as f:
pickle.dump(vectorstore_openai, f)
# Process the query
if os.path.exists(file_path):
with open(file_path, "rb") as f:
vectorstore = pickle.load(f)
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
result = chain({"question": query}, return_only_outputs=True)
answer = result["answer"]
# Extract and format sources
sources = result.get("sources", "")
sources_list = sources.split("\n") if sources else []
return answer, sources_list
# Define the Gradio interface
url1_input = gr.inputs.Textbox(label="URL 1")
url2_input = gr.inputs.Textbox(label="URL 2")
url3_input = gr.inputs.Textbox(label="URL 3")
query_input = gr.inputs.Textbox(label="Question")
output_text = gr.outputs.Textbox(label="Answer")
output_sources = gr.outputs.Textbox(label="Sources")
interface = gr.Interface(
fn=process_and_query,
inputs=[url1_input, url2_input, url3_input, query_input],
outputs=[output_text, output_sources],
title="RockyBot: News Research Tool πŸ“ˆ",
description="Enter up to three news article URLs and ask a question. The bot will process the articles and provide an answer along with the sources."
)
if __name__ == "__main__":
interface.launch()