Spaces:

rishabh5752
/

Equity_Research_Bot

Sleeping

App Files Files Community

rishabh5752 commited on May 20

Commit

17829f2

•

1 Parent(s): 6f1887b

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import os
+import pickle
+import time
+import gradio as gr
+from langchain import OpenAI
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredURLLoader
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from dotenv import load_dotenv
+load_dotenv()  # take environment variables from .env (especially openai api key)
+# Define the main function to process URLs and handle queries
+def process_and_query(url1, url2, url3, query):
+    urls = [url1, url2, url3]
+    file_path = "faiss_store_openai.pkl"
+    llm = OpenAI(temperature=0.9, max_tokens=500)
+    # Load data
+    loader = UnstructuredURLLoader(urls=urls)
+    data = loader.load()
+    # Split data
+    text_splitter = RecursiveCharacterTextSplitter(
+        separators=['\n\n', '\n', '.', ','],
+        chunk_size=1000
+    )
+    docs = text_splitter.split_documents(data)
+    # Create embeddings and save it to FAISS index
+    embeddings = OpenAIEmbeddings()
+    vectorstore_openai = FAISS.from_documents(docs, embeddings)
+    # Save the FAISS index to a pickle file
+    with open(file_path, "wb") as f:
+        pickle.dump(vectorstore_openai, f)
+    # Process the query
+    if os.path.exists(file_path):
+        with open(file_path, "rb") as f:
+            vectorstore = pickle.load(f)
+            chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
+            result = chain({"question": query}, return_only_outputs=True)
+            answer = result["answer"]
+            # Extract and format sources
+            sources = result.get("sources", "")
+            sources_list = sources.split("\n") if sources else []
+            return answer, sources_list
+# Define the Gradio interface
+url1_input = gr.inputs.Textbox(label="URL 1")
+url2_input = gr.inputs.Textbox(label="URL 2")
+url3_input = gr.inputs.Textbox(label="URL 3")
+query_input = gr.inputs.Textbox(label="Question")
+output_text = gr.outputs.Textbox(label="Answer")
+output_sources = gr.outputs.Textbox(label="Sources")
+interface = gr.Interface(
+    fn=process_and_query,
+    inputs=[url1_input, url2_input, url3_input, query_input],
+    outputs=[output_text, output_sources],
+    title="RockyBot: News Research Tool 📈",
+    description="Enter up to three news article URLs and ask a question. The bot will process the articles and provide an answer along with the sources."
+)
+if __name__ == "__main__":
+    interface.launch()