import os import streamlit as st import pickle import time from langchain import OpenAI from langchain.chains import RetrievalQAWithSourcesChain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredURLLoader from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from dotenv import load_dotenv load_dotenv() st.title("Finance News Research Tool") st.sidebar.title("News Articles") # Function to load and process URLs and split the data def load_and_process_urls(urls): loader = UnstructuredURLLoader(urls=urls) st.text("Loading Data ...") data = loader.load() text_splitter = RecursiveCharacterTextSplitter( separators=['\n\n', '\n', '.', ','], chunk_size=1000 ) docs = text_splitter.split_documents(data) # convert to vectors using embeddings embeddings = OpenAIEmbeddings() vectorstore_openai = FAISS.from_documents(docs, embeddings) st.text("Embedding Vectors...") # Save FAISS index to pickle file with open(file_path, "wb") as f: pickle.dump(vectorstore_openai, f) # Function to handle query processing def process_query(query, file_path): if os.path.exists(file_path): with open(file_path, "rb") as f: vectorstore = pickle.load(f) chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever()) result = chain({"question": query}, return_only_outputs=True) st.header("Answer") st.write(result["answer"]) sources = result.get("sources", "") if sources: st.subheader("Sources:") for source in sources.split("\n"): st.write(source) # User inputs for URLs urls = [st.sidebar.text_input(f"URL {i+1}") for i in range(5)] # Button to process URLs process_url_clicked = st.sidebar.button("Process URLs") # Placeholder for displaying messages main_placeholder = st.empty() # File path for storing FAISS index file_path = "faiss_store_openai.pkl" # Load and process data when button is clicked if process_url_clicked: load_and_process_urls(urls) # User input for query query = main_placeholder.text_input("Question: ") # Process the query if provided if query: process_query(query, file_path)