import os import streamlit as st import pickle import time from langchain.chains import RetrievalQA from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredURLLoader #from langchain.vectorstores import FAISS from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEndpoint from sentence_transformers import SentenceTransformer from langchain.embeddings import HuggingFaceEmbeddings from langchain import HuggingFaceHub from dotenv import load_dotenv load_dotenv() repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3" llm = HuggingFaceHub( repo_id=repo_id, task="text-generation", huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"), model_kwargs={"temperature": 0.6, "max_tokens":1000} ) st.title("LinkWise 🔎") st.sidebar.title("Article URLs") # Initialize session state to store the number of URL inputs if 'url_count' not in st.session_state: st.session_state.url_count = 1 # Start with 3 URL placeholders # Function to add a new URL input def add_url(): st.session_state.url_count += 1 # List to store the URLs urls = [] # Create the URL input fields dynamically for i in range(st.session_state.url_count): url = st.sidebar.text_input(f"URL {i+1}") urls.append(url) # Add a button to increase the number of URLs st.sidebar.button("Add another URL", on_click=add_url) process_url_clicked=st.sidebar.button("Submit URLs") # urls=[] # for i in range(3): # url=st.sidebar.text_input(f"URL {i+1}") # urls.append(url) # process_url_clicked=st.sidebar.button("Process URLs") file_path="faiss_store_db.pkl" placeholder=st.empty() if process_url_clicked: #Loading the data loader=UnstructuredURLLoader(urls=urls) placeholder.text("Data Loading started...") data=loader.load() #Splitting the data text_splitter=RecursiveCharacterTextSplitter( separators=['\n\n','\n','.','.'], chunk_size=600, chunk_overlap=100 ) placeholder.text("Splitting of Data Started...") docs=text_splitter.split_documents(data) #creating embeddings model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2" hf_embeddings = HuggingFaceEmbeddings(model_name=model_name) vector_index=FAISS.from_documents(docs,hf_embeddings) placeholder.text("Started Building Embedded Vector...") #saving in FAISS store with open(file_path,'wb') as f: pickle.dump(vector_index,f) query=placeholder.text_input("Question :") submit=st.button("Submit") if query: if os.path.exists(file_path): with open(file_path,'rb') as f: vector_index=pickle.load(f) retrieval_qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case retriever=vector_index.as_retriever() ) result=retrieval_qa({'query':query}) text=result['result'] start_index = text.find("\nHelpful Answer:") # Extract everything after "\nHelpful Answer:" if it exists if start_index != -1: parsed_text =text[start_index + len("\nHelpful Answer:"):] parsed_text = parsed_text.strip() # Optionally strip any extra whitespace if query or submit: st.header("Answer :") st.write(parsed_text)