LinkWise / app.py
AdityaTheDev's picture
Update app.py
b6c948c verified
import os
import streamlit as st
import pickle
import time
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
#from langchain.vectorstores import FAISS
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
from dotenv import load_dotenv
load_dotenv()
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"#"mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceHub(
repo_id=repo_id,
task="text-generation",
huggingfacehub_api_token=os.getenv("HF_TOKEN_FOR_WEBSEARCH"),
model_kwargs={"temperature": 0.6,
"max_tokens":1000}
)
st.title("LinkWise πŸ”Ž")
st.sidebar.title("Article URLs")
# Initialize session state to store the number of URL inputs
if 'url_count' not in st.session_state:
st.session_state.url_count = 1 # Start with 3 URL placeholders
# Function to add a new URL input
def add_url():
st.session_state.url_count += 1
# List to store the URLs
urls = []
# Create the URL input fields dynamically
for i in range(st.session_state.url_count):
url = st.sidebar.text_input(f"URL {i+1}")
urls.append(url)
# Add a button to increase the number of URLs
st.sidebar.button("Add another URL", on_click=add_url)
process_url_clicked=st.sidebar.button("Submit URLs")
# urls=[]
# for i in range(3):
# url=st.sidebar.text_input(f"URL {i+1}")
# urls.append(url)
# process_url_clicked=st.sidebar.button("Process URLs")
file_path="faiss_store_db.pkl"
placeholder=st.empty()
if process_url_clicked:
#Loading the data
loader=UnstructuredURLLoader(urls=urls)
placeholder.text("Data Loading started...")
data=loader.load()
#Splitting the data
text_splitter=RecursiveCharacterTextSplitter(
separators=['\n\n','\n','.','.'],
chunk_size=600,
chunk_overlap=100
)
placeholder.text("Splitting of Data Started...")
docs=text_splitter.split_documents(data)
#creating embeddings
model_name = "sentence-transformers/all-mpnet-base-v2" #"sentence-transformers/all-MiniLM-L6-v2"
hf_embeddings = HuggingFaceEmbeddings(model_name=model_name)
vector_index=FAISS.from_documents(docs,hf_embeddings)
placeholder.text("Started Building Embedded Vector...")
#saving in FAISS store
with open(file_path,'wb') as f:
pickle.dump(vector_index,f)
query=placeholder.text_input("Question :")
submit=st.button("Submit")
if query:
if os.path.exists(file_path):
with open(file_path,'rb') as f:
vector_index=pickle.load(f)
retrieval_qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff", # You can use 'stuff', 'map_reduce', or 'refine' depending on your use case
retriever=vector_index.as_retriever()
)
result=retrieval_qa({'query':query})
text=result['result']
start_index = text.find("\nHelpful Answer:")
# Extract everything after "\nHelpful Answer:" if it exists
if start_index != -1:
parsed_text =text[start_index + len("\nHelpful Answer:"):]
parsed_text = parsed_text.strip() # Optionally strip any extra whitespace
if query or submit:
st.header("Answer :")
st.write(parsed_text)