|
|
|
|
|
|
|
|
|
import streamlit as st |
|
import requests |
|
from bs4 import BeautifulSoup |
|
|
|
|
|
from sentence_transformers import SentenceTransformer |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from langchain_chroma import Chroma |
|
from langchain_community.document_loaders import TextLoader |
|
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,) |
|
from langchain_text_splitters import CharacterTextSplitter |
|
from langchain.chains import RetrievalQA |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_text(url): |
|
|
|
response = requests.get(url) |
|
|
|
|
|
soup = BeautifulSoup(response.content, "html.parser") |
|
|
|
|
|
|
|
paragraphs = soup.find_all("p") |
|
|
|
|
|
with open("text\\temp.txt", "w", encoding='utf-8') as file: |
|
|
|
for paragraph in paragraphs: |
|
file.write(paragraph.get_text() + "\n") |
|
|
|
@st.cache_resource |
|
def create_langchain_index(input_text): |
|
print("--indexing---") |
|
get_text(input_text) |
|
loader = TextLoader("text\\temp.txt", encoding='utf-8') |
|
documents = loader.load() |
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) |
|
docs = text_splitter.split_documents(documents) |
|
|
|
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
|
db = Chroma.from_documents(docs, embeddings) |
|
persist_directory = "chroma_db" |
|
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_directory) |
|
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings) |
|
return db |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data |
|
def get_response(input_text,query,_db): |
|
print(f"--querying---{query}") |
|
retrieval_chain = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=db.as_retriever()) |
|
response = retrieval_chain.run(query) |
|
|
|
return response |
|
|
|
|
|
|
|
|
|
st.title('Webpage Question and Answering ') |
|
|
|
|
|
input_text=st.text_input("Provide the link to the webpage...") |
|
|
|
summary_response = "" |
|
tweet_response = "" |
|
ln_response = "" |
|
|
|
if input_text: |
|
db = create_langchain_index(input_text) |
|
summary_query ="Write a 100 words summary of the document" |
|
summary_response = get_response(input_text,summary_query,db) |
|
|
|
tweet_query ="Write a twitter tweet" |
|
tweet_response = get_response(input_text,tweet_query,db) |
|
|
|
ln_query ="Write a linkedin post for the document" |
|
ln_response = get_response(input_text,ln_query,db) |
|
|
|
|
|
with st.expander('Page Summary'): |
|
st.info(summary_response) |
|
|
|
with st.expander('Tweet'): |
|
st.info(tweet_response) |
|
|
|
with st.expander('LinkedIn Post'): |
|
st.info(ln_response) |
|
|
|
|
|
st.session_state.input_text = '' |
|
question=st.text_input("Ask a question from the link you shared...") |
|
if st.button("Ask"): |
|
if question: |
|
db = create_langchain_index(input_text) |
|
response = get_response(input_text,question,db) |
|
st.write(response) |
|
else: |
|
st.warning("Please enter a question.") |