|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import json |
|
|
import numpy as np |
|
|
import faiss |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from transformers import pipeline |
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
url = "https://aspireec.com/" |
|
|
response = requests.get(url) |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
|
|
|
content = soup.find_all('p') |
|
|
website_data = [p.text.strip() for p in content if p.text.strip()] |
|
|
|
|
|
|
|
|
with open('website_data.json', 'w') as file: |
|
|
json.dump(website_data, file) |
|
|
|
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
embeddings = model.encode(website_data) |
|
|
|
|
|
|
|
|
dimension = embeddings.shape[1] |
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
index.add(np.array(embeddings)) |
|
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="google/flan-t5-base") |
|
|
|
|
|
|
|
|
def get_answer(query): |
|
|
|
|
|
query_embedding = model.encode([query]) |
|
|
distances, indices = index.search(np.array(query_embedding), k=1) |
|
|
|
|
|
best_match = website_data[indices[0][0]] |
|
|
|
|
|
summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False) |
|
|
return summarized_response[0]['summary_text'] |
|
|
|
|
|
|
|
|
st.title("Website Chatbot") |
|
|
|
|
|
user_input = st.text_input("Ask me anything about the website:") |
|
|
if user_input: |
|
|
response = get_answer(user_input) |
|
|
st.write(response) |
|
|
|