Spaces:

kinely
/

chatbot

Sleeping

App Files Files Community

chatbot / app.py

kinely

Update app.py

477440f verified about 1 year ago

raw

history blame contribute delete

1.72 kB

	import requests
	from bs4 import BeautifulSoup
	import json
	import numpy as np
	import faiss
	from sentence_transformers import SentenceTransformer
	from transformers import pipeline
	import streamlit as st

	# Step 1: Scrape Website Data
	url = "https://aspireec.com/"
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract data (e.g., headlines, paragraphs, etc.)
	content = soup.find_all('p') # Example: extracting paragraphs
	website_data = [p.text.strip() for p in content if p.text.strip()]

	# Save the extracted content to a JSON file
	with open('website_data.json', 'w') as file:
	json.dump(website_data, file)

	# Step 2: Create Embeddings and FAISS Index
	model = SentenceTransformer('all-MiniLM-L6-v2')
	embeddings = model.encode(website_data)

	# Create FAISS index
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings))

	# Step 3: Summarization Model
	summarizer = pipeline("summarization", model="google/flan-t5-base")

	# Step 4: Define the `get_answer` Function
	def get_answer(query):
	# Encode the query
	query_embedding = model.encode([query])
	distances, indices = index.search(np.array(query_embedding), k=1)
	# Retrieve the best match
	best_match = website_data[indices[0][0]]
	# Generate a summarized response
	summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False)
	return summarized_response[0]['summary_text']

	# Step 5: Streamlit Chatbot UI
	st.title("Website Chatbot")

	user_input = st.text_input("Ask me anything about the website:")
	if user_input:
	response = get_answer(user_input) # Query the FAISS index and summarize the response
	st.write(response)