Spaces:

Fuad04
/

IntelMentorship

Sleeping

App Files Files Community

IntelMentorship / app.py

Fuad04

Update app.py

492128d verified 4 months ago

raw

history blame contribute delete

No virus

6.9 kB

	import os
	# Disable tokenizers parallelism to avoid deadlocks
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup
	from transformers import pipeline
	from fpdf import FPDF
	import pandas as pd
	import torch
	from transformers import pipeline, AutoTokenizer, AutoModel
	from tqdm import tqdm
	from concurrent.futures import ProcessPoolExecutor
	from summarizer import Summarizer
	import re

	@st.cache_data
	def parse_html_file(file_path):
	try:
	with open(file_path, "r", encoding="utf-8") as file:
	html_content = file.read()
	soup = BeautifulSoup(html_content, "html.parser")
	return soup
	except Exception as e:
	print(f"An error occurred: {e}")
	return None

	@st.cache_data
	def scrape_amazon_product(url):
	global revList
	HEADERS = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
	'Accept-Language': 'en-US, en;q=0.5'
	}
	try:
	response = requests.get(url, headers=HEADERS)
	if response.status_code == 200:
	with open("temp.html", 'wb') as file:
	file.write(response.content)
	else:
	print(f"Failed to download HTML. Status code: {response.status_code}")
	except Exception as e:
	print(f"An error occurred: {e}")

	current_directory = os.getcwd()
	file_name = "temp.html"
	file_path = os.path.join(current_directory, file_name)
	global global_file_path
	global_file_path = file_path

	soup = parse_html_file(file_path)

	product_name_element = soup.find('span', {'id': 'productTitle'})
	product_name = product_name_element.text.strip() if product_name_element else None

	categories = soup.find_all('a', {'class': 'a-link-normal a-color-tertiary'})
	category = categories[-1].text.strip() if categories else None

	product_description_element = soup.find('div', {'id': 'productDescription'})
	product_description = product_description_element.text.strip() if product_description_element else None

	review_elements = soup.find_all('span', {'data-hook': 'review-body'})
	reviews = [review.text.strip() for review in review_elements]

	global revList
	revList = reviews

	return {
	'product_name': product_name,
	'category': category,
	'product_description': product_description,
	'Reviews': reviews
	}

	def CalcReviews(reviews):
	positive_reviews = []
	negative_reviews = []

	model = Summarizer()
	for review in tqdm(reviews):
	summary = model(review, num_sentences=1)
	if "good" in summary or "great" in summary:
	positive_reviews.append(review)
	else:
	negative_reviews.append(review)

	ratio = len(positive_reviews) / (len(negative_reviews) if negative_reviews else 1)

	positive_summary = model("\n".join(positive_reviews), num_sentences=3)
	negative_summary = model("\n".join(negative_reviews), num_sentences=3)

	return {
	'positive_reviews': positive_reviews,
	'negative_reviews': negative_reviews,
	'Ratio of Positive to Negative Reviews': ratio,
	'positive_summary': positive_summary,
	'negative_summary': negative_summary
	}

	def generate_pdf(product_data, review_data):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=12)

	pdf.cell(200, 10, txt="Product Report", ln=True, align="C")

	pdf.cell(200, 10, txt=f"Product Name: {product_data['product_name']}", ln=True, align="L")
	pdf.cell(200, 10, txt=f"Category: {product_data['category']}", ln=True, align="L")
	pdf.cell(200, 10, txt="Product Description:", ln=True, align="L")
	pdf.multi_cell(0, 10, txt=product_data['product_description'])

	pdf.cell(200, 10, txt="Reviews Summary", ln=True, align="L")
	pdf.cell(200, 10, txt=f"Number of Positive Reviews: {len(review_data['positive_reviews'])}", ln=True, align="L")
	pdf.cell(200, 10, txt=f"Number of Negative Reviews: {len(review_data['negative_reviews'])}", ln=True, align="L")
	pdf.cell(200, 10, txt=f"Positive to Negative Ratio: {review_data['Ratio of Positive to Negative Reviews']}", ln=True, align="L")

	pdf.cell(200, 10, txt="Summary of Positive Reviews", ln=True, align="L")
	pdf.multi_cell(0, 10, txt=review_data['positive_summary'])

	pdf.cell(200, 10, txt="Summary of Negative Reviews", ln=True, align="L")
	pdf.multi_cell(0, 10, txt=review_data['negative_summary'])

	pdf_file_path = "product_report.pdf"
	pdf.output(pdf_file_path)

	return pdf_file_path

	def get_answer(question, pdf_path):
	headers = {
	'Authorization': 'Bearer YOUR_API_KEY',
	'Content-Type': 'application/json'
	}

	files = {'file': open(pdf_path, 'rb')}

	response1 = requests.post(
	'https://api.chatpdf.com/v1/sources', headers=headers, files=files)

	if response1.status_code == 200:
	source_id = response1.json()['sourceId']
	else:
	st.error("Failed to upload PDF to ChatPDF.")
	return None

	data = {
	'sourceId': source_id,
	'messages': [
	{
	'role': "user",
	'content': question,
	}
	]
	}

	response = requests.post(
	'https://api.chatpdf.com/v1/chats/message', headers=headers, json=data)

	if response.status_code == 200:
	return response.json()['content']
	else:
	st.error("Failed to get response from ChatPDF.")
	return None

	# Streamlit application
	st.title("Amazon Product Insights Dashboard")

	# URL input
	url = st.text_input("Enter Amazon Product URL:")

	if url:
	product_data = scrape_amazon_product(url)

	if product_data:
	st.header(product_data['product_name'])
	st.subheader("Product Description")
	st.write(product_data['product_description'])

	st.subheader("Reviews")
	st.write(product_data['Reviews'])
	review_data = CalcReviews(product_data['Reviews'])

	st.metric("Number of Positive Reviews" + review_data['positive_reviews'])
	st.metric("Number of Negative Reviews" + review_data['negative_reviews'])
	st.write("Positive to Negative Ratio : " + review_data['Ratio of Positive to Negative Reviews'])

	st.subheader("Summary of Positive Reviews")
	st.write(review_data['positive_summary'])

	st.subheader("Summary of Negative Reviews")
	st.write(review_data['negative_summary'])

	# Generate PDF
	pdf_path = generate_pdf(product_data, review_data)

	# Chatbot interaction
	st.subheader("Chat with the Product")
	user_question = st.text_input("Ask a question about the product:")

	if user_question:
	response = get_answer(user_question, pdf_path)
	st.write(response)