import os
import google.generativeai as palm
import streamlit as st
import pandas as pd
import faiss
import hdbscan
from sklearn.feature_extraction.text import CountVectorizer
from src.modelling.topics.topic_extractor import (
TopicExtractionConfig, TopicExtractor
)
from src.modelling.topics.class_tf_idf import ClassTfidfTransformer
from src import deploy_utils
def get_prompt(title, reviews):
return f"""We are doing a marketing research analysis, in particular we are trying to understand what users thing about a particular market in order to generate tips for future sellers.
In particular, we are interesting to analyze the market for "{title}"
This is what amazon customers are saying about similar products:
{reviews}
Can you write some recomendations about how can we disrupt this market? Try to propose the necesary methodology to create a breaking product."""
def get_prompt_without_reviews(title):
return f"""We are doing a marketing research analysis, in particular we are trying to understand what users thing about a particular market in order to generate tips for future sellers.
In particular, we are interesting to analyze the market for "{title}"
Can you write some recomendations about how can we disrupt this market? Try to propose the necesary methodology to create a breaking product."""
no_electronics_message = """
Sorry, we are currently only recommending business that operate around electronics. Would you like to input another search?
This doesn't mean you make a mistake, I search amazon products and try to extract relevant reviews from similar products and we didn't find relevant products for your search.
#### Maybe you are way ahead of the market!
```
.
___,,,
\_[o o]
Errare humanum est! C\ _\/
/ _____),_/__
________ / \/ /
_| .| / o /
| | .| / /
\| .| / /
|________| /_ \/
__|___|__ _//\ \\
_____|_________|____ \ \ \ \\
_| /// \ \\
| \ /
| / /
| / /
________________ | /__ /_
...|_|.............. /______\.......
```
"""
TEST_MODE = False
def setup_palm():
palm.configure(api_key=os.environ.get('PALM_TOKEN'))
@st.cache_data
def load_data():
reviews = pd.read_csv("data/filtered_reviews.csv").set_index("reviewID")
products = pd.read_csv("data/products.csv")
return reviews, products
def load_uncached_models():
topic_extraction_config = TopicExtractionConfig(
vectorizer_model=CountVectorizer(
ngram_range=(1, 3), stop_words="english"),
ctfidf_model=ClassTfidfTransformer(reduce_frequent_words=True),
number_of_representative_documents=5,
review_text_key="summary",
)
topic_extractor = TopicExtractor(topic_extraction_config)
clusterer = hdbscan.HDBSCAN(
min_cluster_size=5, min_samples=5, metric="precomputed")
return topic_extractor, clusterer
@st.cache_resource
def load_models():
product_model = deploy_utils.load_model("all-MiniLM-L6-v2")
reviews_model = deploy_utils.load_model(
"https://tfhub.dev/google/universal-sentence-encoder/4"
)
product_indexer = faiss.read_index("vectordb/populated.index")
return reviews_model, product_model, product_indexer
def render_cta_link(url, label, font_awesome_icon):
st.markdown(
'',
unsafe_allow_html=True,
)
button_code = f""" {label}"""
return st.markdown(button_code, unsafe_allow_html=True)
def handler_review_query():
relevant_products = deploy_utils.query_relevant_documents(
product_model=product_model,
indexer=product_indexer,
products=products,
query_text=st.session_state.user_search_query,
)
# TODO: check if there are relevant products
if len(relevant_products) == 0:
st.session_state.user_prompt = None
st.session_state.palm_output = no_electronics_message
return
relevant_reviews = deploy_utils.get_relevant_reviews(
relevant_products, reviews)
raw_topic_assigment = deploy_utils.clusterize_reviews(
relevant_reviews, reviews_model, clusterer)
relevant_reviews["topic"] = raw_topic_assigment
reviews_with_topics = relevant_reviews[relevant_reviews["topic"] != -1]
# TODO: check if there are still topics
extracted_topics = topic_extractor(reviews_with_topics)
key_reviews = deploy_utils.get_key_reviews(
reviews_with_topics,
extracted_topics,
)
reviews_prompt = deploy_utils.key_reviews_to_prompt(key_reviews)
prompt = get_prompt(st.session_state.user_search_query, reviews_prompt)
st.session_state.user_prompt = prompt
def handler_product_without_reviews():
st.session_state.user_prompt = get_prompt_without_reviews(
st.session_state.user_search_query)
def palm_handler():
response = palm.generate_text(prompt=st.session_state.user_prompt)
st.session_state.palm_output = response.result
def render_search():
"""
Render the search form in the sidebar.
"""
with st.sidebar:
query = st.text_input(
label="What kind of product are you trying to sell?",
placeholder="Your magic idea goes here ✨",
key="user_search_query",
)
if query:
try:
handler_review_query()
except:
handler_product_without_reviews()
if TEST_MODE:
_ = st.text_area(
label="test env",
placeholder="prompt here",
key="user_prompt"
)
if "user_prompt" in st.session_state and st.session_state.user_prompt:
palm_handler()
st.write("---")
render_cta_link(
url="https://github.com/CamiVasz/factored-datathon-2023-almond",
label="Check the code",
font_awesome_icon="fa-github",
)
def render_palm_results():
# TODO: temporal
st.write("# ALMond recommendations")
st.write(st.session_state.palm_output)
# Execution start here!
st.set_page_config(
page_title="almond - demo",
page_icon="🔍",
layout="wide",
initial_sidebar_state="expanded",
)
setup_palm()
reviews, products = load_data()
reviews_model, product_model, product_indexer = load_models()
topic_extractor, clusterer = load_uncached_models()
render_search()
if "palm_output" in st.session_state:
render_palm_results()