Spaces:
Runtime error
Runtime error
File size: 4,503 Bytes
88cace9 d68475c 88cace9 d68475c 88cace9 ebd939b 88cace9 6de7028 88cace9 f47ef77 d68475c c45d768 d68475c c45d768 88cace9 3bc02d0 3bc693d 9d4506f ba12c1c c45d768 88cace9 32e768f 0b86869 87cd979 3bc693d 8e20041 9afc67e 2256ce9 0b86869 3bc693d 0b86869 3bc693d 0b86869 3bc693d 0b86869 3bc693d 0b86869 3bc693d 0b86869 3bc693d b45f0c1 32e768f b45f0c1 ebd939b 5c3499a 8e20041 ebd939b b45f0c1 32e768f 88cace9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from bertopic import BERTopic
import streamlit as st
import streamlit.components.v1 as components
from datasets import load_dataset
import pandas as pd
from sentence_transformers import SentenceTransformer
from umap import UMAP
from hdbscan import HDBSCAN
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True)
#BerTopic_model = BERTopic.load("my_topics_model")
#sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
#umap_model = UMAP(n_neighbors=15, n_components=2, min_dist=0.1, metric="cosine")
#hdbscan_model = HDBSCAN(min_cluster_size=5, min_samples = 3, metric="euclidean", prediction_data=True)
#vectorizer_model = CountVectorizer(lowercase = True, ngram_range=(1, 3), analyzer="word", max_df=1.0, min_df=0.5, stop_words="english")
#kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, nr_topics = "auto", calculate_probabilities = True)
#BerTopic_model = kw_model
topic = pd.read_csv('./Data/tiktok_utf8.csv')
timestamps = topic.date.to_list()
tiktok = topic.text.to_list()
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
def fit_transform(model, docs):
topics, probs = model.fit_transform(docs)
return topics, probs
topics, probs = fit_transform(topic_model, tiktok)
#topics_over_times = topic_model.topics_over_time(tiktok, topics, timestamps, nr_bins=20)
#topic_model.visualize_topics_over_time(topics_over_times, top_n_topics=30)
#topics, probs = topic_model.fit_transform(tiktok)
#placeholder = st.empty()
#text_input = placeholder.text_area("Enter product topic here", height=300)
#text_input = st.text_area("Enter product topic here", value = "motor")
form = st.sidebar.form("Main Settings")
form.header("Main Settings")
ebay_topic = form.selectbox("eBay Products Topic Selection", ["Motor", "Bicycle", "Beauty", "Basketball", "Fitness"])
num = form.number_input("The Number of Topics", value = 10)
form.form_submit_button("Run")
if ebay_topic == "Motor":
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Motor", top_n=num)
elif ebay_topic == "Bicycle":
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Bicycle", top_n=num)
elif ebay_topic == "Beauty":
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Beauty", top_n=num)
elif ebay_topic == "Basketball":
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Basketball", top_n=num)
elif ebay_topic == "Fitness":
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Fitness", top_n=num)
else:
#topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
#topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Motor", top_n=num)
if similar_topics != []:
most_similar = similar_topics[0]
#print(similar_topics[0])
#print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar)))
#print("Similarity Score: {}".format(similarity[0]))
answer_as_string = topic_model.get_topic(most_similar)
st.info("Extracted Topic")
#st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list")
keywords = pd.DataFrame(answer_as_string)
keywords.columns = ["Social Media Topics", "Similarity Score"]
st.table(keywords)
st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True)
#st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True)
|