kardosdrur's picture
Added script for producing deployment
3f888da
raw
history blame contribute delete
No virus
636 Bytes
import numpy as np
import topicwizard
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from turftopic import KeyNMF
print("Fetching data")
newsgroups = fetch_20newsgroups(
subset="all",
remove=("headers", "footers", "quotes"),
)
texts = newsgroups.data
labels = list(np.array(newsgroups.target_names)[newsgroups.target])
model = KeyNMF(
20,
vectorizer=CountVectorizer(
stop_words="english",
max_features=8000,
ngram_range=(1, 2),
),
)
topic_data = model.prepare_topic_data(texts)
topicwizard.easy_deploy(topic_data, dest_dir=".")