slackdemo / spacy_topics.py
svummidi's picture
POC for passive monitoring
a31ba66
raw
history blame
1.47 kB
import spacy
import numpy as np
nlp = spacy.load("en_core_web_md")
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def find_representative_sub_topic(sub_topics):
sub_topic_vectors = [np.mean([nlp(word.lower()).vector for word in topic.split()], axis=0) for topic in sub_topics]
avg_similarities = {}
for i, vec1 in enumerate(sub_topic_vectors):
total_sim = 0
for j, vec2 in enumerate(sub_topic_vectors):
if i != j:
total_sim += cosine_similarity(vec1, vec2)
avg_similarities[sub_topics[i]] = total_sim / (len(sub_topic_vectors) - 1)
most_representative_sub_topic = max(avg_similarities, key=avg_similarities.get)
return most_representative_sub_topic
sub_topics1 = ['Machine Learning', 'Deep Learning', 'Supervised Learning']
sub_topics2 = ['Web Development', 'Web Design', 'Website Building']
sub_topics3 = ['Healthy Eating', 'Nutrition', 'Balanced Diet', 'Wellness']
sub_topics4 = ['Hours Support', 'Working Hours', 'Morning Brief Calls']
print(f"Representative sub-topic for {sub_topics1}: {find_representative_sub_topic(sub_topics1)}")
print(f"Representative sub-topic for {sub_topics2}: {find_representative_sub_topic(sub_topics2)}")
print(f"Representative sub-topic for {sub_topics3}: {find_representative_sub_topic(sub_topics3)}")
print(f"Representative sub-topic for {sub_topics4}: {find_representative_sub_topic(sub_topics4)}")