File size: 1,466 Bytes
a31ba66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import spacy
import numpy as np

nlp = spacy.load("en_core_web_md")


def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


def find_representative_sub_topic(sub_topics):
    sub_topic_vectors = [np.mean([nlp(word.lower()).vector for word in topic.split()], axis=0) for topic in sub_topics]

    avg_similarities = {}
    for i, vec1 in enumerate(sub_topic_vectors):
        total_sim = 0
        for j, vec2 in enumerate(sub_topic_vectors):
            if i != j:
                total_sim += cosine_similarity(vec1, vec2)
        avg_similarities[sub_topics[i]] = total_sim / (len(sub_topic_vectors) - 1)

    most_representative_sub_topic = max(avg_similarities, key=avg_similarities.get)

    return most_representative_sub_topic


sub_topics1 = ['Machine Learning', 'Deep Learning', 'Supervised Learning']
sub_topics2 = ['Web Development', 'Web Design', 'Website Building']
sub_topics3 = ['Healthy Eating', 'Nutrition', 'Balanced Diet', 'Wellness']
sub_topics4 = ['Hours Support', 'Working Hours', 'Morning Brief Calls']

print(f"Representative sub-topic for {sub_topics1}: {find_representative_sub_topic(sub_topics1)}")
print(f"Representative sub-topic for {sub_topics2}: {find_representative_sub_topic(sub_topics2)}")
print(f"Representative sub-topic for {sub_topics3}: {find_representative_sub_topic(sub_topics3)}")
print(f"Representative sub-topic for {sub_topics4}: {find_representative_sub_topic(sub_topics4)}")