import spacy import numpy as np nlp = spacy.load("en_core_web_md") def cosine_similarity(a, b): return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) def find_representative_sub_topic(sub_topics): sub_topic_vectors = [np.mean([nlp(word.lower()).vector for word in topic.split()], axis=0) for topic in sub_topics] avg_similarities = {} for i, vec1 in enumerate(sub_topic_vectors): total_sim = 0 for j, vec2 in enumerate(sub_topic_vectors): if i != j: total_sim += cosine_similarity(vec1, vec2) avg_similarities[sub_topics[i]] = total_sim / (len(sub_topic_vectors) - 1) most_representative_sub_topic = max(avg_similarities, key=avg_similarities.get) return most_representative_sub_topic sub_topics1 = ['Machine Learning', 'Deep Learning', 'Supervised Learning'] sub_topics2 = ['Web Development', 'Web Design', 'Website Building'] sub_topics3 = ['Healthy Eating', 'Nutrition', 'Balanced Diet', 'Wellness'] sub_topics4 = ['Hours Support', 'Working Hours', 'Morning Brief Calls'] print(f"Representative sub-topic for {sub_topics1}: {find_representative_sub_topic(sub_topics1)}") print(f"Representative sub-topic for {sub_topics2}: {find_representative_sub_topic(sub_topics2)}") print(f"Representative sub-topic for {sub_topics3}: {find_representative_sub_topic(sub_topics3)}") print(f"Representative sub-topic for {sub_topics4}: {find_representative_sub_topic(sub_topics4)}")