metadata
tags:
- word2vec
language: pam
license: gpl-3.0
Description
Word embedding model trained by Al-Rfou et al.
How to use?
import pickle
from numpy import dot
from numpy.linalg import norm
from huggingface_hub import hf_hub_download
words, embeddings = pickle.load(open(hf_hub_download(repo_id="Word2vec/polyglot_words_embeddings_en", filename="words_embeddings_en.pkl"), 'rb'),encoding="latin1")
word = "Irish"
a = embeddings[words.index(word)]
most_similar = []
for i in range(len(embeddings)):
if i != words.index(word):
b = embeddings[i]
cos_sim = dot(a, b)/(norm(a)*norm(b))
most_similar.append(cos_sim)
else:
most_similar.append(0)
words[most_similar.index(max(most_similar))]
Citation
@InProceedings{polyglot:2013:ACL-CoNLL,
author = {Al-Rfou, Rami and Perozzi, Bryan and Skiena, Steven},
title = {Polyglot: Distributed Word Representations for Multilingual NLP},
booktitle = {Proceedings of the Seventeenth Conference on Computational Natural Language Learning},
month = {August},
year = {2013},
address = {Sofia, Bulgaria},
publisher = {Association for Computational Linguistics},
pages = {183--192},
url = {http://www.aclweb.org/anthology/W13-3520}
}