Spaces:

patellae
/

Sentimental_Analysis_From_Scratch_Using_Stacked_LSTMs

Sleeping

App Files Files Community

Sentimental_Analysis_From_Scratch_Using_Stacked_LSTMs / embed.py

patellae's picture

Upload folder using huggingface_hub

68e8aa0 about 2 years ago

history blame contribute delete

718 Bytes

	import torch
	from tokenizer import tokenizer
	from torchtext.vocab import Vectors

	max_len = 64

	glove_file = 'glove.6B.100d.txt'

	# Load the pre-trained GloVe embeddings from the local file
	glove = Vectors(glove_file)

	def embed(sentence):

	tok = tokenizer(sentence.lower()) # tokenization


	if len(tok) >= max_len: # truncation
	tok = tok[1:max_len + 1]

	pad = max_len - len(tok)

	output = []

	for i in range(len(tok)):
	if not(tok[i].text in glove.stoi):
	pad = pad+1

	else:
	word_embedding = glove.vectors[glove.stoi[tok[i].text]]
	output.append(word_embedding)

	for i in range(pad): # padding
	output.append(torch.zeros(100))

	return torch.stack(output)