sentiment-analys / test.py
devraj4522's picture
Upload 14 files
b27a404
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Set the vocabulary size and maximum sequence length
voc_size = 10000
sent_length = 20
# Define the corpus
corpus = ['example text 1', 'example text 2', 'example text 3']
# Generate hashed integer sequences for the corpus
hashed_docs = []
for text in corpus:
hashed_doc = [tf.strings.to_hash_bucket_fast(word, voc_size) for word in text.split()]
for word in text.split():
print(int(tf.strings.to_hash_bucket_fast(word, voc_size)), end=' ')
hashed_docs.append(hashed_doc)
# Pad the sequences to a fixed length
padded_docs = pad_sequences(hashed_docs, padding='pre', maxlen=sent_length)
print(padded_docs)