import tensorflow as tf from tensorflow.keras.preprocessing.sequence import pad_sequences # Set the vocabulary size and maximum sequence length voc_size = 10000 sent_length = 20 # Define the corpus corpus = ['example text 1', 'example text 2', 'example text 3'] # Generate hashed integer sequences for the corpus hashed_docs = [] for text in corpus: hashed_doc = [tf.strings.to_hash_bucket_fast(word, voc_size) for word in text.split()] for word in text.split(): print(int(tf.strings.to_hash_bucket_fast(word, voc_size)), end=' ') hashed_docs.append(hashed_doc) # Pad the sequences to a fixed length padded_docs = pad_sequences(hashed_docs, padding='pre', maxlen=sent_length) print(padded_docs)