ArtORias1 commited on
Commit
9a32c6f
1 Parent(s): 25dc178

Delete README.ipynb

Browse files
Files changed (1) hide show
  1. README.ipynb +0 -241
README.ipynb DELETED
@@ -1,241 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- ---
4
- import tensorflow as tf
5
- from string import punctuation
6
- import numpy as np
7
- import os
8
- import time
9
- import pickle
10
- model_path='/content/drive/MyDrive/Colab Notebooks'
11
- # create directory to store pickled files in
12
- if not os.path.exists(f'/content/drive/MyDrive/Colab Notebooks/pkl'):
13
- os.mkdir(f'/content/drive/MyDrive/Colab Notebooks/pkl')
14
-
15
- # ----------------------------------------------------------------------
16
-
17
- ### LIMITING GPU MEMORY GROWTH ###
18
-
19
- # get list of visible GPUs
20
- gpus = tf.config.experimental.list_physical_devices('GPU')
21
-
22
- if gpus: # if GPU(s) is detected
23
- try: # try setting memory growth to true for all GPUs
24
- for gpu in gpus:
25
- tf.config.experimental.set_memory_growth(gpu, True) # enabling memory growth
26
- logical_gpus = tf.config.experimental.list_logical_devices('GPU')
27
- print('\n', len(gpus), 'Physical GPUs,', len(logical_gpus), 'Logical GPU')
28
- except RuntimeError as e:
29
- # memory growth must be set before GPUs have been initialized
30
- print('\n', e)
31
-
32
- # ----------------------------------------------------------------------
33
-
34
- ### READ IN AND CLEAN THE LYRICS DATA ###
35
-
36
- # ******TAKE IN USER INPUT FOR LYRICS (ARTIST NAME? FILE NAME?)******
37
-
38
- # read in the lyrics text file
39
- text = str(open('/content/drake.txt', 'r').read())
40
- # artist_name = input('\nPlease ')
41
-
42
- # make all letters lowercase and make line breaks into its own "word"
43
- words = text.lower().replace('\n', ' \n ')
44
-
45
- # remove punctuation
46
- for punc in punctuation:
47
- words = words.replace(punc, '')
48
-
49
- # split the entire words string into a Python list of words
50
- words = words.split(' ')
51
-
52
- # obtain list of unique words across all lyrics
53
- vocab = sorted(set(words))
54
- print(f'\nThere are {len(vocab)} unique words in the lyrics file.')
55
-
56
- # pickle the vocab file - will need it for the generation script
57
- outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/vocab', mode='wb')
58
- pickle.dump(vocab, outfile)
59
- outfile.close()
60
-
61
- # ----------------------------------------------------------------------
62
-
63
- ### WORD MAPPING ###
64
-
65
- # map unique characters to indices
66
- word2idx = {u:i for i, u in enumerate(vocab)}
67
-
68
- # pickle this since it is needed in text generation
69
- outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/word2idx', mode='wb')
70
- pickle.dump(word2idx, outfile)
71
- outfile.close()
72
-
73
- # reverse the map - use this to specify an index to obtain a character
74
- idx2word = np.array(vocab)
75
-
76
- # pickle this since it is needed in text generation
77
- outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/idx2word', mode='wb')
78
- pickle.dump(idx2word, outfile)
79
- outfile.close()
80
-
81
- # entire text document represented in the above character-to-indices mapping
82
- words_as_int = np.array([word2idx[c] for c in words])
83
-
84
- # ----------------------------------------------------------------------
85
-
86
- ### CREATING TRAINING EXAMPLES & TARGETS ###
87
-
88
- # ******TAKE IN USER INPUT FOR SEQUENCE LENGTH?******
89
-
90
- # max sentence length (in number of words) desired for training
91
- seq_length = 100
92
- # seq_length = input('\nPlease enter a desired sequence length (in number of words) to train the model on: ')
93
- examples_per_epoch = len(words) // (seq_length + 1)
94
-
95
- # create training examples/targets
96
- word_dataset = tf.data.Dataset.from_tensor_slices(words_as_int)
97
-
98
- # data type of train examples/targets
99
- print('\n', type(word_dataset))
100
-
101
- # create sequence batches from the word_dataset
102
- sequences = word_dataset.batch(seq_length + 1, drop_remainder=True)
103
- print('\n', type(sequences))
104
-
105
- # define the shifting (splitting) function
106
- def split_input_target(chunk):
107
- input_text = chunk[:-1] # up to but not including the last character
108
- target_text = chunk[1:] # everything except for the firs tcharacter
109
- return input_text, target_text
110
-
111
- # apply the shifting to create input texts and target texts that comprise of our dataset
112
- dataset = sequences.map(split_input_target)
113
-
114
- # ----------------------------------------------------------------------
115
-
116
- ### CREATE TRAINING BATCHES ###
117
-
118
- # batch size
119
- BATCH_SIZE = 64
120
-
121
- # buffer size to shuffle the dataset
122
- # (TensorFlow data is designed to work with possibly infinite sequences,
123
- # so it doesn't attempt to shuffle the entire sequence in memory. Instead,
124
- # it maintains a buffer in which it shuffles elements)
125
- BUFFER_SIZE = 10000
126
-
127
- # create a dataset that has been shuffled and batched
128
- dataset_sb = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
129
-
130
- # display batch dataset shapes and data types
131
- print('\n', dataset_sb)
132
-
133
- # ----------------------------------------------------------------------
134
-
135
- ### BUILDING THE RNN ###
136
-
137
- # vocabulary length (number of unique words in dataset)
138
- vocab_size = len(vocab)
139
-
140
- # embedding dimension
141
- embedding_dim = 256
142
-
143
- # number of RNN units
144
- rnn_units = 1024
145
-
146
- # pickle model parameters - will need in the generation script
147
- model_params = [vocab_size, embedding_dim, rnn_units]
148
- outfile = open(file='/content/drive/MyDrive/Colab Notebooks/pkl/model_params', mode='wb')
149
- pickle.dump(model_params, outfile)
150
- outfile.close()
151
-
152
- # helper function to quickly build the RNN model based on vocab size, embedding dimension, number of RNN units, and batch size
153
- def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
154
-
155
- # initialize sequential model architecture
156
- model = tf.keras.Sequential()
157
-
158
- # add embedding layer
159
- model.add(tf.keras.layers.Embedding(
160
- input_dim = vocab_size,
161
- output_dim = embedding_dim,
162
- batch_input_shape=[batch_size, None]
163
- ))
164
-
165
- # add recurrent layer
166
- model.add(tf.keras.layers.GRU(
167
- units = rnn_units,
168
- return_sequences = True,
169
- stateful = True,
170
- recurrent_initializer = 'glorot_uniform'
171
- ))
172
-
173
- # add dense layer
174
- model.add(tf.keras.layers.Dense(units=vocab_size))
175
- model_path= '/content/drive/MyDrive/Colab Notebooks'
176
-
177
- def save_model(self, model_path):
178
- # Save the model weights
179
- self.save_weights(model_path)
180
- print(f"Model saved to {model_path}")
181
- return model
182
-
183
- # build the model using the above helper function
184
- rnn = build_model(
185
- vocab_size = vocab_size,
186
- embedding_dim = embedding_dim,
187
- rnn_units = rnn_units,
188
- batch_size = BATCH_SIZE
189
- )
190
-
191
- # check the shape of the output
192
- for input_example_batch, target_example_batch in dataset_sb.take(1):
193
- example_batch_predictions = rnn(input_example_batch)
194
- print('\n', example_batch_predictions.shape, '# (batch_size, sequence_length, vocab_size)')
195
-
196
- # model architecture summary
197
- print('\n', rnn.summary(), '\n')
198
-
199
- # ----------------------------------------------------------------------
200
-
201
- ### SET UP METRICS ###
202
-
203
- # helper function to obtain the loss function
204
- def loss(labels, logits):
205
- return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
206
-
207
- # compile the model
208
- rnn.compile(
209
- optimizer = 'adam',
210
- loss = loss,
211
- metrics = ['accuracy']
212
- )
213
-
214
- # create directory where the checkpoints will be saved
215
- checkpoint_dir = '/content/drive/MyDrive/Colab Notebooks/training_checkpoints'
216
-
217
- # name of the checkpoint files
218
- checkpoint_prefix = os.path.join(checkpoint_dir, 'checkpoint')
219
-
220
- # create checkpoints-saving object
221
- checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
222
- filepath = checkpoint_prefix,
223
- monitor = 'loss',
224
- save_best_only = True,
225
- mode = 'min',
226
- save_weights_only = True
227
- )
228
-
229
- # ----------------------------------------------------------------------
230
-
231
- ### MODEL TRAINING ###
232
-
233
- # set number of desired epochs
234
- EPOCHS = 200
235
-
236
- # training!
237
- history = rnn.fit(
238
- x = dataset_sb,
239
- epochs = EPOCHS,
240
- callbacks = [checkpoint_callback]
241
- )