ArtORias1 commited on
Commit
9607d54
1 Parent(s): e30cc6b

Upload lyrics5.py

Browse files
Files changed (1) hide show
  1. lyrics5.py +133 -0
lyrics5.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Lyrics5.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1eEWnqPJ4BuKnMDL-kK9b2-vvMJ6_HTyS
8
+ """
9
+
10
+ !pip install keras
11
+
12
+ !pip install keras_preprocessing
13
+ import keras_preprocessing
14
+
15
+ !pip install pad_sequences
16
+ import pad_sequences
17
+
18
+ import pandas as pd
19
+ import numpy as np
20
+ import seaborn as sns
21
+ import matplotlib.pyplot as plt
22
+ from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
23
+ import string, os
24
+ import tensorflow as tf
25
+
26
+ # keras module for building LSTM
27
+ from keras_preprocessing.sequence import pad_sequences
28
+ from tensorflow.keras.layers import Embedding, Dropout, LSTM, Dense, Bidirectional
29
+ from keras.preprocessing.text import Tokenizer
30
+ from keras.callbacks import EarlyStopping
31
+ from keras.models import Sequential
32
+
33
+ import matplotlib.pyplot as plt
34
+ import seaborn as sns
35
+
36
+ df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/lyrics-data.csv')
37
+
38
+ df.head()
39
+
40
+ df.drop(['ALink', 'SName','SLink'],axis=1,inplace=True)
41
+
42
+ df.shape
43
+
44
+ df['language'].value_counts()
45
+
46
+ df = df[df['language']=='en']
47
+
48
+ df = df[:350]
49
+
50
+ df.shape
51
+
52
+ df['Number_of_words'] = df['Lyric'].apply(lambda x:len(str(x).split()))
53
+ df.head()
54
+
55
+ df['Number_of_words'].describe()
56
+
57
+ import matplotlib.pyplot as plt
58
+ plt.style.use('ggplot')
59
+ plt.figure(figsize=(12,6))
60
+ sns.distplot(df['Number_of_words'],kde = False,color="red",bins=200)
61
+ plt.title("Frequency distribution of number of words for each text extracted", size=20)
62
+
63
+ tokenizer = Tokenizer()
64
+ tokenizer.fit_on_texts(df['Lyric'].astype(str).str.lower())
65
+
66
+ total_words = len(tokenizer.word_index)+1
67
+ tokenized_sentences = tokenizer.texts_to_sequences(df['Lyric'].astype(str))
68
+ tokenized_sentences[0]
69
+
70
+ input_sequences = list()
71
+ for i in tokenized_sentences:
72
+ for t in range(1, len(i)):
73
+ n_gram_sequence = i[:t+1]
74
+ input_sequences.append(n_gram_sequence)
75
+
76
+ # Pre padding
77
+ max_sequence_len = max([len(x) for x in input_sequences])
78
+ input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
79
+
80
+ input_sequences[:10]
81
+
82
+ X, labels = input_sequences[:,:-1],input_sequences[:,-1]
83
+ y = tf.keras.utils.to_categorical(labels, num_classes=total_words)
84
+
85
+ model = Sequential()
86
+ model.add(Embedding(total_words, 40, input_length=max_sequence_len-1))
87
+ model.add(Bidirectional(LSTM(250)))
88
+ model.add(Dropout(0.1))
89
+ model.add(Dense(total_words, activation='softmax'))
90
+ model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
91
+ earlystop = EarlyStopping(monitor='loss', min_delta=0, patience=3, verbose=0, mode='auto')
92
+ history = model.fit(X, y, epochs=10, verbose=1, callbacks=[earlystop])
93
+
94
+ plt.plot(history.history['accuracy'], label='train acc')
95
+ plt.legend()
96
+ plt.show()
97
+ plt.savefig('AccVal_acc')
98
+
99
+ def complete_this_song(seed_text, next_words):
100
+ for _ in range(next_words):
101
+ token_list = tokenizer.texts_to_sequences([seed_text])[0]
102
+ token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
103
+ #predicted = model.predict_classes(token_list, verbose=0)
104
+ predict_x=model.predict(token_list, verbose=0)
105
+ classes_x=np.argmax(predict_x,axis=1)
106
+ output_word = ""
107
+ for word, index in tokenizer.word_index.items():
108
+ if index == classes_x:
109
+ output_word = word
110
+ break
111
+ seed_text += " " + output_word
112
+ return seed_text
113
+
114
+ complete_this_song("the sky is blue", 40)
115
+
116
+ !pip install keras.models
117
+
118
+ from tensorflow.keras.models import load_model
119
+ model.save('/content/drive/MyDrive/Colab Notebooks/song_lyrics_generator.h5')
120
+
121
+ import tensorflow as tf
122
+
123
+ from tensorflow.keras.models import load_model
124
+ song_lyrics_generator= tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/song_lyrics_generator.h5')
125
+
126
+ !pip install gradio
127
+ import gradio as gr
128
+
129
+ interface = gr.Interface(fn= complete_this_song,
130
+ inputs= ['text', gr.inputs.Slider(0,250, label='No. of words')],
131
+ outputs='text')
132
+
133
+ interface.launch()