s1ri1337 commited on
Commit
85035a7
1 Parent(s): af17d70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py CHANGED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from tensorflow.keras import layers
4
+ from tensorflow.keras import Input
5
+ from tensorflow.keras.models import Model
6
+ from tensorflow.keras.preprocessing import sequence
7
+ from tensorflow.keras.preprocessing.text import Tokenizer
8
+ import matplotlib.pyplot as plt
9
+ from tensorflow.keras.callbacks import EarlyStopping
10
+ df = pd.read_csv("train.csv")
11
+ embeddings_index = {}
12
+ f = open('glove.6B.100d.txt',encoding="utf")
13
+ for line in f:
14
+ values = line.split()
15
+ word = values[0]
16
+ coefs = np.asarray(values[1:], dtype='float32')
17
+ embeddings_index[word] = coefs
18
+ f.close()
19
+ print('Found %s word vectors.' % len(embeddings_index))
20
+ data = df.text
21
+ labels = df.target
22
+ x_train = data[0:6100]
23
+ x_test = data[6100:]
24
+ y_train = labels[0:6100]
25
+ y_test = labels[6100:]
26
+ tokenizer = Tokenizer()
27
+ tokenizer.fit_on_texts(x_train.values)
28
+ sequences = tokenizer.texts_to_sequences(x_train.values)
29
+ sequences = sequence.pad_sequences(sequences, maxlen=200)
30
+ vocab_size = len(tokenizer.word_index)+1
31
+ embedding_dim = 100
32
+ max_words=1513
33
+
34
+ embedding_matrix = np.zeros((vocab_size, embedding_dim))
35
+ for word, i in tokenizer.word_index.items():
36
+ if i < max_words:
37
+ embedding_vector = embeddings_index.get(word)
38
+ if embedding_vector is not None:
39
+ embedding_matrix[i] = embedding_vector
40
+ input_layer = Input(shape=(None,), dtype='int32', name='tweet_input')
41
+ x = layers.Embedding(vocab_size, 100, input_length=200)(input_layer)
42
+ x = layers.LSTM(32,
43
+ dropout=0.1,
44
+ recurrent_dropout=0.5,
45
+ return_sequences=True)(x)
46
+ x = layers.LSTM(32,
47
+ dropout=0.1,
48
+ recurrent_dropout=0.5,
49
+ return_sequences=False)(x)
50
+ x = layers.Dense(100, activation='relu')(x)
51
+ output = layers.Dense(1, activation='sigmoid')(x)
52
+ model = Model(input_layer,output)
53
+ model.layers[1].set_weights([embedding_matrix])
54
+ model.layers[1].trainable = False
55
+ model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
56
+ es = EarlyStopping(monitor='val_loss', mode='min')
57
+ history = model.fit(sequences, y_train.values, epochs=20, validation_split=0.2, callbacks = [es])
58
+ model.save("trained.h5")
59
+ sequences = tokenizer.texts_to_sequences(x_test.values)
60
+ sequences = sequence.pad_sequences(sequences, maxlen=200)
61
+ x_test = sequences
62
+ score = model.evaluate(x_test, y_test.values)
63
+ test = pd.read_csv("test.csv")
64
+ ids = test.id
65
+ test = test.text
66
+ sequences = tokenizer.texts_to_sequences(test)
67
+ sequences = sequence.pad_sequences(sequences, maxlen=200)
68
+ results = model.predict(sequences)
69
+ results = results.round()
70
+ results = results.squeeze()
71
+ csv_df = pd.DataFrame({
72
+ "id": ids,
73
+ "target": results
74
+ })
75
+ csv_df.index = csv_df.id
76
+ csv_df = csv_df["target"]
77
+ csv_df = csv_df.astype(int)
78
+ csv_df.to_csv("results.csv", header=True)
79
+ def encoder(text):
80
+ text = tokenizer.texts_to_sequences([text])
81
+ text = sequence.pad_sequences(text, maxlen=200)
82
+ return text
83
+ def predict(text):
84
+ encoded_text = encoder(text)
85
+ # print(encoded_text)
86
+ prediction = (model.predict(encoded_text))
87
+ print(prediction)
88
+ prediction = np.round(prediction)
89
+ if prediction==1:
90
+ return "Disaster"
91
+ return "Not a Disaster"
92
+ import gradio as gr
93
+ title="Relevance Classifier"
94
+ description="<p style='text-align:center'>Classifies input text into Disaster-related or not disaster related."
95
+
96
+ gr.Interface(fn=predict, inputs='text', outputs='text', title=title, description=description).launch()
97
+
98
+