Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- .github/workflows/update_space.yml +28 -0
- .idea/.gitignore +3 -0
- .idea/ML_Model.iml +8 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/workspace.xml +59 -0
- README.md +2 -8
- app.py +145 -0
- main.py +166 -0
- requirements.txt +7 -0
.github/workflows/update_space.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run Python script
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
build:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout
|
| 14 |
+
uses: actions/checkout@v2
|
| 15 |
+
|
| 16 |
+
- name: Set up Python
|
| 17 |
+
uses: actions/setup-python@v2
|
| 18 |
+
with:
|
| 19 |
+
python-version: '3.9'
|
| 20 |
+
|
| 21 |
+
- name: Install Gradio
|
| 22 |
+
run: python -m pip install gradio
|
| 23 |
+
|
| 24 |
+
- name: Log in to Hugging Face
|
| 25 |
+
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
| 26 |
+
|
| 27 |
+
- name: Deploy to Spaces
|
| 28 |
+
run: gradio deploy
|
.idea/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
.idea/ML_Model.iml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="inheritedJdk" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
</module>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (AiMouse)" project-jdk-type="Python SDK" />
|
| 4 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/ML_Model.iml" filepath="$PROJECT_DIR$/.idea/ML_Model.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/workspace.xml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ChangeListManager">
|
| 4 |
+
<list default="true" id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
|
| 5 |
+
<option name="SHOW_DIALOG" value="false" />
|
| 6 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
| 7 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
| 8 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
| 9 |
+
</component>
|
| 10 |
+
<component name="FileTemplateManagerImpl">
|
| 11 |
+
<option name="RECENT_TEMPLATES">
|
| 12 |
+
<list>
|
| 13 |
+
<option value="Python Script" />
|
| 14 |
+
</list>
|
| 15 |
+
</option>
|
| 16 |
+
</component>
|
| 17 |
+
<component name="MarkdownSettingsMigration">
|
| 18 |
+
<option name="stateVersion" value="1" />
|
| 19 |
+
</component>
|
| 20 |
+
<component name="ProjectId" id="2QfIgCdHt3byO0m9tB94ER54vy3" />
|
| 21 |
+
<component name="ProjectViewState">
|
| 22 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
| 23 |
+
<option name="showLibraryContents" value="true" />
|
| 24 |
+
</component>
|
| 25 |
+
<component name="RunManager">
|
| 26 |
+
<configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
| 27 |
+
<module name="ML_Model" />
|
| 28 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
| 29 |
+
<option name="PARENT_ENVS" value="true" />
|
| 30 |
+
<envs>
|
| 31 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
| 32 |
+
</envs>
|
| 33 |
+
<option name="SDK_HOME" value="" />
|
| 34 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
| 35 |
+
<option name="IS_MODULE_SDK" value="true" />
|
| 36 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
| 37 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
| 38 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
|
| 39 |
+
<option name="PARAMETERS" value="" />
|
| 40 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
| 41 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
| 42 |
+
<option name="MODULE_MODE" value="false" />
|
| 43 |
+
<option name="REDIRECT_INPUT" value="false" />
|
| 44 |
+
<option name="INPUT_FILE" value="" />
|
| 45 |
+
<method v="2" />
|
| 46 |
+
</configuration>
|
| 47 |
+
</component>
|
| 48 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
|
| 49 |
+
<component name="TaskManager">
|
| 50 |
+
<task active="true" id="Default" summary="Default task">
|
| 51 |
+
<changelist id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
|
| 52 |
+
<created>1685738317859</created>
|
| 53 |
+
<option name="number" value="Default" />
|
| 54 |
+
<option name="presentableId" value="Default" />
|
| 55 |
+
<updated>1685738317859</updated>
|
| 56 |
+
</task>
|
| 57 |
+
<servers />
|
| 58 |
+
</component>
|
| 59 |
+
</project>
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.33.1
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Sentiment_Analysis_Model
|
| 3 |
+
app_file: app.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 3.33.1
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
import numpy as np
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from matplotlib import pyplot as plt
|
| 7 |
+
from tensorflow import keras
|
| 8 |
+
from keras.models import Sequential
|
| 9 |
+
from keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
|
| 10 |
+
from keras.metrics import Precision, Recall, CategoricalAccuracy
|
| 11 |
+
from gradio import components
|
| 12 |
+
from gradio import Interface
|
| 13 |
+
from keras.layers import TextVectorization
|
| 14 |
+
|
| 15 |
+
base_path = r"C:\Users\tochi\SentimentAnalysisData"
|
| 16 |
+
df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
df.head() # displays first couple of comments from csv file
|
| 20 |
+
|
| 21 |
+
X = df['comment_text']
|
| 22 |
+
y = df[df.columns[2:]].values
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
MAX_FEATURES = 200000 # number of words in the vocab
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
|
| 29 |
+
output_sequence_length=1800,
|
| 30 |
+
output_mode='int')
|
| 31 |
+
|
| 32 |
+
vectorizer.adapt(X.values)
|
| 33 |
+
|
| 34 |
+
vectorized_text = vectorizer(X.values)
|
| 35 |
+
|
| 36 |
+
#MCSHBAP - map, chache, shuffle, batch, prefetch from_tensor_slices, list_file
|
| 37 |
+
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
|
| 38 |
+
dataset = dataset.cache()
|
| 39 |
+
dataset = dataset.shuffle(160000)
|
| 40 |
+
dataset = dataset.batch(16)
|
| 41 |
+
dataset = dataset.prefetch(8) # helps bottlenecks
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
train = dataset.take(int(len(dataset)*.7))
|
| 45 |
+
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
|
| 46 |
+
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
|
| 47 |
+
|
| 48 |
+
model = Sequential() # Instantiate sequential api
|
| 49 |
+
# Create the embedding layer
|
| 50 |
+
model.add(Embedding(MAX_FEATURES+1, 32))
|
| 51 |
+
# Bidirectional LSTM Layer
|
| 52 |
+
model.add(Bidirectional(LSTM(32, activation='tanh')))
|
| 53 |
+
# Feature extractor Fully connected layers
|
| 54 |
+
model.add(Dense(128, activation='relu'))
|
| 55 |
+
model.add(Dense(256, activation='relu'))
|
| 56 |
+
model.add(Dense(128, activation='relu'))
|
| 57 |
+
# Final layer
|
| 58 |
+
model.add(Dense(6, activation='sigmoid'))
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
model.compile(loss='BinaryCrossentropy', optimizer='Adam')
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
model.summary()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
history = model.fit(train, epochs=10, validation_data=val)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
plt.figure(figsize=(8,5))
|
| 71 |
+
pd.DataFrame(history.history).plot()
|
| 72 |
+
plt.show()
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
input_text = ['You freaking suck! I am going to hit you!']
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
input_text = vectorizer(input_text)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
res = model.predict(input_text)
|
| 83 |
+
|
| 84 |
+
(res > 0.5).astype(int)
|
| 85 |
+
|
| 86 |
+
batch_X, batch_y = test.as_numpy_iterator().next()
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
(model.predict(batch_X) > 0.5).astype(int)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
res.shape
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
pre = Precision()
|
| 96 |
+
re = Recall()
|
| 97 |
+
acc = CategoricalAccuracy()
|
| 98 |
+
|
| 99 |
+
for batch in test.as_numpy_iterator():
|
| 100 |
+
# Unpack the batch
|
| 101 |
+
X_true, y_true = batch
|
| 102 |
+
# Make a prediction
|
| 103 |
+
yhat = model.predict(X_true)
|
| 104 |
+
|
| 105 |
+
# Flatten the predictions
|
| 106 |
+
y_true = y_true.flatten()
|
| 107 |
+
yhat = yhat.flatten()
|
| 108 |
+
|
| 109 |
+
pre.update_state(y_true, yhat)
|
| 110 |
+
re.update_state(y_true, yhat)
|
| 111 |
+
acc.update_state(y_true, yhat)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
|
| 115 |
+
|
| 116 |
+
model.save('sentimentAnalysis.h5')
|
| 117 |
+
|
| 118 |
+
model = tf.keras.models.load_model('sentimentAnalysis.h5')
|
| 119 |
+
|
| 120 |
+
input_str = vectorizer('I hate you!')
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
res = model.predict(np.expand_dims(input_str,0))
|
| 124 |
+
|
| 125 |
+
res
|
| 126 |
+
|
| 127 |
+
def score_comment(comment):
|
| 128 |
+
vectorized_comment = vectorizer([comment])
|
| 129 |
+
results = model.predict(vectorized_comment)
|
| 130 |
+
|
| 131 |
+
text = ''
|
| 132 |
+
for idx, col in enumerate(df.columns[2:]):
|
| 133 |
+
text += '{}: {}\n'.format(col, results[0][idx]>0.5)
|
| 134 |
+
|
| 135 |
+
return text
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
interface = components.Interface(
|
| 139 |
+
fn=score_comment,
|
| 140 |
+
inputs=components.Textbox(lines=2, placeholder='Comment to score'),
|
| 141 |
+
outputs='text')
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
interface.launch(share=True)
|
| 145 |
+
|
main.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import tensorflow as tf
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
!pip list
|
| 8 |
+
|
| 9 |
+
rom tensorflow.keras.models import Sequential
|
| 10 |
+
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
|
| 11 |
+
|
| 12 |
+
from matplotlib import pyplot as plt
|
| 13 |
+
|
| 14 |
+
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
!pip install gradio jinja2
|
| 18 |
+
|
| 19 |
+
from gradio import components
|
| 20 |
+
from gradio import Interface
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
base_path = r"C:\Users\tochi\SentimentAnalysisData"
|
| 27 |
+
df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
|
| 28 |
+
|
| 29 |
+
df.head() # displays first couple of comments from csv file
|
| 30 |
+
|
| 31 |
+
from tensorflow.keras.layers import TextVectorization
|
| 32 |
+
|
| 33 |
+
X = df['comment_text']
|
| 34 |
+
y = df[df.columns[2:]].values
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
MAX_FEATURES = 200000 # number of words in the vocab
|
| 38 |
+
|
| 39 |
+
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
|
| 40 |
+
output_sequence_length=1800,
|
| 41 |
+
output_mode='int')
|
| 42 |
+
|
| 43 |
+
vectorizer.adapt(X.values)
|
| 44 |
+
|
| 45 |
+
vectorized_text = vectorizer(X.values)
|
| 46 |
+
|
| 47 |
+
#MCSHBAP - map, chache, shuffle, batch, prefetch from_tensor_slices, list_file
|
| 48 |
+
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
|
| 49 |
+
dataset = dataset.cache()
|
| 50 |
+
dataset = dataset.shuffle(160000)
|
| 51 |
+
dataset = dataset.batch(16)
|
| 52 |
+
dataset = dataset.prefetch(8) # helps bottlenecks
|
| 53 |
+
|
| 54 |
+
train = dataset.take(int(len(dataset)*.7))
|
| 55 |
+
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
|
| 56 |
+
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
from tensorflow.keras.models import Sequential
|
| 60 |
+
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
model = Sequential() # Instantiate sequential api
|
| 64 |
+
# Create the embedding layer
|
| 65 |
+
model.add(Embedding(MAX_FEATURES+1, 32))
|
| 66 |
+
# Bidirectional LSTM Layer
|
| 67 |
+
model.add(Bidirectional(LSTM(32, activation='tanh')))
|
| 68 |
+
# Feature extractor Fully connected layers
|
| 69 |
+
model.add(Dense(128, activation='relu'))
|
| 70 |
+
model.add(Dense(256, activation='relu'))
|
| 71 |
+
model.add(Dense(128, activation='relu'))
|
| 72 |
+
# Final layer
|
| 73 |
+
model.add(Dense(6, activation='sigmoid'))
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
model.compile(loss='BinaryCrossentropy', optimizer='Adam')
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
model.summary()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
history = model.fit(train, epochs=1, validation_data=val)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
from matplotlib import pyplot as plt
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
plt.figure(figsize=(8,5))
|
| 89 |
+
pd.DataFrame(history.history).plot()
|
| 90 |
+
plt.show()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
input_text = ['You freaking suck! I am going to hit you!']
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
input_text = vectorizer(input_text)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
res = model.predict(input_text)
|
| 100 |
+
|
| 101 |
+
(res > 0.5).astype(int)
|
| 102 |
+
|
| 103 |
+
batch_X, batch_y = test.as_numpy_iterator().next()
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
(model.predict(batch_X) > 0.5).astype(int)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
res.shape
|
| 110 |
+
|
| 111 |
+
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
pre = Precision()
|
| 115 |
+
re = Recall()
|
| 116 |
+
acc = CategoricalAccuracy()
|
| 117 |
+
|
| 118 |
+
for batch in test.as_numpy_iterator():
|
| 119 |
+
# Unpack the batch
|
| 120 |
+
X_true, y_true = batch
|
| 121 |
+
# Make a prediction
|
| 122 |
+
yhat = model.predict(X_true)
|
| 123 |
+
|
| 124 |
+
# Flatten the predictions
|
| 125 |
+
y_true = y_true.flatten()
|
| 126 |
+
yhat = yhat.flatten()
|
| 127 |
+
|
| 128 |
+
pre.update_state(y_true, yhat)
|
| 129 |
+
re.update_state(y_true, yhat)
|
| 130 |
+
acc.update_state(y_true, yhat)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
|
| 134 |
+
|
| 135 |
+
model.save('sentimentAnalysis.h5')
|
| 136 |
+
|
| 137 |
+
model = tf.keras.models.load_model('sentimentAnalysis.h5')
|
| 138 |
+
|
| 139 |
+
input_str = vectorizer('I hate you!')
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
res = model.predict(np.expand_dims(input_str,0))
|
| 143 |
+
|
| 144 |
+
res
|
| 145 |
+
|
| 146 |
+
def score_comment(comment):
|
| 147 |
+
vectorized_comment = vectorizer([comment])
|
| 148 |
+
results = model.predict(vectorized_comment)
|
| 149 |
+
|
| 150 |
+
text = ''
|
| 151 |
+
for idx, col in enumerate(df.columns[2:]):
|
| 152 |
+
text += '{}: {}\n'.format(col, results[0][idx]>0.5)
|
| 153 |
+
|
| 154 |
+
return text
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
interface = components.Interface(
|
| 158 |
+
fn=score_comment,
|
| 159 |
+
inputs=components.Textbox(lines=2, placeholder='Comment to score'),
|
| 160 |
+
outputs='text')
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
interface.launch(share=True)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
'''
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorflow
|
| 2 |
+
os
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
gradio
|
| 6 |
+
matplotlib
|
| 7 |
+
keras
|