Spaces:

tonwuaso
/

Sentiment_Analysis_Model

Runtime error

App Files Files Community

tonwuaso commited on Jun 2, 2023

Commit

9e456db

1 Parent(s): ec4769b

Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

.github/workflows/update_space.yml +28 -0
.idea/.gitignore +3 -0
.idea/ML_Model.iml +8 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/workspace.xml +59 -0
README.md +2 -8
app.py +145 -0
main.py +166 -0
requirements.txt +7 -0

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Run Python script
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install Gradio
+      run: python -m pip install gradio
+    - name: Log in to Hugging Face
+      run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
+    - name: Deploy to Spaces
+      run: gradio deploy

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/ML_Model.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (AiMouse)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/ML_Model.iml" filepath="$PROJECT_DIR$/.idea/ML_Model.iml" />
+    </modules>
+  </component>
+</project>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,59 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectId" id="2QfIgCdHt3byO0m9tB94ER54vy3" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="RunManager">
+    <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="ML_Model" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="adfe6b33-d4f8-4e0f-a640-2a44c006e89b" name="Changes" comment="" />
+      <created>1685738317859</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1685738317859</updated>
+    </task>
+    <servers />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Sentiment Analysis Model
-emoji: ⚡
-colorFrom: green
-colorTo: pink
 sdk: gradio
 sdk_version: 3.33.1
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Sentiment_Analysis_Model
+app_file: app.py
 sdk: gradio
 sdk_version: 3.33.1
 ---

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import os
+import pandas as pd
+import tensorflow as tf
+import numpy as np
+import gradio as gr
+from matplotlib import pyplot as plt
+from tensorflow import keras
+from keras.models import Sequential
+from keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
+from keras.metrics import Precision, Recall, CategoricalAccuracy
+from gradio import components
+from gradio import Interface
+from keras.layers import TextVectorization
+base_path = r"C:\Users\tochi\SentimentAnalysisData"
+df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
+df.head() # displays first couple of comments from csv file
+X = df['comment_text']
+y = df[df.columns[2:]].values
+MAX_FEATURES = 200000 # number of words in the vocab
+vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
+                               output_sequence_length=1800,
+                               output_mode='int')
+vectorizer.adapt(X.values)
+vectorized_text = vectorizer(X.values)
+#MCSHBAP - map, chache, shuffle, batch, prefetch  from_tensor_slices, list_file
+dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
+dataset = dataset.cache()
+dataset = dataset.shuffle(160000)
+dataset = dataset.batch(16)
+dataset = dataset.prefetch(8) # helps bottlenecks
+train = dataset.take(int(len(dataset)*.7))
+val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
+test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
+model = Sequential() # Instantiate sequential api
+# Create the embedding layer
+model.add(Embedding(MAX_FEATURES+1, 32))
+# Bidirectional LSTM Layer
+model.add(Bidirectional(LSTM(32, activation='tanh')))
+# Feature extractor Fully connected layers
+model.add(Dense(128, activation='relu'))
+model.add(Dense(256, activation='relu'))
+model.add(Dense(128, activation='relu'))
+# Final layer
+model.add(Dense(6, activation='sigmoid'))
+model.compile(loss='BinaryCrossentropy', optimizer='Adam')
+model.summary()
+history = model.fit(train, epochs=10, validation_data=val)
+plt.figure(figsize=(8,5))
+pd.DataFrame(history.history).plot()
+plt.show()
+input_text = ['You freaking suck! I am going to hit you!']
+input_text = vectorizer(input_text)
+res = model.predict(input_text)
+(res > 0.5).astype(int)
+batch_X, batch_y = test.as_numpy_iterator().next()
+(model.predict(batch_X) > 0.5).astype(int)
+res.shape
+pre = Precision()
+re = Recall()
+acc = CategoricalAccuracy()
+for batch in test.as_numpy_iterator():
+    # Unpack the batch
+    X_true, y_true = batch
+    # Make a prediction
+    yhat = model.predict(X_true)
+    # Flatten the predictions
+    y_true = y_true.flatten()
+    yhat = yhat.flatten()
+    pre.update_state(y_true, yhat)
+    re.update_state(y_true, yhat)
+    acc.update_state(y_true, yhat)
+print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
+model.save('sentimentAnalysis.h5')
+model = tf.keras.models.load_model('sentimentAnalysis.h5')
+input_str = vectorizer('I hate you!')
+res = model.predict(np.expand_dims(input_str,0))
+res
+def score_comment(comment):
+    vectorized_comment = vectorizer([comment])
+    results = model.predict(vectorized_comment)
+    text = ''
+    for idx, col in enumerate(df.columns[2:]):
+        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
+    return text
+interface = components.Interface(
+    fn=score_comment,
+    inputs=components.Textbox(lines=2, placeholder='Comment to score'),
+    outputs='text')
+interface.launch(share=True)

main.py ADDED Viewed

	@@ -0,0 +1,166 @@

+'''
+import os
+import pandas as pd
+import tensorflow as tf
+import numpy as np
+!pip list
+rom tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
+from matplotlib import pyplot as plt
+from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
+!pip install gradio jinja2
+from gradio import components
+from gradio import Interface
+base_path = r"C:\Users\tochi\SentimentAnalysisData"
+df = pd.read_csv(os.path.join(base_path, 'train.csv' ))
+df.head() # displays first couple of comments from csv file
+from tensorflow.keras.layers import TextVectorization
+X = df['comment_text']
+y = df[df.columns[2:]].values
+MAX_FEATURES = 200000 # number of words in the vocab
+vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
+                               output_sequence_length=1800,
+                               output_mode='int')
+vectorizer.adapt(X.values)
+vectorized_text = vectorizer(X.values)
+#MCSHBAP - map, chache, shuffle, batch, prefetch  from_tensor_slices, list_file
+dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
+dataset = dataset.cache()
+dataset = dataset.shuffle(160000)
+dataset = dataset.batch(16)
+dataset = dataset.prefetch(8) # helps bottlenecks
+train = dataset.take(int(len(dataset)*.7))
+val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
+test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
+model = Sequential() # Instantiate sequential api
+# Create the embedding layer
+model.add(Embedding(MAX_FEATURES+1, 32))
+# Bidirectional LSTM Layer
+model.add(Bidirectional(LSTM(32, activation='tanh')))
+# Feature extractor Fully connected layers
+model.add(Dense(128, activation='relu'))
+model.add(Dense(256, activation='relu'))
+model.add(Dense(128, activation='relu'))
+# Final layer
+model.add(Dense(6, activation='sigmoid'))
+model.compile(loss='BinaryCrossentropy', optimizer='Adam')
+model.summary()
+history = model.fit(train, epochs=1, validation_data=val)
+from matplotlib import pyplot as plt
+plt.figure(figsize=(8,5))
+pd.DataFrame(history.history).plot()
+plt.show()
+input_text = ['You freaking suck! I am going to hit you!']
+input_text = vectorizer(input_text)
+res = model.predict(input_text)
+(res > 0.5).astype(int)
+batch_X, batch_y = test.as_numpy_iterator().next()
+(model.predict(batch_X) > 0.5).astype(int)
+res.shape
+from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
+pre = Precision()
+re = Recall()
+acc = CategoricalAccuracy()
+for batch in test.as_numpy_iterator():
+    # Unpack the batch
+    X_true, y_true = batch
+    # Make a prediction
+    yhat = model.predict(X_true)
+    # Flatten the predictions
+    y_true = y_true.flatten()
+    yhat = yhat.flatten()
+    pre.update_state(y_true, yhat)
+    re.update_state(y_true, yhat)
+    acc.update_state(y_true, yhat)
+print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
+model.save('sentimentAnalysis.h5')
+model = tf.keras.models.load_model('sentimentAnalysis.h5')
+input_str = vectorizer('I hate you!')
+res = model.predict(np.expand_dims(input_str,0))
+res
+def score_comment(comment):
+    vectorized_comment = vectorizer([comment])
+    results = model.predict(vectorized_comment)
+    text = ''
+    for idx, col in enumerate(df.columns[2:]):
+        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
+    return text
+interface = components.Interface(
+    fn=score_comment,
+    inputs=components.Textbox(lines=2, placeholder='Comment to score'),
+    outputs='text')
+interface.launch(share=True)
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+tensorflow
+os
+pandas
+numpy
+gradio
+matplotlib
+keras