Spaces:

Shreyashftw
/

Hate_Speech_Detection

Runtime error

App Files Files Community

Shreyashftw commited on Jan 24, 2024

Commit

4ee6ffc

verified ·

1 Parent(s): 73f11f9

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.DS_Store +0 -0
.gitattributes +3 -0
.github/workflows/update_space.yml +27 -0
README.md +2 -8
Toxicity.ipynb +0 -0
Toxicity.py +304 -0
jigsaw-toxic-comment-classification-challenge/.DS_Store +0 -0
jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv +0 -0
jigsaw-toxic-comment-classification-challenge/test.csv/test.csv +3 -0
jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv +0 -0
jigsaw-toxic-comment-classification-challenge/train.csv/train.csv +3 -0
requirements.txt +5 -0
toxicity.keras +3 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+jigsaw-toxic-comment-classification-challenge/test.csv/test.csv filter=lfs diff=lfs merge=lfs -text
+jigsaw-toxic-comment-classification-challenge/train.csv/train.csv filter=lfs diff=lfs merge=lfs -text
+toxicity.keras filter=lfs diff=lfs merge=lfs -text

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,27 @@

+name: Update Space
+on:
+  push:
+    branches:
+      - main
+jobs:
+  update:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/setup-python@v2
+      with:
+        python-version: "3.8"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install gradio
+    - name: Update Space
+      run: |
+        gradio deploy main
+      env:
+        GRADSPACE_APP_FILE: Toxicity.py
+        GRADSPACE_TITLE: Hate_Speech_Detection

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Hate Speech Detection
-emoji: 🐢
-colorFrom: purple
-colorTo: green
 sdk: gradio
 sdk_version: 4.15.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Hate_Speech_Detection
+app_file: Toxicity.py
 sdk: gradio
 sdk_version: 4.15.0
 ---

Toxicity.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Toxicity.py ADDED Viewed

	@@ -0,0 +1,304 @@

+#!/usr/bin/env python
+# coding: utf-8
+# # 0. Install Dependencies and Bring in Data
+%pip install tensorflow pandas matplotlib scikit-learn
+# In[2]:
+import os
+import pandas as pd
+import tensorflow as tf
+import numpy as np
+# In[3]:
+df = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge','train.csv', 'train.csv'))
+# In[4]:
+df.head()
+# # 1. Preprocess
+# In[5]:
+get_ipython().system('pip list')
+# In[6]:
+from tensorflow.keras.layers import TextVectorization
+# In[7]:
+X = df['comment_text']
+y = df[df.columns[2:]].values
+# In[8]:
+MAX_FEATURES = 200000 # number of words in the vocab
+# In[9]:
+vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
+                               output_sequence_length=1800,
+                               output_mode='int')
+# In[10]:
+vectorizer.adapt(X.values)
+# In[11]:
+vectorized_text = vectorizer(X.values)
+# In[12]:
+#MCSHBAP - map, chache, shuffle, batch, prefetch  from_tensor_slices, list_file
+dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
+dataset = dataset.cache()
+dataset = dataset.shuffle(160000)
+dataset = dataset.batch(16)
+dataset = dataset.prefetch(8) # helps bottlenecks
+# In[13]:
+train = dataset.take(int(len(dataset)*.7))
+val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
+test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))
+# # 2. Create Sequential Model
+# In[14]:
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
+# In[15]:
+model = Sequential()
+# Create the embedding layer
+model.add(Embedding(MAX_FEATURES+1, 32))
+# Bidirectional LSTM Layer
+model.add(Bidirectional(LSTM(32, activation='tanh')))
+# Feature extractor Fully connected layers
+model.add(Dense(128, activation='relu'))
+model.add(Dense(256, activation='relu'))
+model.add(Dense(128, activation='relu'))
+# Final layer
+model.add(Dense(6, activation='sigmoid'))
+# In[16]:
+model.compile(loss='BinaryCrossentropy', optimizer='Adam')
+# In[17]:
+model.summary()
+# In[18]:
+history = model.fit(train, epochs=1, validation_data=val)
+# In[19]:
+from matplotlib import pyplot as plt
+# In[20]:
+plt.figure(figsize=(8,5))
+pd.DataFrame(history.history).plot()
+plt.show()
+# # 3. Make Predictions
+# In[21]:
+# Make Predictions
+input_text = vectorizer(['You freaking suck! I am going to hit you.'])  # Make sure to pass a list
+res = model.predict(input_text)
+# In[63]:
+# Convert the predictions to binary values based on the threshold (0.5)
+binary_predictions = (res > 0.5).astype(int)
+print(binary_predictions)
+# In[64]:
+batch_X, batch_y = test.as_numpy_iterator().next()
+# In[65]:
+(model.predict(batch_X) > 0.5).astype(int)
+# In[66]:
+res.shape
+# # 4. Evaluate Model
+# In[67]:
+from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
+# In[68]:
+pre = Precision()
+re = Recall()
+acc = CategoricalAccuracy()
+# In[69]:
+for batch in test.as_numpy_iterator():
+    # Unpack the batch
+    X_true, y_true = batch
+    # Make a prediction
+    yhat = model.predict(X_true)
+    # Flatten the predictions
+    y_true = y_true.flatten()
+    yhat = yhat.flatten()
+    pre.update_state(y_true, yhat)
+    re.update_state(y_true, yhat)
+    acc.update_state(y_true, yhat)
+# In[81]:
+print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')
+# # 5. Test and Gradio
+# In[72]:
+get_ipython().system('pip install gradio jinja2')
+# In[73]:
+import tensorflow as tf
+import gradio as gr
+# In[85]:
+model.save('toxicity.keras')
+# In[86]:
+model = tf.keras.models.load_model('toxicity.keras')
+# In[87]:
+input_str = vectorizer('hey i freaken hate you!')
+# In[88]:
+res = model.predict(np.expand_dims(input_str,0))
+# In[89]:
+res
+# In[90]:
+def score_comment(comment):
+    vectorized_comment = vectorizer([comment])
+    results = model.predict(vectorized_comment)
+    text = ''
+    for idx, col in enumerate(df.columns[2:]):
+        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
+    return text
+# In[93]:
+interface = gr.Interface(fn=score_comment,
+                         inputs="textbox",  # Use "textbox" instead of gr.inputs.Textbox
+                         outputs="text",
+                        )
+# In[94]:
+interface.launch(share=True)
+# In[ ]:

jigsaw-toxic-comment-classification-challenge/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

jigsaw-toxic-comment-classification-challenge/sample_submission.csv/sample_submission.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

jigsaw-toxic-comment-classification-challenge/test.csv/test.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
+size 60354593

jigsaw-toxic-comment-classification-challenge/test_labels.csv/test_labels.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

jigsaw-toxic-comment-classification-challenge/train.csv/train.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
+size 68802655

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+tensorflow
+pandas
+numpy
+scikit-learn
+matplotlib

toxicity.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb801227b73177f73c564d8fc07a4d1a5931ec82245ae2cf6c8394f069d62500
+size 26006432