Spaces:
Build error
Build error
import os | |
import pandas as pd | |
import tensorflow as tf | |
import numpy as np | |
data=pd.read_csv('train.csv') | |
data.head(5) | |
from tensorflow.keras.layers import TextVectorization | |
x=data['comment_text'] | |
y=data[data.columns[2:]].values | |
max_features=200000 | |
vectorizer=TextVectorization(max_tokens=max_features, | |
output_sequence_length=1800, | |
output_mode='int') | |
vectorizer.get_vocabulary() | |
vectorizer.adapt(x.values) | |
vectorizer("have you watched breaking bad")[:5] | |
vectorized_text=vectorizer(x.values) | |
dataset=tf.data.Dataset.from_tensor_slices((vectorized_text, y)) | |
dataset=dataset.cache() | |
dataset=dataset.shuffle(160000) | |
dataset=dataset.batch(16) | |
dataset=dataset.prefetch(8) | |
batch_x, batch_y = dataset.as_numpy_iterator().next() | |
train=dataset.take(int(len(dataset)*.7)) | |
val=dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2)) | |
test=dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1)) | |
train_generator=train.as_numpy_iterator() | |
train_generator.next() | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding | |
model=Sequential() | |
model.add(Embedding(max_features+1, 32)) | |
model.add(Bidirectional(LSTM(32, activation='tanh'))) | |
model.add(Dense(128, activation='relu')) | |
model.add(Dense(256, activation='relu')) | |
model.add(Dense(128, activation='relu')) | |
model.add(Dense(6, activation='sigmoid')) | |
model.compile(loss='BinaryCrossentropy', optimizer='adam', metrics=['accuracy']) | |
model.summary() | |
history=model.fit(train, epochs=10, validation_data=val) | |
model.evaluate(test) | |
x_batch, y_batch = test.as_numpy_iterator().next() | |
(model.predict(x_batch) > 0.5).astype(int) | |
input_text=vectorizer('I am coming to kill you pal') | |
input_text[:7] | |
batch=test.as_numpy_iterator().next() | |
res=model.predict(np.expand_dims(input_text,0)) | |
res | |
model.save('finalprojecttoxic.h5') | |
from transformers import pipeline | |
import gradio as gr | |
model=tf.keras.models.load_model('finalprojecttoxic.h5') | |
input_str=vectorizer('Hey i freaking hate you!. I\'m going to hurt you!') | |
res=model.predict(np.expand_dims(input_str,0)) | |
translator_hindi = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en", tokenizer="Helsinki-NLP/opus-mt-hi-en") | |
hindi_text = "नमस्ते, आप कैसे हैं?" | |
en_to_hin = translator_hindi(hindi_text) | |
en_to_hin[0]['translation_text'] | |
def translate_hindi(from_text): | |
result2 = translator_hindi(from_text) | |
return result2[0]['translation_text'] | |
translate_hindi('नमस्ते, आप कैसे हैं?') | |
def score_comment(comment): | |
vectorized_comment = vectorizer([comment]) | |
results=model.predict(vectorized_comment) | |
text='' | |
for idx, col in enumerate(data.columns[2:]): | |
text+= '{}: {}\n'.format(col, results[0][idx]>0.5) | |
return text | |
def combined_models(input): | |
output1=translate_hindi(input) | |
output2=score_comment(input) | |
return output1, output2 | |
interface = gr.Interface(fn=combined_models, inputs="text", outputs=["text","text"],title="Toxic Comment Analyzer") | |
interface.launch(share=True) | |