Spaces:

fnavales
/

hate-speech

Runtime error

fnavales commited on Oct 21, 2022

Commit

2a04af3

•

1 Parent(s): c239532

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,9 +5,10 @@ from transformers import BertTokenizerFast as BertTokenizer, BertModel
 import pytorch_lightning as pl
-BERT_MODEL_NAME = 'bert-base-cased'
 tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
 LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
 class ToxicCommentTagger(pl.LightningModule):
@@ -36,7 +37,7 @@ def predict(model, tokenizer, sentence):
     encoding = tokenizer.encode_plus(
         sentence,
         add_special_tokens=False,
-        max_length=510,
         return_token_type_ids=False,
         padding="max_length",
         return_attention_mask=True,
@@ -44,7 +45,7 @@ def predict(model, tokenizer, sentence):
     )
     # define target chunksize
-    chunksize = 512
     # split into chunks of 510 tokens, we also convert to list (default is tuple which is immutable)
     input_id_chunks = list(encoding['input_ids'][0].split(chunksize - 2))

 import pytorch_lightning as pl
+BERT_MODEL_NAME = 'bert-base-uncased'
 tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
 LABEL_COLUMNS = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
+MAX_TOKEN_COUNT = 300
 class ToxicCommentTagger(pl.LightningModule):
     encoding = tokenizer.encode_plus(
         sentence,
         add_special_tokens=False,
+        max_length=MAX_TOKEN_COUNT,
         return_token_type_ids=False,
         padding="max_length",
         return_attention_mask=True,
     )
     # define target chunksize
+    chunksize = MAX_TOKEN_COUNT
     # split into chunks of 510 tokens, we also convert to list (default is tuple which is immutable)
     input_id_chunks = list(encoding['input_ids'][0].split(chunksize - 2))