class GibberishDetector: """ Detects whether the comment is actual text or a random sequence of characters. The model has been trained to learn character sequence probabilities from a set of approximately 4 million approved comments. """ model = None skip_sequences = ['duh', 'ah'] # tokens that are allowed even tough the model might see these as gibberish def __init__(self, model): self.model = model def predict(self, text): return not self.contains_gibberish(text) def contains_gibberish(self, text): # Skip over tokens that often are misjudged by the model for skip in self.skip_sequences: if skip in text: return False # Pass text through model return self.model.is_gibberish(text)