Spaces:
Runtime error
Runtime error
import json | |
import pickle | |
import pandas as pd | |
import nltk | |
nltk.download('stopwords') | |
import regex as re | |
from nltk.corpus import stopwords | |
class Preprocess: | |
genres = None | |
y = None | |
def __init__(self) -> None: | |
self.genres = [] | |
def clean_text(self, text): | |
"""Cleans text by removing certains unwanted characters""" | |
# remove backslash-apostrophe | |
text = re.sub("\'", "", text) | |
# remove everything except alphabets | |
text = re.sub("[^a-zA-Z]"," ",text) | |
# remove whitespaces | |
text = ' '.join(text.split()) | |
# convert text to lowercase | |
text = text.lower() | |
return text | |
def remove_stopwords(self,text): | |
"""Function to remove stopwords""" | |
stop_words = set(stopwords.words('english')) | |
no_stopword_text = [w for w in text.split() if not w in stop_words] | |
return ' '.join(no_stopword_text) | |
def apply(self, plot): | |
clean_plot = self.clean_text(str(plot)) | |
clean_plot = self.remove_stopwords(str(clean_plot)) | |
return clean_plot | |