# Import necessary modules from sklearn import linear_model from sklearn.cross_validation import train_test_split # Load data df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv ' , sep = ';') X = df.drop('quality' , 1).values #drop target variable y1 = df['quality'].values y = y1 <= 5 # is the rating <= 5? # plot histograms of original target variable # and aggregated target variable plt.figure(figsize=(20,5)); plt.subplot(1, 2, 1 ); plt.hist(y1); plt.xlabel('original target value') plt.ylabel('count') plt.subplot(1, 2, 2); plt.hist(y) plt.xlabel('aggregated target value') plt.show()