hongaik commited on
Commit
a27a834
1 Parent(s): b75e210

updated code

Browse files
.ipynb_checkpoints/utils-checkpoint.py CHANGED
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
57
  df = pd.read_csv(csv)
58
  df.columns = ['sequence']
59
 
60
- df['sequence'] = df['sequence'].str.lower() #lower case
61
- df['sequence'] = df['sequence'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
 
62
 
63
  # Remove OOV words
64
- df['sequence_clean'] = df['sequence'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
65
 
66
  # Remove rows with blank string
67
  invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
79
 
80
  # Join back to original sequence
81
  final_results = df.join(pred_results)
82
- final_results.drop(columns=['sequence_clean'], inplace=True)
83
  final_results['others'] = final_results[labels].max(axis=1)
84
  final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
85
 
86
  # Get sentiment labels
87
- final_results['sentiment'] = final_results['sequence'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
88
  candidate_labels=['positive', 'negative'],
89
  hypothesis_template='The sentiment of this is {}'))
90
  )
91
 
 
 
92
  # Append invalid rows
93
  if len(invalid) == 0:
94
  return final_results.to_csv(index=False).encode('utf-8')
 
57
  df = pd.read_csv(csv)
58
  df.columns = ['sequence']
59
 
60
+ df['sequence_clean'] = df['sequence'].str.lower() #lower case
61
+ df['sequence_clean'] = df['sequence_clean'].str.strip()
62
+ df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
63
 
64
  # Remove OOV words
65
+ df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
66
 
67
  # Remove rows with blank string
68
  invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
 
80
 
81
  # Join back to original sequence
82
  final_results = df.join(pred_results)
 
83
  final_results['others'] = final_results[labels].max(axis=1)
84
  final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
85
 
86
  # Get sentiment labels
87
+ final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
88
  candidate_labels=['positive', 'negative'],
89
  hypothesis_template='The sentiment of this is {}'))
90
  )
91
 
92
+ final_results.drop(columns=['sequence_clean'], inplace=True)
93
+
94
  # Append invalid rows
95
  if len(invalid) == 0:
96
  return final_results.to_csv(index=False).encode('utf-8')
utils.py CHANGED
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
57
  df = pd.read_csv(csv)
58
  df.columns = ['sequence']
59
 
60
- df['sequence'] = df['sequence'].str.lower() #lower case
61
- df['sequence'] = df['sequence'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
 
62
 
63
  # Remove OOV words
64
- df['sequence_clean'] = df['sequence'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
65
 
66
  # Remove rows with blank string
67
  invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
79
 
80
  # Join back to original sequence
81
  final_results = df.join(pred_results)
82
- final_results.drop(columns=['sequence_clean'], inplace=True)
83
  final_results['others'] = final_results[labels].max(axis=1)
84
  final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
85
 
86
  # Get sentiment labels
87
- final_results['sentiment'] = final_results['sequence'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
88
  candidate_labels=['positive', 'negative'],
89
  hypothesis_template='The sentiment of this is {}'))
90
  )
91
 
 
 
92
  # Append invalid rows
93
  if len(invalid) == 0:
94
  return final_results.to_csv(index=False).encode('utf-8')
 
57
  df = pd.read_csv(csv)
58
  df.columns = ['sequence']
59
 
60
+ df['sequence_clean'] = df['sequence'].str.lower() #lower case
61
+ df['sequence_clean'] = df['sequence_clean'].str.strip()
62
+ df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
63
 
64
  # Remove OOV words
65
+ df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
66
 
67
  # Remove rows with blank string
68
  invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
 
80
 
81
  # Join back to original sequence
82
  final_results = df.join(pred_results)
 
83
  final_results['others'] = final_results[labels].max(axis=1)
84
  final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
85
 
86
  # Get sentiment labels
87
+ final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
88
  candidate_labels=['positive', 'negative'],
89
  hypothesis_template='The sentiment of this is {}'))
90
  )
91
 
92
+ final_results.drop(columns=['sequence_clean'], inplace=True)
93
+
94
  # Append invalid rows
95
  if len(invalid) == 0:
96
  return final_results.to_csv(index=False).encode('utf-8')