Spaces:

hongaik
/

service_text_classification

Runtime error

+import streamlit as st
+import utils
+########## Title for the Web App ##########
+st.title("Text Classification for Service Feedback")
+########## Create Input field ##########
+feedback = st.text_input('Type your text here', 'The staff were extremely polite and helpful!')
+if st.button('Click for predictions!'):
+    with st.spinner('Generating predictions...'):
+        result = get_single_prediction(feedback)
+    st.success(f'Your text has been predicted to fall under the following labels: {result[:-1]}. This text is {result[-1]}.')
+st.text('Or... Upload a csv file if you have many texts')
+uploaded_file = st.file_uploader("Please upload a csv file with only 1 column of texts.")
+if uploaded_file is not None:
+    with st.spinner('Generating predictions...'):
+        results = get_multiple_predictions(uploaded_file)
+    st.download_button(
+     label="Download results as CSV",
+     data=results,
+     file_name='results.csv',
+     mime='text/csv',
+     )

utils.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import re
+from gensim.models.keyedvectors import KeyedVectors
+from transformers import pipeline
+import pickle
+w2v = KeyedVectors.load('models/word2vec')
+w2v_vocab = set(sorted(w2v.index_to_key))
+model = pickle.load(open('models/w2v_ovr_svc.sav', 'rb'))
+classifier = pipeline("zero-shot-classification",
+                      model="facebook/bart-large-mnli", device=0, framework='pt'
+                     )
+labels = [
+    'communication', 'waiting time',
+       'information', 'user interface',
+       'facilities', 'location', 'price'
+]
+def get_sentiment_label_facebook(list_of_sent_dicts):
+    if list_of_sent_dicts['labels'][0] == 'negative':
+        return 'negative'
+    else:
+        return 'positive'
+def get_single_prediction(text):
+    # manipulate data into a format that we pass to our model
+    text = text.lower() #lower case
+    text = re.sub('[^0-9a-zA-Z\s]', '', text) #remove special char, punctuation
+    # Remove OOV words
+    text = ' '.join([i for i in text.split() if i in w2v_vocab])
+    # Vectorise text and store in new dataframe. Sentence vector = average of word vectors
+    text_vectors = np.mean([w2v[i] for i in text.split()], axis=0)
+    # Make predictions
+    results = model.predict(text_vectors)
+    # Get sentiment
+    sentiment = get_sentiment_label_facebook(classifier(text,
+                                                    candidate_labels=['positive', 'negative'],
+                                                    hypothesis_template='The sentiment of this is {}'))
+    # Consolidate results
+    pred_labels = [labels[idx] for idx, tag in enumerate(results) if tag == 1]
+    pred_labels.append(sentiment)
+    return pred_labels
+def get_multiple_predictions(csv):
+    df = pd.read_csv(csv)
+    df.columns = ['sequence']
+    df['sequence'] = df['sequence'].str.lower() #lower case
+    df['sequence'] = df['sequence'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
+    # Remove OOV words
+    df['sequence'] = df['sequence'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
+    # Remove rows with blank string
+    invalid = df[(pd.isna(df['sequence'])) | (df['sequence'] == '')]
+    df.dropna(inplace=True)
+    df = df[df['sequence'] != ''].reset_index(drop=True)
+    # Vectorise text and store in new dataframe. Sentence vector = average of word vectors
+    series_text_vectors = pd.DataFrame(df['sequence'].apply(lambda x: np.mean([w2v[i] for i in x.split()], axis=0)).values.tolist())
+    # Get predictions
+    pred_results = pd.DataFrame(model.predict(series_text_vectors), columns = labels)
+    # Join back to original sequence
+    final_results = df.join(series_text_vectors)
+    # Get sentiment labels
+    final_results['sentiment'] = final_results['sequence'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
+                                                            candidate_labels=['positive', 'negative'],
+                                                            hypothesis_template='The sentiment of this is {}'))
+                                                                )
+    # Append invalid rows
+    if len(invalid) == 0:
+        return final_results
+    else:
+        return pd.concat([final_results, invalid]).reset_index(drop=True)