Spaces:
Runtime error
Runtime error
update codes
Browse files- app.py +187 -2
- data/climate_change_tweets.csv +0 -0
- data/imdb.csv +0 -0
- data/sentiment_results.csv +0 -0
- data/zero_shot_results.csv +0 -0
- survey_analytics_library.py +36 -120
app.py
CHANGED
@@ -18,6 +18,8 @@ from scipy.stats import zscore
|
|
18 |
|
19 |
# nlp
|
20 |
from bertopic import BERTopic
|
|
|
|
|
21 |
|
22 |
# custom
|
23 |
import survey_analytics_library as LIB
|
@@ -61,6 +63,14 @@ def read_topic_results():
|
|
61 |
return topic_results
|
62 |
topic_results = read_topic_results()
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# write title of app
|
65 |
st.title('DACoP - Survey Analytics')
|
66 |
st.markdown('''---''')
|
@@ -366,9 +376,184 @@ st.markdown('''---''')
|
|
366 |
|
367 |
|
368 |
st.header('Classifiying Text Responses and Sentiment Analysis')
|
369 |
-
st.write('''
|
370 |
With survey responses, sometimes as a business user, we already have an general idea of what responders are talking about and we want to categorise or classify the responses accordingly.
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
|
|
|
|
|
|
373 |
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
st.write('\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# nlp
|
20 |
from bertopic import BERTopic
|
21 |
+
from transformers import pipeline
|
22 |
+
import transformers
|
23 |
|
24 |
# custom
|
25 |
import survey_analytics_library as LIB
|
|
|
63 |
return topic_results
|
64 |
topic_results = read_topic_results()
|
65 |
|
66 |
+
@st.cache
|
67 |
+
def read_climate_change_results():
|
68 |
+
sentiment_results = pd.read_csv(data_path+'sentiment_results.csv')
|
69 |
+
zero_shot_results = pd.read_csv(data_path+'zero_shot_results.csv')
|
70 |
+
return sentiment_results, zero_shot_results
|
71 |
+
sentiment_results, zero_shot_results = read_climate_change_results()
|
72 |
+
|
73 |
+
|
74 |
# write title of app
|
75 |
st.title('DACoP - Survey Analytics')
|
76 |
st.markdown('''---''')
|
|
|
376 |
|
377 |
|
378 |
st.header('Classifiying Text Responses and Sentiment Analysis')
|
379 |
+
st.write(f'''
|
380 |
With survey responses, sometimes as a business user, we already have an general idea of what responders are talking about and we want to categorise or classify the responses accordingly.
|
381 |
+
An an example, within the topic of 'Climate Change', we are interested in finance, politics, technology, and wildlife.
|
382 |
+
Using **Zero-shot Classification**, we can classify responses into one of these four categories.
|
383 |
+
As an added bonus, we can also find out how responders feel about the categories using **Sentiment Analysis**.
|
384 |
+
We'll use a different set of 10,000 tweets related to climate change.
|
385 |
+
''')
|
386 |
+
st.write('\n')
|
387 |
+
|
388 |
+
# rename column
|
389 |
+
sentiment_results = sentiment_results.rename(columns={'sequence':'Tweet'})
|
390 |
+
st.dataframe(sentiment_results[['Tweet']])
|
391 |
+
|
392 |
+
@st.cache(allow_output_mutation=True)
|
393 |
+
def load_transfomer_pipelines():
|
394 |
+
classifier_zero_shot = pipeline(
|
395 |
+
task='zero-shot-classification',
|
396 |
+
model=model_path+'distilbart-mnli-12-1',
|
397 |
+
return_all_scores=True
|
398 |
+
)
|
399 |
+
classifier_sentiment = pipeline(
|
400 |
+
task='sentiment-analysis',
|
401 |
+
model=model_path+'distilbert-base-uncased-finetuned-sst-2-english',
|
402 |
+
return_all_scores=True
|
403 |
+
)
|
404 |
+
return classifier_zero_shot, classifier_sentiment
|
405 |
+
classifier_zero_shot, classifier_sentiment = load_transfomer_pipelines()
|
406 |
+
|
407 |
+
# define candidate labels
|
408 |
+
candidate_labels = [
|
409 |
+
'finance',
|
410 |
+
'politics',
|
411 |
+
'technology',
|
412 |
+
'wildlife',
|
413 |
+
]
|
414 |
+
|
415 |
+
# define sample tweet
|
416 |
+
sample_tweet_index = 5000
|
417 |
+
|
418 |
+
# define the first and last topic number
|
419 |
+
# create range of index
|
420 |
+
tweet_index = sentiment_results.index
|
421 |
+
first_tweet = tweet_index[0]
|
422 |
+
last_tweet = tweet_index[-1]
|
423 |
|
424 |
+
st.write(f'''
|
425 |
+
As a demonstration, we'll define some categories and pick a tweet to classify and determine its sentiment.
|
426 |
+
Feel free to add your own categories or even input your own text!
|
427 |
''')
|
428 |
+
|
429 |
+
# interactive input for user to define candidate labels and tweet index for analysis
|
430 |
+
with st.form('classify_tweets'):
|
431 |
+
# input for labels
|
432 |
+
user_defined_labels = st.text_input('Enter categories (separate categories by comma):', ', '.join(candidate_labels))
|
433 |
+
candidate_labels = user_defined_labels
|
434 |
+
# input for tweet index
|
435 |
+
user_define_tweet = st.number_input(f'Enter tweet index (from {first_tweet} to {last_tweet}) to classify:', min_value=first_tweet, max_value=last_tweet, value=sample_tweet_index)
|
436 |
+
sample_tweet_index = user_define_tweet
|
437 |
+
sample_tweet = sentiment_results['Tweet'].iloc[sample_tweet_index]
|
438 |
+
# input for user defined text
|
439 |
+
user_defined_input = st.text_input('Enter custom text (optional, leave blank to use Tweets):', '')
|
440 |
+
# check if user has entered any custom text
|
441 |
+
# if user_define_input is not blank, then override sample_tweet
|
442 |
+
if user_defined_input:
|
443 |
+
sample_tweet = user_defined_input
|
444 |
+
|
445 |
+
# submit form
|
446 |
+
submit = st.form_submit_button('Classify Tweet')
|
447 |
+
|
448 |
st.write('\n')
|
449 |
+
st.write(f'''
|
450 |
+
Here are the results:
|
451 |
+
''')
|
452 |
+
st.write(f'Input Text: *\'{sample_tweet}\'*')
|
453 |
+
|
454 |
+
# get predictions from models
|
455 |
+
zero_shot_sample = classifier_zero_shot(sample_tweet, candidate_labels)
|
456 |
+
sentiment_sample = classifier_sentiment(sample_tweet)
|
457 |
+
|
458 |
+
# get sentiment
|
459 |
+
sentiment_sample = sentiment_sample[1].get('score')
|
460 |
+
sentiment_label = 'positive'
|
461 |
+
if sentiment_sample < 0.5:
|
462 |
+
sentiment_label = 'negative'
|
463 |
+
|
464 |
+
st.write(f'''
|
465 |
+
The main category is: **{zero_shot_sample['labels'][0]}** with a score of {round(zero_shot_sample['scores'][0], 2)}
|
466 |
+
Main category score ranges from 0 to 1, with 1 being very likely.
|
467 |
+
|
468 |
+
The full set of scores are: {dict(zip(zero_shot_sample['labels'], [round(score, 2) for score in zero_shot_sample['scores']]))}
|
469 |
+
Full set of scores cores add up to 1.
|
470 |
+
|
471 |
+
The sentiment is: **{sentiment_label}** with a score of {round(sentiment_sample, 2)}
|
472 |
+
Sentiment score ranges from 0 to 1, with 1 being very positive.
|
473 |
+
''')
|
474 |
+
st.write('\n')
|
475 |
+
st.write('\n')
|
476 |
+
|
477 |
+
# drop unused columns and rename columns
|
478 |
+
zero_shot_results = zero_shot_results.drop('labels_scores', axis=1)
|
479 |
+
zero_shot_results = zero_shot_results.rename(columns={'sequence':'tweet', 'label':'category'})
|
480 |
+
st.write(f'''
|
481 |
+
Lets review all the tweets and how they fall into the categories of finance, politics, technology, and wildlife.
|
482 |
+
''')
|
483 |
+
|
484 |
+
st.dataframe(zero_shot_results)
|
485 |
+
|
486 |
+
st.write(f'''
|
487 |
+
We can observe that the model does not have strong confidence in predicting the categories for some of the tweets.
|
488 |
+
It is likely that the tweet does not natually fall into one of the defined categories.
|
489 |
+
Before performing further analysis on our results, we can set a score threshold to only keep predictions that we're confident in.
|
490 |
+
''')
|
491 |
+
st.write('\n')
|
492 |
+
|
493 |
+
# interactive input for user to define candidate labels and tweet index for analysis
|
494 |
+
with st.form('classification_score_threshold'):
|
495 |
+
user_defined_threshold = st.number_input('Enter score threshold (between 0.01 and 0.99):', min_value=0.01, max_value=0.99, value=0.7, step=0.05)
|
496 |
+
# submit form
|
497 |
+
submit = st.form_submit_button('Set Threshold')
|
498 |
+
st.write('\n')
|
499 |
+
|
500 |
+
# filter and keep results with score above defined threshold
|
501 |
+
zero_shot_results_clean = zero_shot_results.loc[(zero_shot_results['score'] >= user_defined_threshold)].copy()
|
502 |
+
|
503 |
+
# rename columns
|
504 |
+
sentiment_results.columns = ['tweet', 'sentiment']
|
505 |
+
|
506 |
+
st.write(f'''
|
507 |
+
The predictions get better with a higher threshold, but reduces the final number of tweets available for further analysis.
|
508 |
+
Out of the 10,000 tweets, we are now left with {len(zero_shot_results_clean)}.
|
509 |
+
We also add on the sentiment score for the tweets, the score here ranges from 0 (most negative) to 1 (most positive).
|
510 |
+
''')
|
511 |
+
|
512 |
+
# merge in sentiment score on index
|
513 |
+
# drop unused columns
|
514 |
+
classification_sentiment_df = pd.merge(zero_shot_results_clean, sentiment_results[['sentiment']], how='left', left_index=True, right_index=True)
|
515 |
+
classification_sentiment_df = classification_sentiment_df[['tweet', 'category', 'score', 'sentiment']]
|
516 |
+
st.dataframe(classification_sentiment_df)
|
517 |
+
|
518 |
+
st.write(f'''
|
519 |
+
The difficult part for zero-shot classification is defining the right set of categories for each business case.
|
520 |
+
Some trial and error is required to find the appropriate words that can return the optimal results.
|
521 |
+
''')
|
522 |
+
st.write('\n')
|
523 |
+
|
524 |
+
# group by category, count tweets and get mean of sentiment
|
525 |
+
classification_sentiment_agg = classification_sentiment_df.groupby(['category']).agg({'tweet':'count', 'sentiment':'mean'}).reset_index()
|
526 |
+
classification_sentiment_agg = classification_sentiment_agg.rename(columns={'tweet':'count'})
|
527 |
+
|
528 |
+
st.write(f'''
|
529 |
+
Finally, we can visualise the percentage of tweets in each category and the respective average sentiment scores.
|
530 |
+
''')
|
531 |
+
|
532 |
+
fig = px.pie(
|
533 |
+
classification_sentiment_agg,
|
534 |
+
values='count',
|
535 |
+
names='category',
|
536 |
+
hole=0.35,
|
537 |
+
title='Percentage of Tweets in Each Category',
|
538 |
+
template='simple_white',
|
539 |
+
width=1000,
|
540 |
+
height=600
|
541 |
+
)
|
542 |
+
fig.update_traces(textposition='inside', textinfo='percent+label')
|
543 |
+
st.plotly_chart(fig)
|
544 |
+
|
545 |
+
fig = px.bar(
|
546 |
+
classification_sentiment_agg,
|
547 |
+
x='category',
|
548 |
+
y='sentiment',
|
549 |
+
title='Average Sentiment of Tweets in Each Category <br><sup>Overall, the sentiment of the tweets are on the negative side.</sup>',
|
550 |
+
template='simple_white',
|
551 |
+
width=1000,
|
552 |
+
height=600
|
553 |
+
)
|
554 |
+
fig.update_yaxes(range=[0, 1])
|
555 |
+
fig.add_hline(y=0.5, line_width=3, line_color='darkgreen')
|
556 |
+
st.plotly_chart(fig)
|
557 |
+
|
558 |
+
st.write('\n')
|
559 |
+
st.markdown('''---''')
|
data/climate_change_tweets.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/imdb.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
data/sentiment_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/zero_shot_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
survey_analytics_library.py
CHANGED
@@ -18,126 +18,6 @@ from nltk.corpus import stopwords
|
|
18 |
|
19 |
|
20 |
|
21 |
-
# # create elbow plot with kmeans to find optimal number of clusters
|
22 |
-
# def create_elbow_plot_kmeans(df, num_clusters, init_method='k-means++', n_init=10, random_state=42, plot=True, template='simple_white', save=False):
|
23 |
-
# '''
|
24 |
-
# create elbow plot with kmeans to find optimal number of clusters based on inertia
|
25 |
-
# where the clusters strikes a balance between being not segmented enough and being too fragmented
|
26 |
-
|
27 |
-
# we look for the point of diminishing returns (also known as the 'elbow') in terms of the inertia,
|
28 |
-
# where inertia is how close the data points are to their respective centers or centroids
|
29 |
-
|
30 |
-
# arguments:
|
31 |
-
# df (df): a dataframe of data to cluster
|
32 |
-
# num_clusters (int): number of clusters to plot
|
33 |
-
# init_method (str): default to 'k-means++', other option is 'random'
|
34 |
-
# n_init (int): default to 10, number of times to run model, cost from the best run will be used
|
35 |
-
# random_state (int): default to 42, random seed used to initialise the model
|
36 |
-
# plot (bool): default to True, option to turn off plots
|
37 |
-
# template (str): default to 'simple_white', change as desired
|
38 |
-
# save (bool): default to False, if True save plot as .html file
|
39 |
-
|
40 |
-
# returns:
|
41 |
-
# a list of inertia for each run
|
42 |
-
# '''
|
43 |
-
|
44 |
-
# # create empty list to store inertia for each run
|
45 |
-
# inertia = []
|
46 |
-
# # define range of clusters to try
|
47 |
-
# k = range(2, num_clusters+1)
|
48 |
-
|
49 |
-
# # loop through number of clusters
|
50 |
-
# for num_clusters in tqdm(k):
|
51 |
-
# # define model
|
52 |
-
# kmeans = KMeans(n_clusters=num_clusters, init=init_method, n_init=n_init, random_state=random_state)
|
53 |
-
# # fit and predict data
|
54 |
-
# kmeans.fit_predict(df)
|
55 |
-
# # get predicted labels
|
56 |
-
# predicted_labels = kmeans.labels_
|
57 |
-
# # append score to list of scores
|
58 |
-
# inertia.append(kmeans.inertia_)
|
59 |
-
|
60 |
-
# # plot elbow plot
|
61 |
-
# if plot:
|
62 |
-
# fig = px.line(
|
63 |
-
# pd.DataFrame({'num_clusters':list(k), 'inertia':inertia}),
|
64 |
-
# x='num_clusters',
|
65 |
-
# y='inertia',
|
66 |
-
# title='Elbow Plot for Optimal Number of Clusters with '+init_method,
|
67 |
-
# markers=True,
|
68 |
-
# template=template,
|
69 |
-
# width=800,
|
70 |
-
# height=500,
|
71 |
-
# )
|
72 |
-
# st.plotly_chart(fig, use_container_width=True)
|
73 |
-
# if save:
|
74 |
-
# fig.write_html('Elbow Plot for Optimal Number of Clusters with '+init_method+'.html')
|
75 |
-
|
76 |
-
# # return
|
77 |
-
# return inertia
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# # create plot of silhouette scores with sklearn model to find optimal number of clusters
|
82 |
-
# def silhouette_score_plot_kmeans(df, num_clusters, init_method='k-means++', n_init=10, random_state=42, plot=True, template='simple_white', save=False):
|
83 |
-
# '''
|
84 |
-
# create plot of silhouette score with kmeans to find optimal number of clusters
|
85 |
-
# where the clusters strikes a balance between being not segmented enough and being too fragmented
|
86 |
-
# the closer the score is to 1, the more easily distinguishable are the clusters from each other
|
87 |
-
|
88 |
-
# arguments:
|
89 |
-
# df (df): a dataframe of data to cluster
|
90 |
-
# num_clusters (int): number of clusters to plot
|
91 |
-
# init_method (str): default to 'k-means++', other option is 'random'
|
92 |
-
# n_init (int): default to 10, number of times to run model, cost from the best run will be used
|
93 |
-
# random_state (int): default to 42, random seed used to initialise the model
|
94 |
-
# plot (bool): default to True, option to turn off plots
|
95 |
-
# template (str): default to 'simple_white', change as desired
|
96 |
-
# save (bool): default to False, if True save plot as .html file
|
97 |
-
|
98 |
-
# returns:
|
99 |
-
# a list of silhouette scores for each run
|
100 |
-
# '''
|
101 |
-
|
102 |
-
# # create empty list to store silhoutte scores for each run
|
103 |
-
# silhouette_scores = []
|
104 |
-
# # define range of clusters to try
|
105 |
-
# k = range(2, num_clusters+1)
|
106 |
-
|
107 |
-
# # loop through number of clusters
|
108 |
-
# for num_clusters in tqdm(k):
|
109 |
-
# # define model
|
110 |
-
# kmeans = KMeans(n_clusters=num_clusters, init=init_method, n_init=n_init, random_state=random_state)
|
111 |
-
# # fit and predict data
|
112 |
-
# kmeans.fit_predict(df)
|
113 |
-
# # get predicted labels
|
114 |
-
# predicted_labels = kmeans.labels_
|
115 |
-
# # get silhoutte score
|
116 |
-
# score = silhouette_score(df, predicted_labels)
|
117 |
-
# # append score to list of scores
|
118 |
-
# silhouette_scores.append(score)
|
119 |
-
|
120 |
-
# # plot silhouette scores
|
121 |
-
# if plot:
|
122 |
-
# fig = px.line(
|
123 |
-
# pd.DataFrame({'num_clusters':list(k), 'silhouette_scores':silhouette_scores}),
|
124 |
-
# x='num_clusters',
|
125 |
-
# y='silhouette_scores',
|
126 |
-
# title='Silhouette Scores for Optimal Number of Clusters with '+init_method,
|
127 |
-
# markers=True,
|
128 |
-
# template=template,
|
129 |
-
# width=800,
|
130 |
-
# height=500,
|
131 |
-
# )
|
132 |
-
# st.plotly_chart(fig, use_container_width=True)
|
133 |
-
# if save:
|
134 |
-
# fig.write_html('Silhouette Scores for Optimal Number of Clusters with '+init_method+'.html')
|
135 |
-
|
136 |
-
# # return
|
137 |
-
# return silhouette_scores
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
# replace text with multiple replacements
|
142 |
def replace_text(string, dict_of_replacements):
|
143 |
'''
|
@@ -379,5 +259,41 @@ def convert_zero_shot_classification_output_to_dataframe(model_output):
|
|
379 |
# drop unused columns
|
380 |
results = results.drop(['labels', 'scores'], axis=1)
|
381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
# return
|
383 |
return results
|
|
|
18 |
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# replace text with multiple replacements
|
22 |
def replace_text(string, dict_of_replacements):
|
23 |
'''
|
|
|
259 |
# drop unused columns
|
260 |
results = results.drop(['labels', 'scores'], axis=1)
|
261 |
|
262 |
+
# return
|
263 |
+
return results
|
264 |
+
|
265 |
+
|
266 |
+
# convert transformer model sentiment classification prediction into dataframe
|
267 |
+
def convert_sentiment_classification_output_to_dataframe(text_input, model_output):
|
268 |
+
'''
|
269 |
+
convert sentiment classification output into a dataframe
|
270 |
+
|
271 |
+
the model used distilbert-base-uncased-finetuned-sst-2-english outputs a list of lists with two dictionaries,
|
272 |
+
within each dictionary is a label negative or postive and the respective score
|
273 |
+
[
|
274 |
+
[
|
275 |
+
{'label': 'NEGATIVE', 'score': 0.18449656665325165},
|
276 |
+
{'label': 'POSITIVE', 'score': 0.8155034780502319}
|
277 |
+
],
|
278 |
+
...
|
279 |
+
]
|
280 |
+
the scores sum up to 1, and we extract only the positive score in this function,
|
281 |
+
append the scores to the model's input and return a dataframe
|
282 |
+
|
283 |
+
arguments:
|
284 |
+
text_input (list): a list of sequences that is input for the model
|
285 |
+
model_output (list): a list of labels and scores
|
286 |
+
|
287 |
+
return:
|
288 |
+
a dataframe of sequences and sentiment score
|
289 |
+
|
290 |
+
'''
|
291 |
+
# store model positive scores as dataframe
|
292 |
+
results = pd.DataFrame(model_output)[[1]]
|
293 |
+
# get score from column
|
294 |
+
results = results[1].apply(lambda x: x.get('score'))
|
295 |
+
# store input sequences and scores as dataframe
|
296 |
+
results = pd.DataFrame({'sequence':text_input, 'score':results})
|
297 |
+
|
298 |
# return
|
299 |
return results
|