Paula Leonova commited on
Commit
66a86a3
1 Parent(s): 94c16ba

Remove main call to speed up rendering

Browse files
Files changed (1) hide show
  1. app.py +73 -73
app.py CHANGED
@@ -16,76 +16,76 @@ ex_text, ex_license, ex_labels = examples_load()
16
  ex_long_text = example_long_text_load()
17
 
18
 
19
- if __name__ == '__main__':
20
- st.header("Summzarization & Multi-label Classification for Long Text")
21
- st.write("This app summarizes and then classifies your long text with multiple labels (_Please allow for a minimum of 30secs to load results_).")
22
- st.write("Inputs: User enters their own custom text and labels")
23
- st.write("Outputs: A summary of the text, pre and post summary label likelihood percentages and a downloadable csv of the results")
24
-
25
- with st.form(key='my_form'):
26
- example_text = ex_long_text #ex_text
27
- display_text = "[Excerpt from Project Gutenberg: Frankenstein]\n" + example_text + "\n\n" + ex_license
28
- text_input = st.text_area("Input any text you want to summaryize & classify here (keep in mind very long text will take a while to process):", display_text)
29
-
30
- if text_input == display_text:
31
- text_input = example_text
32
-
33
- labels = st.text_input('Possible labels (comma-separated):',ex_labels, max_chars=1000)
34
- labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
35
- submit_button = st.form_submit_button(label='Submit')
36
-
37
- if submit_button:
38
- if len(labels) == 0:
39
- st.write('Enter some text and at least one possible topic to see predictions.')
40
-
41
- # For each body of text, create text chunks of a certain token size required for the transformer
42
- nested_sentences = create_nest_sentences(document = text_input, token_max_length = 1024)
43
-
44
- summary = []
45
- st.markdown("### Text Chunk & Summaries")
46
- st.markdown("Breaks up the original text into sections with complete sentences totaling \
47
- less than 1024 tokens, a requirement for the summarizer.")
48
-
49
- # For each chunk of sentences (within the token max), generate a summary
50
- for n in range(0, len(nested_sentences)):
51
- text_chunk = " ".join(map(str, nested_sentences[n]))
52
- st.markdown(f"###### Chunk {n+1}/{len(nested_sentences)}" )
53
- st.markdown(text_chunk)
54
-
55
- chunk_summary = summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens = 300, minimum_tokens = 20)
56
- summary.append(chunk_summary)
57
- st.markdown("###### Partial Summary")
58
- st.markdown(chunk_summary)
59
- # Combine all the summaries into a list and compress into one document, again
60
- final_summary = " \n".join(list(summary))
61
-
62
- # final_summary = summarizer_gen(summarizer, sequence=text_input, maximum_tokens = 30, minimum_tokens = 100)
63
- st.markdown("### Combined Summary")
64
- st.markdown(final_summary)
65
-
66
- topics, scores = classifier_zero(classifier, sequence=final_summary, labels=labels, multi_class=True)
67
- # st.markdown("### Top Label Predictions: Combined Summary")
68
- # plot_result(topics[::-1][:], scores[::-1][:])
69
- # st.markdown("### Download Data")
70
- data = pd.DataFrame({'label': topics, 'scores_from_summary': scores})
71
- # st.dataframe(data)
72
- # coded_data = base64.b64encode(data.to_csv(index = False). encode ()).decode()
73
- # st.markdown(
74
- # f'<a href="data:file/csv;base64, {coded_data}" download = "data.csv">Download Data</a>',
75
- # unsafe_allow_html = True
76
- # )
77
-
78
- st.markdown("### Top Label Predictions: Summary & Full Text")
79
- topics_ex_text, scores_ex_text = classifier_zero(classifier, sequence=example_text, labels=labels, multi_class=True)
80
- plot_dual_bar_chart(topics, scores, topics_ex_text, scores_ex_text)
81
-
82
- data_ex_text = pd.DataFrame({'label': topics_ex_text, 'scores_from_full_text': scores_ex_text})
83
- data2 = pd.merge(data, data_ex_text, on = ['label'])
84
- st.markdown("### Data Table")
85
-
86
- coded_data = base64.b64encode(data2.to_csv(index = False). encode ()).decode()
87
- st.markdown(
88
- f'<a href="data:file/csv;base64, {coded_data}" download = "data.csv">Click here to download the data</a>',
89
- unsafe_allow_html = True
90
- )
91
- st.dataframe(data2)
 
16
  ex_long_text = example_long_text_load()
17
 
18
 
19
+ # if __name__ == '__main__':
20
+ st.header("Summzarization & Multi-label Classification for Long Text")
21
+ st.write("This app summarizes and then classifies your long text with multiple labels (_Please allow for a minimum of 30secs to load results_).")
22
+ st.write("Inputs: User enters their own custom text and labels")
23
+ st.write("Outputs: A summary of the text, pre and post summary label likelihood percentages and a downloadable csv of the results")
24
+
25
+ with st.form(key='my_form'):
26
+ example_text = ex_long_text #ex_text
27
+ display_text = "[Excerpt from Project Gutenberg: Frankenstein]\n" + example_text + "\n\n" + ex_license
28
+ text_input = st.text_area("Input any text you want to summaryize & classify here (keep in mind very long text will take a while to process):", display_text)
29
+
30
+ if text_input == display_text:
31
+ text_input = example_text
32
+
33
+ labels = st.text_input('Possible labels (comma-separated):',ex_labels, max_chars=1000)
34
+ labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
35
+ submit_button = st.form_submit_button(label='Submit')
36
+
37
+ if submit_button:
38
+ if len(labels) == 0:
39
+ st.write('Enter some text and at least one possible topic to see predictions.')
40
+
41
+ # For each body of text, create text chunks of a certain token size required for the transformer
42
+ nested_sentences = create_nest_sentences(document = text_input, token_max_length = 1024)
43
+
44
+ summary = []
45
+ st.markdown("### Text Chunk & Summaries")
46
+ st.markdown("Breaks up the original text into sections with complete sentences totaling \
47
+ less than 1024 tokens, a requirement for the summarizer.")
48
+
49
+ # For each chunk of sentences (within the token max), generate a summary
50
+ for n in range(0, len(nested_sentences)):
51
+ text_chunk = " ".join(map(str, nested_sentences[n]))
52
+ st.markdown(f"###### Chunk {n+1}/{len(nested_sentences)}" )
53
+ st.markdown(text_chunk)
54
+
55
+ chunk_summary = summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens = 300, minimum_tokens = 20)
56
+ summary.append(chunk_summary)
57
+ st.markdown("###### Partial Summary")
58
+ st.markdown(chunk_summary)
59
+ # Combine all the summaries into a list and compress into one document, again
60
+ final_summary = " \n".join(list(summary))
61
+
62
+ # final_summary = summarizer_gen(summarizer, sequence=text_input, maximum_tokens = 30, minimum_tokens = 100)
63
+ st.markdown("### Combined Summary")
64
+ st.markdown(final_summary)
65
+
66
+ topics, scores = classifier_zero(classifier, sequence=final_summary, labels=labels, multi_class=True)
67
+ # st.markdown("### Top Label Predictions: Combined Summary")
68
+ # plot_result(topics[::-1][:], scores[::-1][:])
69
+ # st.markdown("### Download Data")
70
+ data = pd.DataFrame({'label': topics, 'scores_from_summary': scores})
71
+ # st.dataframe(data)
72
+ # coded_data = base64.b64encode(data.to_csv(index = False). encode ()).decode()
73
+ # st.markdown(
74
+ # f'<a href="data:file/csv;base64, {coded_data}" download = "data.csv">Download Data</a>',
75
+ # unsafe_allow_html = True
76
+ # )
77
+
78
+ st.markdown("### Top Label Predictions: Summary & Full Text")
79
+ topics_ex_text, scores_ex_text = classifier_zero(classifier, sequence=example_text, labels=labels, multi_class=True)
80
+ plot_dual_bar_chart(topics, scores, topics_ex_text, scores_ex_text)
81
+
82
+ data_ex_text = pd.DataFrame({'label': topics_ex_text, 'scores_from_full_text': scores_ex_text})
83
+ data2 = pd.merge(data, data_ex_text, on = ['label'])
84
+ st.markdown("### Data Table")
85
+
86
+ coded_data = base64.b64encode(data2.to_csv(index = False). encode ()).decode()
87
+ st.markdown(
88
+ f'<a href="data:file/csv;base64, {coded_data}" download = "data.csv">Click here to download the data</a>',
89
+ unsafe_allow_html = True
90
+ )
91
+ st.dataframe(data2)