Spaces:
Sleeping
Sleeping
UNIST-Eunchan
commited on
Commit
β’
ccb93ff
1
Parent(s):
d17a7aa
Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,14 @@ from sentence_transformers import SentenceTransformer
|
|
9 |
|
10 |
sentence_transformer_model = SentenceTransformer("sentence-transformers/all-roberta-large-v1")
|
11 |
|
|
|
12 |
nltk.download('punkt')
|
13 |
with open('testbook.json') as f:
|
14 |
test_book = json.load(f)
|
15 |
|
16 |
tokenizer = AutoTokenizer.from_pretrained("UNIST-Eunchan/bart-dnc-booksum")
|
17 |
|
|
|
18 |
def load_model(model_name):
|
19 |
model = AutoModelForSeq2SeqLM.from_pretrained("UNIST-Eunchan/bart-dnc-booksum")
|
20 |
return model
|
@@ -38,6 +40,7 @@ def infer(input_ids, max_length, temperature, top_k, top_p):
|
|
38 |
return output_sequences
|
39 |
|
40 |
|
|
|
41 |
def chunking(book_text):
|
42 |
sentences = sent_tokenize(book_text)
|
43 |
segments = []
|
@@ -82,14 +85,14 @@ def chunking(book_text):
|
|
82 |
|
83 |
'''
|
84 |
'''
|
85 |
-
|
86 |
_book = test_book[book_index]['book']
|
87 |
|
88 |
#prompts
|
89 |
st.title("Book Summarization π")
|
90 |
st.write("The almighty king of text generation, GPT-2 comes in four available sizes, only three of which have been publicly made available. Feared for its fake news generation capabilities, it currently stands as the most syntactically coherent model. A direct successor to the original GPT, it reinforces the already established pre-training/fine-tuning killer duo. From the paper: Language Models are Unsupervised Multitask Learners by Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever.")
|
91 |
|
92 |
-
book_index = st.sidebar.slider("Select Book Example", value = 0,min_value = 0, max_value=4)
|
93 |
sent = st.text_area("Text", _book[:512], height = 550)
|
94 |
max_length = st.sidebar.slider("Max Length", value = 512,min_value = 10, max_value=1024)
|
95 |
temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
|
|
|
9 |
|
10 |
sentence_transformer_model = SentenceTransformer("sentence-transformers/all-roberta-large-v1")
|
11 |
|
12 |
+
|
13 |
nltk.download('punkt')
|
14 |
with open('testbook.json') as f:
|
15 |
test_book = json.load(f)
|
16 |
|
17 |
tokenizer = AutoTokenizer.from_pretrained("UNIST-Eunchan/bart-dnc-booksum")
|
18 |
|
19 |
+
|
20 |
def load_model(model_name):
|
21 |
model = AutoModelForSeq2SeqLM.from_pretrained("UNIST-Eunchan/bart-dnc-booksum")
|
22 |
return model
|
|
|
40 |
return output_sequences
|
41 |
|
42 |
|
43 |
+
@st.cache_data
|
44 |
def chunking(book_text):
|
45 |
sentences = sent_tokenize(book_text)
|
46 |
segments = []
|
|
|
85 |
|
86 |
'''
|
87 |
'''
|
88 |
+
book_index = 0
|
89 |
_book = test_book[book_index]['book']
|
90 |
|
91 |
#prompts
|
92 |
st.title("Book Summarization π")
|
93 |
st.write("The almighty king of text generation, GPT-2 comes in four available sizes, only three of which have been publicly made available. Feared for its fake news generation capabilities, it currently stands as the most syntactically coherent model. A direct successor to the original GPT, it reinforces the already established pre-training/fine-tuning killer duo. From the paper: Language Models are Unsupervised Multitask Learners by Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei and Ilya Sutskever.")
|
94 |
|
95 |
+
#book_index = st.sidebar.slider("Select Book Example", value = 0,min_value = 0, max_value=4)
|
96 |
sent = st.text_area("Text", _book[:512], height = 550)
|
97 |
max_length = st.sidebar.slider("Max Length", value = 512,min_value = 10, max_value=1024)
|
98 |
temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
|