Spaces:
Sleeping
Sleeping
sahibnanda
commited on
Commit
β’
9098297
1
Parent(s):
d9579cb
Paraphrasing, Summarizing Added
Browse files- GrammarSummary/new_model.weights.h5 +3 -0
- {TextSummarizationModel β ModelFiles}/assets/tokenizer/merges.txt +0 -0
- {TextSummarizationModel β ModelFiles}/assets/tokenizer/vocabulary.json +0 -0
- {TextSummarizationModel β ModelFiles}/config.json +0 -0
- {TextSummarizationModel β ModelFiles}/metadata.json +0 -0
- {TextSummarizationModel β ModelFiles}/model.weights.h5 +0 -0
- {TextSummarizationModel β ModelFiles}/tokenizer.json +0 -0
- Paraphrase/new_model.weights.h5 +3 -0
- {TextSummarizationModel β WithoutGrammarSummary}/new_model.weights.h5 +0 -0
- app.py +29 -10
- textFunctionality.py +50 -0
- textSFunctionality.py +0 -27
GrammarSummary/new_model.weights.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e1246b81386e12ce55eb44edc86a97d9d892d7b5adf4222c502fab8b801805e
|
3 |
+
size 1673753584
|
{TextSummarizationModel β ModelFiles}/assets/tokenizer/merges.txt
RENAMED
File without changes
|
{TextSummarizationModel β ModelFiles}/assets/tokenizer/vocabulary.json
RENAMED
File without changes
|
{TextSummarizationModel β ModelFiles}/config.json
RENAMED
File without changes
|
{TextSummarizationModel β ModelFiles}/metadata.json
RENAMED
File without changes
|
{TextSummarizationModel β ModelFiles}/model.weights.h5
RENAMED
File without changes
|
{TextSummarizationModel β ModelFiles}/tokenizer.json
RENAMED
File without changes
|
Paraphrase/new_model.weights.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b6ab0eb6c02bd23771aa31e775c848bf3929ef11c34b8f23f788ec4eb953ae0
|
3 |
+
size 1673753584
|
{TextSummarizationModel β WithoutGrammarSummary}/new_model.weights.h5
RENAMED
File without changes
|
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
-
from
|
3 |
|
4 |
# Set the page configuration and theme once at the top
|
5 |
-
st.set_page_config(page_title="Text Summarization", page_icon="β")
|
6 |
|
7 |
st.write(
|
8 |
"""
|
@@ -35,19 +35,38 @@ st.write(
|
|
35 |
)
|
36 |
|
37 |
def main():
|
38 |
-
st.title('Text Summarization')
|
|
|
39 |
|
40 |
# Text area for user input
|
41 |
-
user_input = st.text_area("#### **Enter Text To Summarize**:", height=300)
|
42 |
|
43 |
-
# Button to trigger summarization
|
44 |
-
if st.button("Summarize"):
|
45 |
if user_input:
|
46 |
-
summary = generateText(user_input)
|
47 |
-
st.write("#### **Summarized Text**:")
|
48 |
st.write(summary)
|
49 |
else:
|
50 |
-
st.write("Please Enter Some Text To Summarize
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
if __name__ == '__main__':
|
53 |
-
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
from textFunctionality import generateText, modelWG, modelWOG, modelParaphrase
|
3 |
|
4 |
# Set the page configuration and theme once at the top
|
5 |
+
st.set_page_config(page_title="Text Summarization and Paraphrasing", page_icon="β")
|
6 |
|
7 |
st.write(
|
8 |
"""
|
|
|
35 |
)
|
36 |
|
37 |
def main():
|
38 |
+
st.title('Text Summarization And Paraphrasing')
|
39 |
+
st.write("**Summarize Without Grammar Performs Better But Misses Out On Grammar Like Punctuation, Capitalization, etc.**")
|
40 |
|
41 |
# Text area for user input
|
42 |
+
user_input = st.text_area("#### **Enter Text To Summarize or Paraphrase**:", height=300)
|
43 |
|
44 |
+
# Button to trigger summarization with grammar
|
45 |
+
if st.button("Summarize With Grammar"):
|
46 |
if user_input:
|
47 |
+
summary = generateText(user_input, modelWG, 200, False)
|
48 |
+
st.write("#### **Summarized Text (With Grammar)**:")
|
49 |
st.write(summary)
|
50 |
else:
|
51 |
+
st.write("**Please Enter Some Text To Summarize.**")
|
52 |
+
|
53 |
+
# Button to trigger summarization without grammar
|
54 |
+
if st.button("Summarize Without Grammar"):
|
55 |
+
if user_input:
|
56 |
+
summary = generateText(user_input, modelWOG, 200, True)
|
57 |
+
st.write("#### **Summarized Text (Without Grammar)**:")
|
58 |
+
st.write(summary)
|
59 |
+
else:
|
60 |
+
st.write("**Please Enter Some Text To Summarize.**")
|
61 |
+
|
62 |
+
# Button to trigger paraphrasing
|
63 |
+
if st.button("Paraphrase"):
|
64 |
+
if user_input:
|
65 |
+
paraphrase = generateText(user_input, modelParaphrase, 500, False)
|
66 |
+
st.write("#### **Paraphrased Text**:")
|
67 |
+
st.write(paraphrase)
|
68 |
+
else:
|
69 |
+
st.write("**Please Enter Some Text To Paraphrase.**")
|
70 |
|
71 |
if __name__ == '__main__':
|
72 |
+
main()
|
textFunctionality.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import os
|
3 |
+
import tensorflow as tf
|
4 |
+
import keras
|
5 |
+
import keras_nlp
|
6 |
+
|
7 |
+
# Constants
|
8 |
+
MAX_ENCODER_SEQUENCE_LENGTH = 512
|
9 |
+
MAX_DECODER_SEQUENCE_LENGTH_FOR_SUMMARY = 128
|
10 |
+
MAX_DECODER_SEQUENCE_LENGTH_FOR_PARAPHRASE = 512
|
11 |
+
|
12 |
+
|
13 |
+
# Model Files
|
14 |
+
MODEL_PATH = r"ModelFiles"
|
15 |
+
|
16 |
+
|
17 |
+
# "WithoutGrammarSummary" Model
|
18 |
+
WEIGHT_PATH_WO_G = r"WithoutGrammarSummary/new_model.weights.h5"
|
19 |
+
WEIGHT_PATH_W_G = r"GrammarSummary/new_model.weights.h5"
|
20 |
+
WEIGHT_PATH_PARAPHRASE = r"Paraphrase/new_model.weights.h5"
|
21 |
+
|
22 |
+
def cleanText(text):
|
23 |
+
text = str(text)
|
24 |
+
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
25 |
+
text = text.lower()
|
26 |
+
return text
|
27 |
+
|
28 |
+
# Preprocessor For Summary
|
29 |
+
preprocessorForSummary = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH_FOR_SUMMARY,)
|
30 |
+
preprocessorForParaphrase = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH_FOR_PARAPHRASE,)
|
31 |
+
|
32 |
+
# Load Model
|
33 |
+
|
34 |
+
# "WithoutGrammarSummary" Model
|
35 |
+
modelWOG = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForSummary)
|
36 |
+
modelWOG.load_weights(WEIGHT_PATH_WO_G)
|
37 |
+
|
38 |
+
# "WithGrammarSummary" Model
|
39 |
+
modelWG = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForSummary)
|
40 |
+
modelWG.load_weights(WEIGHT_PATH_W_G)
|
41 |
+
|
42 |
+
# "Paraphrase" Model
|
43 |
+
modelParaphrase = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForParaphrase)
|
44 |
+
modelParaphrase.load_weights(WEIGHT_PATH_W_G)
|
45 |
+
|
46 |
+
def generateText(input_text, model, max_length, wo_summary):
|
47 |
+
if wo_summary:
|
48 |
+
input_text = cleanText(input_text)
|
49 |
+
output = model.generate(input_text, max_length=max_length)
|
50 |
+
return output
|
textSFunctionality.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
import os
|
3 |
-
import tensorflow as tf
|
4 |
-
import keras
|
5 |
-
import keras_nlp
|
6 |
-
|
7 |
-
MAX_ENCODER_SEQUENCE_LENGTH = 512
|
8 |
-
MAX_DECODER_SEQUENCE_LENGTH = 128
|
9 |
-
|
10 |
-
MODEL_PATH = r"TextSummarizationModel"
|
11 |
-
WEIGHT_PATH = r"new_model.weights.h5"
|
12 |
-
WEIGHT_PATH = os.path.join(MODEL_PATH, WEIGHT_PATH)
|
13 |
-
|
14 |
-
def cleanText(text):
|
15 |
-
text = str(text)
|
16 |
-
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
17 |
-
text = text.lower()
|
18 |
-
return text
|
19 |
-
|
20 |
-
preprocessor = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH,)
|
21 |
-
model = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessor)
|
22 |
-
model.load_weights(WEIGHT_PATH)
|
23 |
-
|
24 |
-
def generateText(input_text, model=model, max_length=200):
|
25 |
-
input_text = cleanText(input_text)
|
26 |
-
output = model.generate(input_text, max_length=max_length)
|
27 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|