sahibnanda commited on
Commit
9098297
β€’
1 Parent(s): d9579cb

Paraphrasing, Summarizing Added

Browse files
GrammarSummary/new_model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1246b81386e12ce55eb44edc86a97d9d892d7b5adf4222c502fab8b801805e
3
+ size 1673753584
{TextSummarizationModel β†’ ModelFiles}/assets/tokenizer/merges.txt RENAMED
File without changes
{TextSummarizationModel β†’ ModelFiles}/assets/tokenizer/vocabulary.json RENAMED
File without changes
{TextSummarizationModel β†’ ModelFiles}/config.json RENAMED
File without changes
{TextSummarizationModel β†’ ModelFiles}/metadata.json RENAMED
File without changes
{TextSummarizationModel β†’ ModelFiles}/model.weights.h5 RENAMED
File without changes
{TextSummarizationModel β†’ ModelFiles}/tokenizer.json RENAMED
File without changes
Paraphrase/new_model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6ab0eb6c02bd23771aa31e775c848bf3929ef11c34b8f23f788ec4eb953ae0
3
+ size 1673753584
{TextSummarizationModel β†’ WithoutGrammarSummary}/new_model.weights.h5 RENAMED
File without changes
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import streamlit as st
2
- from textSFunctionality import generateText
3
 
4
  # Set the page configuration and theme once at the top
5
- st.set_page_config(page_title="Text Summarization", page_icon="⭐")
6
 
7
  st.write(
8
  """
@@ -35,19 +35,38 @@ st.write(
35
  )
36
 
37
  def main():
38
- st.title('Text Summarization')
 
39
 
40
  # Text area for user input
41
- user_input = st.text_area("#### **Enter Text To Summarize**:", height=300)
42
 
43
- # Button to trigger summarization
44
- if st.button("Summarize"):
45
  if user_input:
46
- summary = generateText(user_input)
47
- st.write("#### **Summarized Text**:")
48
  st.write(summary)
49
  else:
50
- st.write("Please Enter Some Text To Summarize.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  if __name__ == '__main__':
53
- main()
 
1
  import streamlit as st
2
+ from textFunctionality import generateText, modelWG, modelWOG, modelParaphrase
3
 
4
  # Set the page configuration and theme once at the top
5
+ st.set_page_config(page_title="Text Summarization and Paraphrasing", page_icon="⭐")
6
 
7
  st.write(
8
  """
 
35
  )
36
 
37
  def main():
38
+ st.title('Text Summarization And Paraphrasing')
39
+ st.write("**Summarize Without Grammar Performs Better But Misses Out On Grammar Like Punctuation, Capitalization, etc.**")
40
 
41
  # Text area for user input
42
+ user_input = st.text_area("#### **Enter Text To Summarize or Paraphrase**:", height=300)
43
 
44
+ # Button to trigger summarization with grammar
45
+ if st.button("Summarize With Grammar"):
46
  if user_input:
47
+ summary = generateText(user_input, modelWG, 200, False)
48
+ st.write("#### **Summarized Text (With Grammar)**:")
49
  st.write(summary)
50
  else:
51
+ st.write("**Please Enter Some Text To Summarize.**")
52
+
53
+ # Button to trigger summarization without grammar
54
+ if st.button("Summarize Without Grammar"):
55
+ if user_input:
56
+ summary = generateText(user_input, modelWOG, 200, True)
57
+ st.write("#### **Summarized Text (Without Grammar)**:")
58
+ st.write(summary)
59
+ else:
60
+ st.write("**Please Enter Some Text To Summarize.**")
61
+
62
+ # Button to trigger paraphrasing
63
+ if st.button("Paraphrase"):
64
+ if user_input:
65
+ paraphrase = generateText(user_input, modelParaphrase, 500, False)
66
+ st.write("#### **Paraphrased Text**:")
67
+ st.write(paraphrase)
68
+ else:
69
+ st.write("**Please Enter Some Text To Paraphrase.**")
70
 
71
  if __name__ == '__main__':
72
+ main()
textFunctionality.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ import tensorflow as tf
4
+ import keras
5
+ import keras_nlp
6
+
7
+ # Constants
8
+ MAX_ENCODER_SEQUENCE_LENGTH = 512
9
+ MAX_DECODER_SEQUENCE_LENGTH_FOR_SUMMARY = 128
10
+ MAX_DECODER_SEQUENCE_LENGTH_FOR_PARAPHRASE = 512
11
+
12
+
13
+ # Model Files
14
+ MODEL_PATH = r"ModelFiles"
15
+
16
+
17
+ # "WithoutGrammarSummary" Model
18
+ WEIGHT_PATH_WO_G = r"WithoutGrammarSummary/new_model.weights.h5"
19
+ WEIGHT_PATH_W_G = r"GrammarSummary/new_model.weights.h5"
20
+ WEIGHT_PATH_PARAPHRASE = r"Paraphrase/new_model.weights.h5"
21
+
22
+ def cleanText(text):
23
+ text = str(text)
24
+ text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
25
+ text = text.lower()
26
+ return text
27
+
28
+ # Preprocessor For Summary
29
+ preprocessorForSummary = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH_FOR_SUMMARY,)
30
+ preprocessorForParaphrase = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH_FOR_PARAPHRASE,)
31
+
32
+ # Load Model
33
+
34
+ # "WithoutGrammarSummary" Model
35
+ modelWOG = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForSummary)
36
+ modelWOG.load_weights(WEIGHT_PATH_WO_G)
37
+
38
+ # "WithGrammarSummary" Model
39
+ modelWG = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForSummary)
40
+ modelWG.load_weights(WEIGHT_PATH_W_G)
41
+
42
+ # "Paraphrase" Model
43
+ modelParaphrase = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessorForParaphrase)
44
+ modelParaphrase.load_weights(WEIGHT_PATH_W_G)
45
+
46
+ def generateText(input_text, model, max_length, wo_summary):
47
+ if wo_summary:
48
+ input_text = cleanText(input_text)
49
+ output = model.generate(input_text, max_length=max_length)
50
+ return output
textSFunctionality.py DELETED
@@ -1,27 +0,0 @@
1
- import re
2
- import os
3
- import tensorflow as tf
4
- import keras
5
- import keras_nlp
6
-
7
- MAX_ENCODER_SEQUENCE_LENGTH = 512
8
- MAX_DECODER_SEQUENCE_LENGTH = 128
9
-
10
- MODEL_PATH = r"TextSummarizationModel"
11
- WEIGHT_PATH = r"new_model.weights.h5"
12
- WEIGHT_PATH = os.path.join(MODEL_PATH, WEIGHT_PATH)
13
-
14
- def cleanText(text):
15
- text = str(text)
16
- text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
17
- text = text.lower()
18
- return text
19
-
20
- preprocessor = keras_nlp.models.BartSeq2SeqLMPreprocessor.from_preset(MODEL_PATH, encoder_sequence_length=MAX_ENCODER_SEQUENCE_LENGTH,decoder_sequence_length=MAX_DECODER_SEQUENCE_LENGTH,)
21
- model = keras_nlp.models.BartSeq2SeqLM.from_preset(MODEL_PATH, preprocessor=preprocessor)
22
- model.load_weights(WEIGHT_PATH)
23
-
24
- def generateText(input_text, model=model, max_length=200):
25
- input_text = cleanText(input_text)
26
- output = model.generate(input_text, max_length=max_length)
27
- return output