Spaces:

pszemraj
/

summarize-long-text

Running on CPU Upgrade

App Files Files Community

pszemraj commited on May 1, 2023

Commit

0cef1e2

1 Parent(s): b542f3a

✨ mai improvements

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (4) hide show

app.py +99 -41
requirements.txt +2 -2
summarize.py +36 -22
utils.py +14 -0

app.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import logging
 import random
 import re
@@ -6,6 +10,7 @@ from pathlib import Path
 import gradio as gr
 import nltk
 from cleantext import clean
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
@@ -13,22 +18,62 @@ from utils import load_example_filenames, truncate_word_count
 _here = Path(__file__).parent
-nltk.download("stopwords")  # TODO=find where this requirement originates from
 logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
 def proc_submission(
     input_text: str,
-    model_size: str,
-    num_beams,
-    token_batch_length,
-    length_penalty,
-    repetition_penalty,
-    no_repeat_ngram_size,
-    max_input_length: int = 1024,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
@@ -41,12 +86,14 @@ def proc_submission(
         length_penalty (float): the length penalty to use
         repetition_penalty (float): the repetition penalty to use
         no_repeat_ngram_size (int): the no-repeat ngram size to use
-        max_input_length (int, optional): the maximum input length to use. Defaults to 1024.
     Returns:
         str in HTML format, string of the summary, str of score
     """
     settings = {
         "length_penalty": float(length_penalty),
         "repetition_penalty": float(repetition_penalty),
@@ -58,14 +105,19 @@ def proc_submission(
         "early_stopping": True,
         "do_sample": False,
     }
     st = time.perf_counter()
     history = {}
     clean_text = clean(input_text, lower=False)
-    max_input_length = 2048 if model_size == "base" else max_input_length
     processed = truncate_word_count(clean_text, max_input_length)
     if processed["was_truncated"]:
-        tr_in = processed["truncated_text"]
         # create elaborate HTML warning
         input_wc = re.split(r"\s+", input_text)
         msg = f"""
@@ -77,7 +129,7 @@ def proc_submission(
         logging.warning(msg)
         history["WARNING"] = msg
     else:
-        tr_in = input_text
         msg = None
     if len(input_text) < 50:
@@ -95,24 +147,25 @@ def proc_submission(
         return msg, "", []
-    _summaries = summarize_via_tokenbatches(
-        tr_in,
-        model_sm if "base" in model_size.lower() else model,
-        tokenizer_sm if "base" in model_size.lower() else tokenizer,
-        batch_length=token_batch_length,
         **settings,
     )
-    sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
     sum_scores = [
-        f" - Section {i}: {round(s['summary_score'],4)}"
-        for i, s in enumerate(_summaries)
     ]
     sum_text_out = "\n".join(sum_text)
     history["Summary Scores"] = "<br><br>"
     scores_out = "\n".join(sum_scores)
     rt = round((time.perf_counter() - st) / 60, 2)
-    print(f"Runtime: {rt} minutes")
     html = ""
     html += f"<p>Runtime: {rt} minutes on CPU</p>"
     if msg is not None:
@@ -169,36 +222,38 @@ def load_uploaded_file(file_obj):
 if __name__ == "__main__":
-    model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
-    model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
     name_to_path = load_example_filenames(_here / "examples")
     logging.info(f"Loaded {len(name_to_path)} examples")
-    demo = gr.Blocks()
     _examples = list(name_to_path.keys())
     with demo:
         gr.Markdown("# Long-Form Summarization: LED & BookSum")
         gr.Markdown(
             "LED models ([model card](https://huggingface.co/pszemraj/led-large-book-summary)) fine-tuned to summarize long-form text. A [space with other models can be found here](https://huggingface.co/spaces/pszemraj/document-summarization)"
         )
         with gr.Column():
             gr.Markdown("## Load Inputs & Select Parameters")
             gr.Markdown(
                 "Enter or upload text below, and it will be summarized [using the selected parameters](https://huggingface.co/blog/how-to-generate). "
             )
             with gr.Row():
-                model_size = gr.Radio(
-                    choices=["base", "large"], label="Model Variant", value="large"
                 )
                 num_beams = gr.Radio(
                     choices=[2, 3, 4],
                     label="Beam Search: # of Beams",
                     value=2,
                 )
-            gr.Markdown("Load a a .txt - example or your own (_You may find [this OCR space](https://huggingface.co/spaces/pszemraj/pdf-ocr) useful_)")
             with gr.Row():
                 example_name = gr.Dropdown(
                     _examples,
@@ -213,7 +268,8 @@ if __name__ == "__main__":
             with gr.Row():
                 input_text = gr.Textbox(
                     lines=4,
-                    label="Input Text (for summarization)",
                     placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
                 )
                 with gr.Column():
@@ -250,11 +306,11 @@ if __name__ == "__main__":
         with gr.Column():
             gr.Markdown("### Advanced Settings")
             with gr.Row():
-                length_penalty = gr.inputs.Slider(
                     minimum=0.5,
                     maximum=1.0,
                     label="length penalty",
-                    default=0.7,
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
@@ -264,11 +320,11 @@ if __name__ == "__main__":
                 )
             with gr.Row():
-                repetition_penalty = gr.inputs.Slider(
                     minimum=1.0,
                     maximum=5.0,
                     label="repetition penalty",
-                    default=3.5,
                     step=0.1,
                 )
                 no_repeat_ngram_size = gr.Radio(
@@ -282,10 +338,10 @@ if __name__ == "__main__":
                 "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
             )
             gr.Markdown(
-                "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`.  "
             )
             gr.Markdown(
-                "- The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a Colab notebook for a tutorial."
             )
             gr.Markdown("---")
@@ -301,7 +357,7 @@ if __name__ == "__main__":
             fn=proc_submission,
             inputs=[
                 input_text,
-                model_size,
                 num_beams,
                 token_batch_length,
                 length_penalty,
@@ -311,4 +367,6 @@ if __name__ == "__main__":
             outputs=[output_text, summary_text, summary_scores],
         )
-    demo.launch(enable_queue=True, share=True)

+"""
+app.py - the main application file for the gradio app
+"""
+import gc
 import logging
 import random
 import re
 import gradio as gr
 import nltk
+import torch
 from cleantext import clean
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
 _here = Path(__file__).parent
+nltk.download("stopwords", quiet=True)
 logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - [%(levelname)s] %(name)s: %(message)s"
 )
+MODEL_OPTIONS = [
+    "pszemraj/led-large-book-summary",
+    "pszemraj/led-large-book-summary-continued",
+    "pszemraj/led-base-book-summary",
+]
+def predict(
+    input_text: str,
+    model_name: str,
+    token_batch_length: int = 2048,
+    empty_cache: bool = True,
+    **settings,
+) -> list:
+    """
+    predict - helper fn to support multiple models for summarization at once
+    :param str input_text: the input text to summarize
+    :param str model_name: model name to use
+    :param int token_batch_length: the length of the token batches to use
+    :param bool empty_cache: whether to empty the cache before loading a new= model
+    :return: list of dicts with keys "summary" and "score"
+    """
+    if torch.cuda.is_available() and empty_cache:
+        torch.cuda.empty_cache()
+    model, tokenizer = load_model_and_tokenizer(model_name)
+    summaries = summarize_via_tokenbatches(
+        input_text,
+        model,
+        tokenizer,
+        batch_length=token_batch_length,
+        **settings,
+    )
+    del model
+    del tokenizer
+    gc.collect()
+    return summaries
 def proc_submission(
     input_text: str,
+    model_name: str,
+    num_beams: int,
+    token_batch_length: int,
+    length_penalty: float,
+    repetition_penalty: float,
+    no_repeat_ngram_size: int,
+    max_input_length: int = 2560,
 ):
     """
     proc_submission - a helper function for the gradio module to process submissions
         length_penalty (float): the length penalty to use
         repetition_penalty (float): the repetition penalty to use
         no_repeat_ngram_size (int): the no-repeat ngram size to use
+        max_input_length (int, optional): the maximum input length to use. Defaults to 2560.
     Returns:
         str in HTML format, string of the summary, str of score
     """
+    logger = logging.getLogger(__name__)
+    logger.info("Processing submission")
     settings = {
         "length_penalty": float(length_penalty),
         "repetition_penalty": float(repetition_penalty),
         "early_stopping": True,
         "do_sample": False,
     }
+    if "base" in model_name:
+        logger.info("Updating max_input_length to for base model")
+        max_input_length = 4096
+    logger.info(f"max_input_length: {max_input_length}")
     st = time.perf_counter()
     history = {}
     clean_text = clean(input_text, lower=False)
     processed = truncate_word_count(clean_text, max_input_length)
     if processed["was_truncated"]:
+        truncated_input = processed["truncated_text"]
         # create elaborate HTML warning
         input_wc = re.split(r"\s+", input_text)
         msg = f"""
         logging.warning(msg)
         history["WARNING"] = msg
     else:
+        truncated_input = input_text
         msg = None
     if len(input_text) < 50:
         return msg, "", []
+    _summaries = predict(
+        input_text=truncated_input,
+        model_name=model_name,
+        token_batch_length=token_batch_length,
         **settings,
     )
+    sum_text = [
+        f"\nBatch {i}:\n\t" + s["summary"][0] for i, s in enumerate(_summaries, start=1)
+    ]
     sum_scores = [
+        f"\n- Batch {i}:\n\t{round(s['summary_score'],4)}"
+        for i, s in enumerate(_summaries, start=1)
     ]
     sum_text_out = "\n".join(sum_text)
     history["Summary Scores"] = "<br><br>"
     scores_out = "\n".join(sum_scores)
     rt = round((time.perf_counter() - st) / 60, 2)
+    logger.info(f"Runtime: {rt} minutes")
     html = ""
     html += f"<p>Runtime: {rt} minutes on CPU</p>"
     if msg is not None:
 if __name__ == "__main__":
+    logger = logging.getLogger(__name__)
+    logger.info("Starting up app")
     name_to_path = load_example_filenames(_here / "examples")
     logging.info(f"Loaded {len(name_to_path)} examples")
+    demo = gr.Blocks(
+        title="Summarize Long-Form Text",
+    )
     _examples = list(name_to_path.keys())
     with demo:
         gr.Markdown("# Long-Form Summarization: LED & BookSum")
         gr.Markdown(
             "LED models ([model card](https://huggingface.co/pszemraj/led-large-book-summary)) fine-tuned to summarize long-form text. A [space with other models can be found here](https://huggingface.co/spaces/pszemraj/document-summarization)"
         )
         with gr.Column():
             gr.Markdown("## Load Inputs & Select Parameters")
             gr.Markdown(
                 "Enter or upload text below, and it will be summarized [using the selected parameters](https://huggingface.co/blog/how-to-generate). "
             )
             with gr.Row():
+                model_name = gr.Dropdown(
+                    choices=MODEL_OPTIONS,
+                    value=MODEL_OPTIONS[0],
+                    label="Model Name",
                 )
                 num_beams = gr.Radio(
                     choices=[2, 3, 4],
                     label="Beam Search: # of Beams",
                     value=2,
                 )
+            gr.Markdown(
+                "Load a a .txt - example or your own (_You may find [this OCR space](https://huggingface.co/spaces/pszemraj/pdf-ocr) useful_)"
+            )
             with gr.Row():
                 example_name = gr.Dropdown(
                     _examples,
             with gr.Row():
                 input_text = gr.Textbox(
                     lines=4,
+                    max_lines=12,
+                    label="Text to Summarize",
                     placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
                 )
                 with gr.Column():
         with gr.Column():
             gr.Markdown("### Advanced Settings")
             with gr.Row():
+                length_penalty = gr.Slider(
                     minimum=0.5,
                     maximum=1.0,
                     label="length penalty",
+                    value=0.7,
                     step=0.05,
                 )
                 token_batch_length = gr.Radio(
                 )
             with gr.Row():
+                repetition_penalty = gr.Slider(
                     minimum=1.0,
                     maximum=5.0,
                     label="repetition penalty",
+                    value=3.5,
                     step=0.1,
                 )
                 no_repeat_ngram_size = gr.Radio(
                 "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
             )
             gr.Markdown(
+                "- The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a Colab notebook for a tutorial."
             )
             gr.Markdown(
+                "- **Update May 1, 2023:** Enabled faster inference times via `use_cache=True`, the number of words the model will processed has been increased! New [test model](https://huggingface.co/pszemraj/led-large-book-summary-continued) as an extension of `led-large-book-summary`."
             )
             gr.Markdown("---")
             fn=proc_submission,
             inputs=[
                 input_text,
+                model_name,
                 num_beams,
                 token_batch_length,
                 length_penalty,
             outputs=[output_text, summary_text, summary_scores],
         )
+    demo.launch(
+        enable_queue=True,
+    )

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-clean-text[gpl]
 gradio
 natsort
 nltk
 torch
 tqdm
 transformers
-accelerate

+clean-text
 gradio
 natsort
 nltk
 torch
 tqdm
 transformers
+accelerate

summarize.py CHANGED Viewed

@@ -1,30 +1,40 @@
 import logging
 import torch
 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-def load_model_and_tokenizer(model_name):
     """
-    load_model_and_tokenizer - a function that loads a model and tokenizer from huggingface
-    Args:
-        model_name (str): the name of the model to load
-    Returns:
-        AutoModelForSeq2SeqLM: the model
-        AutoTokenizer: the tokenizer
     """
     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
-        # low_cpu_mem_usage=True,
-        # use_cache=False,
-    )
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = model.to("cuda") if torch.cuda.is_available() else model
-    logging.info(f"Loaded model {model_name}")
     return model, tokenizer
@@ -76,6 +86,7 @@ def summarize_via_tokenbatches(
     tokenizer,
     batch_length=2048,
     batch_stride=16,
     **kwargs,
 ):
     """
@@ -83,7 +94,7 @@ def summarize_via_tokenbatches(
     Args:
         input_text (str): the text to summarize
-        model (): the model to use for summarizationz
         tokenizer (): the tokenizer to use for summarization
         batch_length (int, optional): the length of each batch. Defaults to 2048.
         batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
@@ -92,12 +103,16 @@ def summarize_via_tokenbatches(
         str: the summary
     """
     # log all input parameters
-    if batch_length < 512:
-        batch_length = 512
-        print("WARNING: batch_length was set to 512")
-    print(
-        f"input parameters: {kwargs}, batch_length={batch_length}, batch_stride={batch_stride}"
-    )
     encoded_input = tokenizer(
         input_text,
         padding="max_length",
@@ -115,7 +130,6 @@ def summarize_via_tokenbatches(
     pbar = tqdm(total=len(in_id_arr))
     for _id, _mask in zip(in_id_arr, att_arr):
         result, score = summarize_and_score(
             ids=_id,
             mask=_mask,

 import logging
+import pprint as pp
+from utils import validate_pytorch2
+logging.basicConfig(level=logging.INFO)
 import torch
 from tqdm.auto import tqdm
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+def load_model_and_tokenizer(model_name: str) -> tuple:
     """
+    load_model_and_tokenizer - load a model and tokenizer from a model name/ID on the hub
+    :param str model_name: the model name/ID on the hub
+    :return tuple: a tuple containing the model and tokenizer
     """
+    logger = logging.getLogger(__name__)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     model = AutoModelForSeq2SeqLM.from_pretrained(
         model_name,
+    ).to(device)
+    model = model.eval()
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    logger.info(f"Loaded model {model_name} to {device}")
+    if validate_pytorch2():
+        try:
+            logger.info("Compiling model with Torch 2.0")
+            model = torch.compile(model)
+        except Exception as e:
+            logger.warning(f"Could not compile model with Torch 2.0: {e}")
+    else:
+        logger.info("Torch 2.0 not detected, skipping compilation")
     return model, tokenizer
     tokenizer,
     batch_length=2048,
     batch_stride=16,
+    min_batch_length: int = 512,
     **kwargs,
 ):
     """
     Args:
         input_text (str): the text to summarize
+        model (): the model to use for summarization
         tokenizer (): the tokenizer to use for summarization
         batch_length (int, optional): the length of each batch. Defaults to 2048.
         batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
         str: the summary
     """
     # log all input parameters
+    logger = logging.getLogger(__name__)
+    # log all input parameters
+    if batch_length < min_batch_length:
+        logger.warning(
+            f"batch_length must be at least {min_batch_length}. Setting batch_length to {min_batch_length}"
+        )
+        batch_length = min_batch_length
+    logger.info(f"input parameters:\n{pp.pformat(kwargs)}")
+    logger.info(f"batch_length: {batch_length}, batch_stride: {batch_stride}")
     encoded_input = tokenizer(
         input_text,
         padding="max_length",
     pbar = tqdm(total=len(in_id_arr))
     for _id, _mask in zip(in_id_arr, att_arr):
         result, score = summarize_and_score(
             ids=_id,
             mask=_mask,

utils.py CHANGED Viewed

@@ -2,12 +2,26 @@
     utils.py - Utility functions for the project.
 """
 import re
 from pathlib import Path
 from natsort import natsorted
 def truncate_word_count(text, max_words=512):
     """
     truncate_word_count - a helper function for the gradio module

     utils.py - Utility functions for the project.
 """
+import logging
 import re
 from pathlib import Path
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    level=logging.INFO,
+)
+import torch
 from natsort import natsorted
+def validate_pytorch2(torch_version: str = None):
+    torch_version = torch.__version__ if torch_version is None else torch_version
+    pattern = r"^2\.\d+(\.\d+)*"
+    return True if re.match(pattern, torch_version) else False
 def truncate_word_count(text, max_words=512):
     """
     truncate_word_count - a helper function for the gradio module