Spaces:

remzicam
/

ted_talks_summarizer

Running

App Files Files Community

remzicam commited on Dec 27, 2022

Commit

c27e127

1 Parent(s): 5d8a17a

Upload 2 files

Browse files

The summarizer model updated. Got error from the previous one!

Files changed (2) hide show

app.py +55 -17
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -2,11 +2,23 @@
 from re import sub
-from gradio import Interface, Series, Textbox
 from requests import get
-def clean_text(text):
     """Cleans subtitle text of ted talks.
     Args:
@@ -26,14 +38,14 @@ def clean_text(text):
     return cleaned_text
-def ted_talk_transcriber(link):
     """Creates transcription of ted talks from url.
     Args:
         link (str): url link of ted talks
     Returns:
-        cleaned_transcript (str): transcription of the ted talk
     """
     # request link of the talk
     page = get(link)
@@ -42,29 +54,55 @@ def ted_talk_transcriber(link):
     raw_text = get(
         f"https://hls.ted.com/project_masters/{talk_id}/subtitles/en/full.vtt"
     ).text
-    cleaned_transcript = clean_text(raw_text)
-    return cleaned_transcript
-transcriber = Interface(
-    ted_talk_transcriber,
-    "text",
-    "text",
-)
-summarizer = Interface.load(
-    "huggingface/Shobhank-iiitdwd/long-t5-tglobal-base-16384-book-summary"
-)
 logo = "<center><img src='file/TED.png' width=180px></center>"
-Series(
-    transcriber,
-    summarizer,
     inputs=Textbox(label="Type the TED Talks link"),
     examples=[
     "https://www.ted.com/talks/jen_gunter_the_truth_about_yeast_in_your_body"
              ],
     allow_flagging="never",
     description=logo,
 ).launch()

 from re import sub
+from gradio import Interface, Textbox
 from requests import get
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
+repo_id = "pszemraj/led-base-book-summary"
+model = AutoModelForSeq2SeqLM.from_pretrained(
+    repo_id,
+    low_cpu_mem_usage=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(repo_id)
+summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
+def clean_text(text: str) -> str:
     """Cleans subtitle text of ted talks.
     Args:
     return cleaned_text
+def ted_talk_transcriber(link: str) -> str:
     """Creates transcription of ted talks from url.
     Args:
         link (str): url link of ted talks
     Returns:
+        raw_text (str): raw transcription of the ted talk
     """
     # request link of the talk
     page = get(link)
     raw_text = get(
         f"https://hls.ted.com/project_masters/{talk_id}/subtitles/en/full.vtt"
     ).text
+    return raw_text
+def text_summarizer(text: str) -> str:
+    """Summarizes given text.
+    Args:
+        text (str): ted talks transcription
+    Returns:
+        str: summary
+    """
+    result = summarizer(
+        text,
+        min_length=8,
+        max_length=256,
+        no_repeat_ngram_size=3,
+        encoder_no_repeat_ngram_size=3,
+        repetition_penalty=3.5,
+        num_beams=4,
+        do_sample=False,
+        early_stopping=True,
+    )
+    return result[0]["summary_text"]
+def main(link: str) -> str:
+    """Summarizes ted talks given link.
+    Args:
+        link (str): url link of ted talks
+    Returns:
+        str: summary
+    """
+    raw_text = ted_talk_transcriber(link)
+    cleaned_transcript = clean_text(raw_text)
+    return text_summarizer(cleaned_transcript)
 logo = "<center><img src='file/TED.png' width=180px></center>"
+Interface(
+    main,
     inputs=Textbox(label="Type the TED Talks link"),
     examples=[
     "https://www.ted.com/talks/jen_gunter_the_truth_about_yeast_in_your_body"
              ],
+    outputs=Textbox(label="Summary"),
     allow_flagging="never",
     description=logo,
 ).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+accelerate
+torch
+transformers