Spaces:

claytonsamples
/

newsletter

Sleeping

claytonsamples commited on Aug 8, 2023

Commit

08a92ff

1 Parent(s): 1413056

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,6 +34,24 @@ def finder(url, soup, media_type):
                 files.append(file_url)
     return files
 def scrapper(url):
     try:
         response = requests.get(url, timeout=10)
@@ -55,10 +73,7 @@ def scrapper(url):
                 full_text += line + ' '
     # Initialize the summarization pipeline
-    summarizer = pipeline('summarization')
-    # Summarize the content
-    summary = summarizer(full_text, max_length=200, min_length=50, do_sample=False)
     # Extract the summary text
     summary_text = summary[0]['summary_text']

                 files.append(file_url)
     return files
+def summarize_long_text(text, chunk_size=1024):
+    # Initialize the summarization pipeline
+    summarizer = pipeline('summarization')
+    # Tokenize the text into words
+    words = text.split()
+    # Split the words into chunks of the specified size
+    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
+    # Summarize each chunk
+    summarized_chunks = [summarizer(chunk, max_length=1024, min_length=50, do_sample=False)[0]['summary_text'] for chunk in chunks]
+    # Combine the summarized chunks into the final summary
+    final_summary = ' '.join(summarized_chunks)
+    return final_summary
 def scrapper(url):
     try:
         response = requests.get(url, timeout=10)
                 full_text += line + ' '
     # Initialize the summarization pipeline
+    summary = summarize_long_text(full_text)
     # Extract the summary text
     summary_text = summary[0]['summary_text']