Spaces:
Sleeping
Sleeping
Commit
·
08a92ff
1
Parent(s):
1413056
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,24 @@ def finder(url, soup, media_type):
|
|
34 |
files.append(file_url)
|
35 |
return files
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def scrapper(url):
|
38 |
try:
|
39 |
response = requests.get(url, timeout=10)
|
@@ -55,10 +73,7 @@ def scrapper(url):
|
|
55 |
full_text += line + ' '
|
56 |
|
57 |
# Initialize the summarization pipeline
|
58 |
-
|
59 |
-
|
60 |
-
# Summarize the content
|
61 |
-
summary = summarizer(full_text, max_length=200, min_length=50, do_sample=False)
|
62 |
|
63 |
# Extract the summary text
|
64 |
summary_text = summary[0]['summary_text']
|
|
|
34 |
files.append(file_url)
|
35 |
return files
|
36 |
|
37 |
+
def summarize_long_text(text, chunk_size=1024):
|
38 |
+
# Initialize the summarization pipeline
|
39 |
+
summarizer = pipeline('summarization')
|
40 |
+
|
41 |
+
# Tokenize the text into words
|
42 |
+
words = text.split()
|
43 |
+
|
44 |
+
# Split the words into chunks of the specified size
|
45 |
+
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
|
46 |
+
|
47 |
+
# Summarize each chunk
|
48 |
+
summarized_chunks = [summarizer(chunk, max_length=1024, min_length=50, do_sample=False)[0]['summary_text'] for chunk in chunks]
|
49 |
+
|
50 |
+
# Combine the summarized chunks into the final summary
|
51 |
+
final_summary = ' '.join(summarized_chunks)
|
52 |
+
|
53 |
+
return final_summary
|
54 |
+
|
55 |
def scrapper(url):
|
56 |
try:
|
57 |
response = requests.get(url, timeout=10)
|
|
|
73 |
full_text += line + ' '
|
74 |
|
75 |
# Initialize the summarization pipeline
|
76 |
+
summary = summarize_long_text(full_text)
|
|
|
|
|
|
|
77 |
|
78 |
# Extract the summary text
|
79 |
summary_text = summary[0]['summary_text']
|