Spaces:

enstazao
/

Blog-Summarizer

Runtime error

App Files Files Community

enstazao commited on Mar 5

Commit

cbcc9fd

•

1 Parent(s): 6bae4ea

added application files

Browse files

Files changed (5) hide show

__pycache__/app.cpython-311.pyc +0 -0
__pycache__/main.cpython-311.pyc +0 -0
app.py +23 -0
main.py +54 -0
requirements.txt +3 -0

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (1.24 kB). View file

__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.67 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import gradio as gr
+from main import fetch_webpage_content, parse_and_segment_content, summarize_text
+def summarize_webpage(url):
+    html_content = fetch_webpage_content(url)
+    if html_content:
+        chunks = parse_and_segment_content(html_content)
+        summary = summarize_text(chunks)
+        if summary:
+            return summary
+        else:
+            return "Failed to generate a summary."
+    else:
+        return "Failed to fetch or process webpage content."
+interface = gr.Interface(fn=summarize_webpage,
+                         inputs=gr.Textbox(lines=2, placeholder="Enter URL Here..."),
+                         outputs="text",
+                         title="Webpage Summarizer",
+                         description="Paste the URL of a webpage to get a summarized content.")
+if __name__ == "__main__":
+    interface.launch()

main.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from transformers import pipeline
+from bs4 import BeautifulSoup
+import requests
+# @desc Get the content of the web page
+def fetch_webpage_content(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raises an HTTPError if the status is 4xx, 5xx
+        return response.text
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching the webpage: {e}")
+        return None
+# @desc Get the chunks of the content from the the scrapped content
+def parse_and_segment_content(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    results = soup.find_all(['h1', 'p'])
+    text = ' '.join([result.text for result in results])
+    text = text.replace('.', '.<eos>').replace('!', '!<eos>').replace('?', '?<eos>')
+    sentences = text.split('<eos>')
+    max_chunk = 500
+    chunks = []
+    current_chunk = 0
+    for sentence in sentences:
+        if len(sentence) > 0:  # Check if sentence is not empty
+            if len(chunks) == current_chunk + 1:
+                if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
+                    chunks[current_chunk].extend(sentence.split(' '))
+                else:
+                    current_chunk += 1
+                    chunks.append(sentence.split(' '))
+            else:
+                chunks.append(sentence.split(' '))
+    for chunk_id in range(len(chunks)):
+        chunks[chunk_id] = ' '.join(chunks[chunk_id]).strip()
+    return chunks
+# @desc Summarize the content and then return that back
+def summarize_text(chunks):
+    summarizer = pipeline("summarization")
+    summaries = []
+    try:
+        for chunk in chunks:
+            summary = summarizer(chunk, max_length=50, min_length=30, do_sample=False)
+            summaries.append(summary[0]['summary_text'])
+        return ' '.join(summaries)
+    except Exception as e:
+        print(f"Error in summarization: {e}")
+        return None

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers
+beautifulsoup4
+requests