Spaces:

mksaad
/

NewsSummary

Sleeping

File size: 2,025 Bytes

0f7edc8
de506dc
0f7edc8
de506dc
 
 
 
 
 
edceea4
 
766e234
 
edceea4
de506dc
8993d82
 
de506dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766e234
de506dc
a0b7f75
3ffc077
de506dc
 
 
766e234
ee9673c
 
de506dc
ee9673c

import gradio as gr
import requests 


import requests
from bs4 import BeautifulSoup
import re


# from huggingface_hub import login
# login()


# Use a pipeline as a high-level helper
from transformers import pipeline
# pipe = pipeline("text-generation", model="openai-community/gpt2-medium")
pipe = pipeline("text-generation", model="openai-community/gpt2")     

def get_clean_text(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            return None
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove all script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Get the text from the HTML content
        text = soup.get_text()
        
        # Break the text into lines and remove leading and trailing whitespace
        lines = (line.strip() for line in text.splitlines())
        
        # Break multi-headlines into a line each
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        
        # Remove blank lines
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        # Remove extra whitespace
        text = re.sub(r'\s+', ' ', text)
        
        return text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



def summarize(link):
    summary = ""
    # link = "https://www.aljazeeramubasher.net/palestine/"
    text = get_clean_text(link)
    summary = pipe(f"summarize the following news into bullet points {text}")
    return summary


demo = gr.Interface(fn=summarize, inputs="text", outputs="json")
demo.launch()




# demo = gr.interface(fn=summarize, inputs="textbox", outputs="textbox")
# if __name__ == "__main__":
#     demo.launch()

# # gr.load("models/meta-llama/Llama-3.2-1B").launch()