File size: 2,025 Bytes
0f7edc8
de506dc
0f7edc8
de506dc
 
 
 
 
 
edceea4
 
766e234
 
edceea4
de506dc
8993d82
 
de506dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766e234
de506dc
a0b7f75
3ffc077
de506dc
 
 
766e234
ee9673c
 
de506dc
ee9673c
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import requests 


import requests
from bs4 import BeautifulSoup
import re


# from huggingface_hub import login
# login()


# Use a pipeline as a high-level helper
from transformers import pipeline
# pipe = pipeline("text-generation", model="openai-community/gpt2-medium")
pipe = pipeline("text-generation", model="openai-community/gpt2")     

def get_clean_text(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            return None
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove all script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Get the text from the HTML content
        text = soup.get_text()
        
        # Break the text into lines and remove leading and trailing whitespace
        lines = (line.strip() for line in text.splitlines())
        
        # Break multi-headlines into a line each
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        
        # Remove blank lines
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        # Remove extra whitespace
        text = re.sub(r'\s+', ' ', text)
        
        return text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



def summarize(link):
    summary = ""
    # link = "https://www.aljazeeramubasher.net/palestine/"
    text = get_clean_text(link)
    summary = pipe(f"summarize the following news into bullet points {text}")
    return summary


demo = gr.Interface(fn=summarize, inputs="text", outputs="json")
demo.launch()




# demo = gr.interface(fn=summarize, inputs="textbox", outputs="textbox")
# if __name__ == "__main__":
#     demo.launch()

# # gr.load("models/meta-llama/Llama-3.2-1B").launch()